merge with internal master

This commit is contained in:
Marcin Junczys-Dowmunt 2020-08-18 16:10:14 -07:00
commit 089fb48e6c
57 changed files with 3630 additions and 1406 deletions

View File

@ -192,6 +192,13 @@ else(MSVC)
set(CMAKE_C_FLAGS_PROFUSE "${CMAKE_C_FLAGS_RELEASE} -fprofile-use -fprofile-correction")
endif(MSVC)
# with gcc 7.0 and above we need to mark fallthrough in switch case statements
# that can be done in comments for backcompat, but CCACHE removes comments.
# -C makes gcc keep comments.
if(USE_CCACHE)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -C")
endif()
###############################################################################
# Downloading SentencePiece if requested and set to compile with it.
# Requires all the dependencies imposed by SentencePiece
@ -205,6 +212,7 @@ if(USE_ONNX)
message(STATUS "Enabling experimental ONNX support")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DUSE_ONNX")
set(EXT_LIBS ${EXT_LIBS} protobuf)
include_directories(${Protobuf_INCLUDE_DIRS})
endif()
# Find packages
@ -400,6 +408,13 @@ if(COMPILE_SERVER)
message(STATUS "Found OpenSSL")
include_directories(${OPENSSL_INCLUDE_DIR})
set(EXT_LIBS ${EXT_LIBS} ${OPENSSL_CRYPTO_LIBRARY})
if(MSVC AND USE_STATIC_LIBS)
# "If you link with static OpenSSL libraries then you're expected to additionally link your
# application with WS2_32.LIB, GDI32.LIB, ADVAPI32.LIB, CRYPT32.LIB and USER32.LIB"
# See https://github.com/openssl/openssl/blob/OpenSSL_1_1_1d/NOTES.WIN#L127
# Linking with crypt32.lib seem to be enough.
set(EXT_LIBS ${EXT_LIBS} crypt32.lib)
endif()
set(BOOST_COMPONENTS ${BOOST_COMPONENTS} system)
else(OpenSSL_FOUND)
message(WARNING "Cannot find OpenSSL library. Not compiling server.")

View File

@ -2,20 +2,25 @@
"environments": [
{
// Dependencies can be found automatically if you use vcpkg as your library manager.
// In this case, please set the VCPKG_ROOT variable to the directory that contains the vcpkg.exe
// In this case, please set the VCPKG_ROOT variable to the directory that contains the
// vcpkg.exe. If you used CheckDeps.bat to install dependencies, by default this should be an
// absolute path to marian-dev\vs\deps\vcpkg.
// If you prefer managing yourself the dependencies, please fill in the other variables.
"VCPKG_ROOT": "D:\\Perso\\Dev\\vcpkg",
"VCPKG_ROOT": "C:\\path\\to\\marian-dev\\vs\\deps\\vcpkg",
// The MKL library can be automatically found by CMake. However, if you installed it in a
// custom directory, please set the MKLROOT to this directory path.
// Default is c:\Program Files (x86)\IntelSWTools\compilers_and_libraries\windows\mkl on
// Windows, or /opt/intel/mkl on Linux
"MKLROOT": "",
// Boost and OpenSSL are required if you compile with COMPILE_SERVER=TRUE
"BOOST_INCLUDEDIR": "",
"BOOST_LIBRARYDIR": "",
"ZLIB_ROOT": "",
"OPENSSL_ROOT_DIR": "",
// The MKL library can be automatically found by CMake. However, if you installed it in a custom
// directory, please set the MKLROOT to this directory path.
// Default is c:\Program Files (x86)\IntelSWTools\compilers_and_libraries\windows\mkl on Windows, or
// /opt/intel/mkl on Linux
"MKLROOT": ""
// Protobuf is required if you compile with USE_SENTENCEPIECE=TRUE
"Protobuf_SRC_ROOT_FOLDER": ""
}
],
"configurations": [
@ -32,18 +37,21 @@
"variables": [
{ "name": "CMAKE_TOOLCHAIN_FILE", "value": "${env.VCPKG_ROOT}\\scripts\\buildsystems\\vcpkg.cmake" },
{ "name": "VCPKG_TARGET_TRIPLET", "value": "x64-windows-static" },
{ "name": "OPENSSL_USE_STATIC_LIBS:BOOL", "value": "TRUE" },
{ "name": "OPENSSL_MSVC_STATIC_RT:BOOL", "value": "TRUE" },
{ "name": "COMPILE_SERVER:BOOL", "value": "TRUE" },
{ "name": "COMPILE_EXAMPLES:BOOL", "value": "FALSE" },
{ "name": "COMPILE_TESTS:BOOL", "value": "FALSE" },
{ "name": "COMPILE_CPU:BOOL", "value": "TRUE" },
{ "name": "COMPILE_CUDA:BOOL", "value": "TRUE" },
{ "name": "USE_CUDNN:BOOL", "value": "TRUE" },
{ "name": "USE_MPI:BOOL", "value": "FALSE" }
{ "name": "COMPILE_CUDA:BOOL", "value": "TRUE" },
{ "name": "COMPILE_CPU:BOOL", "value": "TRUE" },
{ "name": "COMPILE_EXAMPLES:BOOL", "value": "FALSE" },
{ "name": "COMPILE_SERVER:BOOL", "value": "TRUE" },
{ "name": "COMPILE_TESTS:BOOL", "value": "FALSE" },
{ "name": "USE_CUDNN:BOOL", "value": "FALSE" },
{ "name": "USE_FBGEMM:BOOL", "value": "TRUE" },
{ "name": "USE_MPI:BOOL", "value": "FALSE" },
{ "name": "USE_SENTENCEPIECE:BOOL", "value": "TRUE" },
{ "name": "USE_STATIC_LIBS:BOOL", "value": "TRUE" }
]
},
{
@ -59,19 +67,22 @@
"variables": [
{ "name": "CMAKE_TOOLCHAIN_FILE", "value": "${env.VCPKG_ROOT}\\scripts\\buildsystems\\vcpkg.cmake" },
{ "name": "VCPKG_TARGET_TRIPLET", "value": "x64-windows-static" },
{ "name": "OPENSSL_USE_STATIC_LIBS:BOOL", "value": "TRUE" },
{ "name": "OPENSSL_MSVC_STATIC_RT:BOOL", "value": "TRUE" },
{ "name": "COMPILE_SERVER:BOOL", "value": "TRUE" },
{ "name": "COMPILE_EXAMPLES:BOOL", "value": "FALSE" },
{ "name": "COMPILE_TESTS:BOOL", "value": "FALSE" },
{ "name": "COMPILE_CPU:BOOL", "value": "TRUE" },
{ "name": "COMPILE_CUDA:BOOL", "value": "TRUE" },
{ "name": "USE_CUDNN:BOOL", "value": "TRUE" },
{ "name": "USE_MPI:BOOL", "value": "FALSE" }
{ "name": "COMPILE_CUDA:BOOL", "value": "TRUE" },
{ "name": "COMPILE_CPU:BOOL", "value": "TRUE" },
{ "name": "COMPILE_EXAMPLES:BOOL", "value": "FALSE" },
{ "name": "COMPILE_SERVER:BOOL", "value": "TRUE" },
{ "name": "COMPILE_TESTS:BOOL", "value": "TRUE" },
{ "name": "USE_CUDNN:BOOL", "value": "FALSE" },
{ "name": "USE_FBGEMM:BOOL", "value": "TRUE" },
{ "name": "USE_MPI:BOOL", "value": "FALSE" },
{ "name": "USE_SENTENCEPIECE:BOOL", "value": "TRUE" },
{ "name": "USE_STATIC_LIBS:BOOL", "value": "TRUE" }
]
}
]
}
}

View File

@ -1,2 +1 @@
v1.9.33
v1.9.35

@ -1 +1 @@
Subproject commit 864ea3c605305a6c0623e3df91b85afe13d37a46
Subproject commit 0f8cabf13ec362d50544d33490024e00c3a763be

View File

@ -0,0 +1,85 @@
import numpy as np
import sys
import yaml
import argparse
import torch
parser = argparse.ArgumentParser(description='Convert LASER model to Marian weight file.')
parser.add_argument('--laser', help='Path to LASER PyTorch model', required=True)
parser.add_argument('--marian', help='Output path for Marian weight file', required=True)
args = parser.parse_args()
laser = torch.load(args.laser)
config = dict()
config["type"] = "laser"
config["input-types"] = ["sequence"]
config["dim-vocabs"] = [laser["params"]["num_embeddings"]]
config["version"] = "laser2marian.py conversion"
config["enc-depth"] = laser["params"]["num_layers"]
config["enc-cell"] = "lstm"
config["dim-emb"] = laser["params"]["embed_dim"]
config["dim-rnn"] = laser["params"]["hidden_size"]
yaml.dump(laser["dictionary"], open(args.marian + ".vocab.yml", "w"))
marianModel = dict()
def transposeOrder(mat):
matT = np.transpose(mat) # just a view with changed row order
return matT.flatten(order="C").reshape(matT.shape) # force row order change and reshape
def convert(pd, srcs, trg, transpose=True, bias=False, lstm=False):
num = pd[srcs[0]].detach().numpy()
for i in range(1, len(srcs)):
num += pd[srcs[i]].detach().numpy()
out = num
if bias:
num = np.atleast_2d(num)
else:
if transpose:
num = transposeOrder(num) # transpose with row order change
if lstm: # different order in pytorch than marian
stateDim = int(num.shape[-1] / 4)
i = np.copy(num[:, 0*stateDim:1*stateDim])
f = np.copy(num[:, 1*stateDim:2*stateDim])
num[:, 0*stateDim:1*stateDim] = f
num[:, 1*stateDim:2*stateDim] = i
marianModel[trg] = num
for k in laser:
print(k)
for k in laser["model"]:
print(k, laser["model"][k].shape)
convert(laser["model"], ["embed_tokens.weight"], "encoder_Wemb", transpose=False)
for i in range(laser["params"]["num_layers"]):
convert(laser["model"], [f"lstm.weight_ih_l{i}"], f"encoder_lstm_l{i}_W", lstm=True)
convert(laser["model"], [f"lstm.weight_hh_l{i}"], f"encoder_lstm_l{i}_U", lstm=True)
convert(laser["model"], [f"lstm.bias_ih_l{i}", f"lstm.bias_hh_l{i}"], f"encoder_lstm_l{i}_b", bias=True, lstm=True) # needs to be summed!
convert(laser["model"], [f"lstm.weight_ih_l{i}_reverse"], f"encoder_lstm_l{i}_reverse_W", lstm=True)
convert(laser["model"], [f"lstm.weight_hh_l{i}_reverse"], f"encoder_lstm_l{i}_reverse_U", lstm=True)
convert(laser["model"], [f"lstm.bias_ih_l{i}_reverse", f"lstm.bias_hh_l{i}_reverse"], f"encoder_lstm_l{i}_reverse_b", bias=True, lstm=True) # needs to be summed!
for m in marianModel:
print(m, marianModel[m].shape)
configYamlStr = yaml.dump(config, default_flow_style=False)
desc = list(configYamlStr)
npDesc = np.chararray((len(desc),))
npDesc[:] = desc
npDesc.dtype = np.int8
marianModel["special:model.yml"] = npDesc
print("\nMarian config:")
print(configYamlStr)
print("Saving Marian model to %s" % (args.marian,))
np.savez(args.marian, **marianModel)

View File

@ -1,81 +0,0 @@
import onnxruntime as ort
import numpy as np
import onnx
import os, sys, time
os.environ['OMP_NUM_THREADS'] = '1'
sess_options = ort.SessionOptions()
sess_options.intra_op_num_threads = 1
sess_options.execution_mode = ort.ExecutionMode.ORT_SEQUENTIAL
sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
def get_function(path, output_vars):
print("Reading ONNX function from", path)
#model = onnx.load(path)
#print("Done", flush=True)
#print(model)
ort_sess = ort.InferenceSession(path, sess_options)
output_defs = ort_sess.get_outputs()
for input in ort_sess.get_inputs():
print(" input: ", input.name, input.shape, input.type)
for output in output_defs:
print(" output: ", output.name, output.shape, output.type)
def invoke_model(**kwargs):
def to_numpy(val):
arr = np.array(val)
if arr.dtype == np.double:
arr = arr.astype(np.float32)
elif arr.dtype == np.int64:
arr = arr.astype(np.int32)
return arr
kwargs = { name: to_numpy(val) for name, val in kwargs.items() }
output_vals = ort_sess.run(None, kwargs)
output_dict = { output_def.name : output_val for output_val, output_def in zip(output_vals, output_defs) }
return [output_dict[output_var] for output_var in output_vars]
return invoke_model
id2word = { id : word.rstrip() for id, word in enumerate(open('c:/work/marian-dev/local/model/vocab_v1.wl', encoding='utf-8').readlines()) }
word2id = { word : id for id, word in id2word.items() }
unk_id = word2id["<unk>"]
model_path_prefix = "c:/work/marian-dev/local/model/model.npz.best-ce-mean-words-debug-sin-uniq-notrans-nounk"
encode_source = get_function(model_path_prefix + '.encode_source.onnx',
['encoder_context_0'])
decode_first = get_function(model_path_prefix + '.decode_first.onnx',
['first_logits', 'first_decoder_state_0', 'first_decoder_state_1', 'first_decoder_state_2', 'first_decoder_state_3', 'first_decoder_state_4', 'first_decoder_state_5'])
decode_next = get_function(model_path_prefix + '.decode_next.onnx',
['next_logits', 'next_decoder_state_0', 'next_decoder_state_1', 'next_decoder_state_2', 'next_decoder_state_3', 'next_decoder_state_4', 'next_decoder_state_5'])
def greedy_decode(data_0):
if len(data_0) == 1: # special handling for the empty sentence, like Marian
return data_0
data_0_mask = [[[1.]]] * len(data_0)
data_0_index_range = [[[float(t)]] for t in range(len(data_0))]
#print(data_0, data_0_mask, data_0_index_range)
max_len = len(data_0) * 3
Y = []
encoder_context_0, *_ = encode_source(data_0=data_0, data_0_mask=data_0_mask, data_0_posrange=data_0_index_range)
logp, *out_decoder_states = decode_first(data_1_posrange=[[[float(0)]]],
encoder_context_0=encoder_context_0, data_0_mask=data_0_mask)
logp[:,:,:,unk_id] = -1e8 # suppress <unk>, like Marian
Y.append(np.argmax(logp[0][0]))
while Y[-1] != 0 and len(Y) < max_len:
logp, *out_decoder_states = decode_next(prev_word=[Y[-1]], data_1_posrange=[[[float(len(Y))]]],
encoder_context_0=encoder_context_0, data_0_mask=data_0_mask,
decoder_state_0=out_decoder_states[0], decoder_state_1=out_decoder_states[1],
decoder_state_2=out_decoder_states[2], decoder_state_3=out_decoder_states[3],
decoder_state_4=out_decoder_states[4], decoder_state_5=out_decoder_states[5])
logp[:,:,:,unk_id] = -1e8
Y.append(np.argmax(logp[0][0]))
return Y
start_time = time.time()
with open("C:/work/marian-dev/local/model/predictions.out-onnx-debug-sin-notrans-first100-d.tok", 'wt', encoding='utf-8') as out_f:
for line in open("C:/work/marian-dev/local/model/predictions.in-first100.tok", encoding='utf-8').readlines():
data = [word2id.get(w, unk_id) for w in (line.rstrip() + " </s>").split(' ') if w]
Y = greedy_decode(data)
print("input: ", ' '.join(id2word[x] for x in data))
print("output:", ' '.join(id2word[y] for y in Y))
print(' '.join(id2word[y] for y in Y[:-1]), file=out_f, flush=True) # strip </s> for output to file
print("--- %s seconds ---" % (time.time() - start_time))

View File

@ -0,0 +1,276 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
"""
Library for converting certain types of Marian models to a standalone ONNX model.
Because Marian and ONNX use very different philosophies, a conversion is not possible
for all possible Marian models. Specifically, currently we don't support recurrent
networks in the encoder, and we can only decode with greedy search (not beam search).
This works by running a Marian decode for 2 output steps, and capturing three pieces of
Marian's internal graph that correspond to the encoder, the first decoding steps, and the
second decoding step. The graph of the second decoding step can be applied repeatedly in
order to decoder a variable-length sequence.
The three pieces are then composed with a greedy-search implementation, which is realized
directly via ONNX operators. This is facilitated by the onnx_fx library. As of this writing,
onnx_fx is still in experimental stage, and is not yet included in Release branches of
the onnxconverter-common distribution. Hence, you must use the latest master branch, not
the release.
The code below assumes that the onnxconverter_common repo is cloned next to the marian-dev
repo, and that you use the standard CMake build process on Linux. If not, please make sure
that the onnxconverter-common repo is included in PYTHONPATH, and you may need to pass the
binary path of Marian to export_marian_model_components() explicitly.
Prerequisites:
```
pip install onnxruntime
git clone https://github.com/microsoft/onnxconverter-common.git
```
You will also need to compile Marian with -DUSE_ONNX=ON.
Known issue: If the number of decoder layers is not 6, you need to manually adjust one
line of code in loop_body() below.
"""
import os, sys, inspect, subprocess
from typing import List, Dict, Optional, Callable
# get the Marian root path
_marian_root_path = os.path.dirname(inspect.getfile(inspect.currentframe())) + "/../.."
# we assume onnxconverter-common to be available next to the marian-dev repo; you may need to adjust this
sys.path.append(_marian_root_path + "/../onnxconverter-common")
from onnxconverter_common.onnx_fx import Graph
from onnxconverter_common.onnx_fx import GraphFunctionType as _Ty
from onnxconverter_common import optimize_onnx_graph
import onnxruntime as _ort
from onnxruntime import quantization
def _ort_apply_model(model, inputs): # ORT execution is a callback so that Graph itself does not need to depend on ORT
sess = _ort.InferenceSession(model.SerializeToString())
return sess.run(None, inputs)
Graph.inference_runtime = _ort_apply_model
Graph.opset = 11
def _optimize_graph_in_place(graph: Graph):
# @TODO: This should really be methods on onnx_fx.Graph.
g = graph._oxml.graph
g_opt = optimize_onnx_graph(
onnx_nodes=g.node, # the onnx node list in onnx model.
nchw_inputs=None, # the name list of the inputs needed to be transposed as NCHW
inputs=g.input, # the model input
outputs=g.output, # the model output
initializers=g.initializer, # the model initializers
stop_initializers=None, # 'stop' optimization on these initializers
model_value_info=g.value_info, # the model value_info
model_name=g.name, # the internal name of model
target_opset=graph.opset)
graph._oxml.graph.CopyFrom(g_opt)
def export_marian_model_components(marian_model_path: str, marian_vocab_paths: List[str],
marian_executable_path: Optional[str]=None) -> Dict[str,Graph]:
"""
Export the Marian graph to a set of models.
Args:
marian_model_path: path to Marian model to convert
marian_vocab_paths: paths of vocab files (normally, this requires 2 entries, which may be identical)
marian_executable_path: path to Marian executable; will default to THIS_SCRIPT_PATH/../../build/marian
Returns:
Dict of onnx_fx.Graph instances corresponding to pieces of the Marian model.
"""
assert isinstance(marian_vocab_paths, list), "marian_vocab_paths must be a list of paths"
# default marian executable is found relative to location of this script (Linux/CMake only)
if marian_executable_path is None:
marian_executable_path = _marian_root_path + "/build/marian"
# partial models are written to /tmp
output_path_stem = "/tmp/" + os.path.basename(marian_model_path)
# exporting is done via invoking Marian via its command-line interface; models are written to tmp files
command = marian_executable_path
args = [
"convert",
"--from", marian_model_path,
"--vocabs", *marian_vocab_paths,
"--to", output_path_stem,
"--export-as", "onnx-encode"
]
subprocess.run([command] + args, check=True)
# load the tmp files into onnx_fx.Graph objects
graph_names = ["encode_source", "decode_first", "decode_next"] # Marian generates graphs with these names
output_paths = [output_path_stem + "." + graph_name + ".onnx" for graph_name in graph_names] # form pathnames under which Marian wrote the files
res = { graph_name: Graph.load(output_path) for graph_name, output_path in zip(graph_names, output_paths) }
# optimize the partial models in place, as Marian may not have used the most optimal way of expressing all operations
for graph_name in res.keys():
_optimize_graph_in_place(res[graph_name])
# clean up after ourselves
for output_path in output_paths:
os.unlink(output_path)
return res
def quantize_models_in_place(partial_models: Dict[str,Graph], to_bits: int=8):
"""
Quantize the partial models in place.
Args:
partial_models: models returned from export_marian_model_components()
to_bits: number of bits to quantize to, currently only supports 8
"""
for graph_name in partial_models.keys(): # quantize each partial model
partial_models[graph_name]._oxml = quantization.quantize(
partial_models[graph_name]._oxml,
nbits=to_bits,
quantization_mode=quantization.QuantizationMode.IntegerOps,
symmetric_weight=True,
force_fusions=True)
def compose_model_components_with_greedy_search(partial_models: Dict[str,Graph], num_decoder_layers: int):
"""
Create an ONNX model that implements greedy search over the exported Marian pieces.
Args:
partial_models: models returned from export_marian_model_components()
num_decoder_layers: must be specified, since it cannot be inferred from the model files presently (e.g. 6)
Returns:
ONNX model that can be called as
result_ids = greedy_search_fn(np.array(source_ids, dtype=np.int64), np.array([target_eos_id], dtype=np.int64))[0]
"""
decoder_state_dim = num_decoder_layers * 2 # each decoder has two state variables
# load our partial functions
# ONNX graph inputs and outputs are named but not ordered. Therefore, we must define the parameter order here.
def define_parameter_order(graph, inputs, outputs):
tmppath = "/tmp/tmpmodel.onnx"
graph.save(tmppath) # unfortunately, Graph.load() cannot load from another Graph, so use a tmp file
graph = Graph.load(tmppath, inputs=inputs, outputs=outputs)
os.unlink(tmppath)
return graph
encode_source = define_parameter_order(partial_models["encode_source"],
inputs=['data_0', 'data_0_mask', 'data_0_posrange'], # define the order of arguments
outputs=['encoder_context_0'])
decode_first = define_parameter_order(partial_models["decode_first"],
inputs=['data_1_posrange', 'encoder_context_0', 'data_0_mask'],
outputs=['first_logits'] +
[f"first_decoder_state_{i}" for i in range(decoder_state_dim)])
decode_next = define_parameter_order(partial_models["decode_next"],
inputs=['prev_word', 'data_1_posrange', 'encoder_context_0', 'data_0_mask'] +
[f"decoder_state_{i}" for i in range(decoder_state_dim)],
outputs=['next_logits'] +
[f"next_decoder_state_{i}" for i in range(decoder_state_dim)])
# create an ONNX graph that implements full greedy search
# The greedy search is implemented via the @onnx_fx.Graph.trace decorator, which allows us to
# author the greedy search in Python, similar to @CNTK.Function and PyTorch trace-based jit.
# The decorator executes greedy_search() below on a dummy input in order to generate an ONNX graph
# via invoking operators from the onnx.fx library.
# The partial functions exported from Marian are invoked (=inlined) by this.
# The result is a full ONNX graph that implements greedy search using the Marian model.
@Graph.trace(
input_types=[_Ty.I(shape=['N']), _Ty.I([1])],
output_types=[_Ty.I(shape=['T'])],
outputs="Y")
def greedy_search(X, eos_id):
"""
Args:
X: sequence of input tokens, including EOS symbol, as integer indices into the input vocabulary
eos_id: id of the EOS symbol in the output vocabulary
"""
ox = X.ox
data_0 = X
data_0_shape = data_0.shape()
data_0_mask = ox.constant_of_shape(data_0_shape, value=1.0)
seq_len = data_0_shape[-1]
data_0_index_range = ox.range([ox.constant(value=0), seq_len, ox.constant(value=1)]).cast(to=ox.float)
data_0_index_range = ox.unsqueeze(data_0_index_range, axes=[1, 2])
max_len = seq_len * 3
encoder_context_0 = encode_source(data_0=data_0, data_0_mask=data_0_mask,
data_0_posrange=data_0_index_range)
y_len_0 = ox.constant(value=0.0)
logp, *out_decoder_states = decode_first(data_1_posrange=y_len_0,
encoder_context_0=encoder_context_0, data_0_mask=data_0_mask)
y_t = logp[0, 0, 0].argmax(axis=-1, keepdims=True) # note: rank-1 tensor, not a scalar
eos_token = eos_id + 0
test_y_t = (y_t != eos_token)
@Graph.trace(outputs=['ty_t', 'y_t_o', *(f'ods_{i}' for i in range(decoder_state_dim)), 'y_t_o2'],
output_types=[_Ty.b, _Ty.i] + [_Ty.f] * decoder_state_dim + [_Ty.i],
input_types=[_Ty.I([1]), _Ty.b, _Ty.i] + [_Ty.f] * decoder_state_dim)
def loop_body(iteration_count, condition, # these are not actually used inside
y_t,
out_decoder_states_0, out_decoder_states_1, out_decoder_states_2, out_decoder_states_3, out_decoder_states_4, out_decoder_states_5,
out_decoder_states_6, out_decoder_states_7, out_decoder_states_8, out_decoder_states_9, out_decoder_states_10, out_decoder_states_11):
# @BUGBUG: Currently, we do not support variable number of arguments to the callable.
# @TODO: We have the information from the type signature in Graph.trace(), so this should be possible.
assert decoder_state_dim == 12, "Currently, decoder layers other than 6 require a manual code change"
out_decoder_states = [out_decoder_states_0, out_decoder_states_1, out_decoder_states_2, out_decoder_states_3, out_decoder_states_4, out_decoder_states_5,
out_decoder_states_6, out_decoder_states_7, out_decoder_states_8, out_decoder_states_9, out_decoder_states_10, out_decoder_states_11]
"""
Loop body follows the requirements of ONNX Loop:
"The graph run each iteration.
It has 2+N inputs: (iteration_num, condition, loop carried dependencies...).
It has 1+N+K outputs: (condition, loop carried dependencies..., scan_outputs...).
Each scan_output is created by concatenating the value of the specified output value at the end of each iteration of the loop.
It is an error if the dimensions or data type of these scan_outputs change across loop iterations."
Inputs:
iteration_num (not used by our function)
test_y_t: condition (not used as an input)
y_t, *out_decoder_states: N=(decoder_state_dim+1) loop-carried dependencies
Outputs:
test_y_t: condition, return True if there is more to decode
y_t, *out_decoder_states: N=(decoder_state_dim+1) loop-carried dependencies (same as in the Inputs section)
y_t: K=1 outputs
"""
pos = iteration_count + 1
data_1_posrange = pos.cast(to=1).unsqueeze(axes=[0, 1, 2])
logp, *out_decoder_states = decode_next(
prev_word=y_t, data_1_posrange=data_1_posrange,
encoder_context_0=encoder_context_0, data_0_mask=data_0_mask,
**{f"decoder_state_{i}": out_decoder_states[i] for i in range(len(out_decoder_states))})
y_t = logp[0, 0, 0].argmax(axis=-1, keepdims=True)
test_y_t = (y_t != eos_token)
return [test_y_t, y_t] + out_decoder_states + [y_t]
# "Final N loop carried dependency values then K scan_outputs"
ret_vals = ox.loop(max_len, test_y_t, loop_body,
inputs=[y_t] + out_decoder_states,
outputs=['gy_t_o', *[f"gods_{i}" for i in range(len(out_decoder_states))], 'greedy_out'])
y = ret_vals[-1] # scan_output
# we must prepend the very first token
Y = ox.concat([ox.unsqueeze(y_t), y], axis=0) # note: y_t are rank-1 tensors, not scalars (ORT concat fails with scalars)
return ox.squeeze(Y, axes=[1])
greedy_search.to_model() # this triggers the model tracing (which is lazy)
# optimize the final model as well
# @BUGBUG: This leads to a malformed or hanging model.
#_optimize_graph_in_place(greedy_search)
return greedy_search
def apply_model(greedy_search_fn: Graph, source_ids: List[int], target_eos_id: int) -> List[int]:
"""
Apply model to an input sequence, e.g. run translation.
This function is meant for quick testing, and as an example of how to invoke the final graph.
Args:
greedy_search_fn: ONNX model created with combine_model_components_with_greedy_search()\
source_ids: list of source tokens, as indices into soure vocabulary, ending in EOS symbol
target_eos_id: id of EOS symbol in target vocabulary
Returns:
Result as list of ids into target vocabulary
"""
import numpy as np
Y = greedy_search_fn(
np.array(source_ids, dtype=np.int64),
np.array([target_eos_id], dtype=np.int64))[0]
return Y

View File

@ -0,0 +1,47 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
"""
Example program demonstrating how to convert a Marian model using the marian_to_onnx library
to a self-contained ONNX model that implements greedy search.
"""
import os, sys
import marian_to_onnx as mo
# The following variables would normally be command-line arguments.
# We use constants here to keep it simple. They reflect an example use. You must adjust these.
my_dir = os.path.expanduser("~/young/wngt 2019/")
marian_npz = my_dir + "model.base.npz" # path to the Marian model to convert
num_decoder_layers = 6 # number of decoder layers
marian_vocs = [my_dir + "en-de.wl"] * 2 # path to the vocabularies for source and target
onnx_model_path = my_dir + "model.base.opt.onnx" # resulting model gets written here
quantize_to_bits = 8 # None for no quantization
# export Marian model as multiple ONNX models
partial_models = mo.export_marian_model_components(marian_npz, marian_vocs)
# quantize if desired
if quantize_to_bits:
mo.quantize_models_in_place(partial_models, to_bits=quantize_to_bits)
# use the ONNX models in a greedy-search
# The result is a fully self-contained model that implements greedy search.
onnx_model = mo.compose_model_components_with_greedy_search(partial_models, num_decoder_layers)
# save as ONNX file
onnx_model.save(onnx_model_path)
# run a test sentence
w2is = [{ word.rstrip(): id for id, word in enumerate(open(voc_path, "r").readlines()) } for voc_path in marian_vocs]
i2ws = [{ id: tok for tok, id in w2i.items() } for w2i in w2is]
src_tokens = "▁Republican ▁leaders ▁justifie d ▁their ▁policy ▁by ▁the ▁need ▁to ▁combat ▁electoral ▁fraud ▁.".split()
src_ids = [w2is[0][tok] for tok in src_tokens]
print(src_tokens)
print(src_ids)
Y = mo.apply_model(greedy_search_fn=onnx_model,
source_ids=src_ids + [w2is[0]["</s>"]],
target_eos_id=w2is[1]["</s>"])
print(Y.shape, Y)
tgt_tokens = [i2ws[1][y] for y in Y]
print(" ".join(tgt_tokens))

View File

@ -18,9 +18,10 @@
#pragma warning(disable : 4100 4125 4127 4244 4267 4512 4456 4510 4610 4800)
#endif
#ifdef __GNUC__
#pragma GCC diagnostic ignored "-Wunused-variable" // note: GCC <6.0 ignores this when inside push/pop
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-variable"
#pragma GCC diagnostic ignored "-Wsuggest-override"
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
#endif
#define AuxillaryParseTableField AuxiliaryParseTableField // in protobuf 3.12, the generated source has a spelling error

View File

@ -13,7 +13,7 @@
#error incompatible with your Protocol Buffer headers. Please update
#error your headers.
#endif
#if 3012000 < PROTOBUF_MIN_PROTOC_VERSION
#if 3012003 < PROTOBUF_MIN_PROTOC_VERSION
#error This file was generated by an older version of protoc which is
#error incompatible with your Protocol Buffer headers. Please
#error regenerate this file with a newer version of protoc.

View File

@ -70,6 +70,7 @@ add_library(marian STATIC
layers/generic.cpp
layers/loss.cpp
layers/weight.cpp
layers/lsh.cpp
rnn/cells.cpp
rnn/attention.cpp
@ -82,7 +83,9 @@ add_library(marian STATIC
models/transformer_stub.cpp
rescorer/score_collector.cpp
embedder/vector_collector.cpp
translator/beam_search.cpp
translator/history.cpp
translator/output_collector.cpp
translator/output_printer.cpp
@ -100,17 +103,15 @@ add_library(marian STATIC
# this is only compiled to catch build errors, but not linked
microsoft/quicksand.cpp
microsoft/cosmos.cpp
$<TARGET_OBJECTS:libyaml-cpp>
$<TARGET_OBJECTS:SQLiteCpp>
$<TARGET_OBJECTS:pathie-cpp>
$<TARGET_OBJECTS:zlib>
$<TARGET_OBJECTS:faiss>
)
if(BLAS_FOUND)
target_sources(marian PRIVATE ${CMAKE_CURRENT_LIST_DIR}/layers/lsh.cpp $<TARGET_OBJECTS:faiss>)
endif()
target_compile_options(marian PUBLIC ${ALL_WARNINGS})
# Generate git_revision.h to reflect current git revision information
@ -225,7 +226,7 @@ if(COMPILE_SERVER)
set_target_properties(marian_server PROPERTIES OUTPUT_NAME marian-server)
if(MSVC)
# Disable warnings from the SimpleWebSocketServer library needed for compilation of marian-server
target_compile_options(marian_server PUBLIC ${ALL_WARNINGS})
target_compile_options(marian_server PUBLIC ${ALL_WARNINGS} /wd4267 /wd4244 /wd4456 /wd4458)
else(MSVC)
# -Wno-suggest-override disables warnings from Boost 1.69+
target_compile_options(marian_server PUBLIC ${ALL_WARNINGS} -Wno-suggest-override)
@ -238,6 +239,7 @@ if(APPLE) # This is a dependency of pathie but I can't seem to link it into that
endif()
foreach(exec ${EXECUTABLES})
# @TODO: consider adding MKL and other libs to the library rather than the executables if at all possible
target_link_libraries(${exec} marian ${EXT_LIBS} ${EXT_LIBS} ${CMAKE_THREAD_LIBS_INIT})
if(CUDA_FOUND)
target_link_libraries(${exec} marian marian_cuda ${EXT_LIBS} ${CMAKE_THREAD_LIBS_INIT})

View File

@ -70,15 +70,17 @@ int main(int argc, char** argv) {
// added a flag if the weights needs to be packed or not
graph->packAndSave(modelTo, configStr.str(), /* --gemm-type */ saveGemmType, Type::float32);
}
#ifdef USE_ONNX
else if (exportAs == "onnx-encode") {
#ifdef USE_ONNX
auto graph = New<ExpressionGraphONNXExporter>();
load(graph);
auto modelOptions = New<Options>(config)->with("vocabs", vocabPaths, "inference", true);
graph->exportToONNX(modelTo, modelOptions, vocabPaths);
}
#else
ABORT("--export-as onnx-encode requires Marian to be built with USE_ONNX=ON");
#endif // USE_ONNX
}
else
ABORT("Unknown --export-as value: {}", exportAs);

View File

@ -0,0 +1,14 @@
#include "marian.h"
#include "models/model_task.h"
#include "embedder/embedder.h"
#include "common/timer.h"
int main(int argc, char** argv) {
using namespace marian;
auto options = parseOptions(argc, argv, cli::mode::embedding);
New<Embed<Embedder>>(options)->run();
return 0;
}

View File

@ -11,6 +11,7 @@
// train
// decode
// score
// embed
// vocab
// convert
// Currently, marian_server is not supported, since it is a special use case with lots of extra dependencies.
@ -24,6 +25,9 @@
#define main mainScorer
#include "marian_scorer.cpp"
#undef main
#define main mainEmbedder
#include "marian_embedder.cpp"
#undef main
#define main mainVocab
#include "marian_vocab.cpp"
#undef main
@ -44,9 +48,10 @@ int main(int argc, char** argv) {
if(cmd == "train") return mainTrainer(argc, argv);
else if(cmd == "decode") return mainDecoder(argc, argv);
else if (cmd == "score") return mainScorer(argc, argv);
else if (cmd == "embed") return mainEmbedder(argc, argv);
else if (cmd == "vocab") return mainVocab(argc, argv);
else if (cmd == "convert") return mainConv(argc, argv);
std::cerr << "Command must be train, decode, score, vocab, or convert." << std::endl;
std::cerr << "Command must be train, decode, score, embed, vocab, or convert." << std::endl;
exit(1);
} else
return mainTrainer(argc, argv);

View File

@ -22,8 +22,8 @@ namespace marian {
*
* @see CLIWrapper::alias()
*
* The order of alias definitions *does* matter: options from later aliases override earlier
* regardless of its order in the command line or config file.
* The order of alias definitions *does* matter: options from an alias defined later override
* options defined in earlier aliases regardless of their order in the command line or config file.
*/
void ConfigParser::addAliases(cli::CLIWrapper& cli) {
cli.alias("fp16", "true", [&](YAML::Node& config) {

View File

@ -72,30 +72,45 @@ void Config::initialize(ConfigParser const& cp) {
}
}
// guess --tsv-fields (the number of streams) if not set
// guess --tsv-fields, i.e. the number of fields in a TSV input, if not set
if(get<bool>("tsv") && get<size_t>("tsv-fields") == 0) {
size_t tsvFields = 0;
if(loaded) {
// model.npz has properly set vocab dimensions in special:model.yml,
// so we may use them to determine the number of streams
for(auto dim : get<std::vector<size_t>>("dim-vocabs"))
if(dim != 0) // language models have a fake extra vocab
++tsvFields;
// For translation there is no target stream
if((mode == cli::mode::translation || mode == cli::mode::server) && tsvFields > 1)
--tsvFields;
} else {
// TODO: This is very britle, find a better solution
// If parameters from model.npz special:model.yml were not loaded,
// guess the number of inputs and outputs based on the model type name.
auto modelType = get<std::string>("type");
tsvFields = 1;
if(modelType.find("multi-", 0) != std::string::npos) // is a dual-source model
tsvFields += 1;
if(mode == cli::mode::training || mode == cli::mode::scoring)
if(modelType.rfind("lm", 0) != 0) // unless it is a language model
// use the length of --input-types if given
auto inputTypes = get<std::vector<std::string>>("input-types");
if(!inputTypes.empty()) {
tsvFields = inputTypes.size();
} else {
if(loaded) {
// model.npz has properly set vocab dimensions in special:model.yml,
// so we may use them to determine the number of streams
for(auto dim : get<std::vector<size_t>>("dim-vocabs"))
if(dim != 0) // language models have a fake extra vocab
++tsvFields;
// For translation there is no target stream
if((mode == cli::mode::translation || mode == cli::mode::server) && tsvFields > 1)
--tsvFields;
} else {
// If parameters from model.npz special:model.yml were not loaded,
// guess the number of inputs and outputs based on the model type name.
// TODO: This is very britle, find a better solution
auto modelType = get<std::string>("type");
tsvFields = 1;
if(modelType.find("multi-", 0) != std::string::npos) // is a dual-source model
tsvFields += 1;
if(mode == cli::mode::training || mode == cli::mode::scoring)
if(modelType.rfind("lm", 0) != 0) // unless it is a language model
tsvFields += 1;
}
// count fields with guided-alignment or data-weighting too
if(mode == cli::mode::training) {
if(has("guided-alignment") && get<std::string>("guided-alignment") != "none")
tsvFields += 1;
if(has("data-weighting") && !get<std::string>("data-weighting").empty())
tsvFields += 1;
}
}
config_["tsv-fields"] = tsvFields;

View File

@ -91,6 +91,9 @@ ConfigParser::ConfigParser(cli::mode mode)
case cli::mode::scoring:
addOptionsScoring(cli_);
break;
case cli::mode::embedding:
addOptionsEmbedding(cli_);
break;
default:
ABORT("wrong CLI mode");
break;
@ -218,7 +221,8 @@ void ConfigParser::addOptionsModel(cli::CLIWrapper& cli) {
"Train right-to-left model");
cli.add<std::vector<std::string>>("--input-types",
"Provide type of input data if different than 'sequence'. "
"Possible values: sequence, class. You need to provide one type per input.",
"Possible values: sequence, class, alignment, weight. "
"You need to provide one type per input file (if --train-sets) or per TSV field (if --tsv).",
{});
cli.add<bool>("--best-deep",
"Use Edinburgh deep RNN configuration (s2s)");
@ -235,6 +239,8 @@ void ConfigParser::addOptionsModel(cli::CLIWrapper& cli) {
8);
cli.add<bool>("--transformer-no-projection",
"Omit linear projection after multi-head attention (transformer)");
cli.add<bool>("--transformer-pool",
"Pool encoder states instead of using cross attention (selects first encoder state, best used with special token)");
cli.add<int>("--transformer-dim-ffn",
"Size of position-wise feed-forward network (transformer)",
2048);
@ -462,7 +468,8 @@ void ConfigParser::addOptionsTraining(cli::CLIWrapper& cli) {
"Auto-adjusted to --mini-batch-words-ref if given.",
0.f)->implicit_val("1e-4");
cli.add<std::string>("--guided-alignment",
"Path to a file with word alignments. Use guided alignment to guide attention or 'none'",
"Path to a file with word alignments. Use guided alignment to guide attention or 'none'. "
"If --tsv it specifies the index of a TSV field that contains the alignments (0-based)",
"none");
cli.add<std::string>("--guided-alignment-cost",
"Cost type for guided alignment: ce (cross-entropy), mse (mean square error), mult (multiplication)",
@ -471,7 +478,8 @@ void ConfigParser::addOptionsTraining(cli::CLIWrapper& cli) {
"Weight for guided alignment cost",
0.1);
cli.add<std::string>("--data-weighting",
"Path to a file with sentence or word weights");
"Path to a file with sentence or word weights. "
"If --tsv it specifies the index of a TSV field that contains the weights (0-based)");
cli.add<std::string>("--data-weighting-type",
"Processing level for data weighting: sentence, word",
"sentence");
@ -707,6 +715,45 @@ void ConfigParser::addOptionsScoring(cli::CLIWrapper& cli) {
// clang-format on
}
void ConfigParser::addOptionsEmbedding(cli::CLIWrapper& cli) {
auto previous_group = cli.switchGroup("Scorer options");
// clang-format off
cli.add<bool>("--no-reload",
"Do not load existing model specified in --model arg");
// TODO: move options like vocabs and train-sets to a separate procedure as they are defined twice
cli.add<std::vector<std::string>>("--train-sets,-t",
"Paths to corpora to be scored: source target");
cli.add<std::string>("--output,-o",
"Path to output file, stdout by default",
"stdout");
cli.add<std::vector<std::string>>("--vocabs,-v",
"Paths to vocabulary files have to correspond to --train-sets. "
"If this parameter is not supplied we look for vocabulary files source.{yml,json} and target.{yml,json}. "
"If these files do not exists they are created");
cli.add<bool>("--compute-similarity",
"Expect two inputs and compute cosine similarity instead of outputting embedding vector");
cli.add<bool>("--binary",
"Output vectors as binary floats");
addSuboptionsInputLength(cli);
addSuboptionsTSV(cli);
addSuboptionsDevices(cli);
addSuboptionsBatching(cli);
cli.add<bool>("--optimize",
"Optimize speed aggressively sacrificing memory or precision");
cli.add<bool>("--fp16",
"Shortcut for mixed precision inference with float16, corresponds to: --precision float16");
cli.add<std::vector<std::string>>("--precision",
"Mixed precision for inference, set parameter type in expression graph",
{"float32"});
cli.switchGroup(previous_group);
// clang-format on
}
void ConfigParser::addSuboptionsDevices(cli::CLIWrapper& cli) {
// clang-format off
cli.add<std::vector<std::string>>("--devices,-d",
@ -804,7 +851,7 @@ void ConfigParser::addSuboptionsTSV(cli::CLIWrapper& cli) {
cli.add<bool>("--tsv",
"Tab-separated input");
cli.add<size_t>("--tsv-fields",
"Number of fields in the TSV input, guessed based on the model type");
"Number of fields in the TSV input. By default, it is guessed based on the model type");
// clang-format on
}
@ -904,6 +951,7 @@ Ptr<Options> ConfigParser::parseOptions(int argc, char** argv, bool doValidate){
// remove extra config files from the config to avoid redundancy
config_.remove("config");
// dump config and exit
if(!get<std::string>("dump-config").empty() && get<std::string>("dump-config") != "false") {
auto dumpMode = get<std::string>("dump-config");
config_.remove("dump-config");
@ -917,6 +965,43 @@ Ptr<Options> ConfigParser::parseOptions(int argc, char** argv, bool doValidate){
exit(0);
}
// For TSV input, it is possible to use --input-types to determine fields that contain alignments
// or weights. In such case, the position of 'alignment' input type in --input-types determines
// the index of a TSV field that contains word alignments, and respectively, the position of
// 'weight' in --input-types determines the index of a TSV field that contains weights.
// Marian will abort if both the --guided-alignment and 'alignment' in --input-types are specified
// (or --data-weighting and 'weight').
//
// Note: this may modify the config, so it is safer to do it after --dump-config.
if(mode_ == cli::mode::training || get<bool>("tsv")) {
auto inputTypes = get<std::vector<std::string>>("input-types");
if(!inputTypes.empty()) {
bool seenAligns = false;
bool seenWeight = false;
YAML::Node config;
for(size_t i = 0; i < inputTypes.size(); ++i) {
if(inputTypes[i] == "alignment") {
ABORT_IF(seenAligns, "You can specify 'alignment' only once in input-types");
ABORT_IF(has("guided-alignment") && get<std::string>("guided-alignment") != "none",
"You must use either guided-alignment or 'alignment' in input-types");
config["guided-alignment"] = std::to_string(i);
seenAligns = true;
}
if(inputTypes[i] == "weight") {
ABORT_IF(seenWeight, "You can specify 'weight' only once in input-types");
ABORT_IF(has("data-weighting") && !get<std::string>("data-weighting").empty(),
"You must use either data-weighting or 'weight' in input-types");
config["data-weighting"] = std::to_string(i);
seenWeight = true;
}
}
if(!config.IsNull())
cli_.updateConfig(config,
cli::OptionPriority::CommandLine,
"Extracting 'alignment' and 'weight' types from input-types failed.");
}
}
cli_.parseAliases();
auto opts = New<Options>();
opts->merge(Config(*this).get());

View File

@ -14,7 +14,7 @@
namespace marian {
namespace cli {
enum struct mode { training, translation, scoring, server };
enum struct mode { training, translation, scoring, server, embedding };
} // namespace cli
/**
@ -129,6 +129,7 @@ private:
void addOptionsValidation(cli::CLIWrapper&);
void addOptionsTranslation(cli::CLIWrapper&);
void addOptionsScoring(cli::CLIWrapper&);
void addOptionsEmbedding(cli::CLIWrapper&);
void addAliases(cli::CLIWrapper&);

View File

@ -27,6 +27,10 @@ void ConfigValidator::validateOptions(cli::mode mode) const {
validateOptionsParallelData();
validateOptionsScoring();
break;
case cli::mode::embedding:
validateOptionsParallelData();
validateOptionsScoring();
break;
case cli::mode::training:
validateOptionsParallelData();
validateOptionsTraining();

View File

@ -1,8 +1,10 @@
#include "common/file_stream.h"
#include "common/utils.h"
#include <streambuf>
#include <string>
#include <vector>
#include <cstdio>
#ifdef _MSC_VER
#include <io.h>
#include <windows.h>
@ -18,23 +20,44 @@ namespace io {
///////////////////////////////////////////////////////////////////////////////////////////////
InputFileStream::InputFileStream(const std::string &file)
: std::istream(NULL), file_(file) {
ABORT_IF(!marian::filesystem::exists(file_), "File '{}' does not exist", file);
: std::istream(NULL) {
// the special syntax "command |" starts command in a sh shell and reads out its result
if (marian::utils::endsWith(file, "|")) {
#ifdef __unix__
auto command = file.substr(0, file.size() - 1);
// open as a pipe
pipe_ = popen(command.c_str(), "r");
ABORT_IF(!pipe_, "Command failed to execute ({}): {}", errno, command);
// there is no official way to construct a filebuf from a FILE* or fd, so we use /proc/{pid}/fd/{fd}
// For now, this only works on Linux. There are similar workarounds for Windows.
file_ = "/proc/" + std::to_string(getpid()) + "/fd/" + std::to_string(fileno(pipe_));
#else
ABORT("Pipe syntax not supported in this build of Marian: {}", file);
#endif
}
else
file_ = file;
streamBuf1_.reset(new std::filebuf());
auto ret = static_cast<std::filebuf*>(streamBuf1_.get())->open(file.c_str(), std::ios::in | std::ios::binary);
ABORT_IF(!ret, "File cannot be opened", file);
auto ret = static_cast<std::filebuf*>(streamBuf1_.get())->open(file_.string().c_str(), std::ios::in | std::ios::binary);
ABORT_IF(!ret, "Error opening file ({}): {}", errno, file_.string());
ABORT_IF(ret != streamBuf1_.get(), "Return value is not equal to streambuf pointer, that is weird");
if(file_.extension() == marian::filesystem::Path(".gz")) {
streamBuf2_.reset(new zstr::istreambuf(streamBuf1_.get()));
this->init(streamBuf2_.get());
} else {
this->init(streamBuf1_.get());
// insert .gz decompression
if(marian::utils::endsWith(file, ".gz")) {
streamBuf2_ = std::move(streamBuf1_);
streamBuf1_.reset(new zstr::istreambuf(streamBuf2_.get()));
}
// initialize the underlying istream
this->init(streamBuf1_.get());
}
InputFileStream::~InputFileStream() {}
InputFileStream::~InputFileStream() {
#ifdef __unix__ // (pipe syntax is only supported on UNIX-like OS)
if (pipe_)
pclose(pipe_); // non-NULL if pipe syntax was used
#endif
}
bool InputFileStream::empty() {
return this->peek() == std::ifstream::traits_type::eof();

View File

@ -49,8 +49,9 @@ public:
protected:
marian::filesystem::Path file_;
std::unique_ptr<std::streambuf> streamBuf1_;
std::unique_ptr<std::streambuf> streamBuf2_;
std::unique_ptr<std::streambuf> streamBuf1_; // main streambuf
std::unique_ptr<std::streambuf> streamBuf2_; // in case of a .gz file
FILE* pipe_{}; // in case of pipe syntax
std::vector<char> readBuf_;
};

View File

@ -75,6 +75,8 @@ protected:
private:
Ptr<BatchStats> stats_;
bool runAsync_{true}; // use asynchronous batch pre-fetching by default. We want to be able to disable this when running in library mode and for exception-safety.
// state of fetching
std::deque<BatchPtr> bufferedBatches_; // current swath of batches that next() reads from
@ -84,7 +86,7 @@ private:
bool newlyPrepared_{ true }; // prepare() was just called: we need to reset current_ --@TODO: can we just reset it directly?
// variables for multi-threaded pre-fetching
mutable ThreadPool threadPool_; // (we only use one thread, but keep it around)
mutable UPtr<ThreadPool> threadPool_; // (we only use one thread, but keep it around)
std::future<std::deque<BatchPtr>> futureBufferedBatches_; // next swath of batches is returned via this
// this runs on a bg thread; sequencing is handled by caller, but locking is done in here
@ -230,26 +232,37 @@ private:
// this starts fillBatches() as a background operation
void fetchBatchesAsync() {
ABORT_IF(futureBufferedBatches_.valid(), "Attempted to restart futureBufferedBatches_ while still running");
futureBufferedBatches_ = threadPool_.enqueue([this]() {
ABORT_IF(!runAsync_, "Trying to run fetchBatchesAsync() but runAsync_ is false??");
ABORT_IF(!threadPool_, "Trying to run fetchBatchesAsync() without initialized threadPool_??");
futureBufferedBatches_ = threadPool_->enqueue([this]() {
return fetchBatches();
});
}
BatchPtr next() {
if(bufferedBatches_.empty()) {
// out of data: need to get next batch from background thread
// We only get here if the future has been scheduled to run; it must be valid.
ABORT_IF(!futureBufferedBatches_.valid(), "Attempted to wait for futureBufferedBatches_ when none pending.\n"
"This error often occurs when Marian tries to restore the training data iterator, but the corpus has been changed or replaced.\n"
"If you have changed the training corpus, add --no-restore-corpus to the training command and run it again.");
bufferedBatches_ = std::move(futureBufferedBatches_.get());
// if bg thread returns an empty swath, we hit the end of the epoch
if (bufferedBatches_.empty()) {
return nullptr;
if(runAsync_) { // by default we will run in asynchronous mode
// out of data: need to get next batch from background thread
// We only get here if the future has been scheduled to run; it must be valid.
ABORT_IF(!futureBufferedBatches_.valid(), "Attempted to wait for futureBufferedBatches_ when none pending.\n"
"This error often occurs when Marian tries to restore the training data iterator, but the corpus has been changed or replaced.\n"
"If you have changed the training corpus, add --no-restore-corpus to the training command and run it again.");
bufferedBatches_ = std::move(futureBufferedBatches_.get());
// if bg thread returns an empty swath, we hit the end of the epoch
if (bufferedBatches_.empty()) {
return nullptr;
}
// and kick off the next bg operation
fetchBatchesAsync();
} else { // don't spawn any threads, i.e. batch fetching is blocking.
bufferedBatches_ = fetchBatches();
// if bufferedBatches is empty we hit the end of the epoch
if (bufferedBatches_.empty()) {
return nullptr;
}
}
// and kick off the next bg operation
fetchBatchesAsync();
}
auto batch = bufferedBatches_.front();
bufferedBatches_.pop_front();
return batch;
@ -259,8 +272,10 @@ public:
BatchGenerator(Ptr<DataSet> data,
Ptr<Options> options,
Ptr<BatchStats> stats = nullptr)
: data_(data), options_(options), stats_(stats), threadPool_(1) {
Ptr<BatchStats> stats = nullptr,
bool runAsync = true)
: data_(data), options_(options), stats_(stats),
runAsync_(runAsync), threadPool_(runAsync ? new ThreadPool(1) : nullptr) {
auto shuffle = options_->get<std::string>("shuffle", "none");
shuffleData_ = shuffle == "data";
shuffleBatches_ = shuffleData_ || shuffle == "batches";
@ -287,8 +302,9 @@ public:
data_->reset();
newlyPrepared_ = true;
// start the background pre-fetch operation
fetchBatchesAsync();
// start the background pre-fetch operation when running in asynchronous mode, otherwise we will fetch on demand.
if(runAsync_)
fetchBatchesAsync();
}
// Used to restore the state of a BatchGenerator after

View File

@ -46,7 +46,14 @@ void Corpus::preprocessLine(std::string& line, size_t streamId) {
}
SentenceTuple Corpus::next() {
std::vector<std::string> fields(tsvNumFields_); // used for handling TSV inputs
// Used for handling TSV inputs
// Determine the total number of fields including alignments or weights
auto tsvNumAllFields = tsvNumInputFields_;
if(alignFileIdx_ > -1)
++tsvNumAllFields;
if(weightFileIdx_ > -1)
++tsvNumAllFields;
std::vector<std::string> fields(tsvNumAllFields);
for(;;) { // (this is a retry loop for skipping invalid sentences)
// get index of the current sentence
@ -86,11 +93,27 @@ SentenceTuple Corpus::next() {
addWeightsToSentenceTuple(line, tup);
} else {
if(tsv_) { // split TSV input and add each field into the sentence tuple
utils::splitTsv(line, fields, tsvNumFields_);
for(size_t j = 0; j < tsvNumFields_; ++j) {
preprocessLine(fields[j], j);
addWordsToSentenceTuple(fields[j], j, tup);
utils::splitTsv(line, fields, tsvNumAllFields);
size_t shift = 0;
for(size_t j = 0; j < tsvNumAllFields; ++j) {
// index j needs to be shifted to get the proper vocab index if guided-alignment or
// data-weighting are preceding source or target sequences in TSV input
if(j == alignFileIdx_ || j == weightFileIdx_) {
++shift;
} else {
size_t vocabId = j - shift;
preprocessLine(fields[j], vocabId);
addWordsToSentenceTuple(fields[j], vocabId, tup);
}
}
// weights are added last to the sentence tuple, because this runs a validation that needs
// length of the target sequence
if(alignFileIdx_ > -1)
addAlignmentToSentenceTuple(fields[alignFileIdx_], tup);
if(weightFileIdx_ > -1)
addWeightsToSentenceTuple(fields[weightFileIdx_], tup);
} else {
preprocessLine(line, i);
addWordsToSentenceTuple(line, i, tup);
@ -267,9 +290,10 @@ CorpusBase::batch_ptr Corpus::toBatch(const std::vector<Sample>& batchVector) {
auto batch = batch_ptr(new batch_type(subBatches));
batch->setSentenceIds(sentenceIds);
if(options_->get("guided-alignment", std::string("none")) != "none" && alignFileIdx_)
// Add prepared word alignments and weights if they are available
if(alignFileIdx_ > -1 && options_->get("guided-alignment", std::string("none")) != "none")
addAlignmentsToBatch(batch, batchVector);
if(options_->hasAndNotEmpty("data-weighting") && weightFileIdx_)
if(weightFileIdx_ > -1 && options_->hasAndNotEmpty("data-weighting"))
addWeightsToBatch(batch, batchVector);
return batch;

View File

@ -30,6 +30,9 @@ const SentenceTuple& CorpusIterator::dereference() const {
return tup_;
}
// These types of corpus constructors are used in in-training validators
// (only?), so do not load additional files for guided alignment or data
// weighting.
CorpusBase::CorpusBase(const std::vector<std::string>& paths,
const std::vector<Ptr<Vocab>>& vocabs,
Ptr<Options> options)
@ -39,11 +42,11 @@ CorpusBase::CorpusBase(const std::vector<std::string>& paths,
maxLengthCrop_(options_->get<bool>("max-length-crop")),
rightLeft_(options_->get<bool>("right-left")),
tsv_(options_->get<bool>("tsv", false)),
tsvNumFields_(options->get<size_t>("tsv-fields", 0)) {
tsvNumInputFields_(getNumberOfTSVInputFields(options)) {
// TODO: support passing only one vocab file if we have fully-tied embeddings
if(tsv_) {
ABORT_IF(tsvNumFields_ != vocabs_.size(),
"Number of TSV fields and vocab files does not agree");
ABORT_IF(tsvNumInputFields_ != vocabs_.size(),
"Number of TSV input fields and vocab files does not agree");
} else {
ABORT_IF(paths_.size() != vocabs_.size(),
"Number of corpus files and vocab files does not agree");
@ -64,7 +67,7 @@ CorpusBase::CorpusBase(Ptr<Options> options, bool translate)
maxLengthCrop_(options_->get<bool>("max-length-crop")),
rightLeft_(options_->get<bool>("right-left")),
tsv_(options_->get<bool>("tsv", false)),
tsvNumFields_(options->get<size_t>("tsv-fields", 0)) {
tsvNumInputFields_(getNumberOfTSVInputFields(options)) {
bool training = !translate;
if(training)
@ -72,22 +75,65 @@ CorpusBase::CorpusBase(Ptr<Options> options, bool translate)
else
paths_ = options_->get<std::vector<std::string>>("input");
initEOS(training);
std::vector<std::string> vocabPaths;
if(!options_->get<std::vector<std::string>>("vocabs").empty())
vocabPaths = options_->get<std::vector<std::string>>("vocabs");
if(training) {
if(tsv_) {
ABORT_IF(!vocabPaths.empty() && tsvNumFields_ != vocabPaths.size(),
"Number of TSV fields and vocab files does not agree");
ABORT_IF(!vocabPaths.empty() && tsvNumInputFields_ != vocabPaths.size(),
"Number of TSV input fields and vocab files does not agree");
} else {
ABORT_IF(!vocabPaths.empty() && paths_.size() != vocabPaths.size(),
"Number of corpus files and vocab files does not agree");
}
}
bool useGuidedAlignment = options_->get("guided-alignment", std::string("none")) != "none";
bool useDataWeighting = options_->hasAndNotEmpty("data-weighting");
if(training && tsv_) {
// For TSV input, we expect that guided-alignment or data-weighting provide the index of a TSV
// field that contains the alignments or weights.
//
// Alignments and weights for non TSV input are handled later, after vocab creation.
if(useGuidedAlignment) {
try {
alignFileIdx_ = std::stoul(options_->get<std::string>("guided-alignment"));
} catch(const std::invalid_argument& /*e*/) {
ABORT(
"For TSV input, guided-alignment must provide an index of a field with alignments. "
"The value '{}' could not be converted to an unsigned integer.",
options_->get<std::string>("guided-alignment"));
}
LOG(info, "[data] Using word alignments from TSV field no. {}", alignFileIdx_);
}
if(useDataWeighting) {
try {
weightFileIdx_ = std::stoul(options_->get<std::string>("data-weighting"));
} catch(const std::invalid_argument& /*e*/) {
ABORT(
"For TSV input, data-weighting must provide an index of a field with weights. "
"The value '{}' could not be converted to an unsigned integer.",
options_->get<std::string>("data-weighting"));
}
LOG(info, "[data] Using weights from TSV field no. {}", weightFileIdx_);
}
// check for identical or too large indices
size_t maxIndex = tsvNumInputFields_ + size_t(useGuidedAlignment) + size_t(useDataWeighting) - 1;
ABORT_IF((useGuidedAlignment && useDataWeighting && alignFileIdx_ == weightFileIdx_)
|| (useGuidedAlignment && (alignFileIdx_ > maxIndex))
|| (useDataWeighting && (weightFileIdx_ > maxIndex)),
"For TSV input, guided-alignment and data-weighting must provide an index <= {} "
"and be different",
maxIndex);
}
// run this after determining if guided alignment or data weighting is used in TSV input
initEOS(training);
// @TODO: check if size_t can be used instead of int
std::vector<int> maxVocabs = options_->get<std::vector<int>>("dim-vocabs");
@ -133,12 +179,23 @@ CorpusBase::CorpusBase(Ptr<Options> options, bool translate)
// There is more cases for multi-encoder models not listed above.
//
if(vocabPaths.empty()) {
size_t numStreams = tsv_ ? tsvNumFields_ : paths_.size();
size_t numStreams = tsv_ ? tsvNumInputFields_ : paths_.size();
// Creating a vocabulary from stdin is not supported
ABORT_IF(tsv_ && (paths_[0] == "stdin" || paths_[0] == "-"),
"Creating vocabularies automatically from a data stream from STDIN is not supported. "
"Create vocabularies first and provide them with --vocabs");
if(tsv_) {
// Creating a vocabulary from stdin is not supported
ABORT_IF(paths_[0] == "stdin" || paths_[0] == "-",
"Creating vocabularies automatically from a data stream from STDIN is not "
"supported. Create vocabularies first and provide them with --vocabs");
// Creating a vocab from a TSV input (from STDIN or a file) with alignments or weights is
// not supported
ABORT_IF(useGuidedAlignment,
"Creating vocabularies automatically from TSV data with alignments is not "
"supported. Create vocabularies first and provide them with --vocabs");
ABORT_IF(useDataWeighting,
"Creating vocabularies automatically from TSV data with weights is not "
"supported. Create vocabularies first and provide them with --vocabs");
}
if(maxVocabs.size() < paths_.size())
maxVocabs.resize(paths_.size(), 0);
@ -170,7 +227,7 @@ CorpusBase::CorpusBase(Ptr<Options> options, bool translate)
options_->set("dim-vocabs", vocabDims, "vocabs", vocabPaths1);
} else { // Vocabulary paths are given
size_t numStreams = tsv_ ? tsvNumFields_ : paths_.size();
size_t numStreams = tsv_ ? tsvNumInputFields_ : paths_.size();
// Load all vocabs
size_t numVocs = vocabPaths.size();
@ -200,11 +257,22 @@ CorpusBase::CorpusBase(Ptr<Options> options, bool translate)
vocabDims.resize(numVocs, 0); // make sure there is as many dims as vocab paths
for(size_t i = 0; i < numVocs; ++i) {
// Creating a vocabulary from stdin is not supported
ABORT_IF(tsv_ && (paths_[0] == "stdin" || paths_[0] == "-")
&& (vocabPaths[i].empty() || !filesystem::exists(vocabPaths[i])),
"Creating vocabulary automatically from a data stream from STDIN is not supported. "
"Create vocabularies first and provide them with --vocabs");
if(tsv_) {
bool noVocabGiven = (vocabPaths[i].empty() || !filesystem::exists(vocabPaths[i]));
// Creating a vocabulary from stdin is not supported
ABORT_IF(noVocabGiven && (paths_[0] == "stdin" || paths_[0] == "-"),
"Creating vocabulary automatically from a data stream from STDIN is not "
"supported. Create vocabularies first and provide them with --vocabs");
// Creating a vocab from a TSV input (from STDIN or a file) with alignments or weights is not supported
ABORT_IF(noVocabGiven && useGuidedAlignment,
"Creating vocabularies automatically from TSV data with alignments is not "
"supported. Create vocabularies first and provide them with --vocabs");
ABORT_IF(noVocabGiven && useDataWeighting,
"Creating vocabularies automatically from TSV data with weights is not "
"supported. Create vocabularies first and provide them with --vocabs");
}
// Get the set of files that corresponds to the vocab. If the next file is the same vocab,
// it will not be created again, but just correctly loaded.
@ -230,7 +298,7 @@ CorpusBase::CorpusBase(Ptr<Options> options, bool translate)
fileutils::cut(groupedPaths[0], // Index 0 because there is only one TSV file
tsvTempFile,
vocabDetails.streams,
tsvNumFields_,
tsvNumInputFields_,
" "); // Notice that tab-separated fields are joined with a whitespace
groupedPaths.clear();
@ -288,30 +356,34 @@ CorpusBase::CorpusBase(Ptr<Options> options, bool translate)
files_.size(),
vocabs_.size());
if(training && options_->get("guided-alignment", std::string("none")) != "none") {
auto path = options_->get<std::string>("guided-alignment");
// Handle guided alignment and data weighting files. Alignments and weights in TSV input were
// handled earlier.
if(training && !tsv_) {
if(useGuidedAlignment) {
auto path = options_->get<std::string>("guided-alignment");
ABORT_IF(!filesystem::exists(path), "Alignment file does not exist");
LOG(info, "[data] Using word alignments from file {}", path);
ABORT_IF(!filesystem::exists(path), "Alignment file does not exist");
LOG(info, "[data] Using word alignments from file {}", path);
alignFileIdx_ = paths_.size();
paths_.emplace_back(path);
io::InputFileStream* strm = new io::InputFileStream(path);
ABORT_IF(strm->empty(), "File with alignments '{}' is empty", path);
files_.emplace_back(strm);
}
alignFileIdx_ = paths_.size();
paths_.emplace_back(path);
io::InputFileStream* strm = new io::InputFileStream(path);
ABORT_IF(strm->empty(), "File with alignments '{}' is empty", path);
files_.emplace_back(strm);
}
if(training && options_->hasAndNotEmpty("data-weighting")) {
auto path = options_->get<std::string>("data-weighting");
if(useDataWeighting) {
auto path = options_->get<std::string>("data-weighting");
ABORT_IF(!filesystem::exists(path), "Weight file does not exist");
LOG(info, "[data] Using weights from file {}", path);
ABORT_IF(!filesystem::exists(path), "Weight file does not exist");
LOG(info, "[data] Using weights from file {}", path);
weightFileIdx_ = paths_.size();
paths_.emplace_back(path);
io::InputFileStream* strm = new io::InputFileStream(path);
ABORT_IF(strm->empty(), "File with weights '{}' is empty", path);
files_.emplace_back(strm);
weightFileIdx_ = paths_.size();
paths_.emplace_back(path);
io::InputFileStream* strm = new io::InputFileStream(path);
ABORT_IF(strm->empty(), "File with weights '{}' is empty", path);
files_.emplace_back(strm);
}
}
}
@ -412,23 +484,37 @@ void CorpusBase::initEOS(bool training = true) {
// add a EOS symbol. Hence decision to add EOS is now based on input stream positions and
// correspoding input type.
size_t numStreams = tsv_ ? tsvNumFields_ : paths_.size(); // determine number of streams
// Determine the number of streams, i.e. the number of input files (if --train-sets) or fields in
// a TSV input (if --tsv). Notice that in case of a TSV input, fields that contain alignments and
// weights are *not* included.
size_t numStreams = tsv_ ? tsvNumInputFields_ : paths_.size();
addEOS_.resize(numStreams, true);
// @TODO: think if this should be checked and processed here or in a validation step in config?
// input-types provides the input type for each input file (if --train-sets) or for each TSV field
// (if --tsv), for example: sequence, class, alignment.
auto inputTypes = options_->get<std::vector<std::string>>("input-types", {}); // empty list by default
// make sure there is an input type for each stream
ABORT_IF(inputTypes.size() > 0 && inputTypes.size() < numStreams,
"Input types have been specified ({}), you need to specify one per input ({})",
inputTypes.size(),
numStreams);
// @TODO: think if this should be checked and processed here or in a validation step in config?
if(!inputTypes.empty()) {
if(tsv_) {
// Remove 'alignment' and 'weight' from input types.
// Note that these input types are not typical input streams with corresponding vocabularies.
// For a TSV input, they were used only to determine fields that contain alignments or weights
// and initialize guided-alignment and data-weighting options.
auto pos = std::find(inputTypes.begin(), inputTypes.end(), "alignment");
if(pos != inputTypes.end())
inputTypes.erase(pos);
pos = std::find(inputTypes.begin(), inputTypes.end(), "weight");
if(pos != inputTypes.end())
inputTypes.erase(pos);
}
// make sure there is an equal number of input types and streams when training
ABORT_IF(training && inputTypes.size() > 0 && inputTypes.size() != numStreams,
"Input types have been specified ({}), you need to specify one per input ({})",
inputTypes.size(),
numStreams);
// Make sure there is an input type for each stream
// and that there is an equal number of input types and streams when training
ABORT_IF((inputTypes.size() < numStreams) || (training && inputTypes.size() != numStreams),
"Input types have been specified ({}), you need to specify one per input stream ({})",
inputTypes.size(), numStreams);
}
for(int i = 0; i < numStreams; ++i)
if(inputTypes.size() > i) {
@ -444,6 +530,35 @@ void CorpusBase::initEOS(bool training = true) {
}
}
size_t CorpusBase::getNumberOfTSVInputFields(Ptr<Options> options) {
if(options->get<bool>("tsv", false)) {
size_t n = options->get<size_t>("tsv-fields", 0);
if(n > 0 && options->get("guided-alignment", std::string("none")) != "none")
--n;
if(n > 0 && options->hasAndNotEmpty("data-weighting"))
--n;
return n;
}
return 0;
}
void SentenceTuple::setWeights(const std::vector<float>& weights) {
if(weights.size() != 1) { // this assumes a single sentence-level weight is always fine
ABORT_IF(empty(), "Source and target sequences should be added to a tuple before data weights");
auto numWeights = weights.size();
auto numTrgWords = back().size();
// word-level weights may or may not contain a weight for EOS tokens
if(numWeights != numTrgWords && numWeights != numTrgWords - 1)
LOG(warn,
"[warn] "
"Number of weights ({}) does not match the number of target words ({}) in line #{}",
numWeights,
numTrgWords,
id_);
}
weights_ = weights;
}
// experimental: hide inline-fix source tokens from cross attention
std::vector<float> SubBatch::crossMaskWithInlineFixSourceSuppressed() const
{

File diff suppressed because it is too large Load Diff

View File

@ -44,21 +44,24 @@ SentenceTuple TextInput::next() {
for(size_t i = 0; i < files_.size(); ++i) {
std::string line;
if(io::getline(*files_[i], line)) {
Words words = vocabs_[i]->encode(line, /*addEOS =*/ true, /*inference =*/ inference_);
Words words = vocabs_[i]->encode(line, /*addEOS=*/true, /*inference=*/inference_);
if(this->maxLengthCrop_ && words.size() > this->maxLength_) {
words.resize(maxLength_);
words.back() = vocabs_.back()->getEosId(); // note: this will not work with class-labels
}
if(words.empty())
words.push_back(Word::ZERO); // @TODO: What is this for? @BUGBUG: addEOS=true, so this can never happen, right?
ABORT_IF(words.empty(), "No words (not even EOS) found in string??");
ABORT_IF(tup.size() != i, "Previous tuple elements are missing.");
tup.push_back(words);
}
}
// check if each input file provided an example
if(tup.size() == files_.size())
if(tup.size() == files_.size()) // check if each input file provided an example
return tup;
return SentenceTuple(0);
else if(tup.size() == 0) // if no file provided examples we are done
return SentenceTuple(0);
else // neither all nor none => we have at least on missing entry
ABORT("There are missing entries in the text tuples.");
}
} // namespace data

168
src/embedder/embedder.h Normal file
View File

@ -0,0 +1,168 @@
#pragma once
#include "marian.h"
#include "common/config.h"
#include "common/options.h"
#include "data/batch_generator.h"
#include "data/corpus.h"
#include "data/corpus_nbest.h"
#include "models/costs.h"
#include "models/model_task.h"
#include "embedder/vector_collector.h"
#include "training/scheduler.h"
#include "training/validator.h"
namespace marian {
using namespace data;
/*
* The tool is used to create output sentence embeddings from available
* Marian encoders. With --compute-similiarity and can return the cosine
* similarity between two sentences provided from two sources.
*/
class Embedder {
private:
Ptr<models::IModel> model_;
public:
Embedder(Ptr<Options> options)
: model_(createModelFromOptions(options, models::usage::embedding)) {}
void load(Ptr<ExpressionGraph> graph, const std::string& modelFile) {
model_->load(graph, modelFile);
}
Expr build(Ptr<ExpressionGraph> graph, Ptr<data::CorpusBatch> batch) {
auto embedder = std::dynamic_pointer_cast<EncoderPooler>(model_);
ABORT_IF(!embedder, "Could not cast to EncoderPooler");
return embedder->apply(graph, batch, /*clearGraph=*/true);
}
};
/*
* Actual Embed task. @TODO: this should be simplified in the future.
*/
template <class Model>
class Embed : public ModelTask {
private:
Ptr<Options> options_;
Ptr<CorpusBase> corpus_;
std::vector<Ptr<ExpressionGraph>> graphs_;
std::vector<Ptr<Model>> models_;
public:
Embed(Ptr<Options> options) : options_(options) {
options_ = options_->with("inference", true,
"shuffle", "none");
// if a similarity is computed then double the input types and vocabs for
// the two encoders that are used in the model.
if(options->get<bool>("compute-similarity")) {
auto vVocabs = options_->get<std::vector<std::string>>("vocabs");
auto vDimVocabs = options_->get<std::vector<size_t>>("dim-vocabs");
vVocabs.push_back(vVocabs.back());
vDimVocabs.push_back(vDimVocabs.back());
options_ = options_->with("vocabs", vVocabs,
"dim-vocabs", vDimVocabs);
}
corpus_ = New<Corpus>(options_);
corpus_->prepare();
auto devices = Config::getDevices(options_);
for(auto device : devices) {
auto graph = New<ExpressionGraph>(true);
auto precison = options_->get<std::vector<std::string>>("precision", {"float32"});
graph->setDefaultElementType(typeFromString(precison[0])); // only use first type, used for parameter type in graph
graph->setDevice(device);
graph->getBackend()->setClip(options_->get<float>("clip-gemm"));
if (device.type == DeviceType::cpu) {
graph->getBackend()->setOptimized(options_->get<bool>("optimize"));
}
graph->reserveWorkspaceMB(options_->get<size_t>("workspace"));
graphs_.push_back(graph);
}
auto modelFile = options_->get<std::string>("model");
models_.resize(graphs_.size());
ThreadPool pool(graphs_.size(), graphs_.size());
for(size_t i = 0; i < graphs_.size(); ++i) {
pool.enqueue(
[=](size_t j) {
models_[j] = New<Model>(options_);
models_[j]->load(graphs_[j], modelFile);
},
i);
}
}
void run() override {
LOG(info, "Embedding");
timer::Timer timer;
auto batchGenerator = New<BatchGenerator<CorpusBase>>(corpus_, options_);
batchGenerator->prepare();
auto output = New<VectorCollector>(options_);
size_t batchId = 0;
std::mutex smutex;
{
ThreadPool pool(graphs_.size(), graphs_.size());
for(auto batch : *batchGenerator) {
auto task = [=, &smutex](size_t id) {
thread_local Ptr<ExpressionGraph> graph;
thread_local Ptr<Model> builder;
if(!graph) {
graph = graphs_[id % graphs_.size()];
builder = models_[id % graphs_.size()];
}
auto embeddings = builder->build(graph, batch);
graph->forward();
std::vector<float> sentVectors;
embeddings->val()->get(sentVectors);
// collect embedding vector per sentence.
// if we compute similarities this is only one similarity per sentence pair.
for(size_t i = 0; i < batch->size(); ++i) {
auto embSize = embeddings->shape()[-1];
auto beg = i * embSize;
auto end = (i + 1) * embSize;
std::vector<float> sentVector(sentVectors.begin() + beg, sentVectors.begin() + end);
output->Write((long)batch->getSentenceIds()[i],
sentVector);
}
// progress heartbeat for MS-internal Philly compute cluster
// otherwise this job may be killed prematurely if no log for 4 hrs
if (getenv("PHILLY_JOB_ID") // this environment variable exists when running on the cluster
&& id % 1000 == 0) // hard beat once every 1000 batches
{
auto progress = id / 10000.f; //fake progress for now, becomes >100 after 1M batches
fprintf(stderr, "PROGRESS: %.2f%%\n", progress);
fflush(stderr);
}
};
pool.enqueue(task, batchId++);
}
}
LOG(info, "Total time: {:.5f}s wall", timer.elapsed());
}
};
} // namespace marian

View File

@ -0,0 +1,71 @@
#include "embedder/vector_collector.h"
#include "common/logging.h"
#include "common/utils.h"
#include <iostream>
#include <iomanip>
namespace marian {
// This class manages multi-threaded writing of embedded vectors to stdout or an output file.
// It will either output string versions of float vectors or binary equal length versions depending
// on its binary_ flag.
VectorCollector::VectorCollector(const Ptr<Options>& options)
: nextId_(0), binary_{options->get<bool>("binary", false)} {
if(options->get<std::string>("output") == "stdout")
outStrm_.reset(new std::ostream(std::cout.rdbuf()));
else
outStrm_.reset(new io::OutputFileStream(options->get<std::string>("output")));
}
void VectorCollector::Write(long id, const std::vector<float>& vec) {
std::lock_guard<std::mutex> lock(mutex_);
if(id == nextId_) {
WriteVector(vec);
++nextId_;
typename Outputs::const_iterator iter, iterNext;
iter = outputs_.begin();
while(iter != outputs_.end()) {
long currId = iter->first;
if(currId == nextId_) {
// 1st element in the map is the next
WriteVector(iter->second);
++nextId_;
// delete current record, move iter on 1
iterNext = iter;
++iterNext;
outputs_.erase(iter);
iter = iterNext;
} else {
// not the next. stop iterating
assert(nextId_ < currId);
break;
}
}
} else {
// save for later
outputs_[id] = vec;
}
}
void VectorCollector::WriteVector(const std::vector<float>& vec) {
if(binary_) {
outStrm_->write((char*)vec.data(), vec.size() * sizeof(float));
} else {
std::stringstream ss;
ss << std::fixed << std::setprecision(8);
for(auto v : vec)
*outStrm_ << v << " ";
*outStrm_ << std::endl;
}
}
} // namespace marian

View File

@ -0,0 +1,32 @@
#pragma once
#include "common/options.h"
#include "common/definitions.h"
#include "common/file_stream.h"
#include <map>
#include <mutex>
namespace marian {
// This class manages multi-threaded writing of embedded vectors to stdout or an output file.
// It will either output string versions of float vectors or binary equal length versions depending
// on its binary_ flag.
class VectorCollector {
public:
VectorCollector(const Ptr<Options>& options);
virtual void Write(long id, const std::vector<float>& vec);
protected:
long nextId_{0};
UPtr<std::ostream> outStrm_;
bool binary_; // output binary floating point vectors if set
std::mutex mutex_;
typedef std::map<long, std::vector<float>> Outputs;
Outputs outputs_;
virtual void WriteVector(const std::vector<float>& vec);
};
} // namespace marian

View File

@ -416,7 +416,7 @@ protected:
ABORT_IF(logits.getNumFactorGroups() > 1, "Unlikelihood loss is not implemented for factors");
ABORT_IF(!mask, "mask is required"); // @TODO: check this, it seems weights for padding are by default 1, which would make this obsolete.
// use label weights, where 1 is GOOD and 0 is BAD. After inversion here, now 1 marks, mask again to eliminate padding (might be obsolete)
// use label weights, where 1 is GOOD and 0 is BAD. After inversion here, now 1 marks BAD, mask again to eliminate padding (might be obsolete)
auto errorMask = (1.f - cast(labelWeights, Type::float32)) * cast(mask, Type::float32);
auto ceUl = logits.applyLossFunction(labels, [&](Expr logits, Expr indices) {

187
src/microsoft/cosmos.cpp Normal file
View File

@ -0,0 +1,187 @@
#include "cosmos.h"
#include "models/model_base.h"
#include "models/model_factory.h"
#include "data/text_input.h"
#if MKL_FOUND
#include "mkl.h"
#endif
namespace marian {
// Thin wrapper around IModel that makes sure model can be cast to an EncoderPooler
// These poolers know how to collect embeddings from a seq2seq encoder.
class EmbedderModel {
private:
Ptr<models::IModel> model_;
public:
EmbedderModel(Ptr<Options> options)
: model_(createModelFromOptions(options, models::usage::embedding)) {}
void load(Ptr<ExpressionGraph> graph, const std::string& modelFile) {
model_->load(graph, modelFile);
}
Expr build(Ptr<ExpressionGraph> graph, Ptr<data::CorpusBatch> batch) {
auto embedder = std::dynamic_pointer_cast<EncoderPooler>(model_);
ABORT_IF(!embedder, "Could not cast to EncoderPooler");
return embedder->apply(graph, batch, /*clearGraph=*/true);
}
};
namespace cosmos {
const size_t MAX_BATCH_SIZE = 32;
const size_t MAX_LENGTH = 256;
/**
* Single CPU-core implementation of an Embedder/Similiarity scorer. Turns sets of '\n' strings
* into parallel batches and either outputs embedding vectors or similarity scores.
*/
class Embedder {
private:
Ptr<Options> options_;
Ptr<ExpressionGraph> graph_;
Ptr<Vocab> vocab_;
Ptr<EmbedderModel> model_;
public:
Embedder(const std::string& modelPath, const std::string& vocabPath, bool computeSimilarity = false) {
options_ = New<Options>("inference", true,
"shuffle", "none",
"mini-batch", MAX_BATCH_SIZE,
"maxi-batch", 100,
"maxi-batch-sort", "src",
"max-length", MAX_LENGTH,
"max-length-crop", true,
"compute-similarity", computeSimilarity,
"vocabs", std::vector<std::string>(computeSimilarity ? 2 : 1, vocabPath));
vocab_ = New<Vocab>(options_, 0);
vocab_->load(vocabPath, 0);
graph_ = New<ExpressionGraph>(/*inference=*/true);
graph_->setDevice(CPU0);
graph_->reserveWorkspaceMB(512);
YAML::Node config;
io::getYamlFromModel(config, "special:model.yml", modelPath);
Ptr<Options> modelOpts = New<Options>();
modelOpts->merge(options_);
modelOpts->merge(config);
model_ = New<EmbedderModel>(modelOpts);
model_->load(graph_, modelPath);
}
// Compute embedding vectors for a batch of sentences
std::vector<std::vector<float>> embed(const std::string& input) {
auto text = New<data::TextInput>(std::vector<std::string>({input}),
std::vector<Ptr<Vocab>>({vocab_}),
options_);
// we set runAsync=false as we are throwing exceptions instead of aborts. Exceptions and threading do not mix well.
data::BatchGenerator<data::TextInput> batchGenerator(text, options_, /*stats=*/nullptr, /*runAsync=*/false);
batchGenerator.prepare();
std::vector<std::vector<float>> output;
for(auto batch : batchGenerator) {
auto embeddings = model_->build(graph_, batch);
graph_->forward();
std::vector<float> sentVectors;
embeddings->val()->get(sentVectors);
// collect embedding vector per sentence.
// if we compute similarities this is only one similarity per sentence pair.
for(size_t i = 0; i < batch->size(); ++i) {
auto batchIdx = batch->getSentenceIds()[i];
if(output.size() <= batchIdx)
output.resize(batchIdx + 1);
int embSize = embeddings->shape()[-1];
size_t beg = i * embSize;
size_t end = (i + 1) * embSize;
std::vector<float> sentVector(sentVectors.begin() + beg, sentVectors.begin() + end);
output[batchIdx] = sentVector;
}
}
return output;
}
// Compute cosine similarity scores for a two batches of corresponding sentences
std::vector<float> similarity(const std::string& input1, const std::string& input2) {
auto text = New<data::TextInput>(std::vector<std::string>({input1, input2}),
std::vector<Ptr<Vocab>>({vocab_, vocab_}),
options_);
// we set runAsync=false as we are throwing exceptions instead of aborts. Exceptions and threading do not mix well.
data::BatchGenerator<data::TextInput> batchGenerator(text, options_, /*stats=*/nullptr, /*runAsync=*/false);
batchGenerator.prepare();
std::vector<float> output;
for(auto batch : batchGenerator) {
auto similarities = model_->build(graph_, batch);
graph_->forward();
std::vector<float> vSimilarities;
similarities->val()->get(vSimilarities);
// collect similarity score per sentence pair.
for(size_t i = 0; i < batch->size(); ++i) {
auto batchIdx = batch->getSentenceIds()[i];
if(output.size() <= batchIdx)
output.resize(batchIdx + 1);
output[batchIdx] = vSimilarities[i];
}
}
return output;
};
};
/* Interface functions ***************************************************************************/
MarianEmbedder::MarianEmbedder() {
#if MKL_FOUND
mkl_set_num_threads(1);
#endif
marian::setThrowExceptionOnAbort(true); // globally defined to throw now
}
std::vector<std::vector<float>> MarianEmbedder::embed(const std::string& input) {
ABORT_IF(!embedder_, "Embedder is not defined??");
return embedder_->embed(input);
}
bool MarianEmbedder::load(const std::string& modelPath, const std::string& vocabPath) {
embedder_ = New<Embedder>(modelPath, vocabPath, /*computeSimilarity*/false);
ABORT_IF(!embedder_, "Embedder is not defined??");
return true;
}
MarianCosineScorer::MarianCosineScorer() {
#if MKL_FOUND
mkl_set_num_threads(1);
#endif
marian::setThrowExceptionOnAbort(true); // globally defined to throw now
}
std::vector<float> MarianCosineScorer::score(const std::string& input1, const std::string& input2) {
ABORT_IF(!embedder_, "Embedder is not defined??");
return embedder_->similarity(input1, input2);
};
bool MarianCosineScorer::load(const std::string& modelPath, const std::string& vocabPath) {
embedder_ = New<Embedder>(modelPath, vocabPath, /*computeSimilarity*/true);
ABORT_IF(!embedder_, "Embedder is not defined??");
return true;
}
} // namespace cosmos
} // namespace marian

64
src/microsoft/cosmos.h Normal file
View File

@ -0,0 +1,64 @@
#pragma once
#include <memory>
#include <string>
#include <vector>
namespace marian {
template <typename T>
using Ptr = std::shared_ptr<T>;
namespace cosmos {
class Embedder;
/**
* MarianEmbedder takes a Marian sequence2sequence transformer model and produces
* sentence embeddings collected from the encoder. Currently the model file is supposed
* to know how to do that.
*/
class MarianEmbedder {
private:
Ptr<Embedder> embedder_;
public:
MarianEmbedder();
/**
* `input` is a big string with multiple sentences separated by '\n'.
* Returns a vector of embedding vectors in order corresponding to input sentence order.
*/
std::vector<std::vector<float>> embed(const std::string& input);
/**
* `modelPath` is a Marian model, `vocabPath` a matching SentencePiece model with *.spm suffix.
*/
bool load(const std::string& modelPath, const std::string& vocabPath);
};
/**
* MarianCosineScorer takes a Marian sequence2sequence transformer model and produces
* sentence-wise cosine similarities for two sentence embeddings.
*/
class MarianCosineScorer {
private:
Ptr<Embedder> embedder_;
public:
MarianCosineScorer();
/**
* `input1` and `input2' are big strings with multiple sentences separated by '\n'.
* Both inputs have to have the same number of separated lines.
* Returns a vector of similarity scores in order corresponding to input sentence order.
*/
std::vector<float> score(const std::string& input1, const std::string& input2);
/**
* `modelPath` is a Marian model, `vocabPath` a matching SentencePiece model with *.spm suffix.
*/
bool load(const std::string& modelPath, const std::string& vocabPath);
};
}
}

View File

@ -51,6 +51,7 @@ EncoderDecoder::EncoderDecoder(Ptr<ExpressionGraph> graph, Ptr<Options> options)
modelFeatures_.insert("transformer-tied-layers");
modelFeatures_.insert("transformer-guided-alignment-layer");
modelFeatures_.insert("transformer-train-position-embeddings");
modelFeatures_.insert("transformer-pool");
modelFeatures_.insert("bert-train-type-embeddings");
modelFeatures_.insert("bert-type-vocab-size");

219
src/models/encoder_pooler.h Normal file
View File

@ -0,0 +1,219 @@
#pragma once
#include "marian.h"
#include "models/encoder.h"
#include "models/pooler.h"
#include "models/model_base.h"
#include "models/states.h"
// @TODO: this introduces functionality to use LASER in Marian for the filtering workflow or for use in MS-internal
// COSMOS server-farm. There is a lot of code duplication with Classifier and EncoderDecoder and this needs to be fixed.
// This will be done after the new layer system has been finished.
namespace marian {
/**
* Combines sequence encoders with generic poolers
* Can be used to train sequence poolers like language detection, BERT-next-sentence-prediction etc.
* Already has support for multi-objective training.
*
* @TODO: this should probably be unified somehow with EncoderDecoder which could allow for deocder/pooler
* multi-objective training.
*/
class EncoderPoolerBase : public models::IModel {
public:
virtual ~EncoderPoolerBase() {}
virtual void load(Ptr<ExpressionGraph> graph,
const std::string& name,
bool markedReloaded = true) override
= 0;
virtual void mmap(Ptr<ExpressionGraph> graph,
const void* ptr,
bool markedReloaded = true)
= 0;
virtual void save(Ptr<ExpressionGraph> graph,
const std::string& name,
bool saveTranslatorConfig = false) override
= 0;
virtual void clear(Ptr<ExpressionGraph> graph) override = 0;
virtual Expr apply(Ptr<ExpressionGraph>, Ptr<data::CorpusBatch>, bool) = 0;
virtual Logits build(Ptr<ExpressionGraph> graph,
Ptr<data::Batch> batch,
bool clearGraph = true) override {
clearGraph;
ABORT("Poolers cannot produce Logits");
};
virtual Logits build(Ptr<ExpressionGraph> graph,
Ptr<data::CorpusBatch> batch,
bool clearGraph = true) {
clearGraph;
ABORT("Poolers cannot produce Logits");
}
virtual Ptr<Options> getOptions() = 0;
};
class EncoderPooler : public EncoderPoolerBase {
protected:
Ptr<Options> options_;
std::string prefix_;
std::vector<Ptr<EncoderBase>> encoders_;
std::vector<Ptr<PoolerBase>> poolers_;
bool inference_{true};
std::set<std::string> modelFeatures_;
Config::YamlNode getModelParameters() {
Config::YamlNode modelParams;
auto clone = options_->cloneToYamlNode();
for(auto& key : modelFeatures_)
modelParams[key] = clone[key];
if(options_->has("original-type"))
modelParams["type"] = clone["original-type"];
modelParams["version"] = buildVersion();
return modelParams;
}
std::string getModelParametersAsString() {
auto yaml = getModelParameters();
YAML::Emitter out;
cli::OutputYaml(yaml, out);
return std::string(out.c_str());
}
public:
typedef data::Corpus dataset_type;
// @TODO: lots of code-duplication with EncoderDecoder
EncoderPooler(Ptr<Options> options)
: options_(options),
prefix_(options->get<std::string>("prefix", "")),
inference_(options->get<bool>("inference", false)) {
modelFeatures_ = {"type",
"dim-vocabs",
"dim-emb",
"dim-rnn",
"enc-cell",
"enc-type",
"enc-cell-depth",
"enc-depth",
"dec-depth",
"dec-cell",
"dec-cell-base-depth",
"dec-cell-high-depth",
"skip",
"layer-normalization",
"right-left",
"input-types",
"special-vocab",
"tied-embeddings",
"tied-embeddings-src",
"tied-embeddings-all"};
modelFeatures_.insert("transformer-heads");
modelFeatures_.insert("transformer-no-projection");
modelFeatures_.insert("transformer-dim-ffn");
modelFeatures_.insert("transformer-ffn-depth");
modelFeatures_.insert("transformer-ffn-activation");
modelFeatures_.insert("transformer-dim-aan");
modelFeatures_.insert("transformer-aan-depth");
modelFeatures_.insert("transformer-aan-activation");
modelFeatures_.insert("transformer-aan-nogate");
modelFeatures_.insert("transformer-preprocess");
modelFeatures_.insert("transformer-postprocess");
modelFeatures_.insert("transformer-postprocess-emb");
modelFeatures_.insert("transformer-decoder-autoreg");
modelFeatures_.insert("transformer-tied-layers");
modelFeatures_.insert("transformer-guided-alignment-layer");
modelFeatures_.insert("transformer-train-position-embeddings");
modelFeatures_.insert("transformer-pool");
modelFeatures_.insert("bert-train-type-embeddings");
modelFeatures_.insert("bert-type-vocab-size");
modelFeatures_.insert("ulr");
modelFeatures_.insert("ulr-trainable-transformation");
modelFeatures_.insert("ulr-dim-emb");
modelFeatures_.insert("lemma-dim-emb");
}
virtual Ptr<Options> getOptions() override { return options_; }
std::vector<Ptr<EncoderBase>>& getEncoders() { return encoders_; }
std::vector<Ptr<PoolerBase>>& getPoolers() { return poolers_; }
void push_back(Ptr<EncoderBase> encoder) { encoders_.push_back(encoder); }
void push_back(Ptr<PoolerBase> pooler) { poolers_.push_back(pooler); }
void load(Ptr<ExpressionGraph> graph,
const std::string& name,
bool markedReloaded) override {
graph->load(name, markedReloaded && !opt<bool>("ignore-model-config", false));
}
void mmap(Ptr<ExpressionGraph> graph,
const void* ptr,
bool markedReloaded) override {
graph->mmap(ptr, markedReloaded && !opt<bool>("ignore-model-config", false));
}
void save(Ptr<ExpressionGraph> graph,
const std::string& name,
bool /*saveModelConfig*/) override {
LOG(info, "Saving model weights and runtime parameters to {}", name);
graph->save(name , getModelParametersAsString());
}
void clear(Ptr<ExpressionGraph> graph) override {
graph->clear();
for(auto& enc : encoders_)
enc->clear();
for(auto& pooler : poolers_)
pooler->clear();
}
template <typename T>
T opt(const std::string& key) {
return options_->get<T>(key);
}
template <typename T>
T opt(const std::string& key, const T& def) {
return options_->get<T>(key, def);
}
template <typename T>
void set(std::string key, T value) {
options_->set(key, value);
}
/*********************************************************************/
virtual Expr apply(Ptr<ExpressionGraph> graph, Ptr<data::CorpusBatch> batch, bool clearGraph) override {
if(clearGraph)
clear(graph);
std::vector<Ptr<EncoderState>> encoderStates;
for(auto& encoder : encoders_)
encoderStates.push_back(encoder->build(graph, batch));
ABORT_IF(poolers_.size() != 1, "Expected exactly one pooler");
return poolers_[0]->apply(graph, batch, encoderStates);
}
};
} // namespace marian

71
src/models/laser.h Normal file
View File

@ -0,0 +1,71 @@
#pragma once
#include "marian.h"
#include "layers/constructors.h"
#include "rnn/constructors.h"
namespace marian {
// Re-implements the LASER BiLSTM encoder from:
// Massively Multilingual Sentence Embeddings for Zero-Shot Cross-Lingual Transfer and Beyond
// Mikel Artetxe, Holger Schwenk
// https://arxiv.org/abs/1812.10464
class EncoderLaser : public EncoderBase {
using EncoderBase::EncoderBase;
public:
Expr applyEncoderRNN(Ptr<ExpressionGraph> graph,
Expr embeddings,
Expr mask) {
int depth = opt<int>("enc-depth");
float dropoutRnn = inference_ ? 0 : opt<float>("dropout-rnn");
Expr output = embeddings;
auto applyRnn = [&](int layer, rnn::dir direction, Expr input, Expr mask) {
std::string paramPrefix = prefix_ + "_" + opt<std::string>("enc-cell");
paramPrefix += "_l" + std::to_string(layer);
if(direction == rnn::dir::backward)
paramPrefix += "_reverse";
auto rnnFactory = rnn::rnn()
("type", opt<std::string>("enc-cell"))
("direction", (int)direction)
("dimInput", input->shape()[-1])
("dimState", opt<int>("dim-rnn"))
("dropout", dropoutRnn)
("layer-normalization", opt<bool>("layer-normalization"))
("skip", opt<bool>("skip"))
.push_back(rnn::cell()("prefix", paramPrefix));
return rnnFactory.construct(graph)->transduce(input, mask);
};
for(int i = 0; i < depth; ++i) {
output = concatenate({applyRnn(i, rnn::dir::forward, output, mask),
applyRnn(i, rnn::dir::backward, output, mask)},
/*axis =*/ -1);
}
return output;
}
virtual Ptr<EncoderState> build(Ptr<ExpressionGraph> graph,
Ptr<data::CorpusBatch> batch) override {
graph_ = graph;
// select embeddings that occur in the batch
Expr batchEmbeddings, batchMask; std::tie
(batchEmbeddings, batchMask) = getEmbeddingLayer()->apply((*batch)[batchIndex_]);
Expr context = applyEncoderRNN(graph_, batchEmbeddings, batchMask);
return New<EncoderState>(context, batchMask, batch);
}
void clear() override {}
};
}

View File

@ -8,7 +8,7 @@
namespace marian {
namespace models {
enum struct usage { raw, training, scoring, translation };
enum struct usage { raw, training, scoring, translation, embedding };
}
} // namespace marian

View File

@ -10,6 +10,7 @@
#include "models/amun.h"
#include "models/nematus.h"
#include "models/s2s.h"
#include "models/laser.h"
#include "models/transformer_factory.h"
#ifdef CUDNN
@ -29,6 +30,9 @@ namespace models {
Ptr<EncoderBase> EncoderFactory::construct(Ptr<ExpressionGraph> graph) {
if(options_->get<std::string>("type") == "s2s")
return New<EncoderS2S>(graph, options_);
if(options_->get<std::string>("type") == "laser" || options_->get<std::string>("type") == "laser-sim")
return New<EncoderLaser>(graph, options_);
#ifdef CUDNN
if(options_->get<std::string>("type") == "char-s2s")
@ -61,6 +65,17 @@ Ptr<ClassifierBase> ClassifierFactory::construct(Ptr<ExpressionGraph> graph) {
ABORT("Unknown classifier type");
}
Ptr<PoolerBase> PoolerFactory::construct(Ptr<ExpressionGraph> graph) {
if(options_->get<std::string>("type") == "max-pooler")
return New<MaxPooler>(graph, options_);
if(options_->get<std::string>("type") == "slice-pooler")
return New<SlicePooler>(graph, options_);
else if(options_->get<std::string>("type") == "sim-pooler")
return New<SimPooler>(graph, options_);
else
ABORT("Unknown pooler type");
}
Ptr<IModel> EncoderDecoderFactory::construct(Ptr<ExpressionGraph> graph) {
Ptr<EncoderDecoder> encdec;
if(options_->get<std::string>("type") == "amun")
@ -97,9 +112,54 @@ Ptr<IModel> EncoderClassifierFactory::construct(Ptr<ExpressionGraph> graph) {
return enccls;
}
Ptr<IModel> EncoderPoolerFactory::construct(Ptr<ExpressionGraph> graph) {
Ptr<EncoderPooler> encpool = New<EncoderPooler>(options_);
for(auto& ef : encoders_)
encpool->push_back(ef(options_).construct(graph));
for(auto& pl : poolers_)
encpool->push_back(pl(options_).construct(graph));
return encpool;
}
Ptr<IModel> createBaseModelByType(std::string type, usage use, Ptr<Options> options) {
Ptr<ExpressionGraph> graph = nullptr; // graph unknown at this stage
// clang-format off
if(use == usage::embedding) { // hijacking an EncoderDecoder model for embedding only
int dimVocab = options->get<std::vector<int>>("dim-vocabs")[0];
Ptr<Options> newOptions;
if(options->get<bool>("compute-similarity")) {
newOptions = options->with("usage", use,
"original-type", type,
"input-types", std::vector<std::string>({"sequence", "sequence"}),
"dim-vocabs", std::vector<int>(2, dimVocab));
} else {
newOptions = options->with("usage", use,
"original-type", type,
"input-types", std::vector<std::string>({"sequence"}),
"dim-vocabs", std::vector<int>(1, dimVocab));
}
auto res = New<EncoderPooler>(newOptions);
if(options->get<bool>("compute-similarity")) {
res->push_back(models::encoder(newOptions->with("index", 0)).construct(graph));
res->push_back(models::encoder(newOptions->with("index", 1)).construct(graph));
res->push_back(New<SimPooler>(graph, newOptions->with("type", "sim-pooler")));
} else {
res->push_back(models::encoder(newOptions->with("index", 0)).construct(graph));
if(type == "laser")
res->push_back(New<MaxPooler>(graph, newOptions->with("type", "max-pooler")));
else
res->push_back(New<SlicePooler>(graph, newOptions->with("type", "slice-pooler")));
}
return res;
}
if(type == "s2s" || type == "amun" || type == "nematus") {
return models::encoder_decoder(options->with(
"usage", use,
@ -313,7 +373,7 @@ Ptr<IModel> createModelFromOptions(Ptr<Options> options, usage use) {
else
ABORT("'usage' parameter 'translation' cannot be applied to model type: {}", type);
}
else if (use == usage::raw)
else if (use == usage::raw || use == usage::embedding)
return baseModel;
else
ABORT("'Usage' parameter must be 'translation' or 'raw'");

View File

@ -5,6 +5,7 @@
#include "layers/factory.h"
#include "models/encoder_decoder.h"
#include "models/encoder_classifier.h"
#include "models/encoder_pooler.h"
namespace marian {
namespace models {
@ -33,6 +34,14 @@ public:
typedef Accumulator<ClassifierFactory> classifier;
class PoolerFactory : public Factory {
using Factory::Factory;
public:
virtual Ptr<PoolerBase> construct(Ptr<ExpressionGraph> graph);
};
typedef Accumulator<PoolerFactory> pooler;
class EncoderDecoderFactory : public Factory {
using Factory::Factory;
private:
@ -77,6 +86,28 @@ public:
typedef Accumulator<EncoderClassifierFactory> encoder_classifier;
class EncoderPoolerFactory : public Factory {
using Factory::Factory;
private:
std::vector<encoder> encoders_;
std::vector<pooler> poolers_;
public:
Accumulator<EncoderPoolerFactory> push_back(encoder enc) {
encoders_.push_back(enc);
return Accumulator<EncoderPoolerFactory>(*this);
}
Accumulator<EncoderPoolerFactory> push_back(pooler cls) {
poolers_.push_back(cls);
return Accumulator<EncoderPoolerFactory>(*this);
}
virtual Ptr<IModel> construct(Ptr<ExpressionGraph> graph);
};
typedef Accumulator<EncoderPoolerFactory> encoder_pooler;
Ptr<IModel> createBaseModelByType(std::string type, usage, Ptr<Options> options);
Ptr<IModel> createModelFromOptions(Ptr<Options> options, usage);

139
src/models/pooler.h Normal file
View File

@ -0,0 +1,139 @@
#pragma once
#include "marian.h"
#include "models/states.h"
#include "layers/constructors.h"
#include "layers/factory.h"
namespace marian {
/**
* Simple base class for Poolers to be used in EncoderPooler framework
* A pooler takes a encoder state (contextual word embeddings) and produces
* a single sentence embedding.
*/
class PoolerBase : public LayerBase {
using LayerBase::LayerBase;
protected:
const std::string prefix_{"pooler"};
const bool inference_{false};
const size_t batchIndex_{0};
public:
PoolerBase(Ptr<ExpressionGraph> graph, Ptr<Options> options)
: LayerBase(graph, options),
prefix_(options->get<std::string>("prefix", "pooler")),
inference_(options->get<bool>("inference", true)),
batchIndex_(options->get<size_t>("index", 1)) {} // assume that training input has batch index 0 and labels has 1
virtual ~PoolerBase() {}
virtual Expr apply(Ptr<ExpressionGraph>, Ptr<data::CorpusBatch>, const std::vector<Ptr<EncoderState>>&) = 0;
template <typename T>
T opt(const std::string& key) const {
return options_->get<T>(key);
}
// Should be used to clear any batch-wise temporary objects if present
virtual void clear() = 0;
};
/**
* Pool encoder state (contextual word embeddings) via max-pooling along sentence-length dimension.
*/
class MaxPooler : public PoolerBase {
public:
MaxPooler(Ptr<ExpressionGraph> graph, Ptr<Options> options)
: PoolerBase(graph, options) {}
Expr apply(Ptr<ExpressionGraph> graph, Ptr<data::CorpusBatch> batch, const std::vector<Ptr<EncoderState>>& encoderStates) override {
ABORT_IF(encoderStates.size() != 1, "Pooler expects exactly one encoder state");
auto context = encoderStates[0]->getContext();
auto batchMask = encoderStates[0]->getMask();
// do a max pool here
Expr logMask = (1.f - batchMask) * -9999.f;
Expr maxPool = max(context * batchMask + logMask, /*axis=*/-3);
return maxPool;
}
void clear() override {}
};
/**
* Pool encoder state (contextual word embeddings) by selecting 1st embedding along sentence-length dimension.
*/
class SlicePooler : public PoolerBase {
public:
SlicePooler(Ptr<ExpressionGraph> graph, Ptr<Options> options)
: PoolerBase(graph, options) {}
Expr apply(Ptr<ExpressionGraph> graph, Ptr<data::CorpusBatch> batch, const std::vector<Ptr<EncoderState>>& encoderStates) override {
ABORT_IF(encoderStates.size() != 1, "Pooler expects exactly one encoder state");
auto context = encoderStates[0]->getContext();
auto batchMask = encoderStates[0]->getMask();
// Corresponds to the way we do this in transformer.h
// @TODO: unify this better, this is currently hacky
Expr slicePool = slice(context * batchMask, /*axis=*/-3, 0);
return slicePool;
}
void clear() override {}
};
/**
* Not really a pooler but abusing the interface to compute a similarity of two pooled states
*/
class SimPooler : public PoolerBase {
public:
SimPooler(Ptr<ExpressionGraph> graph, Ptr<Options> options)
: PoolerBase(graph, options) {}
Expr apply(Ptr<ExpressionGraph> graph, Ptr<data::CorpusBatch> batch, const std::vector<Ptr<EncoderState>>& encoderStates) override {
ABORT_IF(encoderStates.size() != 2, "SimPooler expects exactly two encoder states");
std::vector<Expr> vecs;
for(auto encoderState : encoderStates) {
auto context = encoderState->getContext();
auto batchMask = encoderState->getMask();
Expr pool;
auto type = options_->get<std::string>("original-type");
if(type == "laser") {
// LASER models do a max pool here
Expr logMask = (1.f - batchMask) * -9999.f;
pool = max(context * batchMask + logMask, /*axis=*/-3);
} else if(type == "transformer") {
// Our own implementation in transformer.h uses a slice of the first element
pool = slice(context, -3, 0);
} else {
// @TODO: make SimPooler take Pooler objects as arguments then it won't need to know this.
ABORT("Don't know what type of pooler to use for model type {}", type);
}
vecs.push_back(pool);
}
auto scalars = scalar_product(vecs[0], vecs[1], /*axis*/-1);
auto length1 = sqrt(sum(square(vecs[0]), /*axis=*/-1));
auto length2 = sqrt(sum(square(vecs[1]), /*axis=*/-1));
auto cosine = scalars / ( length1 * length2 );
return cosine;
}
void clear() override {}
};
}

View File

@ -328,6 +328,29 @@ public:
return output;
}
// Reduce the encoder to a single sentence vector, here we just take the contextual embedding of the first word per sentence
// Replaces cross-attention in LASER-like models
Expr LayerPooling(std::string prefix,
Expr input, // [-4: beam depth, -3: batch size, -2: max length, -1: vector dim]
const Expr& values) { // [-4: beam depth=1, -3: batch size, -2: max length (src or trg), -1: vector dim]
int dimModel = input->shape()[-1];
auto output = slice(values, -2, 0); // Select first word [-4: beam depth, -3: batch size, -2: 1, -1: vector dim]
int dimPool = output->shape()[-1];
bool project = !opt<bool>("transformer-no-projection");
if(project || dimPool != dimModel) {
auto Wo = graph_->param(prefix + "_Wo", {dimPool, dimModel}, inits::glorotUniform());
auto bo = graph_->param(prefix + "_bo", {1, dimModel}, inits::zeros());
output = affine(output, Wo, bo); // [-4: beam depth, -3: batch size, -2: 1, -1: vector dim]
}
auto opsPost = opt<std::string>("transformer-postprocess");
output = postProcess(prefix + "_Wo", opsPost, output, input, 0.f);
return output;
}
Expr LayerAttention(std::string prefix,
Expr input, // [-4: beam depth, -3: batch size, -2: max length, -1: vector dim]
const Expr& keys, // [-4: beam depth=1, -3: batch size, -2: max length, -1: vector dim]
@ -790,14 +813,20 @@ public:
saveAttentionWeights = i == attLayer;
}
query = LayerAttention(prefix,
if(options_->get<bool>("transformer-pool", false)) {
query = LayerPooling(prefix,
query,
encoderContexts[j], // keys
encoderContexts[j], // values
encoderMasks[j],
opt<int>("transformer-heads"),
/*cache=*/true,
saveAttentionWeights);
encoderContexts[j]); // values
} else {
query = LayerAttention(prefix,
query,
encoderContexts[j], // keys
encoderContexts[j], // values
encoderMasks[j],
opt<int>("transformer-heads"),
/*cache=*/true,
saveAttentionWeights);
}
}
}

View File

@ -622,7 +622,7 @@ namespace marian {
model.set_ir_version(IR_VERSION);
model.set_producer_name(producerName);
model.mutable_graph()->CopyFrom(graph);
#define OPSET_IMPORT_VERSION 9 // 9 is needed for some newer ops
#define OPSET_IMPORT_VERSION 11
model.add_opset_import()->set_version(OPSET_IMPORT_VERSION);
return model;
}
@ -833,7 +833,7 @@ namespace marian {
LOG(info, s);
}
// axis attribute
size_t axis;
size_t axis{};
std::vector<size_t> axes;
if (E::tryGetAxisAttribute<ConcatenateNodeOp>(expr, axis)// ||
//E::tryGetAxisAttribute<SelectNodeOp>(expr, axis)

View File

@ -651,7 +651,7 @@ public:
using LSTM = FastLSTM;
/******************************************************************************/
// Experimentak cells, use with care
// Experimental cells, use with care
template <class CellType>
class Multiplicative : public CellType {

View File

@ -6,7 +6,6 @@
#endif
#endif
#if BLAS_FOUND
inline void sgemm(bool transA,
bool transB,
int rows_a,
@ -20,6 +19,7 @@ inline void sgemm(bool transA,
float beta,
float* c,
int ldc) {
#if BLAS_FOUND
cblas_sgemm(CblasRowMajor,
transA ? CblasTrans : CblasNoTrans,
transB ? CblasTrans : CblasNoTrans,
@ -34,5 +34,7 @@ inline void sgemm(bool transA,
beta,
c,
ldc);
#else
ABORT("Marian must be compiled with a BLAS library");
#endif
}
#endif

View File

@ -1282,67 +1282,104 @@ void SetSparse(float* out,
}
}
void LSTMCellForward(Tensor out_, std::vector<Tensor> inputs) {
// should be implemented via slicing and elementwise
template <typename FType>
void LSTMCellForwardTyped(Tensor out_, const std::vector<Tensor>& inputs) {
int rows = out_->shape().elements() / out_->shape()[-1];
int cols = out_->shape()[-1];
float* out = out_->data();
const float* cell = inputs[0]->data();
const float* xW = inputs[1]->data();
const float* sU = inputs[2]->data();
const float* b = inputs[3]->data();
int fVecSize = sizeof(FType) / sizeof(float);
int cols = out_->shape()[-1] / fVecSize;
FType* out = out_->data<FType>();
const FType* cell = inputs[0]->data<FType>();
const FType* xW = inputs[1]->data<FType>();
const FType* sU = inputs[2]->data<FType>();
const FType* b = inputs[3]->data<FType>();
const float* mask = inputs.size() > 4 ? inputs[4]->data() : nullptr;
using fop = functional::Ops<FType>;
for(int j = 0; j < rows; ++j) {
float m = !mask || mask[j];
float* rowOut = out + j * cols;
const float* rowCell = cell + j * cols;
FType* rowOut = out + j * cols;
const FType* rowCell = cell + j * cols;
const float* xWrow = xW + j * cols * 4;
const float* sUrow = sU + j * cols * 4;
const FType* xWrow = xW + j * cols * 4;
const FType* sUrow = sU + j * cols * 4;
for(int i = 0; i < cols; ++i) {
float gf = functional::Ops<float>::sigmoid(xWrow[i] + sUrow[i] + b[i]);
FType gf = fop::sigmoid(fop::add(fop::add(xWrow[i], sUrow[i]), b[i]));
int k = i + cols;
float gi = functional::Ops<float>::sigmoid(xWrow[k] + sUrow[k] + b[k]);
FType gi = fop::sigmoid(fop::add(fop::add(xWrow[k], sUrow[k]), b[k]));
int l = i + 2 * cols;
float gc = std::tanh(xWrow[l] + sUrow[l] + b[l]);
FType gc = fop::tanh(fop::add(fop::add(xWrow[l], sUrow[l]), b[l]));
float cout = gf * rowCell[i] + gi * gc;
rowOut[i] = m * cout + (1 - m) * rowCell[i];
FType cout = fop::add(fop::mul(gf, rowCell[i]), fop::mul(gi, gc));
rowOut[i] = fop::add(fop::mul(m, cout), fop::mul(fop::sub(1.f, m), rowCell[i]));
}
}
}
void LSTMOutputForward(Tensor out_, std::vector<Tensor> inputs) {
int rows = out_->shape().elements() / out_->shape()[-1];
int cols = out_->shape()[-1];
void LSTMCellForward(Tensor out, std::vector<Tensor> inputs) {
int cols = out->shape()[-1];
#ifdef __AVX__
if(cols % 8 == 0)
LSTMCellForwardTyped<float32x8>(out, inputs);
else
#endif
if(cols % 4 == 0)
LSTMCellForwardTyped<float32x4>(out, inputs);
else
LSTMCellForwardTyped<float>(out, inputs);
}
float* out = out_->data();
const float* cell = inputs[0]->data();
const float* xW = inputs[1]->data();
const float* sU = inputs[2]->data();
const float* b = inputs[3]->data();
template <typename FType>
void LSTMOutputForwardTyped(Tensor out_, const std::vector<Tensor>& inputs) {
int rows = out_->shape().elements() / out_->shape()[-1];
int fVecSize = sizeof(FType) / sizeof(float);
int cols = out_->shape()[-1] / fVecSize;
FType* out = out_->data<FType>();
const FType* cell = inputs[0]->data<FType>();
const FType* xW = inputs[1]->data<FType>();
const FType* sU = inputs[2]->data<FType>();
const FType* b = inputs[3]->data<FType>();
using fop = functional::Ops<FType>;
for(int j = 0; j < rows; ++j) {
float* rowOut = out + j * cols;
const float* rowCell = cell + j * cols;
FType* rowOut = out + j * cols;
const FType* rowCell = cell + j * cols;
const float* xWrow = xW + j * cols * 4;
const float* sUrow = sU + j * cols * 4;
const FType* xWrow = xW + j * cols * 4;
const FType* sUrow = sU + j * cols * 4;
for(int i = 0; i < cols; ++i) {
int k = i + 3 * cols;
float go = functional::Ops<float>::sigmoid(xWrow[k] + sUrow[k] + b[k]);
rowOut[i] = go * std::tanh(rowCell[i]);
FType go = fop::sigmoid(fop::add(fop::add(xWrow[k], sUrow[k]), b[k]));
rowOut[i] = fop::mul(go, fop::tanh(rowCell[i]));
}
}
}
void LSTMOutputForward(Tensor out, std::vector<Tensor> inputs) {
int cols = out->shape()[-1];
#ifdef __AVX__
if(cols % 8 == 0)
LSTMOutputForwardTyped<float32x8>(out, inputs);
else
#endif
if(cols % 4 == 0)
LSTMOutputForwardTyped<float32x4>(out, inputs);
else
LSTMOutputForwardTyped<float>(out, inputs);
}
void LSTMCellBackward(std::vector<Tensor> outputs,
std::vector<Tensor> inputs,
Tensor adj_) {

View File

@ -12,6 +12,63 @@
#include "tensors/gpu/cuda_helpers.h"
// clang-format on
// recreations of a few cusparse functions that were deprecated in CUDA 11
// @TODO: Fill these in. This is not trivial. Until then, using these with CUDA 11 will fail.
#if CUDA_VERSION >= 11000
cusparseStatus_t
cusparseSgemmi10(cusparseHandle_t handle,
int m,
int n,
int k,
int nnz,
const float* alpha,
const float* A,
int lda,
const float* cscValB,
const int* cscColPtrB,
const int* cscRowIndB,
const float* beta,
float* C,
int ldc) {
ABORT("Sparse matrix operations are currently not supported by Marian under CUDA 11");
}
#define cusparseSgemmi cusparseSgemmi10
cusparseStatus_t
cusparseScsr2csc(cusparseHandle_t handle,
int m,
int n,
int nnz,
const float* csrVal,
const int* csrRowPtr,
const int* csrColInd,
float* cscVal,
int* cscRowInd,
int* cscColPtr,
cusparseAction_t copyValues,
cusparseIndexBase_t idxBase) {
ABORT("Sparse matrix operations are currently not supported by Marian under CUDA 11");
}
cusparseStatus_t
cusparseScsrmm(cusparseHandle_t handle,
cusparseOperation_t transA,
int m,
int n,
int k,
int nnz,
const float* alpha,
const cusparseMatDescr_t descrA,
const float* csrValA,
const int* csrRowPtrA,
const int* csrColIndA,
const float* B,
int ldb,
const float* beta,
float* C,
int ldc) {
ABORT("Sparse matrix operations are currently not supported by Marian under CUDA 11");
}
#endif
namespace marian {
namespace gpu {

View File

@ -6,6 +6,7 @@ set(UNIT_TESTS
attention_tests
fastopt_tests
utils_tests
# cosmos_tests # optional, uncomment to test with specific files.
)
foreach(test ${UNIT_TESTS})

View File

@ -0,0 +1,141 @@
#include "catch.hpp"
#include "microsoft/cosmos.h"
#include "common/definitions.h"
#include "common/filesystem.h"
using namespace marian;
TEST_CASE("microsoft::cosmos::cosine_scorer", "[cosmos]") {
using namespace cosmos;
auto logger = spdlog::get("general");
if(!logger) {
std::vector<std::string> generalLogs;
logger = createStderrLogger("general", "[%Y-%m-%d %T] %v", generalLogs, /*quiet=*/true);
}
setThrowExceptionOnAbort(true);
auto floatApprox = [](float x, float y) -> bool {
return x == Approx(y).margin(0.001f);
};
auto createScorer = [&]() {
std::string path = "/home/marcinjd/data2/cosmos/embedder/";
std::string modelPath = path + "2020-07-24.laser.model.npz";
std::string vocabPath = path + "2020-07-24.laser.vocab.spm";
CHECK( filesystem::exists(modelPath) );
CHECK( filesystem::exists(vocabPath) );
auto scorer = New<MarianCosineScorer>();
CHECK( scorer->load(modelPath, vocabPath) );
return scorer;
};
auto scorer = createScorer();
SECTION("Compare two identical sentences") {
std::string input1 = "<CLS> This is a test.";
std::string input2 = "<CLS> This is a test.";
auto similarities = scorer->score(input1, input2);
CHECK( similarities.size() == 1 );
CHECK( floatApprox(similarities[0], 1.f) );
}
SECTION("Compare two different sentences") {
std::string input1 = "<CLS> This is a test.";
std::string input2 = "<CLS> This is another test.";
auto similarities = scorer->score(input1, input2);
CHECK( similarities.size() == 1 );
CHECK( floatApprox(similarities[0], 0.94101) );
}
SECTION("Compare small batches of sentences") {
std::string input1 = "<CLS> This is a test.\n<CLS> This is a test.";
std::string input2 = "<CLS> This is a test.\n<CLS> This is another test.";
auto similarities = scorer->score(input1, input2);
CHECK( similarities.size() == 2 );
CHECK( floatApprox(similarities[0], 1.f) );
CHECK( floatApprox(similarities[1], 0.94101) );
}
SECTION("Throw exception when there is a mismatch in number of sentences (first is shorter)") {
std::string input1 = "<CLS> This is a test.\n";
std::string input2 = "<CLS> This is a test.\n<CLS> This is another test.";
try {
marian::setThrowExceptionOnAbort(true);
auto similarities = scorer->score(input1, input2);
CHECK( false ); // we shoudn't reach this check, hence a failed test if we do.
} catch(MarianRuntimeException& e) {
CHECK( e.what() == std::string("Previous tuple elements are missing.") );
}
}
SECTION("Throw exception when there is a mismatch in number of sentences (second is shorter)") {
std::string input1 = "<CLS> This is a test.\n<CLS> This is a test.";
std::string input2 = "<CLS> This is a test.\n";
try {
marian::setThrowExceptionOnAbort(true);
auto similarities = scorer->score(input1, input2);
CHECK( false ); // we shoudn't reach this check, hence a failed test if we do.
} catch(MarianRuntimeException& e) {
CHECK( e.what() == std::string("There are missing entries in the text tuples.") );
}
}
}
TEST_CASE("microsoft::cosmos::embedder", "[cosmos]") {
using namespace cosmos;
auto floatApprox = [](float x, float y) -> bool {
return x == Approx(y).margin(0.001f);
};
auto createEmbedder = [&]() {
std::string path = "/home/marcinjd/data2/cosmos/embedder/";
std::string modelPath = path + "2020-07-24.laser.model.npz";
std::string vocabPath = path + "2020-07-24.laser.vocab.spm";
CHECK( filesystem::exists(modelPath) );
CHECK( filesystem::exists(vocabPath) );
auto embedder = New<MarianEmbedder>();
CHECK( embedder->load(modelPath, vocabPath) );
return embedder;
};
auto embedder = createEmbedder();
SECTION("Embed a single sentence") {
std::string input = "<CLS> This is a test.";
auto embeddings = embedder->embed(input);
CHECK( embeddings.size() == 1 );
CHECK( embeddings[0].size() == 512 );
CHECK( floatApprox(embeddings[0][0], -0.04813f) );
}
SECTION("Embed two sentences") {
std::string input = "<CLS> This is a test.\n<CLS> This is another test.";
auto embeddings = embedder->embed(input);
CHECK( embeddings.size() == 2 );
CHECK( embeddings[0].size() == 512 );
CHECK( embeddings[1].size() == 512 );
CHECK( floatApprox(embeddings[0][0], -0.04813f) );
CHECK( floatApprox(embeddings[1][0], -0.04775f) );
}
}

View File

@ -17,10 +17,7 @@ Ptr<data::BatchStats> GraphGroup::collectStats(Ptr<ExpressionGraph> graph,
const std::vector<Ptr<Vocab>>& vocabs,
double multiplier) {
auto stats = New<data::BatchStats>();
size_t numFiles = options_->get<bool>("tsv", false)
? options_->get<size_t>("tsv-fields")
: options_->get<std::vector<std::string>>("train-sets").size();
size_t numFiles = numberOfInputFiles();
// Initialize first batch to step size
size_t first = options_->get<size_t>("mini-batch-fit-step");
@ -77,7 +74,7 @@ Ptr<data::BatchStats> GraphGroup::collectStats(Ptr<ExpressionGraph> graph,
} else {
end = current - 1;
}
} while(end - start > step);
} while(end - start > step); // @TODO: better replace with `end >= start` to remove the step here
maxBatch = start;
}
@ -88,4 +85,16 @@ void GraphGroup::setTypicalTrgBatchWords(size_t typicalTrgBatchWords) { // neede
typicalTrgBatchWords_ = typicalTrgBatchWords;
}
size_t GraphGroup::numberOfInputFiles() {
if(options_->get<bool>("tsv", false)) {
size_t n = options_->get<size_t>("tsv-fields");
if(n > 0 && options_->get("guided-alignment", std::string("none")) != "none")
--n;
if(n > 0 && options_->hasAndNotEmpty("data-weighting"))
--n;
return n;
}
return options_->get<std::vector<std::string>>("train-sets").size();
}
} // namespace marian

View File

@ -23,7 +23,11 @@ protected:
Ptr<Scheduler> scheduler_; // scheduler that keeps track of how much has been processed
bool finalized_{false}; // 'true' if training has completed (further updates are no longer allowed)
size_t typicalTrgBatchWords_{ 0 }; // for dynamic batch sizing: typical batch size in words
size_t typicalTrgBatchWords_{0}; // for dynamic batch sizing: typical batch size in words
// determines the number of input streams (i.e. input files or fields in the TSV input) that need
// to be included in the batch, i.e. without alignments and weights
size_t numberOfInputFiles();
public:
GraphGroup(Ptr<Options> options);

515
src/translator/beam_search.cpp Executable file
View File

@ -0,0 +1,515 @@
#include "translator/beam_search.h"
#include "data/factored_vocab.h"
#include "translator/helpers.h"
#include "translator/nth_element.h"
#include "data/shortlist.h"
namespace marian {
// combine new expandedPathScores and previous beams into new set of beams
Beams BeamSearch::toHyps(const std::vector<unsigned int>& nBestKeys, // [currentDimBatch, beamSize] flattened -> ((batchIdx, beamHypIdx) flattened, word idx) flattened
const std::vector<float>& nBestPathScores, // [currentDimBatch, beamSize] flattened
const size_t nBestBeamSize, // for interpretation of nBestKeys
const size_t vocabSize, // ditto.
const Beams& beams,
const std::vector<Ptr<ScorerState /*const*/>>& states,
Ptr<data::CorpusBatch /*const*/> batch, // for alignments only
Ptr<FactoredVocab/*const*/> factoredVocab, size_t factorGroup,
const std::vector<bool>& dropBatchEntries, // [origDimBatch] - empty source batch entries are marked with true, should be cleared after first use.
const std::vector<IndexType>& batchIdxMap) const { // [origBatchIdx -> currentBatchIdx]
std::vector<float> align; // collects alignment information from the last executed time step
if(options_->hasAndNotEmpty("alignment") && factorGroup == 0)
align = scorers_[0]->getAlignment(); // [beam depth * max src length * current batch size] -> P(s|t); use alignments from the first scorer, even if ensemble,
const auto origDimBatch = beams.size(); // see function search for definition of origDimBatch and currentDimBatch etc.
Beams newBeams(origDimBatch); // return value of this function goes here. There are always origDimBatch beams.
// create a reverse batchMap to obtain original batchIdx in the starting batch size
// and calculate the current batch size based on non-empty beams
std::vector<IndexType> reverseBatchIdxMap; // empty if not purging batch entries
size_t currentDimBatch = beams.size();
if(PURGE_BATCH) {
reverseBatchIdxMap.resize(batchIdxMap.size()); // adjust size if doing batch purging.
currentDimBatch = 0;
for(int i = 0; i < batchIdxMap.size(); ++i) {
reverseBatchIdxMap[batchIdxMap[i]] = i; // reverse batch index mapping, multiple occurences get overwritten with the last one,
// which is expected due to down-shifting
if(!beams[i].empty())
currentDimBatch++;
}
}
for(size_t i = 0; i < nBestKeys.size(); ++i) { // [currentDimBatch, beamSize] flattened
// Keys encode batchIdx, beamHypIdx, and word index in the entire beam.
// They can be between 0 and (vocabSize * nBestBeamSize * batchSize)-1.
// (beamHypIdx refers to the GPU tensors, *not* the beams[] array; they are not the same in case of purging)
const auto key = nBestKeys[i];
// decompose key into individual indices (batchIdx, beamHypIdx, wordIdx)
const auto beamHypIdx = (key / vocabSize) % nBestBeamSize;
const auto currentBatchIdx = (key / vocabSize) / nBestBeamSize;
const auto origBatchIdx = reverseBatchIdxMap.empty() ? currentBatchIdx : reverseBatchIdxMap[currentBatchIdx]; // map currentBatchIdx back into original position within starting maximal batch size, required to find correct beam
bool dropHyp = !dropBatchEntries.empty() && dropBatchEntries[origBatchIdx] && factorGroup == 0;
WordIndex wordIdx;
if(dropHyp) { // if we force=drop the hypothesis, assign EOS, otherwise the expected word id.
if(factoredVocab) { // when using factoredVocab, extract the EOS lemma index from the word id, we predicting factors one by one here, hence lemma only
std::vector<size_t> eosFactors;
factoredVocab->word2factors(factoredVocab->getEosId(), eosFactors);
wordIdx = (WordIndex)eosFactors[0];
} else { // without factoredVocab lemma index and word index are the same. Safe cruising.
wordIdx = trgVocab_->getEosId().toWordIndex();
}
} else { // we are not dropping anything, just assign the normal index
wordIdx = (WordIndex)(key % vocabSize);
}
// @TODO: We currently assign a log probability of 0 to all beam entries of the dropped batch entry, instead it might be a good idea to use
// the per Hyp pathScore without the current expansion (a bit hard to obtain).
// For the case where we drop empty inputs, 0 is fine. For other use cases like a forced stop, the penultimate pathScore might be better.
// For the empty hyp this would naturally result in 0, too.
const float pathScore = dropHyp ? 0.f : nBestPathScores[i]; // 0 (Prob = 1, maximum score) if dropped or expanded path score for (batchIdx, beamHypIdx, word)
const auto& beam = beams[origBatchIdx];
auto& newBeam = newBeams[origBatchIdx]; // extended hypotheses are going to be placed in this new beam
if(newBeam.size() >= beam.size()) // getNBestList() generates N for all batch entries incl. those that already have a narrower beam
continue;
if(pathScore == INVALID_PATH_SCORE) // (dummy slot or word that cannot be expanded by current factor)
continue;
ABORT_IF(pathScore < INVALID_PATH_SCORE, "Actual pathScore ({}) is lower than INVALID_PATH_SCORE ({})??", pathScore, INVALID_PATH_SCORE); // This should not happen in valid situations. Currently the only smaller value would be -inf (effect of overflow in summation?)
ABORT_IF(beamHypIdx >= beam.size(), "Out of bounds beamHypIdx??"); // effectively this is equivalent to ABORT_IF(beams[origBatchIdx].empty(), ...)
// map wordIdx to word
auto prevBeamHypIdx = beamHypIdx; // back pointer
auto prevHyp = beam[prevBeamHypIdx];
Word word;
// If short list has been set, then wordIdx is an index into the short-listed word set,
// rather than the true word index.
auto shortlist = scorers_[0]->getShortlist();
if (factoredVocab) {
// For factored decoding, the word is built over multiple decoding steps,
// starting with the lemma, then adding factors one by one.
if (factorGroup == 0) {
word = factoredVocab->lemma2Word(shortlist ? shortlist->reverseMap(wordIdx) : wordIdx); // @BUGBUG: reverseMap is only correct if factoredVocab_->getGroupRange(0).first == 0
std::vector<size_t> factorIndices; factoredVocab->word2factors(word, factorIndices);
//LOG(info, "{} + {} ({}) -> {} -> {}",
// factoredVocab->decode(prevHyp->tracebackWords()),
// factoredVocab->word2string(word), factorIndices[0], prevHyp->getPathScore(), pathScore);
}
else {
//LOG(info, "{} |{} ({}) = {} ({}) -> {} -> {}",
// factoredVocab->decodeForDiagnostics(beam[beamHypIdx]->tracebackWords()),
// factoredVocab->getFactorGroupPrefix(factorGroup), factorGroup,
// factoredVocab->getFactorName(factorGroup, wordIdx), wordIdx,
// prevHyp->getPathScore(), pathScore);
word = beam[beamHypIdx]->getWord();
ABORT_IF(!factoredVocab->canExpandFactoredWord(word, factorGroup),
"A word without this factor snuck through to here??");
word = factoredVocab->expandFactoredWord(word, factorGroup, wordIdx);
prevBeamHypIdx = prevHyp->getPrevStateIndex();
prevHyp = prevHyp->getPrevHyp(); // short-circuit the backpointer, so that the traceback does not contain partially factored words
}
}
else if (shortlist)
word = Word::fromWordIndex(shortlist->reverseMap(wordIdx));
else
word = Word::fromWordIndex(wordIdx);
auto hyp = Hypothesis::New(prevHyp, word, prevBeamHypIdx, pathScore);
// Set score breakdown for n-best lists
if(options_->get<bool>("n-best")) {
auto breakDown = beam[beamHypIdx]->getScoreBreakdown();
ABORT_IF(factoredVocab && factorGroup > 0 && !factoredVocab->canExpandFactoredWord(word, factorGroup),
"A word without this factor snuck through to here??");
breakDown.resize(states.size(), 0); // at start, this is empty, so this will set the initial score to 0
for(size_t j = 0; j < states.size(); ++j) {
auto lval = states[j]->getLogProbs().getFactoredLogitsTensor(factorGroup); // [maxBeamSize, 1, currentDimBatch, dimFactorVocab]
// The flatting happens based on actual (current) batch size and batch index computed with batch-pruning as we are looking into the pruned tensor
size_t flattenedLogitIndex = (beamHypIdx * currentDimBatch + currentBatchIdx) * vocabSize + wordIdx; // (beam idx, batch idx, word idx); note: beam and batch are transposed, compared to 'key'
// @TODO: use a function on shape() to index, or new method val->at({i1, i2, i3, i4}) with broadcasting
ABORT_IF(lval->shape() != Shape({(int)nBestBeamSize, 1, (int)currentDimBatch, (int)vocabSize}) &&
(beamHypIdx == 0 && lval->shape() != Shape({1, 1, (int)currentDimBatch, (int)vocabSize})),
"Unexpected shape of logits?? {} != {}", lval->shape(), Shape({(int)nBestBeamSize, 1, (int)currentDimBatch, (int)vocabSize}));
breakDown[j] += lval->get(flattenedLogitIndex);
}
hyp->setScoreBreakdown(breakDown);
}
// Set alignments
if(!align.empty())
hyp->setAlignment(getAlignmentsForHypothesis(align, batch, (int)beamHypIdx, (int)currentBatchIdx, (int)origBatchIdx, (int)currentDimBatch));
else // not first factor: just copy
hyp->setAlignment(beam[beamHypIdx]->getAlignment());
newBeam.push_back(hyp);
}
// if factored vocab and this is not the first factor, we need to
// also propagate factored hypotheses that do not get expanded in this step because they don't have this factor
if (factorGroup > 0) {
for (size_t batchIdx = 0; batchIdx < beams.size(); batchIdx++) {
const auto& beam = beams[batchIdx];
auto& newBeam = newBeams[batchIdx];
for (const auto& beamHyp : beam) {
auto word = beamHyp->getWord();
//LOG(info, "Checking {}", factoredVocab->word2string(word));
if (factoredVocab->canExpandFactoredWord(word, factorGroup)) // handled above
continue;
//LOG(info, "Forwarded {}", factoredVocab->word2string(word));
newBeam.push_back(beamHyp);
}
if (newBeam.size() > beam.size()) {
//LOG(info, "Size {}, sorting...", newBeam.size());
std::nth_element(newBeam.begin(), newBeam.begin() + beam.size(), newBeam.end(), [](Hypothesis::PtrType a, Hypothesis::PtrType b) {
return a->getPathScore() > b->getPathScore(); // (sort highest score first)
});
//LOG(info, "Size {}, sorted...", newBeam.size());
newBeam.resize(beam.size());
}
}
}
return newBeams;
}
std::vector<float> BeamSearch::getAlignmentsForHypothesis( // -> P(s|t) for current t and given beam and batch dim
const std::vector<float> alignAll, // [beam depth, max src length, batch size, 1], flattened vector of all attention probablities
Ptr<data::CorpusBatch> batch,
int beamHypIdx,
int currentBatchIdx,
int origBatchIdx,
int currentDimBatch) const {
// Let's B be the beam size, N be the number of batched sentences,
// and L the number of words in the longest sentence in the batch.
// The alignment vector:
//
// if(first)
// * has length of N x L if it's the first beam
// * stores elements in the following order:
// beam1 = [word1-batch1, word1-batch2, ..., word2-batch1, ...]
// else
// * has length of N x L x B
// * stores elements in the following order:
// beams = [beam1, beam2, ..., beam_n]
//
// The mask vector is always of length N x L and has 1/0s stored like
// in a single beam, i.e.:
// * [word1-batch1, word1-batch2, ..., word2-batch1, ...]
//
size_t origDimBatch = batch->size(); // number of sentences in batch
size_t batchWidth = batch->width(); // max src length
// loop over words of batch entry 'currentBatchIdx' and beam entry 'beamHypIdx'
std::vector<float> align;
for(size_t srcPos = 0; srcPos < batchWidth; ++srcPos) { // loop over source positions
// We are looking into the probabilites from an actual tensor, hence we need to use currentDimBatch and currentBatchIdx.
size_t currentAttIdx = (batchWidth * beamHypIdx + srcPos) * currentDimBatch + currentBatchIdx; // = flatten [beam index, s, batch index, 0]
// We are looking into the mask from the orginal batch, hence we need to use origDmBatch and origBatchIdx.
size_t origAttIdx = (batchWidth * beamHypIdx + srcPos) * origDimBatch + origBatchIdx;; // = flatten [beam index, s, batch index, 0]
size_t origMaskIdx = origAttIdx % (batchWidth * origDimBatch); // == batchIdx + (batchSize * srcPos) = flatten [0, s, batch index, 0]
// If the original position is not masked out used the corresponding current attention score.
if(batch->front()->mask()[origMaskIdx] != 0)
align.emplace_back(alignAll[currentAttIdx]);
}
return align;
}
// remove all beam entries that have reached EOS
Beams BeamSearch::purgeBeams(const Beams& beams, /*in/out=*/std::vector<IndexType>& batchIdxMap) {
const auto trgEosId = trgVocab_->getEosId();
Beams newBeams;
size_t beamIdx = 0; // beam index
for(auto beam : beams) {
Beam newBeam; // a beam of surviving hyps
for(auto hyp : beam)
if(hyp->getWord() != trgEosId) // if this hyp is not finished,
newBeam.push_back(hyp); // move over to beam of surviving hyps
if(PURGE_BATCH)
if(newBeam.empty() && !beam.empty()) { // previous beam had hyps, but all were finished in this step, newBeam will now stay empty
for(size_t i = beamIdx + 1; i < beams.size(); ++i) // for all entries above this beam
batchIdxMap[i] = batchIdxMap[i] - 1; // make them look at one batch index below, as the current entry will be removed from the batch.
}
newBeams.push_back(newBeam);
beamIdx++; // move to next beam index
}
return newBeams;
}
//**********************************************************************
// main decoding function
Histories BeamSearch::search(Ptr<ExpressionGraph> graph, Ptr<data::CorpusBatch> batch) {
auto factoredVocab = trgVocab_->tryAs<FactoredVocab>();
size_t numFactorGroups = factoredVocab ? factoredVocab->getNumGroups() : 1;
if (numFactorGroups == 1) // if no factors then we didn't need this object in the first place
factoredVocab.reset();
// We will use the prefix "origBatch..." whenever we refer to batch dimensions of the original batch. These do not change during search.
// We will use the prefix "currentBatch.." whenever we refer to batch dimension that can change due to batch-pruning.
const int origDimBatch = (int)batch->size();
const auto trgEosId = trgVocab_->getEosId();
const auto trgUnkId = trgVocab_->getUnkId();
auto getNBestList = createGetNBestListFn(beamSize_, origDimBatch, graph->getDeviceId());
for(auto scorer : scorers_) {
scorer->clear(graph);
}
Histories histories(origDimBatch);
for(int i = 0; i < origDimBatch; ++i) {
size_t sentId = batch->getSentenceIds()[i];
histories[i] = New<History>(sentId,
options_->get<float>("normalize"),
options_->get<float>("word-penalty"));
}
// start states
std::vector<Ptr<ScorerState>> states;
for(auto scorer : scorers_) {
states.push_back(scorer->startState(graph, batch));
}
// create one beam per batch entry with sentence-start hypothesis
Beams beams(origDimBatch, Beam(beamSize_, Hypothesis::New())); // array [origDimBatch] of array [maxBeamSize] of Hypothesis, keeps full size through search.
// batch purging is determined from an empty sub-beam.
std::vector<IndexType> batchIdxMap(origDimBatch); // Record at which batch entry a beam is looking.
// By default that corresponds to position in array,
// but shifts in the course of removing batch entries when they are finished.
const std::vector<bool> emptyBatchEntries; // used for recording if there are empty input batch entries
for(int origBatchIdx = 0; origBatchIdx < origDimBatch; ++origBatchIdx) {
batchIdxMap[origBatchIdx] = origBatchIdx; // map to same position on initialization
auto& beam = beams[origBatchIdx];
histories[origBatchIdx]->add(beam, trgEosId); // add beams with start-hypotheses to traceback grid
// Mark batch entries that consist only of source <EOS> i.e. these are empty lines. They will be forced to EOS and purged from batch
const auto& srcEosId = batch->front()->vocab()->getEosId();
const_cast<std::vector<bool>&>(emptyBatchEntries).push_back(batch->front()->data()[origBatchIdx] == srcEosId); // const_cast during construction
}
// determine index of UNK in the log prob vectors if we want to suppress it in the decoding process
int unkColId = -1;
if (trgUnkId != Word::NONE && !options_->get<bool>("allow-unk", false)) { // do we need to suppress unk?
unkColId = factoredVocab ? factoredVocab->getUnkIndex() : trgUnkId.toWordIndex(); // what's the raw index of unk in the log prob vector?
auto shortlist = scorers_[0]->getShortlist(); // first shortlist is generally ok, @TODO: make sure they are the same across scorers?
if (shortlist)
unkColId = shortlist->tryForwardMap(unkColId); // use shifted postion of unk in case of using a shortlist, shortlist may have removed unk which results in -1
}
// the decoding process updates the following state information in each output time step:
// - beams: array [origDimBatch] of array [maxBeamSize] of Hypothesis
// - current output time step's set of active hypotheses, aka active search space
// - states[.]: ScorerState
// - NN state; one per scorer, e.g. 2 for ensemble of 2
// and it forms the following return value
// - histories: array [origDimBatch] of History
// with History: vector [t] of array [maxBeamSize] of Hypothesis
// with Hypothesis: (last word, aggregate score, prev Hypothesis)
IndexType currentDimBatch = origDimBatch;
auto prevBatchIdxMap = batchIdxMap; // [origBatchIdx -> currentBatchIdx] but shifted by one time step
// main loop over output time steps
for (size_t t = 0; ; t++) {
ABORT_IF(origDimBatch != beams.size(), "Lost a batch entry??");
// determine beam size for next output time step, as max over still-active sentences
// E.g. if all batch entries are down from beam 5 to no more than 4 surviving hyps, then
// switch to beam of 4 for all. If all are done, then beam ends up being 0, and we are done.
size_t maxBeamSize = 0; // @TODO: is there some std::algorithm for this?
for(auto& beam : beams)
if(beam.size() > maxBeamSize)
maxBeamSize = beam.size();
// done if all batch entries have reached EOS on all beam entries
if (maxBeamSize == 0)
break;
for (size_t factorGroup = 0; factorGroup < numFactorGroups; factorGroup++) {
// for factored vocabs, we do one factor at a time, but without updating the scorer for secondary factors
//**********************************************************************
// create constant containing previous path scores for current beam
// Also create mapping of hyp indices, for reordering the decoder-state tensors.
std::vector<IndexType> batchIndices; // [1, 1, currentDimBatch, 1] indices of currently used batch indices with regard to current, actual tensors
std::vector<IndexType> hypIndices; // [maxBeamSize, 1, currentDimBatch, 1] (flattened) tensor index ((beamHypIdx, batchIdx), flattened) of prev hyp that a hyp originated from
std::vector<Word> prevWords; // [maxBeamSize, 1, currentDimBatch, 1] (flattened) word that a hyp ended in, for advancing the decoder-model's history
Expr prevPathScores; // [maxBeamSize, 1, currentDimBatch, 1], path score that a hyp ended in (last axis will broadcast into vocab size when adding expandedPathScores)
bool anyCanExpand = false; // stays false if all hyps are invalid factor expansions
if(t == 0 && factorGroup == 0) { // no scores yet
prevPathScores = graph->constant({1, 1, 1, 1}, inits::fromValue(0));
anyCanExpand = true;
// at the beginning all batch entries are used
batchIndices.resize(origDimBatch);
std::iota(batchIndices.begin(), batchIndices.end(), 0);
} else {
if(factorGroup == 0) // only factorGroup==0 can subselect neural state
for(int currentBatchIdx = 0; currentBatchIdx < beams.size(); ++currentBatchIdx) // loop over batch entries (active sentences)
if(!beams[currentBatchIdx].empty() || !PURGE_BATCH) // for each beam check
batchIndices.push_back(prevBatchIdxMap[currentBatchIdx]); // which batch entries were active in previous step
std::vector<float> prevScores;
for(size_t beamHypIdx = 0; beamHypIdx < maxBeamSize; ++beamHypIdx) { // loop over globally maximal beam-size (maxBeamSize)
for(int origBatchIdx = 0; origBatchIdx < origDimBatch; ++origBatchIdx) { // loop over all batch entries (active and inactive)
auto& beam = beams[origBatchIdx];
if(beamHypIdx < beam.size()) {
auto hyp = beam[beamHypIdx];
auto word = hyp->getWord();
auto canExpand = (!factoredVocab || factoredVocab->canExpandFactoredWord(hyp->getWord(), factorGroup));
//LOG(info, "[{}, {}] Can expand {} with {} -> {}", batchIdx, beamHypIdx, (*batch->back()->vocab())[hyp->getWord()], factorGroup, canExpand);
anyCanExpand |= canExpand;
auto currentBatchIdx = origBatchIdx;
if(PURGE_BATCH) {
if(factorGroup == 0)
currentBatchIdx = prevBatchIdxMap[origBatchIdx]; // subselection may happen for factorGroup == 0
else
currentBatchIdx = batchIdxMap[origBatchIdx]; // no subselection happens for factorGroup > 0,
// but we treat it like a next step, since a step
// happened for factorGroup == 0
}
auto hypIndex = (IndexType)(hyp->getPrevStateIndex() * currentDimBatch + currentBatchIdx); // (beamHypIdx, batchIdx), flattened, for index_select() operation
hypIndices.push_back(hypIndex); // (beamHypIdx, batchIdx), flattened as said above.
prevWords .push_back(word);
prevScores.push_back(canExpand ? hyp->getPathScore() : INVALID_PATH_SCORE);
} else { // pad to maxBeamSize (dummy hypothesis)
if(!PURGE_BATCH || !beam.empty()) { // but only if we are not pruning and the beam is not deactivated yet
hypIndices.push_back(0);
prevWords.push_back(trgEosId); // (unused, but must be valid)
prevScores.push_back((float)INVALID_PATH_SCORE);
}
}
}
}
if(factorGroup == 0)
currentDimBatch = (IndexType) batchIndices.size(); // keep batch size constant for all factor groups in a time step
prevPathScores = graph->constant({(int)maxBeamSize, 1, (int)currentDimBatch, 1}, inits::fromVector(prevScores));
}
if (!anyCanExpand) // all words cannot expand this factor: skip
continue;
//**********************************************************************
// compute expanded path scores with word prediction probs from all scorers
auto expandedPathScores = prevPathScores; // will become [maxBeamSize, 1, currDimBatch, dimVocab]
Expr logProbs;
for(size_t i = 0; i < scorers_.size(); ++i) {
if (factorGroup == 0) {
// compute output probabilities for current output time step
// - uses hypIndices[index in beam, 1, batch index, 1] to reorder scorer state to reflect the top-N in beams[][]
// - adds prevWords [index in beam, 1, batch index, 1] to the scorer's target history
// - performs one step of the scorer
// - returns new NN state for use in next output time step
// - returns vector of prediction probabilities over output vocab via newState
// update state in-place for next output time step
//if (t > 0) for (size_t kk = 0; kk < prevWords.size(); kk++)
// LOG(info, "prevWords[{},{}]={} -> {}", t/numFactorGroups, factorGroup,
// factoredVocab ? factoredVocab->word2string(prevWords[kk]) : (*batch->back()->vocab())[prevWords[kk]],
// prevScores[kk]);
states[i] = scorers_[i]->step(graph, states[i], hypIndices, prevWords, batchIndices, (int)maxBeamSize);
if (numFactorGroups == 1) // @TODO: this branch can go away
logProbs = states[i]->getLogProbs().getLogits(); // [maxBeamSize, 1, currentDimBatch, dimVocab]
else
{
auto shortlist = scorers_[i]->getShortlist();
logProbs = states[i]->getLogProbs().getFactoredLogits(factorGroup, shortlist); // [maxBeamSize, 1, currentDimBatch, dimVocab]
}
}
else {
// add secondary factors
// For those, we don't update the decoder-model state in any way.
// Instead, we just keep expanding with the factors.
// We will have temporary Word entries in hyps with some factors set to FACTOR_NOT_SPECIFIED.
// For some lemmas, a factor is not applicable. For those, the factor score is the same (zero)
// for all factor values. This would thus unnecessarily pollute the beam with identical copies,
// and push out other hypotheses. Hence, we exclude those here by setting the path score to
// INVALID_PATH_SCORE. Instead, toHyps() explicitly propagates those hyps by simply copying the
// previous hypothesis.
logProbs = states[i]->getLogProbs().getFactoredLogits(factorGroup, /*shortlist=*/ nullptr, hypIndices, maxBeamSize); // [maxBeamSize, 1, currentDimBatch, dimVocab]
}
// expand all hypotheses, [maxBeamSize, 1, currentDimBatch, 1] -> [maxBeamSize, 1, currentDimBatch, dimVocab]
expandedPathScores = expandedPathScores + scorers_[i]->getWeight() * logProbs;
}
// make beams continuous
expandedPathScores = swapAxes(expandedPathScores, 0, 2); // -> [currentDimBatch, 1, maxBeamSize, dimVocab]
// perform NN computation
if(t == 0 && factorGroup == 0)
graph->forward();
else
graph->forwardNext();
//**********************************************************************
// suppress specific symbols if not at right positions
if(unkColId != -1 && factorGroup == 0)
suppressWord(expandedPathScores, unkColId);
for(auto state : states)
state->blacklist(expandedPathScores, batch);
//**********************************************************************
// perform beam search
// find N best amongst the (maxBeamSize * dimVocab) hypotheses
std::vector<unsigned int> nBestKeys; // [currentDimBatch, maxBeamSize] flattened -> (batchIdx, beamHypIdx, word idx) flattened
std::vector<float> nBestPathScores; // [currentDimBatch, maxBeamSize] flattened
getNBestList(/*in*/ expandedPathScores->val(), // [currentDimBatch, 1, maxBeamSize, dimVocab or dimShortlist]
/*N=*/ maxBeamSize, // desired beam size
/*out*/ nBestPathScores,
/*out*/ nBestKeys,
/*first=*/t == 0 && factorGroup == 0); // @TODO: this is only used for checking presently, and should be removed altogether
// Now, nBestPathScores contain N-best expandedPathScores for each batch and beam,
// and nBestKeys for each their original location (batchIdx, beamHypIdx, word).
// combine N-best sets with existing search space (beams) to updated search space
beams = toHyps(nBestKeys, nBestPathScores,
/*nBestBeamSize*/expandedPathScores->shape()[-2], // used for interpretation of keys
/*vocabSize=*/expandedPathScores->shape()[-1], // used for interpretation of keys
beams,
states, // used for keeping track of per-ensemble-member path score
batch, // only used for propagating alignment info
factoredVocab, factorGroup,
emptyBatchEntries, // [origDimBatch] - empty source batch entries are marked with true
batchIdxMap); // used to create a reverse batch index map to recover original batch indices for this step
} // END FOR factorGroup = 0 .. numFactorGroups-1
prevBatchIdxMap = batchIdxMap; // save current batchIdx map to be used in next step; we are then going to look one step back
// remove all hyps that end in EOS
// The position of a hyp in the beam may change.
// in/out = shifts the batch index map if a beam gets fully purged
const auto purgedNewBeams = purgeBeams(beams, /*in/out=*/batchIdxMap);
// add updated search space (beams) to our return value
bool maxLengthReached = false;
for(int batchIdx = 0; batchIdx < origDimBatch; ++batchIdx) {
// if this batch entry has surviving hyps then add them to the traceback grid
if(!beams[batchIdx].empty()) { // if the beam is not empty expand the history object associated with the beam
if (histories[batchIdx]->size() >= options_->get<float>("max-length-factor") * batch->front()->batchWidth())
maxLengthReached = true;
histories[batchIdx]->add(beams[batchIdx], trgEosId, purgedNewBeams[batchIdx].empty() || maxLengthReached);
}
}
if (maxLengthReached) // early exit if max length limit was reached
break;
// this is the search space for the next output time step
beams = purgedNewBeams;
} // end of main loop over output time steps
return histories; // [origDimBatch][t][N best hyps]
}
} // namespace marian

508
src/translator/beam_search.h Executable file → Normal file
View File

@ -1,14 +1,8 @@
#pragma once
#include <algorithm>
#include "marian.h"
#include "translator/history.h"
#include "translator/scorers.h"
#include "data/factored_vocab.h"
#include "data/shortlist.h"
#include "translator/helpers.h"
#include "translator/nth_element.h"
namespace marian {
@ -23,13 +17,9 @@ private:
const bool PURGE_BATCH = true; // @TODO: diagnostic, to-be-removed once confirmed there are no issues.
public:
BeamSearch(Ptr<Options> options,
const std::vector<Ptr<Scorer>>& scorers,
const Ptr<const Vocab> trgVocab)
: options_(options),
scorers_(scorers),
beamSize_(options_->get<size_t>("beam-size")),
trgVocab_(trgVocab) {}
BeamSearch(Ptr<Options> options, const std::vector<Ptr<Scorer>>& scorers, const Ptr<const Vocab> trgVocab)
: options_(options), scorers_(scorers), beamSize_(options_->get<size_t>("beam-size")), trgVocab_(trgVocab)
{}
// combine new expandedPathScores and previous beams into new set of beams
Beams toHyps(const std::vector<unsigned int>& nBestKeys, // [currentDimBatch, beamSize] flattened -> ((batchIdx, beamHypIdx) flattened, word idx) flattened
@ -39,168 +29,9 @@ public:
const Beams& beams,
const std::vector<Ptr<ScorerState /*const*/>>& states,
Ptr<data::CorpusBatch /*const*/> batch, // for alignments only
Ptr<FactoredVocab/*const*/> factoredVocab, size_t factorGroup,
Ptr<class FactoredVocab/*const*/> factoredVocab, size_t factorGroup,
const std::vector<bool>& dropBatchEntries, // [origDimBatch] - empty source batch entries are marked with true, should be cleared after first use.
const std::vector<IndexType>& batchIdxMap) const { // [origBatchIdx -> currentBatchIdx]
std::vector<float> align; // collects alignment information from the last executed time step
if(options_->hasAndNotEmpty("alignment") && factorGroup == 0)
align = scorers_[0]->getAlignment(); // [beam depth * max src length * current batch size] -> P(s|t); use alignments from the first scorer, even if ensemble,
const auto origDimBatch = beams.size(); // see function search for definition of origDimBatch and currentDimBatch etc.
Beams newBeams(origDimBatch); // return value of this function goes here. There are always origDimBatch beams.
// create a reverse batchMap to obtain original batchIdx in the starting batch size
// and calculate the current batch size based on non-empty beams
std::vector<IndexType> reverseBatchIdxMap; // empty if not purging batch entries
size_t currentDimBatch = beams.size();
if(PURGE_BATCH) {
reverseBatchIdxMap.resize(batchIdxMap.size()); // adjust size if doing batch purging.
currentDimBatch = 0;
for(int i = 0; i < batchIdxMap.size(); ++i) {
reverseBatchIdxMap[batchIdxMap[i]] = i; // reverse batch index mapping, multiple occurences get overwritten with the last one,
// which is expected due to down-shifting
if(!beams[i].empty())
currentDimBatch++;
}
}
for(size_t i = 0; i < nBestKeys.size(); ++i) { // [currentDimBatch, beamSize] flattened
// Keys encode batchIdx, beamHypIdx, and word index in the entire beam.
// They can be between 0 and (vocabSize * nBestBeamSize * batchSize)-1.
// (beamHypIdx refers to the GPU tensors, *not* the beams[] array; they are not the same in case of purging)
const auto key = nBestKeys[i];
// decompose key into individual indices (batchIdx, beamHypIdx, wordIdx)
const auto beamHypIdx = (key / vocabSize) % nBestBeamSize;
const auto currentBatchIdx = (key / vocabSize) / nBestBeamSize;
const auto origBatchIdx = reverseBatchIdxMap.empty() ? currentBatchIdx : reverseBatchIdxMap[currentBatchIdx]; // map currentBatchIdx back into original position within starting maximal batch size, required to find correct beam
bool dropHyp = !dropBatchEntries.empty() && dropBatchEntries[origBatchIdx] && factorGroup == 0;
WordIndex wordIdx;
if(dropHyp) { // if we force=drop the hypothesis, assign EOS, otherwise the expected word id.
if(factoredVocab) { // when using factoredVocab, extract the EOS lemma index from the word id, we predicting factors one by one here, hence lemma only
std::vector<size_t> eosFactors;
factoredVocab->word2factors(factoredVocab->getEosId(), eosFactors);
wordIdx = (WordIndex)eosFactors[0];
} else { // without factoredVocab lemma index and word index are the same. Safe cruising.
wordIdx = trgVocab_->getEosId().toWordIndex();
}
} else { // we are not dropping anything, just assign the normal index
wordIdx = (WordIndex)(key % vocabSize);
}
// @TODO: We currently assign a log probability of 0 to all beam entries of the dropped batch entry, instead it might be a good idea to use
// the per Hyp pathScore without the current expansion (a bit hard to obtain).
// For the case where we drop empty inputs, 0 is fine. For other use cases like a forced stop, the penultimate pathScore might be better.
// For the empty hyp this would naturally result in 0, too.
const float pathScore = dropHyp ? 0.f : nBestPathScores[i]; // 0 (Prob = 1, maximum score) if dropped or expanded path score for (batchIdx, beamHypIdx, word)
const auto& beam = beams[origBatchIdx];
auto& newBeam = newBeams[origBatchIdx]; // extended hypotheses are going to be placed in this new beam
if(newBeam.size() >= beam.size()) // getNBestList() generates N for all batch entries incl. those that already have a narrower beam
continue;
if(pathScore == INVALID_PATH_SCORE) // (dummy slot or word that cannot be expanded by current factor)
continue;
ABORT_IF(pathScore < INVALID_PATH_SCORE, "Actual pathScore ({}) is lower than INVALID_PATH_SCORE ({})??", pathScore, INVALID_PATH_SCORE); // This should not happen in valid situations. Currently the only smaller value would be -inf (effect of overflow in summation?)
ABORT_IF(beamHypIdx >= beam.size(), "Out of bounds beamHypIdx??"); // effectively this is equivalent to ABORT_IF(beams[origBatchIdx].empty(), ...)
// map wordIdx to word
auto prevBeamHypIdx = beamHypIdx; // back pointer
auto prevHyp = beam[prevBeamHypIdx];
Word word;
// If short list has been set, then wordIdx is an index into the short-listed word set,
// rather than the true word index.
auto shortlist = scorers_[0]->getShortlist();
if (factoredVocab) {
// For factored decoding, the word is built over multiple decoding steps,
// starting with the lemma, then adding factors one by one.
if (factorGroup == 0) {
word = factoredVocab->lemma2Word(shortlist ? shortlist->reverseMap(wordIdx) : wordIdx); // @BUGBUG: reverseMap is only correct if factoredVocab_->getGroupRange(0).first == 0
std::vector<size_t> factorIndices; factoredVocab->word2factors(word, factorIndices);
//LOG(info, "{} + {} ({}) -> {} -> {}",
// factoredVocab->decode(prevHyp->tracebackWords()),
// factoredVocab->word2string(word), factorIndices[0], prevHyp->getPathScore(), pathScore);
}
else {
//LOG(info, "{} |{} ({}) = {} ({}) -> {} -> {}",
// factoredVocab->decodeForDiagnostics(beam[beamHypIdx]->tracebackWords()),
// factoredVocab->getFactorGroupPrefix(factorGroup), factorGroup,
// factoredVocab->getFactorName(factorGroup, wordIdx), wordIdx,
// prevHyp->getPathScore(), pathScore);
word = beam[beamHypIdx]->getWord();
ABORT_IF(!factoredVocab->canExpandFactoredWord(word, factorGroup),
"A word without this factor snuck through to here??");
word = factoredVocab->expandFactoredWord(word, factorGroup, wordIdx);
prevBeamHypIdx = prevHyp->getPrevStateIndex();
prevHyp = prevHyp->getPrevHyp(); // short-circuit the backpointer, so that the traceback does not contain partially factored words
}
}
else if (shortlist)
word = Word::fromWordIndex(shortlist->reverseMap(wordIdx));
else
word = Word::fromWordIndex(wordIdx);
auto hyp = Hypothesis::New(prevHyp, word, prevBeamHypIdx, pathScore);
// Set score breakdown for n-best lists
if(options_->get<bool>("n-best")) {
auto breakDown = beam[beamHypIdx]->getScoreBreakdown();
ABORT_IF(factoredVocab && factorGroup > 0 && !factoredVocab->canExpandFactoredWord(word, factorGroup),
"A word without this factor snuck through to here??");
breakDown.resize(states.size(), 0); // at start, this is empty, so this will set the initial score to 0
for(size_t j = 0; j < states.size(); ++j) {
auto lval = states[j]->getLogProbs().getFactoredLogitsTensor(factorGroup); // [maxBeamSize, 1, currentDimBatch, dimFactorVocab]
// The flatting happens based on actual (current) batch size and batch index computed with batch-pruning as we are looking into the pruned tensor
size_t flattenedLogitIndex = (beamHypIdx * currentDimBatch + currentBatchIdx) * vocabSize + wordIdx; // (beam idx, batch idx, word idx); note: beam and batch are transposed, compared to 'key'
// @TODO: use a function on shape() to index, or new method val->at({i1, i2, i3, i4}) with broadcasting
ABORT_IF(lval->shape() != Shape({(int)nBestBeamSize, 1, (int)currentDimBatch, (int)vocabSize}) &&
(beamHypIdx == 0 && lval->shape() != Shape({1, 1, (int)currentDimBatch, (int)vocabSize})),
"Unexpected shape of logits?? {} != {}", lval->shape(), Shape({(int)nBestBeamSize, 1, (int)currentDimBatch, (int)vocabSize}));
breakDown[j] += lval->get(flattenedLogitIndex);
}
hyp->setScoreBreakdown(breakDown);
}
// Set alignments
if(!align.empty())
hyp->setAlignment(getAlignmentsForHypothesis(align, batch, (int)beamHypIdx, (int)currentBatchIdx, (int)origBatchIdx, (int)currentDimBatch));
else // not first factor: just copy
hyp->setAlignment(beam[beamHypIdx]->getAlignment());
newBeam.push_back(hyp);
}
// if factored vocab and this is not the first factor, we need to
// also propagate factored hypotheses that do not get expanded in this step because they don't have this factor
if (factorGroup > 0) {
for (size_t batchIdx = 0; batchIdx < beams.size(); batchIdx++) {
const auto& beam = beams[batchIdx];
auto& newBeam = newBeams[batchIdx];
for (const auto& beamHyp : beam) {
auto word = beamHyp->getWord();
//LOG(info, "Checking {}", factoredVocab->word2string(word));
if (factoredVocab->canExpandFactoredWord(word, factorGroup)) // handled above
continue;
//LOG(info, "Forwarded {}", factoredVocab->word2string(word));
newBeam.push_back(beamHyp);
}
if (newBeam.size() > beam.size()) {
//LOG(info, "Size {}, sorting...", newBeam.size());
std::nth_element(newBeam.begin(), newBeam.begin() + beam.size(), newBeam.end(), [](Hypothesis::PtrType a, Hypothesis::PtrType b) {
return a->getPathScore() > b->getPathScore(); // (sort highest score first)
});
//LOG(info, "Size {}, sorted...", newBeam.size());
newBeam.resize(beam.size());
}
}
}
return newBeams;
}
const std::vector<IndexType>& batchIdxMap) const;
std::vector<float> getAlignmentsForHypothesis( // -> P(s|t) for current t and given beam and batch dim
const std::vector<float> alignAll, // [beam depth, max src length, batch size, 1], flattened vector of all attention probablities
@ -208,334 +39,13 @@ public:
int beamHypIdx,
int currentBatchIdx,
int origBatchIdx,
int currentDimBatch) const {
// Let's B be the beam size, N be the number of batched sentences,
// and L the number of words in the longest sentence in the batch.
// The alignment vector:
//
// if(first)
// * has length of N x L if it's the first beam
// * stores elements in the following order:
// beam1 = [word1-batch1, word1-batch2, ..., word2-batch1, ...]
// else
// * has length of N x L x B
// * stores elements in the following order:
// beams = [beam1, beam2, ..., beam_n]
//
// The mask vector is always of length N x L and has 1/0s stored like
// in a single beam, i.e.:
// * [word1-batch1, word1-batch2, ..., word2-batch1, ...]
//
size_t origDimBatch = batch->size(); // number of sentences in batch
size_t batchWidth = batch->width(); // max src length
// loop over words of batch entry 'currentBatchIdx' and beam entry 'beamHypIdx'
std::vector<float> align;
for(size_t srcPos = 0; srcPos < batchWidth; ++srcPos) { // loop over source positions
// We are looking into the probabilites from an actual tensor, hence we need to use currentDimBatch and currentBatchIdx.
size_t currentAttIdx = (batchWidth * beamHypIdx + srcPos) * currentDimBatch + currentBatchIdx; // = flatten [beam index, s, batch index, 0]
// We are looking into the mask from the orginal batch, hence we need to use origDmBatch and origBatchIdx.
size_t origAttIdx = (batchWidth * beamHypIdx + srcPos) * origDimBatch + origBatchIdx;; // = flatten [beam index, s, batch index, 0]
size_t origMaskIdx = origAttIdx % (batchWidth * origDimBatch); // == batchIdx + (batchSize * srcPos) = flatten [0, s, batch index, 0]
// If the original position is not masked out used the corresponding current attention score.
if(batch->front()->mask()[origMaskIdx] != 0)
align.emplace_back(alignAll[currentAttIdx]);
}
return align;
}
int currentDimBatch) const;
// remove all beam entries that have reached EOS
Beams purgeBeams(const Beams& beams, /*in/out=*/std::vector<IndexType>& batchIdxMap) {
const auto trgEosId = trgVocab_->getEosId();
Beams newBeams;
size_t beamIdx = 0; // beam index
for(auto beam : beams) {
Beam newBeam; // a beam of surviving hyps
for(auto hyp : beam)
if(hyp->getWord() != trgEosId) // if this hyp is not finished,
newBeam.push_back(hyp); // move over to beam of surviving hyps
Beams purgeBeams(const Beams& beams, /*in/out=*/std::vector<IndexType>& batchIdxMap);
if(PURGE_BATCH)
if(newBeam.empty() && !beam.empty()) { // previous beam had hyps, but all were finished in this step, newBeam will now stay empty
for(size_t i = beamIdx + 1; i < beams.size(); ++i) // for all entries above this beam
batchIdxMap[i] = batchIdxMap[i] - 1; // make them look at one batch index below, as the current entry will be removed from the batch.
}
newBeams.push_back(newBeam);
beamIdx++; // move to next beam index
}
return newBeams;
}
//**********************************************************************
// main decoding function
Histories search(Ptr<ExpressionGraph> graph, Ptr<data::CorpusBatch> batch) {
auto factoredVocab = trgVocab_->tryAs<FactoredVocab>();
#if 0 // use '1' here to disable factored decoding, e.g. for comparisons
factoredVocab.reset();
#endif
size_t numFactorGroups = factoredVocab ? factoredVocab->getNumGroups() : 1;
if (numFactorGroups == 1) // if no factors then we didn't need this object in the first place
factoredVocab.reset();
// We will use the prefix "origBatch..." whenever we refer to batch dimensions of the original batch. These do not change during search.
// We will use the prefix "currentBatch.." whenever we refer to batch dimension that can change due to batch-pruning.
const int origDimBatch = (int)batch->size();
const auto trgEosId = trgVocab_->getEosId();
const auto trgUnkId = trgVocab_->getUnkId();
auto getNBestList = createGetNBestListFn(beamSize_, origDimBatch, graph->getDeviceId());
for(auto scorer : scorers_) {
scorer->clear(graph);
}
Histories histories(origDimBatch);
for(int i = 0; i < origDimBatch; ++i) {
size_t sentId = batch->getSentenceIds()[i];
histories[i] = New<History>(sentId,
options_->get<float>("normalize"),
options_->get<float>("word-penalty"));
}
// start states
std::vector<Ptr<ScorerState>> states;
for(auto scorer : scorers_) {
states.push_back(scorer->startState(graph, batch));
}
// create one beam per batch entry with sentence-start hypothesis
Beams beams(origDimBatch, Beam(beamSize_, Hypothesis::New())); // array [origDimBatch] of array [maxBeamSize] of Hypothesis, keeps full size through search.
// batch purging is determined from an empty sub-beam.
std::vector<IndexType> batchIdxMap(origDimBatch); // Record at which batch entry a beam is looking.
// By default that corresponds to position in array,
// but shifts in the course of removing batch entries when they are finished.
const std::vector<bool> emptyBatchEntries; // used for recording if there are empty input batch entries
for(int origBatchIdx = 0; origBatchIdx < origDimBatch; ++origBatchIdx) {
batchIdxMap[origBatchIdx] = origBatchIdx; // map to same position on initialization
auto& beam = beams[origBatchIdx];
histories[origBatchIdx]->add(beam, trgEosId); // add beams with start-hypotheses to traceback grid
// Mark batch entries that consist only of source <EOS> i.e. these are empty lines. They will be forced to EOS and purged from batch
const auto& srcEosId = batch->front()->vocab()->getEosId();
const_cast<std::vector<bool>&>(emptyBatchEntries).push_back(batch->front()->data()[origBatchIdx] == srcEosId); // const_cast during construction
}
// determine index of UNK in the log prob vectors if we want to suppress it in the decoding process
int unkColId = -1;
if (trgUnkId != Word::NONE && !options_->get<bool>("allow-unk", false)) { // do we need to suppress unk?
unkColId = factoredVocab ? factoredVocab->getUnkIndex() : trgUnkId.toWordIndex(); // what's the raw index of unk in the log prob vector?
auto shortlist = scorers_[0]->getShortlist(); // first shortlist is generally ok, @TODO: make sure they are the same across scorers?
if (shortlist)
unkColId = shortlist->tryForwardMap(unkColId); // use shifted postion of unk in case of using a shortlist, shortlist may have removed unk which results in -1
}
// the decoding process updates the following state information in each output time step:
// - beams: array [origDimBatch] of array [maxBeamSize] of Hypothesis
// - current output time step's set of active hypotheses, aka active search space
// - states[.]: ScorerState
// - NN state; one per scorer, e.g. 2 for ensemble of 2
// and it forms the following return value
// - histories: array [origDimBatch] of History
// with History: vector [t] of array [maxBeamSize] of Hypothesis
// with Hypothesis: (last word, aggregate score, prev Hypothesis)
IndexType currentDimBatch = origDimBatch;
auto prevBatchIdxMap = batchIdxMap; // [origBatchIdx -> currentBatchIdx] but shifted by one time step
// main loop over output time steps
for (size_t t = 0; ; t++) {
ABORT_IF(origDimBatch != beams.size(), "Lost a batch entry??");
// determine beam size for next output time step, as max over still-active sentences
// E.g. if all batch entries are down from beam 5 to no more than 4 surviving hyps, then
// switch to beam of 4 for all. If all are done, then beam ends up being 0, and we are done.
size_t maxBeamSize = 0; // @TODO: is there some std::algorithm for this?
for(auto& beam : beams)
if(beam.size() > maxBeamSize)
maxBeamSize = beam.size();
// done if all batch entries have reached EOS on all beam entries
if (maxBeamSize == 0)
break;
for (size_t factorGroup = 0; factorGroup < numFactorGroups; factorGroup++) {
// for factored vocabs, we do one factor at a time, but without updating the scorer for secondary factors
//**********************************************************************
// create constant containing previous path scores for current beam
// Also create mapping of hyp indices, for reordering the decoder-state tensors.
std::vector<IndexType> batchIndices; // [1, 1, currentDimBatch, 1] indices of currently used batch indices with regard to current, actual tensors
std::vector<IndexType> hypIndices; // [maxBeamSize, 1, currentDimBatch, 1] (flattened) tensor index ((beamHypIdx, batchIdx), flattened) of prev hyp that a hyp originated from
std::vector<Word> prevWords; // [maxBeamSize, 1, currentDimBatch, 1] (flattened) word that a hyp ended in, for advancing the decoder-model's history
Expr prevPathScores; // [maxBeamSize, 1, currentDimBatch, 1], path score that a hyp ended in (last axis will broadcast into vocab size when adding expandedPathScores)
bool anyCanExpand = false; // stays false if all hyps are invalid factor expansions
if(t == 0 && factorGroup == 0) { // no scores yet
prevPathScores = graph->constant({1, 1, 1, 1}, inits::fromValue(0));
anyCanExpand = true;
// at the beginning all batch entries are used
batchIndices.resize(origDimBatch);
std::iota(batchIndices.begin(), batchIndices.end(), 0);
} else {
if(factorGroup == 0) // only factorGroup==0 can subselect neural state
for(int currentBatchIdx = 0; currentBatchIdx < beams.size(); ++currentBatchIdx) // loop over batch entries (active sentences)
if(!beams[currentBatchIdx].empty() || !PURGE_BATCH) // for each beam check
batchIndices.push_back(prevBatchIdxMap[currentBatchIdx]); // which batch entries were active in previous step
std::vector<float> prevScores;
for(size_t beamHypIdx = 0; beamHypIdx < maxBeamSize; ++beamHypIdx) { // loop over globally maximal beam-size (maxBeamSize)
for(int origBatchIdx = 0; origBatchIdx < origDimBatch; ++origBatchIdx) { // loop over all batch entries (active and inactive)
auto& beam = beams[origBatchIdx];
if(beamHypIdx < beam.size()) {
auto hyp = beam[beamHypIdx];
auto word = hyp->getWord();
auto canExpand = (!factoredVocab || factoredVocab->canExpandFactoredWord(hyp->getWord(), factorGroup));
//LOG(info, "[{}, {}] Can expand {} with {} -> {}", batchIdx, beamHypIdx, (*batch->back()->vocab())[hyp->getWord()], factorGroup, canExpand);
anyCanExpand |= canExpand;
auto currentBatchIdx = origBatchIdx;
if(PURGE_BATCH) {
if(factorGroup == 0)
currentBatchIdx = prevBatchIdxMap[origBatchIdx]; // subselection may happen for factorGroup == 0
else
currentBatchIdx = batchIdxMap[origBatchIdx]; // no subselection happens for factorGroup > 0,
// but we treat it like a next step, since a step
// happened for factorGroup == 0
}
auto hypIndex = (IndexType)(hyp->getPrevStateIndex() * currentDimBatch + currentBatchIdx); // (beamHypIdx, batchIdx), flattened, for index_select() operation
hypIndices.push_back(hypIndex); // (beamHypIdx, batchIdx), flattened as said above.
prevWords .push_back(word);
prevScores.push_back(canExpand ? hyp->getPathScore() : INVALID_PATH_SCORE);
} else { // pad to maxBeamSize (dummy hypothesis)
if(!PURGE_BATCH || !beam.empty()) { // but only if we are not pruning and the beam is not deactivated yet
hypIndices.push_back(0);
prevWords.push_back(trgEosId); // (unused, but must be valid)
prevScores.push_back((float)INVALID_PATH_SCORE);
}
}
}
}
if(factorGroup == 0)
currentDimBatch = (IndexType) batchIndices.size(); // keep batch size constant for all factor groups in a time step
prevPathScores = graph->constant({(int)maxBeamSize, 1, (int)currentDimBatch, 1}, inits::fromVector(prevScores));
}
if (!anyCanExpand) // all words cannot expand this factor: skip
continue;
//**********************************************************************
// compute expanded path scores with word prediction probs from all scorers
auto expandedPathScores = prevPathScores; // will become [maxBeamSize, 1, currDimBatch, dimVocab]
Expr logProbs;
for(size_t i = 0; i < scorers_.size(); ++i) {
if (factorGroup == 0) {
// compute output probabilities for current output time step
// - uses hypIndices[index in beam, 1, batch index, 1] to reorder scorer state to reflect the top-N in beams[][]
// - adds prevWords [index in beam, 1, batch index, 1] to the scorer's target history
// - performs one step of the scorer
// - returns new NN state for use in next output time step
// - returns vector of prediction probabilities over output vocab via newState
// update state in-place for next output time step
//if (t > 0) for (size_t kk = 0; kk < prevWords.size(); kk++)
// LOG(info, "prevWords[{},{}]={} -> {}", t/numFactorGroups, factorGroup,
// factoredVocab ? factoredVocab->word2string(prevWords[kk]) : (*batch->back()->vocab())[prevWords[kk]],
// prevScores[kk]);
states[i] = scorers_[i]->step(graph, states[i], hypIndices, prevWords, batchIndices, (int)maxBeamSize);
if (numFactorGroups == 1) // @TODO: this branch can go away
logProbs = states[i]->getLogProbs().getLogits(); // [maxBeamSize, 1, currentDimBatch, dimVocab]
else
{
auto shortlist = scorers_[i]->getShortlist();
logProbs = states[i]->getLogProbs().getFactoredLogits(factorGroup, shortlist); // [maxBeamSize, 1, currentDimBatch, dimVocab]
}
}
else {
// add secondary factors
// For those, we don't update the decoder-model state in any way.
// Instead, we just keep expanding with the factors.
// We will have temporary Word entries in hyps with some factors set to FACTOR_NOT_SPECIFIED.
// For some lemmas, a factor is not applicable. For those, the factor score is the same (zero)
// for all factor values. This would thus unnecessarily pollute the beam with identical copies,
// and push out other hypotheses. Hence, we exclude those here by setting the path score to
// INVALID_PATH_SCORE. Instead, toHyps() explicitly propagates those hyps by simply copying the
// previous hypothesis.
logProbs = states[i]->getLogProbs().getFactoredLogits(factorGroup, /*shortlist=*/ nullptr, hypIndices, maxBeamSize); // [maxBeamSize, 1, currentDimBatch, dimVocab]
}
// expand all hypotheses, [maxBeamSize, 1, currentDimBatch, 1] -> [maxBeamSize, 1, currentDimBatch, dimVocab]
expandedPathScores = expandedPathScores + scorers_[i]->getWeight() * logProbs;
}
// make beams continuous
expandedPathScores = swapAxes(expandedPathScores, 0, 2); // -> [currentDimBatch, 1, maxBeamSize, dimVocab]
// perform NN computation
if(t == 0 && factorGroup == 0)
graph->forward();
else
graph->forwardNext();
//**********************************************************************
// suppress specific symbols if not at right positions
if(unkColId != -1 && factorGroup == 0)
suppressWord(expandedPathScores, unkColId);
for(auto state : states)
state->blacklist(expandedPathScores, batch);
//**********************************************************************
// perform beam search
// find N best amongst the (maxBeamSize * dimVocab) hypotheses
std::vector<unsigned int> nBestKeys; // [currentDimBatch, maxBeamSize] flattened -> (batchIdx, beamHypIdx, word idx) flattened
std::vector<float> nBestPathScores; // [currentDimBatch, maxBeamSize] flattened
getNBestList(/*in*/ expandedPathScores->val(), // [currentDimBatch, 1, maxBeamSize, dimVocab or dimShortlist]
/*N=*/ maxBeamSize, // desired beam size
/*out*/ nBestPathScores, /*out*/ nBestKeys,
/*first=*/t == 0 && factorGroup == 0); // @TODO: this is only used for checking presently, and should be removed altogether
// Now, nBestPathScores contain N-best expandedPathScores for each batch and beam,
// and nBestKeys for each their original location (batchIdx, beamHypIdx, word).
// combine N-best sets with existing search space (beams) to updated search space
beams = toHyps(nBestKeys, nBestPathScores,
/*nBestBeamSize*/expandedPathScores->shape()[-2], // used for interpretation of keys
/*vocabSize=*/expandedPathScores->shape()[-1], // used for interpretation of keys
beams,
states, // used for keeping track of per-ensemble-member path score
batch, // only used for propagating alignment info
factoredVocab, factorGroup,
emptyBatchEntries, // [origDimBatch] - empty source batch entries are marked with true
batchIdxMap); // used to create a reverse batch index map to recover original batch indices for this step
} // END FOR factorGroup = 0 .. numFactorGroups-1
prevBatchIdxMap = batchIdxMap; // save current batchIdx map to be used in next step; we are then going to look one step back
// remove all hyps that end in EOS
// The position of a hyp in the beam may change.
// in/out = shifts the batch index map if a beam gets fully purged
const auto purgedNewBeams = purgeBeams(beams, /*in/out=*/batchIdxMap);
// add updated search space (beams) to our return value
bool maxLengthReached = false;
for(int batchIdx = 0; batchIdx < origDimBatch; ++batchIdx) {
// if this batch entry has surviving hyps then add them to the traceback grid
if(!beams[batchIdx].empty()) { // if the beam is not empty expand the history object associated with the beam
if (histories[batchIdx]->size() >= options_->get<float>("max-length-factor") * batch->front()->batchWidth())
maxLengthReached = true;
histories[batchIdx]->add(beams[batchIdx], trgEosId, purgedNewBeams[batchIdx].empty() || maxLengthReached);
}
}
if (maxLengthReached) // early exit if max length limit was reached
break;
// this is the search space for the next output time step
beams = purgedNewBeams;
} // end of main loop over output time steps
return histories; // [origDimBatch][t][N best hyps]
}
Histories search(Ptr<ExpressionGraph> graph, Ptr<data::CorpusBatch> batch);
};
} // namespace marian

View File

@ -3,7 +3,8 @@
::
:: This script is used to verify that all the dependencies required to build Marian are available.
:: The Cuda SDK and the Intel MKL must be installed beforehand by the user.
:: The Boost and OpenSSH libraries, if not found, will be installed by this script using vcpkg
:: The rest of libraries (see README.md), if not found, will be installed by this script using
:: vcpkg.
::
::
@echo off
@ -28,6 +29,7 @@ set ROOT=%~dp0
::set BOOST_INCLUDEDIR=
::set BOOST_LIBRARYDIR=
::set OPENSSL_ROOT_DIR=
::set Protobuf_SRC_ROOT_FOLDER=
:: If all the variables are empty and vcpkg is found in a known path, the script will download and
@ -41,9 +43,9 @@ set ROOT=%~dp0
::set MKLROOT=
if "%BOOST_INCLUDEDIR%" == "" goto :needVcPkg
if "%OPENSSL_ROOT_DIR%" == "" goto :needVcPkg
if "%Protobuf_SRC_ROOT_FOLDER%"=="" goto :needVcPkg
goto :checkDeps
@ -64,6 +66,7 @@ if not exist %VCPKG_ROOT% (
echo --- Cloning vcpkg...
git clone https://github.com/Microsoft/vcpkg.git %VCPKG_ROOT%
set USE_BOOST_172=1
set BOOTSTRAP_VCPKG=1
) else (
@ -73,6 +76,7 @@ if not exist %VCPKG_ROOT% (
echo --- Updating vcpkg...
for /f "delims=" %%p in ('git pull') do (
if not "%%p" == "Already up to date." (
set USE_BOOST_172=1
set BOOTSTRAP_VCPKG=1
)
)
@ -80,6 +84,17 @@ if not exist %VCPKG_ROOT% (
popd
)
:: Checkout to the last version of Boost that is supported by the 3rd party library
:: SimpleWebSocketServer. Vcpkg does not allow installing a specific version of library yet, but
:: the feature has a PR and should be available soon.
:: For more details, see https://github.com/microsoft/vcpkg/issues/1681
if "%USE_BOOST_172%"=="1" (
pushd %VCPKG_ROOT%
echo --- Checkout to Boost version 1.72...
git checkout 597038559647776ee39d02dcf159da05d9342f1d --pathspec-from-file=../../pathspec-boost-1.72.txt
popd
)
if "%BOOTSTRAP_VCPKG%"=="1" (
pushd %VCPKG_ROOT%
call bootstrap-vcpkg.bat
@ -91,7 +106,6 @@ set VCPKG_INSTALL=%VCPKG_ROOT%\installed\%VCPKG_DEFAULT_TRIPLET%
set VCPKG=%VCPKG_ROOT%\vcpkg
:: -------------------------------------------------------
:: Check dependencies and configure CMake
:checkDeps
@ -117,12 +131,12 @@ else (
:: -------------------------
:: The MKL setup does not set any environment variable to the installation path.
:: The script look into the standard default installation dir
:: The script look into the standard default installation directory.
:: If you installed MKL in a custom directory, please set the variable MKLROOT at the top of this file.
::
echo.
echo ... Intel MKL
if "%MKLROOT%" == "" (
if "%MKLROOT%" == "" (
set "MKLROOT=C:\Program Files (x86)\IntelSWTools\compilers_and_libraries\windows\mkl"
)
if not exist "%MKLROOT%" (
@ -153,7 +167,7 @@ echo Found Intel MKL library in %MKLROOT%
echo.
echo ... Boost (1.58+)
if "%BOOST_INCLUDEDIR%" == "" (
"%VCPKG%" install boost-chrono boost-filesystem boost-iostreams boost-program-options boost-regex boost-system boost-thread boost-timer boost-asio
"%VCPKG%" install boost-system boost-asio
set BOOST_INCLUDEDIR=%VCPKG_INSTALL%\include
set BOOST_LIBRARYDIR=%VCPKG_INSTALL%\lib
)
@ -197,17 +211,15 @@ if "%OPENSSL_ROOT_DIR%"=="" (
set OPENSSL_ROOT_DIR=%VCPKG_INSTALL%
)
if not exist "%VCPKG_INSTALL%/bin/protoc.exe" (
mkdir build
cd build
git clone https://github.com/protocolbuffers/protobuf
cd protobuf
git checkout v3.6.1
cd cmake
cmake . -A x64 -Dprotobuf_BUILD_TESTS=OFF -DCMAKE_INSTALL_PREFIX=%VCPKG_INSTALL%
cmake --build . --config Release --target install
cd ..\..\..
:: -------------------------
:: Protobuf_SRC_ROOT_FOLDER can be set to an existing Protobuf installation.
:: If not, we use vcpkg to install the library
::
echo.
echo ... Protobuf
if "%Protobuf_SRC_ROOT_FOLDER%"=="" (
%VCPKG% install protobuf
set Protobuf_SRC_ROOT_FOLDER=%VCPKG_INSTALL%
)
set CMAKE_PREFIX_PATH=%VCPKG_INSTALL%
@ -215,13 +227,14 @@ set CMAKE_PREFIX_PATH=%VCPKG_INSTALL%
echo.
echo.
echo --------------------------------------------------
echo CUDA_PATH ^| %CUDA_PATH%
echo MKLROOT ^| %MKLROOT%
echo VCPKG_ROOT ^| %VCPKG_ROOT%
echo BOOST_INCLUDEDIR ^| %BOOST_INCLUDEDIR%
echo BOOST_LIBRARYDIR ^| %BOOST_LIBRARYDIR%
echo OPENSSL_ROOT_DIR ^| %OPENSSL_ROOT_DIR%
echo CMAKE_PREFIX_PATH ^| %CMAKE_PREFIX_PATH%
echo BOOST_INCLUDEDIR ^| %BOOST_INCLUDEDIR%
echo BOOST_LIBRARYDIR ^| %BOOST_LIBRARYDIR%
echo CMAKE_PREFIX_PATH ^| %CMAKE_PREFIX_PATH%
echo CUDA_PATH ^| %CUDA_PATH%
echo MKLROOT ^| %MKLROOT%
echo OPENSSL_ROOT_DIR ^| %OPENSSL_ROOT_DIR%
echo Protobuf_SRC_ROOT_FOLDER ^| %Protobuf_SRC_ROOT_FOLDER%
echo VCPKG_ROOT ^| %VCPKG_ROOT%
echo --------------------------------------------------
echo.
echo.

View File

@ -1379,6 +1379,10 @@
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</ClInclude>
<ClCompile Include="..\src\command\marian_embedder.cpp">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="..\src\command\marian_main.cpp">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">false</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</ExcludedFromBuild>
@ -1414,6 +1418,7 @@
<ClCompile Include="..\src\data\corpus_nbest.cpp" />
<ClCompile Include="..\src\data\text_input.cpp" />
<ClCompile Include="..\src\3rd_party\cnpy\cnpy.cpp" />
<ClCompile Include="..\src\embedder\vector_collector.cpp" />
<ClCompile Include="..\src\examples\iris\helper.cpp">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
@ -1556,7 +1561,12 @@
<ClCompile Include="..\src\3rd_party\yaml-cpp\tag.cpp" />
<ClCompile Include="..\src\3rd_party\yaml-cpp\contrib\graphbuilder.cpp" />
<ClCompile Include="..\src\3rd_party\yaml-cpp\contrib\graphbuilderadapter.cpp" />
<ClInclude Include="..\src\embedder\embedder.h" />
<ClInclude Include="..\src\embedder\vector_collector.h" />
<ClInclude Include="..\src\layers\lsh.h" />
<ClInclude Include="..\src\models\encoder_pooler.h" />
<ClInclude Include="..\src\models\laser.h" />
<ClInclude Include="..\src\models\pooler.h" />
<ClInclude Include="..\src\onnx\expression_graph_onnx_exporter.h" />
<ClInclude Include="resource.h" />
</ItemGroup>

View File

@ -931,6 +931,12 @@
<ClCompile Include="..\src\layers\lsh.cpp">
<Filter>layers</Filter>
</ClCompile>
<ClCompile Include="..\src\command\marian_embedder.cpp">
<Filter>command</Filter>
</ClCompile>
<ClCompile Include="..\src\embedder\vector_collector.cpp">
<Filter>embedder</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\src\marian.h" />
@ -2348,6 +2354,21 @@
<ClInclude Include="..\src\layers\lsh.h">
<Filter>layers</Filter>
</ClInclude>
<ClInclude Include="..\src\models\encoder_pooler.h">
<Filter>models</Filter>
</ClInclude>
<ClInclude Include="..\src\models\laser.h">
<Filter>models</Filter>
</ClInclude>
<ClInclude Include="..\src\models\pooler.h">
<Filter>models</Filter>
</ClInclude>
<ClInclude Include="..\src\embedder\embedder.h">
<Filter>embedder</Filter>
</ClInclude>
<ClInclude Include="..\src\embedder\vector_collector.h">
<Filter>embedder</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<Filter Include="3rd_party">
@ -2614,6 +2635,9 @@
<Filter Include="3rd_party\faiss\utils">
<UniqueIdentifier>{b100324b-a506-45fa-948e-40be75b239fc}</UniqueIdentifier>
</Filter>
<Filter Include="embedder">
<UniqueIdentifier>{7a11415f-9bc5-4fe4-8bf2-95fa4114736d}</UniqueIdentifier>
</Filter>
</ItemGroup>
<ItemGroup>
<None Include="..\src\3rd_party\nccl\src\bootstrap.cu">

58
vs/NOTES.md Normal file
View File

@ -0,0 +1,58 @@
# How to build Marian on Windows with GPU support
This is interesting for developers, exctracted from README.
---
## Changes from the master branch
This part gives more information on all changes done in this PR. Refer to [this page](https://github.com/cedrou/marian-dev/commits/build_on_win) for commits.
1. __Fix Cuda error : Unsupported Visual Studio Version Error__
See above for justification and fixes
2. __Fix VS compiler flags / Build in Release, with improved debug info__
Added VS specific compile and link flags
3. __Fix Warning: D9002: ignoring unknown option '-m64'__
This one is related to a compiler flag added while finding the package MKL that does not exists for MS compiler.
4. __Fix marian::Backend, marian::cpu::Backend and marian::gpu::Backend conflicts__
There were name conflicts between the 3 `Backend` classes that confused the compiler:
> template instantiation resulted in unexpected function type of "void(Ptr\<marian::gpu::Backend\> backend, [...])" (the meaning of a name may have changed since the template declaration -- the type of the template is "void(Ptr\<marian::Backend\> backend, [...]").
To solve this, I changed the declaration of 3 methods to specify the full name with namespace (`marian::Backend`, instead of `Backend`).
5. __Fix error : identifier "CUDA_FLT_MAX" is undefined in device code__
`CUDA_FLT_MAX` is not seen by CUDA from the device code and I had to declare it as `__constant__`.
From [StackOverflow](https://stackoverflow.com/questions/20111409/how-to-pass-structures-into-cuda-device#comment29972423_20112013):
> Undecorated constants get compiled into both host and device code with gcc based toolchains, but not with the Microsoft compiler.
6. __Fix fatal error C1019: unexpected #else__
There was preprocessor instructions (`#ifdef ... #else ... #endif`) in the middle of a call of a macro function (`CUDNN_CALL`), which is not allowed with MS compiler.
7. __Fix mismatched class/struct forward declarations__
Microsoft's C++ name mangling makes a distinction between `class` and `struct` objects, so definitions and forward declaration must match.
See [this pdf](https://www.agner.org/optimize/calling_conventions.pdf), page 27, for more information.
_Note_: This fix was invalidated by commit # from @frankseide
8. __Fix unresolved external due to a removed #include directive__
There was an include directive removed from MSVC compilation, but this prevented the build of the project.
I'm not sure why this was removed; the comment is:
#ifndef _WIN32 // TODO: remove this once I updated the Linux-side makefile
9. __Fix CUDA+MSVC incompatibility with Boost.Preprocessor__
The toolchain nvcc+msvc is not correctly handled in Boost.Preprocessor module. See [this issue](https://github.com/boostorg/preprocessor/issues/15). In the meantime, the recommended workaround is to disable Variadic Macro support in Boost.
I created a [PR](https://github.com/boostorg/preprocessor/pull/18) in the Boost repo on GitHub to fix this.
_Note_: The library sources have been fixed, but this fix is still needed until the next release of Boost.Preprocessor
10. __Provide implementation for mkstemp / Fix temporary file creation__
The code explicitely disabled the creation of temporary files because "mkstemp not available in Windows". In fact, `mktemp` and `unlink` are both implemented, but they don't work as expected. I used `tempnam` to replace `mkstemp`, and added the flag `_O_TEMPORARY` to the parameters of `open` to automatically delete the file when it is closed. If `unlinkEarly` is not set, I added a call to `remove` in the destructor to delete the file after its closure.
I also handled the case of the default value for the `base` parameter: the path `\tmp` doesnot exist on Windows, so it is replaced by the value of the `%TMP%` environment variable in `NormalizeTempPrefix`.
11. __Revert commit #2f8b093 + Fix copy/paste error while fixing #301 + restrict fix to MSVC compiler.__
cf [Issue #301](https://github.com/marian-nmt/marian-dev/issues/301) -->

View File

@ -3,81 +3,72 @@
## Install prerequisites
The following SDK are required to build Marian with GPU support. At least one of them needs to be installed. If only CUDA is installed but not MKL,
a GPU-only version will be build. If only MKL is installed and not CUDA, only the CPU version will be built. So if you are interested in only one
functionality, you can ommit one of them. Install both for full functionality.
The following SDK are required to build Marian with GPU support. At least one of them needs to be
installed. If only CUDA is installed but not MKL, a GPU-only version will be build. If only MKL is
installed and not CUDA, only the CPU version will be built. So if you are interested in only one
functionality, you can omit one of them. Install both for full functionality.
- [Cuda 10](https://developer.nvidia.com/cuda-downloads?target_os=Windows&target_arch=x86_64&target_version=10&target_type=exelocal)
- Base installer
- [MKL](https://software.intel.com/en-us/mkl)
__Note: Patch for CUDA 9.2 error: Unsupported Visual Studio Version Error__
This seems to work fine with CUDA 10.0.
When using CUDA 9.2, the latest versions of Visual Studio 2017 are not officially supported by CUDA. Two fixes are proposed:
- Downgrade Visual Studio to a supported version
- Edit the file `<CUDA install path>\include\crt\host_config.h` and change the line 131:
131 #if _MSC_VER < 1600 || _MSC_VER > 1914
into:
131 #if _MSC_VER < 1600 || _MSC_VER > 1915
For more information, read this [nVidia forum](https://devtalk.nvidia.com/default/topic/1022648/cuda-setup-and-installation/cuda-9-unsupported-visual-studio-version-error/4)
- [Cuda 10](https://developer.nvidia.com/cuda-downloads?target_os=Windows&target_arch=x86_64&target_version=10&target_type=exelocal),
Base installer, CUDA 10.0+ is recommended, there might be issues with CUDA 9.2, see below
- [Intel MKL](https://software.intel.com/en-us/mkl)
---
## Check dependencies : `CheckDeps.bat`
In addition to the 2 previous prerequisites, Marian needs 2 libraries that you may already have on your system:
In addition to the 2 previous prerequisites, Marian may need the following libraries that you may
already have on your system:
- Boost (1.58+)
- OpenSSL (optional for server)
- Boost (1.58-1.72), optional for marian-server (`COMPILE_SERVER=TRUE` in CMake)
- OpenSSL, optional for marian-server
- Protobuf, optional for compiling with SentencePiece (`USE_SENTENCEPIECE=TRUE` in CMake),
recommended
The script `CheckDeps.bat` can be used to verify that all dependencies are found on your system. If not, it will use the `vcpkg` library manager to download and manage your dependencies for CMake.
The script `CheckDeps.bat` can be used to verify that all dependencies are found on your system. If
not, it will use the `vcpkg` library manager to download and manage your dependencies for CMake.
If you already have a working `vcpkg` installation, this script can use it:
- If vcpkg is in your `PATH` environment variable, the script will find it and use it automatically.
- Otherwise, you need to edit the script and set the `VCPKG_ROOT` variable to the directory that contains the vcpkg.exe
If you prefer to manage yourself the dependencies, you can edit the script file to set the following variables to the respective installation paths. These variable can also be already set in your environment.
- `BOOST_INCLUDE_PATH` and `BOOST_LIB_PATH`
- `OPENSSL_PATH`
If you already have a working `vcpkg` installation, this script can use it.
If vcpkg is in your `PATH` environment variable, the script will find it and use it automatically.
Otherwise, you need to edit the script and set the `VCPKG_ROOT` variable.
Please see the script for more details.
---
## Build the project
There are 3 alternatives to build the project:
1. Use Visual Studio 2017 built-in support for CMake
1. Use Visual Studio 2017+ built-in support for CMake
2. Create a Solution file for Visual Studio
3. Use a script (MSBuild)
### 1. Use VS2017 with built-in support for CMake
VS2017 now allows to develop projects built with CMake without the need to generate VS projects and solutions. For more information, please read [this article](https://blogs.msdn.microsoft.com/vcblog/2016/10/05/cmake-support-in-visual-studio/) from the Visual C++ Team.
### 1. Use VS2017+ with built-in support for CMake
VS2017 or newer now allows to develop projects built with CMake without the need to generate VS
projects and solutions. For more information, please read [this article](https://blogs.msdn.microsoft.com/vcblog/2016/10/05/cmake-support-in-visual-studio/)
from the Visual C++ Team.
You just need to open the root folder of the git repository in VS (which contains the file
`CMakeSettings.json`):
You just need to open the root folder of the git repository in VS (which contains the file `CMakeSettings.json`):
- In an Explorer window, right-click then `Open in Visual Studio`
- In a VS2017 instance, `File > Open > Folder...`
You may need to edit the file `CMakeSettings.json` to set the environment variable for the dependencies.
You may need to edit the file `CMakeSettings.json` to set the environment variable for the
dependencies.
The developing experience is very similar than when using a solution file (Intellisense, build project with `F7`, debug, set breakpoints and watch variables, ...), except that the project configuration is done in 3 different files:
The developing experience is very similar that when using a solution file (Intellisense, build
project with `F7`, debug, set breakpoints and watch variables, ...), except that the project
configuration is done in 3 different files:
- `CMakeList.txt`: this is the CMake source file from the original project.
It is used to configure the build targets, add/remove files to compile and configure the compiler flags.
- `CMakeList.txt`: this is the CMake source file from the original project.
It is used to configure the build targets, add/remove files to compile and configure the
compiler flags.
- `CMakeSettings.json`: this file is required to enable CMake integration in VS2017.
Use this file to configure the environment variables and the parameters passed to CMake to generate the project.
- `.vs\launch.vs.json`: this is a user specific file and it is not commited in the Git repo
Use this file to configure the debugging targets.
- `CMakeSettings.json`: this file is required to enable CMake integration in VS2017.
Use this file to configure the environment variables and the parameters passed to CMake to
generate the project.
- `.vs\launch.vs.json`: this is a user specific file and it is not commited in the Git repo.
Use this file to configure the debugging targets.
For example:
{
@ -116,8 +107,8 @@ The developing experience is very similar than when using a solution file (Intel
"--keep-best",
"--seed 1111",
"--exponential-smoothing",
"--normalize=1",
"--beam-size=12",
"--normalize 1",
"--beam-size 12",
"--quiet-translation"
]
}
@ -125,12 +116,13 @@ The developing experience is very similar than when using a solution file (Intel
}
### 2. Create solution and projects files for Visual Studio : `CreateVSProjects.bat`
If you have a previous version of Visual Studio, you will need to use CMake to generate the projects files.
If you have a previous version of Visual Studio, you will need to use CMake to generate the projects
files.
The provided script `CreateVSProjects.bat` runs the dependency checks then invokes CMake with the right parameters to create the solutions for Visual Studio.
The provided script `CreateVSProjects.bat` runs the dependency checks then invokes CMake with the
right parameters to create the solutions for Visual Studio.
### 3. Use MSBuild : `BuildRelease.bat`
@ -140,61 +132,31 @@ The last alternative is to use the script `BuildRelease.bat` that will:
- Create the VS project files
- Invoke MSBuild on these projects to build the targets in Release.
<!--
This is interesting for developers, hiding away from users.
---
## Changes from the master branch
This part gives more information on all changes done in this PR. Refer to [this page](https://github.com/cedrou/marian-dev/commits/build_on_win) for commits.
## Known issues
1. __Fix Cuda error : Unsupported Visual Studio Version Error__
See above for justification and fixes
1. __Patch for CUDA 9.2 error: Unsupported Visual Studio Version Error__
2. __Fix VS compiler flags / Build in Release, with improved debug info__
Added VS specific compile and link flags
When using CUDA 9.2, the latest versions of Visual Studio 2017 are not officially supported by
CUDA. Two fixes are proposed:
- Downgrade Visual Studio to a supported version
- Edit the file `<CUDA install path>\include\crt\host_config.h` and change the line 131:
3. __Fix Warning: D9002: ignoring unknown option '-m64'__
This one is related to a compiler flag added while finding the package MKL that does not exists for MS compiler.
131 #if _MSC_VER < 1600 || _MSC_VER > 1914
4. __Fix marian::Backend, marian::cpu::Backend and marian::gpu::Backend conflicts__
There were name conflicts between the 3 `Backend` classes that confused the compiler:
> template instantiation resulted in unexpected function type of "void(Ptr\<marian::gpu::Backend\> backend, [...])" (the meaning of a name may have changed since the template declaration -- the type of the template is "void(Ptr\<marian::Backend\> backend, [...]").
into:
To solve this, I changed the declaration of 3 methods to specify the full name with namespace (`marian::Backend`, instead of `Backend`).
131 #if _MSC_VER < 1600 || _MSC_VER > 1915
5. __Fix error : identifier "CUDA_FLT_MAX" is undefined in device code__
`CUDA_FLT_MAX` is not seen by CUDA from the device code and I had to declare it as `__constant__`.
For more information, read this [nVidia forum](https://devtalk.nvidia.com/default/topic/1022648/cuda-setup-and-installation/cuda-9-unsupported-visual-studio-version-error/4)
From [StackOverflow](https://stackoverflow.com/questions/20111409/how-to-pass-structures-into-cuda-device#comment29972423_20112013):
> Undecorated constants get compiled into both host and device code with gcc based toolchains, but not with the Microsoft compiler.
2. __It does not compile with Boost 1.73 or newer__
6. __Fix fatal error C1019: unexpected #else__
There was preprocessor instructions (`#ifdef ... #else ... #endif`) in the middle of a call of a macro function (`CUDNN_CALL`), which is not allowed with MS compiler.
At the moment (version 1.9.26) SimpleWebSocketServer, a 3rd party library that Marian uses for
marian-server, does not support Boost newer than 1.72. Since vcpkg does not allow installing a
specific library versions, you need to revert `ports/boost*` directories to install older Boost.
See `CheckDeps.bat` for an example.
7. __Fix mismatched class/struct forward declarations__
Microsoft's C++ name mangling makes a distinction between `class` and `struct` objects, so definitions and forward declaration must match.
See [this pdf](https://www.agner.org/optimize/calling_conventions.pdf), page 27, for more information.
Note that Boost is required only if you compile with marian-server, for compilation using CMake,
it is if you set `COMPILE_SERVER` to `TRUE` in CMakeSettings.json.
_Note_: This fix was invalidated by commit # from @frankseide
8. __Fix unresolved external due to a removed #include directive__
There was an include directive removed from MSVC compilation, but this prevented the build of the project.
I'm not sure why this was removed; the comment is:
#ifndef _WIN32 // TODO: remove this once I updated the Linux-side makefile
9. __Fix CUDA+MSVC incompatibility with Boost.Preprocessor__
The toolchain nvcc+msvc is not correctly handled in Boost.Preprocessor module. See [this issue](https://github.com/boostorg/preprocessor/issues/15). In the meantime, the recommended workaround is to disable Variadic Macro support in Boost.
I created a [PR](https://github.com/boostorg/preprocessor/pull/18) in the Boost repo on GitHub to fix this.
_Note_: The library sources have been fixed, but this fix is still needed until the next release of Boost.Preprocessor
10. __Provide implementation for mkstemp / Fix temporary file creation__
The code explicitely disabled the creation of temporary files because "mkstemp not available in Windows". In fact, `mktemp` and `unlink` are both implemented, but they don't work as expected. I used `tempnam` to replace `mkstemp`, and added the flag `_O_TEMPORARY` to the parameters of `open` to automatically delete the file when it is closed. If `unlinkEarly` is not set, I added a call to `remove` in the destructor to delete the file after its closure.
I also handled the case of the default value for the `base` parameter: the path `\tmp` doesnot exist on Windows, so it is replaced by the value of the `%TMP%` environment variable in `NormalizeTempPrefix`.
11. __Revert commit #2f8b093 + Fix copy/paste error while fixing #301 + restrict fix to MSVC compiler.__
cf [Issue #301](https://github.com/marian-nmt/marian-dev/issues/301) -->

142
vs/pathspec-boost-1.72.txt Executable file
View File

@ -0,0 +1,142 @@
ports/boost/
ports/boost-accumulators/
ports/boost-algorithm/
ports/boost-align/
ports/boost-any/
ports/boost-array/
ports/boost-asio/
ports/boost-assert/
ports/boost-assign/
ports/boost-atomic/
ports/boost-beast/
ports/boost-bimap/
ports/boost-bind/
ports/boost-build/
ports/boost-callable-traits/
ports/boost-chrono/
ports/boost-circular-buffer/
ports/boost-compatibility/
ports/boost-compute/
ports/boost-concept-check/
ports/boost-config/
ports/boost-container/
ports/boost-container-hash/
ports/boost-context/
ports/boost-contract/
ports/boost-conversion/
ports/boost-convert/
ports/boost-core/
ports/boost-coroutine/
ports/boost-coroutine2/
ports/boost-crc/
ports/boost-date-time/
ports/boost-detail/
ports/boost-di/
ports/boost-dll/
ports/boost-dynamic-bitset/
ports/boost-endian/
ports/boost-exception/
ports/boost-fiber/
ports/boost-filesystem/
ports/boost-flyweight/
ports/boost-foreach/
ports/boost-format/
ports/boost-function/
ports/boost-functional/
ports/boost-function-types/
ports/boost-fusion/
ports/boost-geometry/
ports/boost-gil/
ports/boost-graph/
ports/boost-graph-parallel/
ports/boost-hana/
ports/boost-heap/
ports/boost-histogram/
ports/boost-hof/
ports/boost-icl/
ports/boost-integer/
ports/boost-interprocess/
ports/boost-interval/
ports/boost-intrusive/
ports/boost-io/
ports/boost-iostreams/
ports/boost-iterator/
ports/boost-lambda/
ports/boost-lexical-cast/
ports/boost-locale/
ports/boost-local-function/
ports/boost-lockfree/
ports/boost-log/
ports/boost-logic/
ports/boost-math/
ports/boost-metaparse/
ports/boost-modular-build-helper/
ports/boost-move/
ports/boost-mp11/
ports/boost-mpi/
ports/boost-mpl/
ports/boost-msm/
ports/boost-multi-array/
ports/boost-multi-index/
ports/boost-multiprecision/
ports/boost-numeric-conversion/
ports/boost-odeint/
ports/boost-optional/
ports/boost-outcome/
ports/boost-parameter/
ports/boost-parameter-python/
ports/boost-phoenix/
ports/boost-poly-collection/
ports/boost-polygon/
ports/boost-pool/
ports/boost-predef/
ports/boost-preprocessor/
ports/boost-process/
ports/boost-program-options/
ports/boost-property-map/
ports/boost-property-tree/
ports/boost-proto/
ports/boost-ptr-container/
ports/boost-python/
ports/boost-qvm/
ports/boost-random/
ports/boost-range/
ports/boost-ratio/
ports/boost-rational/
ports/boost-regex/
ports/boost-safe-numerics/
ports/boost-scope-exit/
ports/boost-serialization/
ports/boost-signals/
ports/boost-signals2/
ports/boost-smart-ptr/
ports/boost-sort/
ports/boost-spirit/
ports/boost-stacktrace/
ports/boost-statechart/
ports/boost-static-assert/
ports/boost-system/
ports/boost-test/
ports/boost-thread/
ports/boost-throw-exception/
ports/boost-timer/
ports/boost-tokenizer/
ports/boost-tti/
ports/boost-tuple/
ports/boost-type-erasure/
ports/boost-type-index/
ports/boost-typeof/
ports/boost-type-traits/
ports/boost-ublas/
ports/boost-units/
ports/boost-unordered/
ports/boost-utility/
ports/boost-uuid/
ports/boost-variant/
ports/boost-variant2/
ports/boost-vcpkg-helpers/
ports/boost-vmd/
ports/boost-wave/
ports/boost-winapi/
ports/boost-xpressive/
ports/boost-yap/