mirror of
https://github.com/marian-nmt/marian.git
synced 2024-11-03 20:13:47 +03:00
merge with internal master
This commit is contained in:
commit
55a7047f8a
@ -20,12 +20,17 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
|
||||
- Support for CUDA 11.
|
||||
- General improvements and fixes for MPI handling, was essentially non-functional before (syncing, random seeds, deadlocks during saving, validation etc.)
|
||||
- Allow to compile -DUSE_MPI=on with -DUSE_STATIC_LIBS=on although MPI gets still linked dynamically since it has so many dependencies.
|
||||
- Fix building server with Boost 1.75
|
||||
- Missing implementation for cos/tan expression operator
|
||||
|
||||
### Changed
|
||||
- Change compile options a la -DCOMPILE_CUDA_SM35 to -DCOMPILE_KEPLER, -DCOMPILE_MAXWELL,
|
||||
-DCOMPILE_PASCAL, -DCOMPILE_VOLTA, -DCOMPILE_TURING and -DCOMPILE_AMPERE
|
||||
- Disable -DCOMPILE_KEPLER, -DCOMPILE_MAXWELL by default.
|
||||
- Dropped support for legacy graph groups.
|
||||
- Developer documentation framework based on Sphinx+Doxygen+Breathe+Exhale
|
||||
- Expresion graph documentation (#788)
|
||||
- Graph operators documentation (#801)
|
||||
|
||||
## [1.10.0] - 2021-02-06
|
||||
|
||||
|
@ -169,7 +169,7 @@ SHORT_NAMES = NO
|
||||
# description.)
|
||||
# The default value is: NO.
|
||||
|
||||
JAVADOC_AUTOBRIEF = NO
|
||||
JAVADOC_AUTOBRIEF = YES
|
||||
|
||||
# If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first
|
||||
# line (until the first dot) of a Qt-style comment as the brief description. If
|
||||
|
@ -118,7 +118,6 @@ ModelState::SetMarianConfigPath()
|
||||
// Set the Marian config path.
|
||||
std::string config_path("/var/azureml-app/");
|
||||
config_path.append(std::getenv("AZUREML_MODEL_DIR"));
|
||||
config_path.append("/nlxseq2seq/triton/nlxseq2seq/1/data/model/");
|
||||
config_path.append(config_filepath_str);
|
||||
marian_config_path_ = config_path;
|
||||
|
||||
@ -199,6 +198,16 @@ ModelInstanceState::ModelInstanceState(
|
||||
|
||||
extern "C" {
|
||||
|
||||
void
|
||||
handler(int sig) {
|
||||
void* array[30];
|
||||
|
||||
size_t size = backtrace(array, 30);
|
||||
|
||||
fprintf(stderr, "Error: signal %d, Exception info:\n", sig);
|
||||
backtrace_symbols_fd(array, size, STDERR_FILENO);
|
||||
}
|
||||
|
||||
TRITONSERVER_Error*
|
||||
TRITONBACKEND_ModelInitialize(TRITONBACKEND_Model* model)
|
||||
{
|
||||
@ -209,6 +218,9 @@ TRITONBACKEND_ModelInitialize(TRITONBACKEND_Model* model)
|
||||
TRITONBACKEND_ModelSetState(model, reinterpret_cast<void*>(model_state))
|
||||
);
|
||||
|
||||
signal(SIGSEGV, handler);
|
||||
signal(SIGABRT, handler);
|
||||
|
||||
return nullptr; // success
|
||||
}
|
||||
|
||||
@ -308,7 +320,6 @@ TRITONBACKEND_ModelInstanceExecute(
|
||||
|
||||
std::vector<TRITONBACKEND_Input*> request_input;
|
||||
std::vector<int> request_batch_size;
|
||||
std::vector<std::string> inputs;
|
||||
std::string input_strings;
|
||||
|
||||
// Create a single response object for each request. If something
|
||||
@ -389,14 +400,13 @@ TRITONBACKEND_ModelInstanceExecute(
|
||||
}
|
||||
content_buffer.insert(
|
||||
content_buffer.end(), reinterpret_cast<const char*>(input_buffer) + 4,
|
||||
reinterpret_cast<const char*>(input_buffer) + buffer_byte_size - 4
|
||||
reinterpret_cast<const char*>(input_buffer) + buffer_byte_size
|
||||
);
|
||||
}
|
||||
|
||||
std::string s(content_buffer.begin(), content_buffer.end());
|
||||
int count = std::count(s.begin(), s.end(), '\n');
|
||||
request_batch_size.push_back(count + 1);
|
||||
inputs.push_back(s);
|
||||
content_buffer.clear();
|
||||
|
||||
if (input_strings.empty()) {
|
||||
@ -433,12 +443,16 @@ TRITONBACKEND_ModelInstanceExecute(
|
||||
if (output_content == nullptr) {
|
||||
output_content = pos;
|
||||
} else {
|
||||
strcat(output_content, "\n");
|
||||
strcat(output_content, pos);
|
||||
// Replace the null terminator of the prev sentence with new line char
|
||||
*(pos - 1) = '\n';
|
||||
}
|
||||
// Move to next output content.
|
||||
if (p != nullptr) {
|
||||
pos = p + 1;
|
||||
} else {
|
||||
// Break if there no left output content, even though batch_size > 0,
|
||||
// '\n' at the end may be processed by Marian.
|
||||
break;
|
||||
}
|
||||
batch_size--;
|
||||
}
|
||||
@ -567,4 +581,4 @@ TRITONBACKEND_ModelInstanceExecute(
|
||||
|
||||
} // extern "C"
|
||||
|
||||
}}} // namespace triton::backend::marian
|
||||
}}} // namespace triton::backend::marian
|
||||
|
@ -1,4 +1,9 @@
|
||||
#pragma once
|
||||
#include <stdio.h>
|
||||
#include <execinfo.h>
|
||||
#include <signal.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#ifdef _WIN32
|
||||
#define DLLEXPORT extern "C" __declspec(dllexport)
|
||||
|
4
doc/.gitignore
vendored
Normal file
4
doc/.gitignore
vendored
Normal file
@ -0,0 +1,4 @@
|
||||
api
|
||||
build
|
||||
doxygen
|
||||
venv
|
23
doc/Makefile
Normal file
23
doc/Makefile
Normal file
@ -0,0 +1,23 @@
|
||||
# You can set these variables from the command line, and also
|
||||
# from the environment for the first two.
|
||||
SPHINXOPTS ?=
|
||||
SPHINXBUILD ?= sphinx-build
|
||||
SOURCEDIR = .
|
||||
BUILDDIR = build
|
||||
|
||||
# Put it first so that "make" without argument is like "make help".
|
||||
help:
|
||||
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
||||
|
||||
.PHONY: clean help Makefile
|
||||
|
||||
# Catch-all target: route all unknown targets to Sphinx using the new
|
||||
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
|
||||
%: Makefile
|
||||
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
||||
|
||||
# Clean target as recommended by Exhale
|
||||
# https://exhale.readthedocs.io/en/latest/usage.html#optional-create-a-proper-clean-target
|
||||
clean:
|
||||
rm -rf doxygen/ api/
|
||||
@$(SPHINXBUILD) -M clean "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
51
doc/README.md
Normal file
51
doc/README.md
Normal file
@ -0,0 +1,51 @@
|
||||
# Marian NMT code documentation and library API
|
||||
|
||||
This directory contains code documentation and library API for developers of Marian NMT.
|
||||
|
||||
The documentation is generated using
|
||||
[Sphinx](https://www.sphinx-doc.org/en/master/usage/quickstart.html) +
|
||||
[Breathe](https://breathe.readthedocs.io/en/latest/directives.html) +
|
||||
[Doxygen](http://www.doxygen.nl/manual/docblocks.html) +
|
||||
[Exhale](https://exhale.readthedocs.io/en/latest/usage.html).
|
||||
The documentation source code is written in `.rst` or `.md` files with special directives that allow
|
||||
to reference to C++ source code and documentation. The source documents are then build into static
|
||||
HTML pages.
|
||||
|
||||
|
||||
## Installation
|
||||
|
||||
On Ubuntu 20.04, install the following packages:
|
||||
|
||||
sudo apt-get install python3 python3-pip python3-setuptools doxygen
|
||||
|
||||
Then set up a Python environment and install modules:
|
||||
|
||||
pip3 install virtualenv
|
||||
virtualenv venv -p python3
|
||||
source venv/bin/activate
|
||||
pip install -r requirements.txt
|
||||
|
||||
Documentation building should also work on Windows, but it has not been tested.
|
||||
|
||||
|
||||
## Generation
|
||||
|
||||
The documentation can be generated by running:
|
||||
|
||||
make html
|
||||
|
||||
The website will be generated into `build/html` and accessible by opening _index.html_ in your
|
||||
browser.
|
||||
|
||||
Directories:
|
||||
|
||||
- `build` - automatically output directory for HTML documentation
|
||||
- `doxygen` - automatically generated Doxygen XML files
|
||||
- `api` - automatic library API generated with Exhale
|
||||
- `.rst` and `.md` files in this directory and its subdirectories are documentation source files
|
||||
- `_static` - custom CSS and JavaScript files
|
||||
|
||||
|
||||
## Writing documentation
|
||||
|
||||
To be documented...
|
4
doc/_static/css/custom.css
vendored
Normal file
4
doc/_static/css/custom.css
vendored
Normal file
@ -0,0 +1,4 @@
|
||||
.wy-body-for-nav > .wy-grid-for-nav > .wy-nav-side {
|
||||
border-bottom: 5px solid #28bbee;
|
||||
/*background-color: #494d55;*/
|
||||
}
|
120
doc/conf.py
Normal file
120
doc/conf.py
Normal file
@ -0,0 +1,120 @@
|
||||
# Configuration file for the Sphinx documentation builder.
|
||||
#
|
||||
# This file only contains a selection of the most common options. For a full
|
||||
# list see the documentation:
|
||||
# https://www.sphinx-doc.org/en/master/usage/configuration.html
|
||||
|
||||
# -- Path setup --------------------------------------------------------------
|
||||
|
||||
# If extensions (or modules to document with autodoc) are in another directory,
|
||||
# add these directories to sys.path here. If the directory is relative to the
|
||||
# documentation root, use os.path.abspath to make it absolute, like shown here.
|
||||
#
|
||||
import os
|
||||
import datetime
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.abspath('.'))
|
||||
|
||||
|
||||
# -- Project information -----------------------------------------------------
|
||||
|
||||
project = 'Marian NMT'
|
||||
copyright = '2021, Marian NMT Team'
|
||||
author = 'Marian NMT Team'
|
||||
|
||||
# The full version, including alpha/beta/rc tags
|
||||
# TODO: add GitHub commit hash to the version
|
||||
version_file = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'VERSION')
|
||||
with open(os.path.abspath(version_file)) as f:
|
||||
version = f.read().strip()
|
||||
release = version + ' ' + str(datetime.date.today())
|
||||
|
||||
|
||||
# -- General configuration ---------------------------------------------------
|
||||
|
||||
# Add any Sphinx extension module names here, as strings. They can be
|
||||
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
|
||||
# ones.
|
||||
extensions = [
|
||||
'sphinx.ext.imgmath',
|
||||
'sphinx.ext.todo',
|
||||
'breathe',
|
||||
'exhale',
|
||||
'recommonmark',
|
||||
]
|
||||
|
||||
# Add any paths that contain templates here, relative to this directory.
|
||||
templates_path = ['_templates']
|
||||
|
||||
# List of patterns, relative to source directory, that match files and
|
||||
# directories to ignore when looking for source files.
|
||||
# This pattern also affects html_static_path and html_extra_path.
|
||||
exclude_patterns = [
|
||||
'build',
|
||||
'doxygen',
|
||||
'venv',
|
||||
'README.md',
|
||||
]
|
||||
|
||||
|
||||
# -- Options for HTML output -------------------------------------------------
|
||||
|
||||
# The theme to use for HTML and HTML Help pages. See the documentation for
|
||||
# a list of builtin themes.
|
||||
#
|
||||
html_theme = 'sphinx_rtd_theme'
|
||||
htmlhelp_basename = 'marian'
|
||||
|
||||
# Add any paths that contain custom static files (such as style sheets) here,
|
||||
# relative to this directory. They are copied after the builtin static files,
|
||||
# so a file named "default.css" will overwrite the builtin "default.css".
|
||||
html_static_path = ['_static']
|
||||
html_css_files = ['css/custom.css']
|
||||
|
||||
# The base URL which points to the root of the HTML documentation
|
||||
html_baseurl = 'http://marian-nmt.github.io/docs/api'
|
||||
|
||||
|
||||
# -- Extension configuration -------------------------------------------------
|
||||
|
||||
breathe_projects = { 'marian': './doxygen/xml' }
|
||||
breathe_default_project = 'marian'
|
||||
|
||||
doxygen_config = """
|
||||
INPUT = ../src
|
||||
EXCLUDE += ../src/3rd_party
|
||||
EXCLUDE += ../src/tests
|
||||
EXCLUDE_PATTERNS = *.md *.txt
|
||||
FILE_PATTERNS += *.cu
|
||||
EXTENSION_MAPPING += cu=C++ inc=C++
|
||||
ENABLE_PREPROCESSING = YES
|
||||
JAVADOC_AUTOBRIEF = YES
|
||||
WARN_IF_UNDOCUMENTED = NO
|
||||
"""
|
||||
|
||||
exhale_args = {
|
||||
'containmentFolder' : './api',
|
||||
'rootFileName' : 'library_index.rst',
|
||||
'rootFileTitle' : 'Library API',
|
||||
'doxygenStripFromPath' : '..',
|
||||
'createTreeView' : True,
|
||||
'exhaleExecutesDoxygen' : True,
|
||||
'exhaleDoxygenStdin' : doxygen_config.strip(),
|
||||
}
|
||||
|
||||
primary_domain = 'cpp'
|
||||
highlight_language = 'cpp'
|
||||
|
||||
# A trick to include markdown files from outside the source directory using
|
||||
# 'mdinclude'. Warning: all other markdown files not included via 'mdinclude'
|
||||
# will be rendered using recommonmark as recommended by Sphinx
|
||||
from m2r import MdInclude
|
||||
|
||||
def setup(app):
|
||||
# from m2r to make `mdinclude` work
|
||||
app.add_config_value('no_underscore_emphasis', False, 'env')
|
||||
app.add_config_value('m2r_parse_relative_links', False, 'env')
|
||||
app.add_config_value('m2r_anonymous_references', False, 'env')
|
||||
app.add_config_value('m2r_disable_inline_math', False, 'env')
|
||||
app.add_directive('mdinclude', MdInclude)
|
1
doc/contributing.rst
Normal file
1
doc/contributing.rst
Normal file
@ -0,0 +1 @@
|
||||
.. mdinclude:: ../CONTRIBUTING.md
|
406
doc/graph.md
Normal file
406
doc/graph.md
Normal file
@ -0,0 +1,406 @@
|
||||
# Expression graphs
|
||||
|
||||
The design of the deep learning framework in Marian is based on reverse-mode [auto-differentiation](https://en.wikipedia.org/wiki/Automatic_differentiation) (also known as backpropagation) with dynamic computation graphs.
|
||||
Computation graphs allow a great deal of freedom in network architectures, and they can deal with complicated structures like conditions and loops.
|
||||
The dynamic declaration, which means a new graph is created for each training instance (for a training example or a batch), is also advantageous.
|
||||
It allows handling of variably sized inputs, as well as the cases where the graph may change depending on the results of previous steps.
|
||||
Compared to static declaration, a dynamic computation graph could be expensive in terms of creating and optimising computation graphs.
|
||||
Marian uses careful memory management to remove overhead in computation graph construction, and supports efficient execution on both CPU and GPU.
|
||||
The main implementation of computation graph is in under [`src/graph`](api/dir_src_graph.html#dir-src-graph) directory.
|
||||
|
||||
Building blocks for graphs:
|
||||
|
||||
- [graph construction](#graph-construction)
|
||||
- [node types](#node-types)
|
||||
- [graph execution](#graph-execution)
|
||||
|
||||
## Graph construction
|
||||
|
||||
What is a computation graph?
|
||||
All the numerical computations are expressed as a computation graph.
|
||||
A computation graph (or graph in short) is a series of operations arranged into a graph of nodes.
|
||||
To put it simply, a graph is just an arrangement of nodes that represent what you want to do with the data.
|
||||
|
||||
**Example 1**
|
||||
|
||||
Suppose you want to calculate the expression: `z=x*y+sin(x)`.
|
||||
|
||||
The computation graph of this expression is something like Figure 1.
|
||||
|
||||
![fig1](images/graph_example1.jpg "Figure 1 An example of computation graph")
|
||||
|
||||
*Figure 1 An example of computation graph*
|
||||
|
||||
In Marian, the `ExpressionGraph` class is the main implementation of a computation graph.
|
||||
An `ExpressionGraph` object keeps a record of data (tensors) and all operations in a directed graph consisting of `Node` objects.
|
||||
A `Node` is the basic unit of a graph. It can be an operation (e.g., dot()), or a tensor.
|
||||
Each operation in a graph is a `NaryNodeOp` (a child of `Node` class).
|
||||
Each operation defines its forward and backward steps.
|
||||
Except for operations, a Node can also be a constant tensor (`ConstantNode`) or a parameter tensor (`ParamNode`).
|
||||
|
||||
To create a graph, we use `New<>` shortcut in place of regular constructors:
|
||||
|
||||
```cpp
|
||||
// create a graph
|
||||
auto graph = New<ExpressionGraph>();
|
||||
```
|
||||
|
||||
After creating a graph, we also need to initialise the graph object with device options by `setDevice()` and workspace memory by `reserveWorkspaceMB()`, otherwise the program will result in a crash.
|
||||
|
||||
```cpp
|
||||
// initialise graph with device options
|
||||
// here we specify device no. is 0
|
||||
// device type can be DeviceType::cpu or DeviceType::gpu
|
||||
graph->setDevice({0, DeviceType::cpu});
|
||||
// preallocate workspace memory (MB) for the graph
|
||||
graph->reserveWorkspaceMB(128);
|
||||
```
|
||||
The _workspace memory_ means the size of the memory available for the forward and backward step of the training procedure.
|
||||
This does not include model size and optimizer parameters that are allocated outsize workspace.
|
||||
Hence you cannot allocate all device memory to the workspace.
|
||||
|
||||
To create a graph, Marian offer a set of shortcut functions that implements the common expression operators for a neural network (see [`src/graph/expression_operators.h`](api/program_listing_file_src_graph_expression_operators.h.html)), such as `affine()`.
|
||||
These functions actually construct the corresponding operation nodes in the graph, make links with other nodes.
|
||||
E.g., `affine()` construct a `AffineNodeOp` node in the graph.
|
||||
Thus, building a graph turns into a simple task of defining expressions by using those functions.
|
||||
|
||||
**Building graph of Example 1 using Marian**
|
||||
|
||||
The following code is used to build the graph in Example 1 with inputs `x=2` and `y=3`.
|
||||
|
||||
```cpp
|
||||
// create and initialise a graph object
|
||||
auto graph = New<ExpressionGraph>();
|
||||
graph->setDevice({0, DeviceType::cpu});
|
||||
graph->reserveWorkspaceMB(8);
|
||||
// add input node x
|
||||
auto x = graph->constant({1,1}, inits::fromValue(2));
|
||||
// add input node y
|
||||
auto y = graph->constant({1,1}, inits::fromValue(3));
|
||||
// define expression
|
||||
auto mulOp = x*y;
|
||||
auto sinOp = sin(x);
|
||||
auto z = mulOp + sinOp;
|
||||
// You can also define this expression: auto z = x*y + sin(x);
|
||||
```
|
||||
|
||||
For the above example, `constant()` is used to construct a constant node (a tensor) in the graph as the input.
|
||||
We will give more details about this function in the next section [**Node types**](#node-types).
|
||||
The operators `*`, `+` and function `sin()` add corresponding operation nodes (i.e., `MultNodeOp` and `SinNodeOp`) in the graph.
|
||||
|
||||
To check the graph, Marian offers `graphviz()` function to generate graph layout in Graphviz format for visualisation.
|
||||
This visualisation might not be practical for real-size graphs due to an enormous number of nodes and layers.
|
||||
You can print the graph layout on console by running the following code:
|
||||
|
||||
```cpp
|
||||
// print the graph layout on console
|
||||
std::cout<<graph->graphviz()<<std::endl;
|
||||
```
|
||||
|
||||
**Graph visualisation of Example 1**
|
||||
|
||||
The resulting graph is shown in Figure 2. Here we use an online Graphviz editor [edotor](https://edotor.net/) to generate the graph (by pasting the output of `graphviz()`).
|
||||
|
||||
![fig2](images/example1_dot.png "Figure 2 Graph layout of Example 1")
|
||||
|
||||
*Figure 2 Graph layout of Example 1*
|
||||
|
||||
In Figure 2, there are two numbers (between the pair of parentheses) in each node.
|
||||
The first number indicates the node ID, and the second number specifies whether the node is trainable (0 means no; 1 means yes).
|
||||
We will cover the concept of *trainable* in [**ParamNode section**](#paramnode).
|
||||
|
||||
One thing to notice here is that Marian adopts dynamic computation graphs;
|
||||
this means that the nodes will be consumed once performing forward or backwards pass.
|
||||
Thus, we need to call `graphviz()` function before performing the computation.
|
||||
|
||||
## Node types
|
||||
|
||||
As mentioned earlier, `Node` is the basic unit of a graph.
|
||||
Each `Node` defines its forward steps in `Node::forward()` and backward steps in `Node::backward()`.
|
||||
To access the resulting new tensor in the forward pass, we can call `Node::val()`.
|
||||
While `Node::grad()` returns the accumulated gradients (a tensor) in the backward pass.
|
||||
There are three main classes of Node in Marian: `ConstantNode`, `ParamNode` and `NaryNodeOp`.
|
||||
|
||||
### ConstantNode
|
||||
|
||||
The `ConstantNode` class is used to construct a constant node in the graph.
|
||||
A constant node is actually a constant tensor whose value is immutable during the training.
|
||||
A `ConstantNode` instance is usually used to construct the input layer.
|
||||
To construct a constant node in the graph, we can use `constant()` function in the `ExpressionGraph` class.
|
||||
We need to specify the shape and element type for the constant node.
|
||||
For the shape, we can initialise a `Shape` instance in the way of vector initialisation.
|
||||
E.g., `Shape shape={2,3};` this means 2D matrix with `dim[0]`=2 and `dim[1]`=3.
|
||||
The element type must be one of the values stored in `Type` enumeration.
|
||||
`Type` stores all supported data type in Marian, e.g., `Type::float16`.
|
||||
If the type is not specified, the default type of graph will be used.
|
||||
The default type of the graph is usually `Type::float32` unless you change it by `setDefaultElementType()`.
|
||||
|
||||
```cpp
|
||||
// construct a constant node in the graph with default type
|
||||
auto x = graph->constant({N, NUM_FEATURES}, inits::fromVector(inputData));
|
||||
```
|
||||
|
||||
For the above example, the shape of the constant node is `{N, NUM_FEATURES}`, and the value of the constant node is initialised from a vector `inputData`.
|
||||
`inits::fromVector()` returns a `NodeInitializer` which is a functor used to initialise a tensor by copying from the given vector.
|
||||
More functions used to initialise a node can be found in [`src/graph/node_initializers.h`](api/namespace_marian__inits.html#namespace-marian-inits) file.
|
||||
Marian also provides some shortcut functions to construct special constant nodes, such as `ones()` and `zeros()`:
|
||||
|
||||
```cpp
|
||||
// construct a constant node with 1
|
||||
auto ones = graph()->ones({10,10});
|
||||
// construct a constant node with 0
|
||||
auto zeros = graph()->zeros({10,10});
|
||||
```
|
||||
|
||||
### ParamNode
|
||||
|
||||
`ParamNode` is used to store model parameters whose value can be changed during the training, such as weights and biases.
|
||||
In addition to the shape and the element type, we need to specify whether a `ParamNode` object is _trainable_ or not.
|
||||
If a parameter node is _trainable_, then its value will be tracked and updated during the training procedure.
|
||||
For a `ParamNode`, the default value of `trainable_` is `true`.
|
||||
We can define whether this parameter node is trainable by `Node::setTrainable()` function.
|
||||
To construct a parameter node in the graph, we use the `param()` function in the `ExpressionGraph` class.
|
||||
For a parameter node, we need to specify its name.
|
||||
|
||||
```cpp
|
||||
// construct a parameter node called W1 in the graph
|
||||
auto W1 = graph->param("W1", {NUM_FEATURES, 5}, inits::uniform(-0.1f, 0.1f));
|
||||
```
|
||||
|
||||
The parameter node `W1` has a shape of `{NUM_FEATURES, 5}`, and is initialised with random numbers from the uniform distribution `Uniform(-0.1, 0.1)`.
|
||||
|
||||
### NaryNodeOp
|
||||
|
||||
`NaryNodeOp` is the base class that defines the operations in a graph.
|
||||
It mainly contains unary and binary operators.
|
||||
Each `NaryNodeOp` defines its forward operations in `Node::forwardOps()` and backward operations in `Node::backwardOps()`.
|
||||
In the current version of Marian, we provide a set of common operations (inherited from `NaryNodeOp`) used to build a neural network,
|
||||
such as `AffineNodeOp` (affine transformation), `CrossEntropyNodeOp` (cross-entropy loss function) and `TanhNodeOp` (tanh activation function).
|
||||
As mentioned earlier, Marian implements a set of APIs that can easily add operations to the graph.
|
||||
E.g., we can use `affine()` to perform affine transformation and then `tanh()` to perform tanh activation function on the results:
|
||||
|
||||
```cpp
|
||||
// perform affine transformation: x*W1+b
|
||||
// and then perform tanh activation function
|
||||
auto h = tanh(affine(x, W1, b1));
|
||||
```
|
||||
|
||||
In the above example, `affine()` and `tanh()` actually add `AffineNodeOp` and `TanhNodeOp` nodes to the graph.
|
||||
For more shortcut functions used to add operations in the graph, you can find in [`src/graph/expression_operators.h`](api/program_listing_file_src_graph_expression_operators.h.html) file.
|
||||
|
||||
## Graph execution
|
||||
|
||||
Once you finish building a graph by adding all the nodes, now you can perform the real computation.
|
||||
|
||||
### Forward pass
|
||||
|
||||
The forward pass refers to the calculation process.
|
||||
It traverses through all nodes from the input layer (leaves) to the output layer (root).
|
||||
To perform the forward pass, you can call the function `forward()`. The `forward()` function mainly does two things:
|
||||
|
||||
- allocates memory for each node (`Node::allocate()`)
|
||||
- computing the new tensor for each node by performing required operations (`Node::forward()`), and the resulting new tensor is stored in `val_` attribute in each Node.
|
||||
|
||||
**Forward pass of Example 1**
|
||||
|
||||
To run the forward pass of Example 1, you can run the following code:
|
||||
|
||||
```cpp
|
||||
// Perform the forward pass on the nodes of the graph
|
||||
graph->forward();
|
||||
// get the computation result of z
|
||||
std::vector<float> w;
|
||||
z->val()->get(w);
|
||||
std::cout<<"z="<<w[0]<<std::endl;
|
||||
// The output is: z=6.9093
|
||||
```
|
||||
|
||||
### **Backward pass**
|
||||
|
||||
The backward pass refers to the process of computing the output error.
|
||||
It traverses through all *trainable* nodes from the output layer to the input layer.
|
||||
You can call `backward()` to perform the backward pass.
|
||||
The `backward()` function mainly computes the gradients using the chain rule:
|
||||
|
||||
- allocates memory and initialise gradients for each *trainable* Node
|
||||
- computes the gradients based on backward steps (`Node::backwardOps()`) from each Node, and stores them in `adj_` attribute in each Node
|
||||
- using the chain rule, propagates all the way to the input layer
|
||||
|
||||
We also provide a shortcut function `backprop()` which performs first the forward pass and then the backward pass on the nodes of the graph:
|
||||
|
||||
```cpp
|
||||
// Perform backpropagation on the graph
|
||||
graph->backprop();
|
||||
// This function is equal to the following code:
|
||||
/*
|
||||
graph->forward();
|
||||
graph->backward();
|
||||
*/
|
||||
```
|
||||
|
||||
**Backward pass of modified Example 1**
|
||||
|
||||
As shown in Figure 2, there is no trainable node in the graph of Example 1;
|
||||
this means we cannot perform backwards pass on this graph.
|
||||
To demonstrate the backward pass, we modify Example 1 by changing the constant node `x` to a parameter node (change `constant()` to `param()`).
|
||||
Here is the modification:
|
||||
|
||||
```cpp
|
||||
// add parameter node x
|
||||
auto x = graph->param("x", {1,1}, inits::fromValue(2));
|
||||
```
|
||||
|
||||
The resulting graph is also different as displayed in Figure 3.
|
||||
|
||||
|
||||
![fig3](images/example1_dot2.png "Figure 3 Graph layout of modified Example 1")
|
||||
|
||||
*Figure 3 Graph layout of modified Example 1*
|
||||
|
||||
To perform the backward pass of modified Example 1, you can run the following code:
|
||||
|
||||
```cpp
|
||||
// Perform the backward pass on the trainable nodes of the graph
|
||||
graph->backward();
|
||||
// get the gradient of x node
|
||||
std::vector<float> b;
|
||||
x->grad()->get(b);
|
||||
std::cout<<"dz/dx="<<b[0]<<std::endl;
|
||||
// The output is: dz/dx=2.58385
|
||||
```
|
||||
|
||||
### Optimiser
|
||||
|
||||
After the backward pass, we obtain the gradients of the leaves.
|
||||
However, the job is not done yet.
|
||||
To train a model, we need to update the model parameters according to the gradients.
|
||||
This comes to how we define the loss function and optimiser for the graph.
|
||||
|
||||
A loss function is used to calculate the model error between the predicted value and the actual value.
|
||||
The goal is to minimise this error during training.
|
||||
In a graph, the loss function is also represented as a group of node(s).
|
||||
You can also use the operators provided in [`expression_operators.h`](api/program_listing_file_src_graph_expression_operators.h.html) file to define the loss function.
|
||||
E.g., Marian offers `cross_entropy()` function to compute the cross-entropy loss between true labels and predicted labels.
|
||||
|
||||
**Define a loss function for modified Example 1**
|
||||
|
||||
Suppose we know the actual value of `z` is 6 with `y = 3`, and `x` is the parameter we would like to learn from the model.
|
||||
The loss function we choose here is the absolute error:
|
||||
|
||||
```cpp
|
||||
// pass the actual value to the model
|
||||
auto actual = graph->constant({1,1}, inits::fromValue(6));
|
||||
// define loss function
|
||||
auto loss = abs(actual-z);
|
||||
```
|
||||
|
||||
The graph is changed to Figure 4.
|
||||
|
||||
![fig4](images/example1_dot3.png "Figure 4 Graph layout of modified Example 1 with loss function")
|
||||
|
||||
*Figure 4 Graph layout of modified Example 1 with loss function*
|
||||
|
||||
The purpose of the optimiser is to adjust the variables to fit the data.
|
||||
In Marian, there are three built-in optimiser classes: `Sgd`, `Adagrad` and `Adam`.
|
||||
`Sgd` is an optimiser based on [stochastic gradient descent](https://en.wikipedia.org/wiki/Stochastic_gradient_descent).
|
||||
For each iteration, it updates the parameter `w` according to the rule of `w = w - learning_rate * gradient`.
|
||||
`Adagrad` implements [Adagrad algorithm](https://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf),
|
||||
an optimiser with parameter-specific learning rates, which are adapted relative to how frequently a parameter gets updated during training.
|
||||
`Adam` is an implementation of the [Adam algorithm](https://arxiv.org/abs/1412.6980),
|
||||
a stochastic gradient descent method that is based on an adaptive estimation of first-order and second-order moments. .
|
||||
We use `Optimizer<>` to set up an optimiser with the learning rate:
|
||||
|
||||
```cpp
|
||||
// Choose optimizer (Sgd, Adagrad, Adam) and initial learning rate
|
||||
auto opt = Optimizer<Adam>(0.01);
|
||||
```
|
||||
|
||||
After an iteration of backpropagation, we can call `update()` function to update the parameters:
|
||||
|
||||
```cpp
|
||||
// update parameters in the graph
|
||||
opt->update(graph);
|
||||
```
|
||||
|
||||
**Set up an optimiser for modified Example 1**
|
||||
|
||||
Continue with Example 1, we choose `Sgd` as the optimiser and update the parameter `x`:
|
||||
|
||||
```cpp
|
||||
// set up Sgd optimiser with 0.005 learning rate
|
||||
auto opt = Optimizer<Sgd>(0.005);
|
||||
// update parameters
|
||||
opt->update(graph);
|
||||
// get the new value of x
|
||||
std::vector<float> v;
|
||||
x->val()->get(v);
|
||||
std::cout<<"x="<<v[0]<<std::endl;
|
||||
// The output is: x=1.98708
|
||||
```
|
||||
### Debugging
|
||||
For debugging, we can call `debug()` to print node parameters. The `debug()` function has to be called prior to graph execution.
|
||||
Once a node is marked for debugging, its value (resulting tensor) and the gradient will be printed out during the forward and backward pass.
|
||||
It is also recommended to turn on Marian logger by calling `createLoggers()` for more information.
|
||||
|
||||
**Debugging for modified Example 1**
|
||||
|
||||
Suppose we want to check the results of node `x` during the computation. We can call `debug()` to mark node `x` for debugging.
|
||||
```cpp
|
||||
// mark node x for debugging with logging message "Parameter x"
|
||||
debug(x, "Parameter x");
|
||||
```
|
||||
The output is shown as follows with `createLoggers()`:
|
||||
```cpp
|
||||
[2021-02-16 15:10:51] [memory] Reserving 256 B, device gpu0
|
||||
[2021-02-16 15:10:51] Debug: Parameter x op=param
|
||||
[2021-02-16 15:10:51] shape=1x1 size=1 type=float32 device=gpu0 ptr=140505547538432 bytes=256
|
||||
min: 2.00000000 max: 2.00000000 l2-norm: 2.00000000
|
||||
[[ 2.00000000 ]]
|
||||
|
||||
[2021-02-16 15:10:51] [memory] Reserving 256 B, device gpu0
|
||||
[2021-02-16 15:10:51] Debug Grad: Parameter x op=param
|
||||
[2021-02-16 15:10:51] shape=1x1 size=1 type=float32 device=gpu0 ptr=140505547538944 bytes=256
|
||||
min: 2.58385324 max: 2.58385324 l2-norm: 2.58385324
|
||||
[[ 2.58385324 ]]
|
||||
```
|
||||
|
||||
### More advanced
|
||||
|
||||
For more details about graph execution, a graph keeps track of all the `Node` objects in its `nodesForward_` and `nodesBackward_` lists.
|
||||
`nodesForward_` contains all nodes used for the forward pass and `nodesBackward_` contains all trainable nodes used for the backward pass.
|
||||
All the tensor objects for a graph are stored in its `tensors_` attribute.
|
||||
`tensors_` is a shared pointer holding memory and nodes for a graph.
|
||||
Since each `Node` can result in new tensors, this attribute is used to allocate memory for new tensors during the forward and backward pass.
|
||||
This `tensors_` attribute gets cleared before a new graph is built.
|
||||
Another important attribute in `ExpressionGraph` is `paramsByElementType_`.
|
||||
This attribute holds memory and nodes that correspond to graph parameters.
|
||||
You can call `params()` function in a graph to get all the parameter objects:
|
||||
|
||||
```cpp
|
||||
// return the Parameters object related to the graph
|
||||
// The Parameters object holds the whole set of the parameter nodes.
|
||||
graph->params();
|
||||
```
|
||||
|
||||
Besides, we provide APIs to support the mechanism of Gradient Checkpointing.
|
||||
This method works by trading compute for memory, which reruns a forward-pass segment for each checkpoint segment during the backward pass.
|
||||
Currently, Marian only supports setting checkpoint nodes manually by calling `Node::markCheckpoint()` or `checkpoint()`.
|
||||
To enable the gradient-checkpointing mode for a graph, we use `setCheckpointing()`:
|
||||
|
||||
```cpp
|
||||
// enable gradient-checkpointing for a graph
|
||||
graph->setCheckpointing(true);
|
||||
```
|
||||
|
||||
We can also save and load the parameters of a graph in Marian.
|
||||
We can call `save()` to save all parameters in the graph into a file (`.npz` or `.bin` format).
|
||||
The function `load()` can load all model parameters to the graph (either from an array of `io::Items`, a file or a buffer).
|
||||
|
||||
```cpp
|
||||
// specify the filename
|
||||
std::string filename = "my_model.npz";
|
||||
// save all the parameters into a file
|
||||
graph->save(filename);
|
||||
// load model from a file
|
||||
graph->load(filename);
|
||||
```
|
BIN
doc/images/example1_dot.png
Normal file
BIN
doc/images/example1_dot.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 5.5 KiB |
BIN
doc/images/example1_dot2.png
Normal file
BIN
doc/images/example1_dot2.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 6.1 KiB |
BIN
doc/images/example1_dot3.png
Normal file
BIN
doc/images/example1_dot3.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 9.4 KiB |
BIN
doc/images/example2.png
Normal file
BIN
doc/images/example2.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 24 KiB |
BIN
doc/images/graph_example1.jpg
Normal file
BIN
doc/images/graph_example1.jpg
Normal file
Binary file not shown.
After Width: | Height: | Size: 7.9 KiB |
47
doc/index.rst
Normal file
47
doc/index.rst
Normal file
@ -0,0 +1,47 @@
|
||||
Welcome to Marian's documentation!
|
||||
==================================
|
||||
|
||||
|buildgpu| |buildcpu| |tests| |release| |license|
|
||||
|
||||
Marian is an efficient and self-contained Neural Machine Translation framework with an integrated
|
||||
automatic differentiation engine based on dynamic computation graphs, written entirely in C++.
|
||||
|
||||
This is developer documentation. User documentation is available at https://marian-nmt.github.io/docs/
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
:caption: Contents:
|
||||
|
||||
graph
|
||||
operators
|
||||
|
||||
api/library_index
|
||||
|
||||
contributing
|
||||
|
||||
|
||||
Indices and tables
|
||||
------------------
|
||||
|
||||
* :ref:`genindex`
|
||||
|
||||
|
||||
.. |buildgpu| image:: https://img.shields.io/jenkins/s/http/vali.inf.ed.ac.uk/jenkins/view/marian/job/marian-dev-cuda-10.1.svg?label=CUDAC%20Build
|
||||
:target: http://vali.inf.ed.ac.uk/jenkins/job/marian-dev/
|
||||
:alt: GPU build status
|
||||
|
||||
.. |buildcpu| image:: https://img.shields.io/jenkins/s/http/vali.inf.ed.ac.uk/jenkins/view/marian/job/marian-dev-cpu.svg?label=CPU%20Build
|
||||
:target: http://vali.inf.ed.ac.uk/jenkins/job/marian-dev-cpu/
|
||||
:alt: CPU build status
|
||||
|
||||
.. |tests| image:: https://img.shields.io/jenkins/s/http/vali.inf.ed.ac.uk/jenkins/view/marian/job/marian-regression-tests.svg?label=Tests
|
||||
:target: http://vali.inf.ed.ac.uk/jenkins/job/marian-regression-tests/
|
||||
:alt: Tests status
|
||||
|
||||
.. |release| image:: https://img.shields.io/github/release/marian-nmt/marian.svg?label=Release
|
||||
:target: https://github.com/marian-nmt/marian/releases
|
||||
:alt: Latest release
|
||||
|
||||
.. |license| image:: https://img.shields.io/badge/License-MIT-blue.svg
|
||||
:target: ../LICENSE.md
|
||||
:alt: License: MIT
|
35
doc/make.bat
Normal file
35
doc/make.bat
Normal file
@ -0,0 +1,35 @@
|
||||
@ECHO OFF
|
||||
|
||||
pushd %~dp0
|
||||
|
||||
REM Command file for Sphinx documentation
|
||||
|
||||
if "%SPHINXBUILD%" == "" (
|
||||
set SPHINXBUILD=sphinx-build
|
||||
)
|
||||
set SOURCEDIR=source
|
||||
set BUILDDIR=build
|
||||
|
||||
if "%1" == "" goto help
|
||||
|
||||
%SPHINXBUILD% >NUL 2>NUL
|
||||
if errorlevel 9009 (
|
||||
echo.
|
||||
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
|
||||
echo.installed, then set the SPHINXBUILD environment variable to point
|
||||
echo.to the full path of the 'sphinx-build' executable. Alternatively you
|
||||
echo.may add the Sphinx directory to PATH.
|
||||
echo.
|
||||
echo.If you don't have Sphinx installed, grab it from
|
||||
echo.http://sphinx-doc.org/
|
||||
exit /b 1
|
||||
)
|
||||
|
||||
%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
|
||||
goto end
|
||||
|
||||
:help
|
||||
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
|
||||
|
||||
:end
|
||||
popd
|
553
doc/operators.md
Normal file
553
doc/operators.md
Normal file
@ -0,0 +1,553 @@
|
||||
# Operations in the Expression Graph
|
||||
|
||||
Operations are responsible for manipulating the elements of an expression graph.
|
||||
In Marian, many useful operations have already been implemented and can be found
|
||||
the code documentation. The provided operations cover simple arithmetic, logical
|
||||
comparisons and common mathematical functions; as well as tensor manipulation,
|
||||
for example `slice` or `reshape`, and aggregations such as `sum` or `minimum`.
|
||||
Finally, other routines, such as activation functions, useful in building
|
||||
neutral networks are also available.
|
||||
|
||||
There are several necessary components required to implement an operation in
|
||||
Marian's expression graph. The highest-level component is the Expression
|
||||
Operator, responsible for setting up the Node Operator and adding it to the
|
||||
graph. Next, this Node Operator describes the nature of the forward and backward
|
||||
operation to be performed. These operations are implemented using some
|
||||
combination of Functional Operators (element wise), and Tensor Operators.
|
||||
|
||||
This overview aims to provide information about what each of the different
|
||||
operator components does, how they fit together and where to go to make changes.
|
||||
Then, equipped with this knowledge, to be able to add new functionality to
|
||||
Marian.
|
||||
|
||||
## Operator Structure
|
||||
|
||||
The central component in the graph is the `Chainable<Tensor>` object. This
|
||||
object provides the abstract interface necessary to interact with elements in
|
||||
the computation graph. The details of this interface can be found in
|
||||
[/src/graph/chainable.h](api/file_src_graph_chainable.h.html). Note that the
|
||||
template parameter corresponds to the underlying data structure, which in Marian
|
||||
is the `Tensor`. Therefore, for convenience, the type `Expr` is defined:
|
||||
|
||||
```cpp
|
||||
typedef IPtr<Chainable<Tensor>> Expr;
|
||||
```
|
||||
|
||||
The implementation of the different operator components are divided across
|
||||
several files:
|
||||
|
||||
- Expression Operator
|
||||
- [/src/graph/expression_operators.h](api/file_src_graph_expression_operators.h.html)
|
||||
- [/src/graph/expression_operators.cpp](api/file_src_graph_expression_operators.cpp.html)
|
||||
- Node Operator
|
||||
- [/src/graph/node_operators_unary.h](api/file_src_graph_node_operators_unary.h.html)
|
||||
- [/src/graph/node_operators_binary.h](api/file_src_graph_node_operators_binary.h.html)
|
||||
- [/src/graph/node_operators_tuple.h](api/file_src_graph_node_operators_tuple.h.html)
|
||||
- Functional Operator
|
||||
- [/src/functional/operators.h](api/file_src_functional_operators.h.html)
|
||||
- Tensor operation
|
||||
- [/src/tensors/tensor_operators.h](api/file_src_tensors_tensor_operators.h.html)
|
||||
- [/src/tensors/cpu/tensor_operators.cpp](api/file_src_tensors_cpu_tensor_operators.cpp.html)
|
||||
- [/src/tensors/gpu/tensor_operators.cu](api/file_src_tensors_gpu_tensor_operators.cu.html)
|
||||
- Declared Specialization
|
||||
- [/src/tensors/gpu/element.inc](api/program_listing_file_src_tensors_gpu_element.inc.html)
|
||||
- [/src/tensors/gpu/add.inc](api/program_listing_file_src_tensors_gpu_add.inc.html)
|
||||
- [/src/tensors/gpu/add_all.inc](api/program_listing_file_src_tensors_gpu_add_all.inc.html)
|
||||
|
||||
To understand how the different components are inter-linked, we'll look at each
|
||||
of them in turn.
|
||||
|
||||
|
||||
## Expression Operator
|
||||
|
||||
The expression operator is the user-facing method used when building a graph. It
|
||||
is responsible for constructing the corresponding Node Operation and inserting
|
||||
it into the expression graph. To accommodate these core requirements, the
|
||||
function `Expression` is able to perform both actions in generality:
|
||||
|
||||
```cpp
|
||||
template <class T, typename... Args>
|
||||
Expr Expression(Args&&... args) {
|
||||
auto e = Expr(new T(std::forward<Args>(args)...));
|
||||
return e->graph()->add(e);
|
||||
}
|
||||
```
|
||||
|
||||
This helper-function simplifies the definition of many expression operators. For
|
||||
example, the implementation of the expression operator `sin(x)` is simply:
|
||||
|
||||
```cpp
|
||||
// src/graph/expression_operators.h
|
||||
Expr sin(Expr x);
|
||||
|
||||
// src/graph/expression_operators.cpp
|
||||
Expr sin(Expr x) {
|
||||
return Expression<SinNodeOp>(x);
|
||||
}
|
||||
```
|
||||
|
||||
However, implementations may perform actions beyond the core functionality
|
||||
alone. Taking `sum` as an example
|
||||
|
||||
```cpp
|
||||
Expr sum(Expr a, int ax) {
|
||||
if(a->shape()[ax] == 1) {
|
||||
return a;
|
||||
}
|
||||
return Expression<ReduceNodeOp>(a, ax, ReduceNodeOpCode::sum);
|
||||
}
|
||||
```
|
||||
|
||||
The trivial operation is handled without needing to construct a node operation.
|
||||
This example also demonstrates a non-trivial construction of `ReduceNodeOp`,
|
||||
which is capable of performing differing reduction operations depending on
|
||||
instantiation.
|
||||
|
||||
Going further, an expression operator may be defined in terms of existing
|
||||
expressions. Operators such as `weighted_average` are composed of three
|
||||
different expression operator calls: `scalar_product`, `sum`, and `operator/`.
|
||||
|
||||
```cpp
|
||||
Expr weighted_average(Expr in, Expr weights, int ax) {
|
||||
auto p = scalar_product(in, weights, ax);
|
||||
auto s = sum(weights, ax);
|
||||
return p / s;
|
||||
}
|
||||
```
|
||||
|
||||
While useful, composition at this level may be less efficient than lower-level
|
||||
implementations.
|
||||
|
||||
|
||||
## Node Operator
|
||||
|
||||
The `Node` subclass of `Chainable<Tensor>` provides concrete implementations for
|
||||
much of the abstract interface, while subclasses of `Node` enable different node
|
||||
behaviours. In the context of operations, the relevant derived class is
|
||||
`NaryNodeOp` and is base class used for Node Operators. This subclass provides
|
||||
implementation focused on performing general N-arity operations. However, many
|
||||
common operations are unary and, for convenience, a further specialization,
|
||||
`UnaryNodeOp`, exists to simplify their definition.
|
||||
|
||||
The purpose of the Node Operator is to define the forward and backward behaviour
|
||||
of the operation. The forward operation performs the desired operation while the
|
||||
backward operation updates the gradients. These behaviours are written in terms
|
||||
of `NodeOps`, where a `NodeOp` is a wrapper to define a capturing lambda
|
||||
function. Explicitly these are defined as:
|
||||
|
||||
```cpp
|
||||
// src/graph/chainable.h
|
||||
#define NodeOp(op) [=]() { op; }
|
||||
typedef std::vector<std::function<void()>> NodeOps;
|
||||
```
|
||||
|
||||
Each `NodeOp` is written as a function in terms of the value (`val_`), gradient
|
||||
(`adj_`) of the current node, and its children, via `child()`. The values and
|
||||
gradients the n<sup>th</sup> child node are accessed via the interfaces
|
||||
`child(n)->val()` and `child(n)->grad()`, respectively. NodeOps are executed in
|
||||
order when running the graph forwards and backwards, as this snippet from `Node`
|
||||
demonstrates
|
||||
|
||||
```cpp
|
||||
// Node in src/graph/node.h
|
||||
virtual void runForward(const NodeOps& ops) {
|
||||
for(auto&& op : ops)
|
||||
op();
|
||||
}
|
||||
|
||||
virtual void runBackward(const NodeOps& ops) {
|
||||
size_t i = 0;
|
||||
for(auto&& op : ops)
|
||||
if(child(i++)->trainable())
|
||||
op();
|
||||
}
|
||||
```
|
||||
|
||||
In backwards operation it is **crucial** that the `NopeOp` responsible for
|
||||
propagating a gradient to `child(i)` is the i<sup>th</sup> element of the
|
||||
NodeOps vector. The requirement that the child associated with the NodeOp be
|
||||
trainable means that an out-of-position NodeOp may not be run. To represent no
|
||||
operation a `nullptr` can be passed as a NodeOp.
|
||||
|
||||
A typical node operator has the functionality demonstrated in the following
|
||||
snippet.
|
||||
|
||||
```cpp
|
||||
// outline of a node op
|
||||
struct MyNodeOp : public NaryNodeOp {
|
||||
MyNodeOp(Expr a)
|
||||
: NaryNodeOp({a}, newShape(...), newType(...)) {}
|
||||
|
||||
Shape newShape(...) {} // optional
|
||||
Type newType(...) {} // optional
|
||||
|
||||
const std::string type() override { return "my_node_op"; }
|
||||
virtual size_t hash() override {} // potentially required
|
||||
virtual bool equal(Expr node) override {} // potentially required
|
||||
|
||||
NodeOps forwardOps() override {}
|
||||
NodeOps backwardOps() override {}
|
||||
```
|
||||
|
||||
This outline describes a node operator that takes a single argument `a`. The
|
||||
shape and type of the node would be determined by the result of `newShape` and
|
||||
`newType` when constructing the `NaryNodeOp`. These functions represent any
|
||||
custom logic used to determine the shape and type of the node. As indicated in
|
||||
this example code, these are optional and, when omitted, calling
|
||||
`NaryNodeOp({a})` would result in a node with the same shape and type as `a`.
|
||||
The `type()` method returns the friendly name for the node. Note that the
|
||||
[ONNX](https://onnx.ai)
|
||||
[interface](api/program_listing_file_src_onnx_expression_graph_onnx_serialization.cpp.html)
|
||||
maintains a mapping of these friendly names to their ONNX representation. In the
|
||||
absence of any member variables the `hash()` and `equal()` methods can be
|
||||
omitted, and defer to their `NaryNodeOp` definition. However, if such variables
|
||||
exist then `hash()` should implement a hashed representation and `equal()`
|
||||
should provide the necessary conditions to consider nodes equivalent. Finally,
|
||||
the operations of the node are defined in `forwardOps()` and `backwardOps()`.
|
||||
|
||||
Continuing with the example of `sin(x)`, the code responsible for implementing
|
||||
the behaviour is
|
||||
|
||||
```cpp
|
||||
// src/graph/node_operators_unary.h
|
||||
struct SinNodeOp : public UnaryNodeOp {
|
||||
SinNodeOp(Expr x) : UnaryNodeOp(x) {}
|
||||
|
||||
NodeOps forwardOps() override {
|
||||
using namespace functional;
|
||||
return {NodeOp(Element(_1 = sin(_2), val_, child(0)->val()))};
|
||||
}
|
||||
|
||||
NodeOps backwardOps() override {
|
||||
using namespace functional;
|
||||
return {NodeOp(Add(_1 * cos(_2), child(0)->grad(), adj_, child(0)->val()))};
|
||||
}
|
||||
|
||||
const std::string type() override { return "sin"; }
|
||||
};
|
||||
```
|
||||
|
||||
In this code, the constructor trivially initialises the `UnaryNodeOp`, passing
|
||||
the expression `x` as its input. This propagates up to `NaryNodeOp` and becomes
|
||||
`child(0)` of the node. The size and type of the SinNodeOp are equivalent to
|
||||
that of `x`. The lack of any member variables allows the `hash()` and `equal()`
|
||||
methods to be omitted. The friendly name for this node is the string `sin`. The
|
||||
forward and backward implementation are accomplished using a single NodeOp each.
|
||||
|
||||
### Forward operation
|
||||
|
||||
The forward NodeOp calls the tensor operation Element, that execute the
|
||||
element-wise operation described by the functor:
|
||||
|
||||
```cpp
|
||||
_1 = sin(_2)
|
||||
```
|
||||
|
||||
The placeholders `_1`, `_2` are enabled by code in
|
||||
[/src/functional](api/dir_src_functional.html) and interoperate with the
|
||||
functional operators. In the call to `Element`, `val_` is assigned to `_1` and
|
||||
`child(0)->val()` to `_2`. Therefore, this has the action of setting the
|
||||
elements of this node to the result obtained by applying `sin` to the elements
|
||||
of `child(0)`.
|
||||
|
||||
### Backward Operation
|
||||
|
||||
The backward NodeOp is responsible for backpropagation of the gradients via
|
||||
reverse-mode automatic differentiation. In this example, where `y = sin(x)`,
|
||||
this corresponds to evaluating
|
||||
|
||||
```
|
||||
dJ/dx += dJ/dy * dy/dx, dy/dx = cos(x)
|
||||
```
|
||||
|
||||
This is realised using the tensor operator `Add` with the functor
|
||||
|
||||
```cpp
|
||||
_1 * cos(_2)
|
||||
```
|
||||
|
||||
In the call to `Add`, `adj_` is assigned to `_1` and `child(0)->val()` to `_2`.
|
||||
Therefore, this functor represents `dJ/dy * dy/dx`: the product of the gradient
|
||||
at the current node and the gradient of the operation. This value is then added
|
||||
to the gradient of the child `child(0)->grad()` as required.
|
||||
|
||||
### Shape and Type Changes
|
||||
|
||||
The `newShape` and `newType` methods are just a suggestion of how custom logic
|
||||
may be encapsulated where needed. However, in practice, many operations do not
|
||||
require a change in shape or type. In these instances, the node inherits the
|
||||
broadcasted shape of its children as well as their common type. An important
|
||||
feature of the type deduction in `NaryNodeOp::commonType()` is that it
|
||||
guarantees that all child nodes are of the same type.
|
||||
|
||||
There are few operations in Marian that require a type specification. Where they
|
||||
do exist, they are often simple as the desired type is explicitly provided, or
|
||||
is trivially deduced. An example of this is `CastNodeOp`
|
||||
|
||||
```cpp
|
||||
// CastNodeOp in src/graph/node_operators_unary.h
|
||||
CastNodeOp(Expr a, Type type) : UnaryNodeOp(a, type) {}
|
||||
```
|
||||
|
||||
The desired type is set explicitly in construction. A slightly different example
|
||||
is that of `CSRDotNodeOp`. It has several child nodes which are a mixture of
|
||||
`DataType` and `IndexType` and therefore do not share a common type. The
|
||||
solution is to explicitly specify the relevant children to
|
||||
`NaryNodeOp::commonType({...})`.
|
||||
|
||||
Shape modifying operations are more common. A simple example is the class of
|
||||
operations performed by `ReduceNodeOp` which involve an aggregation process
|
||||
along one axis of the Tensor. The output shape is determined by
|
||||
|
||||
```cpp
|
||||
// ReduceNodeOp in src/graph/node_operators_unary.h
|
||||
Shape newShape(Expr a, int axis) {
|
||||
Shape shape = a->shape();
|
||||
axis_ = shape.axis(axis);
|
||||
|
||||
shape.set(axis_, 1);
|
||||
return shape;
|
||||
}
|
||||
```
|
||||
|
||||
The output shape is the same as the input but with the processed axis is reduced
|
||||
to a single element. Other use cases include transpose and slicing operations,
|
||||
as well as tensor products.
|
||||
|
||||
|
||||
## Functional Operator
|
||||
|
||||
As the NodeOp are evaluated, they encounter the underlying datatype of the
|
||||
`Tensor`. At this stage, type-specific intrinsic functions are required. These
|
||||
intrinsics are implemented in the templated struct `Ops<ElementType>`, with a
|
||||
specialization required for each type. The current required types are:
|
||||
- float
|
||||
- double
|
||||
- float32x4 (see `src/3rd_party/sse_mathfun.h`)
|
||||
- float32x8 (see `src/3rd_party/avx_mathfun.h`)
|
||||
- half (see `cuda_fp16.h` in the CUDA Math API)
|
||||
|
||||
Further details are available in
|
||||
[/src/common/types.h](api/file_src_common_types.h.html).
|
||||
|
||||
Returning to the example of `sin(x)`, the specialization for `float` and
|
||||
`double` requires
|
||||
|
||||
```cpp
|
||||
// src/functional/operators.h
|
||||
// in namespace marian::functional
|
||||
template <typename T>
|
||||
struct Ops {
|
||||
static HOST_DEVICE_INLINE T sin(const T&) { ABORT("Unknown type"); }
|
||||
};
|
||||
|
||||
// Specialization for float
|
||||
template <>
|
||||
struct Ops<float> {
|
||||
static HOST_DEVICE_INLINE float sin(const float& x) { return sinf(x); }
|
||||
};
|
||||
|
||||
// Specialization for double
|
||||
template <>
|
||||
struct Ops<double> {
|
||||
static HOST_DEVICE_INLINE double sin(const double& x) { return std::sin(x); }
|
||||
};
|
||||
```
|
||||
|
||||
The remaining specializations can be seen in
|
||||
[/src/functional/operators.h](api/file_src_functional_operators.h.html). Note
|
||||
that the general template must produce a runtime abort.
|
||||
|
||||
The final component of the functional operator is to call the macro that enables
|
||||
interoperability with the framework of
|
||||
[/src/functional](api/dir_src_functional.html). For a unary operator, this is
|
||||
the macro `UNARY`.
|
||||
|
||||
```cpp
|
||||
UNARY(Sin, sin, Ops<ElementType>::sin(x));
|
||||
```
|
||||
|
||||
where template parameter `ElementType` **must** be used. There are equivalent
|
||||
macros for `BINARY` and `TERNARY` Ops.
|
||||
|
||||
|
||||
## Tensor Operator
|
||||
|
||||
Tensor operations use less abstracted interfaces to interact with the Tensors,
|
||||
often working with the Tensor data directly. They also rely on BLAS (Basic
|
||||
Linear Algebra Subprograms) libraries to accelerate these operations. As well as
|
||||
libraries containing device-specific optimisations. These libraries include:
|
||||
|
||||
- CPU
|
||||
- CBLAS / OpenBLAS
|
||||
- FBGEMM
|
||||
- INTGEMM
|
||||
- MKL
|
||||
- GPU
|
||||
- CUDA (cuBLAS)
|
||||
|
||||
An important subtlety is that while the CPU focused libraries use a row-major
|
||||
representation, the cuBLAS library (GPU) instead uses a column-major
|
||||
representation.
|
||||
|
||||
Furthermore, the OpenMPI and OpenMP libraries are employed for parallelisation.
|
||||
While macros provided in
|
||||
[/src/common/definitions.h](api/file_src_common_definitions.h.html) locally
|
||||
enable faster floating-point math in supported compilers.
|
||||
|
||||
```cpp
|
||||
MARIAN_FFAST_MATH_BEGIN
|
||||
// ffmath code
|
||||
MARIAN_FFAST_MATH_END
|
||||
```
|
||||
|
||||
The usual caveats apply when enabling `fast_math`, and can be found in
|
||||
[/src/common/definitions.h](api/file_src_common_definitions.h.html)
|
||||
|
||||
Tensor operators are declared in
|
||||
[/src/tensors/tensor_operators.h](api/file_src_tensors_tensor_operators.h.html),
|
||||
these are device-agnostic function that call the relevant device-specific
|
||||
implementation. The CPU- and GPU-specific implementation are defined in `cpu`
|
||||
namespace in [/src/tensors/cpu/](api/dir_src_tensors_cpu.html) and the `gpu`
|
||||
namespace [/src/tensors/gpu/](api/dir_src_tensors_gpu.html). Therefore a typical
|
||||
operator defers to an implementation in the device-specific namespace.
|
||||
|
||||
```cpp
|
||||
void TensorOp(marian::Tensor out, marian::Tensor in) {
|
||||
#ifdef CUDA_FOUND
|
||||
if(out->getBackend()->getDeviceId().type == DeviceType::gpu)
|
||||
gpu::TensorOp(out, in);
|
||||
else
|
||||
#endif
|
||||
cpu::TensorOp(out, in);
|
||||
}
|
||||
```
|
||||
|
||||
When compiled with GPU support, this function dispatches a call to the
|
||||
implementation that corresponds to the backend device type configured in the
|
||||
graph (either GPU or CPU). Without GPU support, only the CPU implementation is
|
||||
available.
|
||||
|
||||
Many operations are covered by three general tensor operators: `Element`,
|
||||
`Aggregate` and `Prod`. The `Element` operator applies a function element-wise
|
||||
across an arbitrary number of input tensors and stores the result in the output
|
||||
tensor. The `Aggregate` operator also applies a function element-wise across its
|
||||
inputs, but instead aggregates the results in the output via a given aggregation
|
||||
function. A common aggregation function used is addition, which is the basis of
|
||||
the `Add` and `Reduce` operators. Finally, `Prod` deals with products of
|
||||
tensors. This operator performs a general matrix multiplication with the
|
||||
underlying implementation relying on the libraries mentioned above.
|
||||
|
||||
Specialized operators exist to manipulation tensors beyond the cases covered
|
||||
above; such as under transposition and concatenation. These operators may even
|
||||
be expressed in terms of existing tensor operators.
|
||||
|
||||
Furthermore, for complicated multi-operation computations, performance gains and
|
||||
memory improvements may be realised by implementing a tensor operator for that
|
||||
specific purpose. An example of this is `softmax`, which could be implemented
|
||||
using multiple expression operators (`exp`, `sum`), but is instead implemented
|
||||
directly as a tensor operator. These optimized implementations may be device
|
||||
specific.
|
||||
|
||||
## Declared Specialization
|
||||
|
||||
The operations performed in the forward and backward methods of NodeOp require
|
||||
their GPU templates to be explicitly declared. When a new specialization is
|
||||
introduced without being explicitly instantiated it will cause a link error on
|
||||
compilation:
|
||||
|
||||
```
|
||||
.../src/tensors/tensor_operators.h:41: undefined reference to `void marian::gpu::Element<marian::functional::Assign< ... > ( ... )'
|
||||
```
|
||||
|
||||
To fix these undefined references, we must explicitly add the specialization to
|
||||
the `.inc` files of [/src/tensors/gpu/](api/dir_src_tensors_gpu.html). Each
|
||||
`.inc` file is included at the end of its corresponding `.cu` file, ensuring
|
||||
that the specialization is compiled.
|
||||
|
||||
The undefined references should be added to the `.inc` file that corresponds to
|
||||
the header file in which contains the declaration of the missing functions.
|
||||
|
||||
The file [element.inc](api/file_src_tensors_gpu_element.inc.html) contains the
|
||||
specializations of the function defined in
|
||||
[element.h](api/file_src_tensors_gpu_element.h.html):
|
||||
|
||||
```cpp
|
||||
// src/tensors/gpu/element.h
|
||||
template <class Functor, class... Tensors>
|
||||
void Element(Functor functor, Tensor out, Tensors... tensors);
|
||||
```
|
||||
|
||||
Similarly, [add.inc](api/file_src_tensors_gpu_add.inc.html) contains the
|
||||
specializations for functions matching either of the two signatures in
|
||||
[add.h](api/file_src_tensors_gpu_add.h.html):
|
||||
|
||||
```cpp
|
||||
// src/tensors/gpu/add.h
|
||||
template <class Functor, class... Tensors>
|
||||
void Add(Functor functor, float scale, marian::Tensor out, Tensors... tensors);
|
||||
|
||||
template <class Functor, class AggFunctor, class... Tensors>
|
||||
void Aggregate(Functor functor, float initAgg, AggFunctor aggFunctor, float scale, marian::Tensor out, Tensors... tensors);
|
||||
```
|
||||
|
||||
Finally [add_all.inc](api/file_src_tensors_gpu_add_all.inc.html) contains the
|
||||
specializations for [add_all.h](api/file_src_tensors_gpu_add_all.h.html), which
|
||||
are several versions of:
|
||||
|
||||
```cpp
|
||||
// src/tensors/gpu/add_all.h
|
||||
template <typename T, typename AccType, class Functor, class AggFunctor>
|
||||
void AggregateAll(Ptr<Allocator> allocator,
|
||||
Functor functor,
|
||||
AccType aggInit,
|
||||
AggFunctor aggFunctor,
|
||||
AccType scale,
|
||||
Tensor out,
|
||||
const Tensor in1);
|
||||
```
|
||||
|
||||
However, for [add_all.h](api/file_src_tensors_gpu_add_all.h.html), there is an
|
||||
additional type dependence in the first template parameter, which requires two
|
||||
entries:
|
||||
|
||||
```cpp
|
||||
marian::gpu::AggregateAll< float, ... >( ... );
|
||||
marian::gpu::AggregateAll< __half, ... >( ... ); // for COMPILE_FP16
|
||||
```
|
||||
|
||||
where the `__half` specialization is related to half-precision floats and should
|
||||
be added to the `COMPILE_FP16` preprocessor block.
|
||||
|
||||
The simplest method to add the correct specialization is to take the compilation
|
||||
error output and extract the needed signature. To extract the signature:
|
||||
|
||||
1. Replace up to, and including, "undefined reference to `" with "template"
|
||||
2. Replace the final ' with a semi-colon
|
||||
|
||||
To conform with definitions in the codebase, we should replace
|
||||
`IntrusivePtr<marian::TensorBase>` with its typedef `marian::Tensor`. Note that
|
||||
as these files are included in `marian::gpu` namespace, and explicitly use
|
||||
`marian::functional` namespace it is also possible to omit both of these
|
||||
prefixes. Typically, the namespace prefix of the specialized function is removed
|
||||
as well. Following these rules for the example of `SinNodeOp` results in the
|
||||
following entries:
|
||||
|
||||
**element**
|
||||
```cpp
|
||||
template void Element<Assign<Var<1>, UnaryFunctor<elem::Sin, Assignee<2> > >, marian::Tensor >(Assign<Var<1>, UnaryFunctor<elem::Sin, Assignee<2> > >, marian::Tensor, marian::Tensor);
|
||||
```
|
||||
|
||||
**add**
|
||||
```cpp
|
||||
template void Add<BinaryFunctor<elem::Mult,Assignee<1>,UnaryFunctor<elem::Cos,Assignee<2> > >,class marian::Tensor,class marian::Tensor >(BinaryFunctor<elem::Mult,Assignee<1>,UnaryFunctor<elem::Cos,Assignee<2> > >,float,class marian::Tensor,class marian::Tensor,class marian::Tensor);
|
||||
```
|
||||
|
||||
**add_all**
|
||||
```cpp
|
||||
template void AggregateAll<float,float,BinaryFunctor<elem::Mult,Assignee<1>,UnaryFunctor<elem::Cos,Assignee<2> > >,BinaryFunctor<elem::Plus,Assignee<1>,Assignee<2> > >(std::shared_ptr<marian::Allocator>,BinaryFunctor<elem::Mult,Assignee<1>,UnaryFunctor<elem::Cos,Assignee<2> > >,float,BinaryFunctor<elem::Plus,Assignee<1>,Assignee<2> >,float,marian::Tensor,marian::Tensor,marian::Tensor);
|
||||
|
||||
#if COMPILE_FP16
|
||||
template void AggregateAll<__half,float,BinaryFunctor<elem::Mult,Assignee<1>,UnaryFunctor<elem::Cos,Assignee<2> > >,BinaryFunctor<elem::Plus,Assignee<1>,Assignee<2> > >(std::shared_ptr<marian::Allocator>,BinaryFunctor<elem::Mult,Assignee<1>,UnaryFunctor<elem::Cos,Assignee<2> > >,float,BinaryFunctor<elem::Plus,Assignee<1>,Assignee<2> >,float,marian::Tensor,marian::Tensor,marian::Tensor);
|
||||
#endif
|
||||
```
|
6
doc/requirements.txt
Normal file
6
doc/requirements.txt
Normal file
@ -0,0 +1,6 @@
|
||||
sphinx==2.4.4
|
||||
breathe==4.13.0
|
||||
exhale
|
||||
sphinx_rtd_theme
|
||||
recommonmark
|
||||
m2r
|
@ -127,6 +127,7 @@ IPtr<T> INew(Ptr<T> p) {
|
||||
return IPtr<T>(p);
|
||||
}
|
||||
|
||||
/// enum class DeviceType: defines which device is used for computation
|
||||
enum class DeviceType : size_t { gpu = 0, cpu = 1 };
|
||||
|
||||
struct DeviceId {
|
||||
|
@ -28,6 +28,14 @@ struct Slice // Python-like slice/index descriptor
|
||||
};
|
||||
typedef std::vector<Slice> Slices;
|
||||
|
||||
/**
|
||||
* Shape class mainly defines the shape or dimensionality of the node.
|
||||
* Basically, Shape is a wrapper of a std::vector. Its size is the number of
|
||||
* dimension. E.g., shape={2,3} means 2D matrix with dim[0]=2 and dim[1]=3.
|
||||
* WHen the index is negative, the real index is size() + index.
|
||||
* It implements most common functions demanded by operations, e.g., resize(),
|
||||
* slice(), and broadcast().
|
||||
*/
|
||||
struct Shape {
|
||||
private:
|
||||
std::vector<int> shape_;
|
||||
|
@ -143,7 +143,7 @@ do { \
|
||||
default: ABORT("Unknown type {}", type); \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
/// namespace marian
|
||||
namespace marian {
|
||||
|
||||
// small struct to enable templating based on types use for packing
|
||||
@ -290,36 +290,37 @@ constexpr inline size_t operator+(size_t val, TypeClass typeClass) {
|
||||
}
|
||||
|
||||
// @TODO: rename to ElementType when things become stable, so it's easier to review
|
||||
/// enum class Type: stores all supported data type in Marian
|
||||
enum class Type : size_t {
|
||||
int8 = TypeClass::signed_type + 1u,
|
||||
int16 = TypeClass::signed_type + 2u,
|
||||
int32 = TypeClass::signed_type + 4u,
|
||||
int64 = TypeClass::signed_type + 8u,
|
||||
int8 = TypeClass::signed_type + 1u, ///< int8 type
|
||||
int16 = TypeClass::signed_type + 2u, ///< int16 type
|
||||
int32 = TypeClass::signed_type + 4u, ///< int32 type
|
||||
int64 = TypeClass::signed_type + 8u, ///< int64 type
|
||||
|
||||
uint8 = TypeClass::unsigned_type + 1u,
|
||||
uint16 = TypeClass::unsigned_type + 2u,
|
||||
uint32 = TypeClass::unsigned_type + 4u,
|
||||
uint64 = TypeClass::unsigned_type + 8u,
|
||||
uint8 = TypeClass::unsigned_type + 1u, ///< uint8 type
|
||||
uint16 = TypeClass::unsigned_type + 2u, ///< uint16 type
|
||||
uint32 = TypeClass::unsigned_type + 4u, ///< uint32 type
|
||||
uint64 = TypeClass::unsigned_type + 8u, ///< uint64 type
|
||||
|
||||
float16 = TypeClass::float_type + 2u,
|
||||
float32 = TypeClass::float_type + 4u,
|
||||
float64 = TypeClass::float_type + 8u,
|
||||
float16 = TypeClass::float_type + 2u, ///< float16 type
|
||||
float32 = TypeClass::float_type + 4u, ///< float32 type
|
||||
float64 = TypeClass::float_type + 8u, ///< float64 type
|
||||
|
||||
packed16 = TypeClass::packed_type + 2u, // special type for FBGEMM, not meant to be used anywhere else, not meant to be accessed invidually. Internal actual type (uint16) is meaningless.
|
||||
packed8avx2 = TypeClass::packed_type + 1u + TypeClass::avx2_type, // special type for FBGEMM with AVX2, not meant to be used anywhere else, not meant to be accessed invidually. Internal actual type (uint8) is meaningless.
|
||||
packed8avx512 = TypeClass::packed_type + 1u + TypeClass::avx512_type, // special type for FBGEMM with AVX512, not meant to be used anywhere else, not meant to be accessed invidually. Internal actual type (uint8) is meaningless.
|
||||
packed16 = TypeClass::packed_type + 2u, ///< special type for FBGEMM, not meant to be used anywhere else, not meant to be accessed invidually. Internal actual type (uint16) is meaningless.
|
||||
packed8avx2 = TypeClass::packed_type + 1u + TypeClass::avx2_type, ///< special type for FBGEMM with AVX2, not meant to be used anywhere else, not meant to be accessed invidually. Internal actual type (uint8) is meaningless.
|
||||
packed8avx512 = TypeClass::packed_type + 1u + TypeClass::avx512_type, ///< special type for FBGEMM with AVX512, not meant to be used anywhere else, not meant to be accessed invidually. Internal actual type (uint8) is meaningless.
|
||||
|
||||
intgemm8 = TypeClass::intgemm_type + 1u, // Int8 quantized (not packed) matrices for intgemm
|
||||
intgemm16 = TypeClass::intgemm_type + 2u, // Int16 quantized (not packed) matrices for intgemm
|
||||
intgemm8 = TypeClass::intgemm_type + 1u, ///< Int8 quantized (not packed) matrices for intgemm
|
||||
intgemm16 = TypeClass::intgemm_type + 2u, ///< Int16 quantized (not packed) matrices for intgemm
|
||||
|
||||
intgemm8ssse3 = TypeClass::intgemm_type + 1u + TypeClass::ssse3_type, ///< Int8 quantized and packed (ssse3) matrices for intgemm
|
||||
intgemm8avx2 = TypeClass::intgemm_type + 1u + TypeClass::avx2_type, ///< Int8 quantized and packed (avx2) matrices for intgemm
|
||||
intgemm8avx512 = TypeClass::intgemm_type + 1u + TypeClass::avx512_type, ///< Int8 quantized and packed (avx512) matrices for intgemm
|
||||
intgemm8avx512vnni = TypeClass::intgemm_type + 1u + TypeClass::avx512_type + 4096u, ///< Int8 quantized and packed (avx512) matrices for intgemm. VNNI algorithm
|
||||
|
||||
intgemm8ssse3 = TypeClass::intgemm_type + 1u + TypeClass::ssse3_type, // Int8 quantized and packed (ssse3) matrices for intgemm
|
||||
intgemm8avx2 = TypeClass::intgemm_type + 1u + TypeClass::avx2_type, // Int8 quantized and packed (avx2) matrices for intgemm
|
||||
intgemm8avx512 = TypeClass::intgemm_type + 1u + TypeClass::avx512_type, // Int8 quantized and packed (avx512) matrices for intgemm
|
||||
intgemm8avx512vnni = TypeClass::intgemm_type + 1u + TypeClass::avx512_type + 4096u, // Int8 quantized and packed (avx512) matrices for intgemm. VNNI algorithm
|
||||
|
||||
intgemm16sse2 = TypeClass::intgemm_type + 2u + TypeClass::sse2_type, // Int16 quantized and packed (sse2) matrices for intgemm
|
||||
intgemm16avx2 = TypeClass::intgemm_type + 2u + TypeClass::avx2_type, // Int16 quantized and packed (avx2) matrices for intgemm
|
||||
intgemm16avx512 = TypeClass::intgemm_type + 2u + TypeClass::avx512_type, // Int16 quantized and packed (avx512) matrices for intgemm
|
||||
intgemm16sse2 = TypeClass::intgemm_type + 2u + TypeClass::sse2_type, ///< Int16 quantized and packed (sse2) matrices for intgemm
|
||||
intgemm16avx2 = TypeClass::intgemm_type + 2u + TypeClass::avx2_type, ///< Int16 quantized and packed (avx2) matrices for intgemm
|
||||
intgemm16avx512 = TypeClass::intgemm_type + 2u + TypeClass::avx512_type, ///< Int16 quantized and packed (avx512) matrices for intgemm
|
||||
};
|
||||
|
||||
static inline size_t operator&(TypeClass typeClass, Type type) {
|
||||
|
@ -39,6 +39,12 @@ struct BinaryFunctor {
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Macro to set up unary-functions from marian::functional::Ops.
|
||||
* @param name name for the struct
|
||||
* @param name2 callable typedef
|
||||
* @param func function wrapped
|
||||
*/
|
||||
#define UNARY(name, name2, func) \
|
||||
namespace elem { \
|
||||
struct name { \
|
||||
@ -55,6 +61,12 @@ struct BinaryFunctor {
|
||||
} \
|
||||
static inline name<Capture> name2(Capture x) { return name<Capture>(x); }
|
||||
|
||||
/**
|
||||
* Macro to set up binary-functions from marian::functional::Ops.
|
||||
* @param name name for the struct
|
||||
* @param name2 callable typedef
|
||||
* @param func function wrapped
|
||||
*/
|
||||
#define BINARY(name, name2, func) \
|
||||
namespace elem { \
|
||||
struct name { \
|
||||
@ -95,6 +107,12 @@ struct TernaryFunctor {
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Macro to set up ternary-functions from marian::functional::Ops.
|
||||
* @param name name for the struct
|
||||
* @param name2 callable typedef
|
||||
* @param func function wrapped
|
||||
*/
|
||||
#define TERNARY(name, name2, func) \
|
||||
namespace elem { \
|
||||
struct name { \
|
||||
|
@ -30,7 +30,7 @@ Expr ExpressionGraph::add(Expr node) {
|
||||
} else {
|
||||
node->setId(count_++);
|
||||
|
||||
// record in foward graph
|
||||
// record in forward graph
|
||||
nodesForward_.push_back(node);
|
||||
|
||||
// record in backward graph if training, and keep track of roots
|
||||
@ -143,6 +143,11 @@ void ExpressionGraph::forward(std::list<Expr>& forwardTape, bool finalPass) {
|
||||
if(inferenceOnly_)
|
||||
v->children().clear();
|
||||
|
||||
// If checkpointing is disabled, keep the memory for forward signals for all nodes.
|
||||
// If checkpointing is enabled:
|
||||
// (a) In the forward pass before the backward pass, free the memory for the nodes in the subtape to save memory.
|
||||
// (b) In the forward calls during the backward pass, keep the memory in the current subtape to accelerate
|
||||
// gradient computation.
|
||||
if(checkpointing_ && !finalPass) {
|
||||
auto subtape = v->getSubtape();
|
||||
if(subtape) {
|
||||
@ -171,12 +176,14 @@ void ExpressionGraph::backward(bool reset, float clipValue) {
|
||||
ABORT("Aborting");
|
||||
}
|
||||
|
||||
// allocates memory and initialises gradients for parameters
|
||||
for(auto kvParams : paramsByElementType_) {
|
||||
kvParams.second->allocateBackward();
|
||||
if(reset)
|
||||
kvParams.second->set_zero_adjoint();
|
||||
}
|
||||
|
||||
// for top nodes: allocates memory and initialise gradients to 1
|
||||
for(auto&& v : topNodes_)
|
||||
v->init_dependent();
|
||||
|
||||
@ -186,13 +193,16 @@ void ExpressionGraph::backward(bool reset, float clipValue) {
|
||||
|
||||
bool firstNaN = true;
|
||||
while(!nodesBackward_.empty()) {
|
||||
auto v = nodesBackward_.back();
|
||||
nodesBackward_.pop_back();
|
||||
auto v = nodesBackward_.back(); // return the last element
|
||||
nodesBackward_.pop_back(); // remove the last element
|
||||
|
||||
// for non-top nodes: allocates memory and initialises gradients to 0
|
||||
for(auto&& child : v->children())
|
||||
if(child->trainable() && child->type() != "param")
|
||||
child->set_zero_adjoint();
|
||||
|
||||
// if using gradient checkpointing,
|
||||
// recompute the forward pass from checkpoint to the root
|
||||
if(checkpointing_ && v->getSubtape()) {
|
||||
forward(*v->getSubtape(), /*finalPass=*/true);
|
||||
}
|
||||
|
@ -16,9 +16,18 @@
|
||||
|
||||
namespace marian {
|
||||
|
||||
/**
|
||||
* Create an expression node of any type, and pass all
|
||||
* arguments to any available constructor.
|
||||
* E.g., to create a ConstantNode uses `Expression<ConstantNode>(...)`.
|
||||
*/
|
||||
template <class T, typename... Args>
|
||||
Expr Expression(Args&&... args);
|
||||
|
||||
/**
|
||||
* The whole tensor set in the graph.
|
||||
* Holds all tensor objects (memory and nodes) for a graph.
|
||||
*/
|
||||
class Tensors {
|
||||
private:
|
||||
Ptr<TensorAllocator> tensors_;
|
||||
@ -27,8 +36,8 @@ private:
|
||||
typedef std::unordered_map<size_t, std::vector<WExpr>> WeakMemory;
|
||||
typedef std::unordered_map<size_t, std::vector<Expr>> Memory;
|
||||
|
||||
Ptr<WeakMemory> shortterm_;
|
||||
Ptr<Memory> longterm_;
|
||||
Ptr<WeakMemory> shortterm_; // holds all nodes for a graph
|
||||
Ptr<Memory> longterm_; // holds memoized nodes
|
||||
|
||||
public:
|
||||
Tensors(Ptr<Backend> backend)
|
||||
@ -112,97 +121,145 @@ public:
|
||||
|
||||
typedef std::map<Type, Ptr<Parameters>> ElementTypeParamsMap; // keep it sorted, hence map not unordered map
|
||||
|
||||
/**
|
||||
* Main implementation of a computation graph.
|
||||
* Keeps a record of data (tensors) and all operations. Each operation in a computation graph is a Node.
|
||||
* Each Node defines its forward and backward steps.
|
||||
*/
|
||||
class ExpressionGraph : public std::enable_shared_from_this<ExpressionGraph> {
|
||||
size_t count_{0};
|
||||
size_t count_{0}; // counter for nodes in the graph; hold current node index
|
||||
|
||||
std::unordered_set<Expr> topNodes_; // current set of roots. In the end, all but one must have been consumed.
|
||||
std::unordered_set<Expr> topNodes_; // current set of roots. In the end, all but one must have been consumed
|
||||
|
||||
protected: // (these are protected, not private, for ONNX exporting)
|
||||
std::list<Expr> nodesForward_;
|
||||
std::list<Expr> nodesBackward_;
|
||||
std::list<Expr> nodesForward_; ///< contains all nodes used for forward()
|
||||
std::list<Expr> nodesBackward_; ///< contains trainable nodes used for backward()
|
||||
|
||||
// Holds memory and expressions that correspond to temporary expressions.
|
||||
// This gets cleared before a new graph is built.
|
||||
/**
|
||||
* A shared pointer to the tensor objects in the graph.
|
||||
* Holds memory and nodes that corresponds to tensors in a graph.
|
||||
* Since operations will result in new tensors, this attribute is used
|
||||
* to allocate memory for new tensors during forward() and backward().
|
||||
* This gets cleared before a new graph is built.
|
||||
*/
|
||||
Ptr<Tensors> tensors_;
|
||||
private:
|
||||
|
||||
std::unordered_map<size_t, std::vector<Expr>> memoized_;
|
||||
|
||||
Type defaultElementType_{Type::float32}; // Type used for storing parameters, currently all parameters have to have the same type
|
||||
Type defaultElementType_{Type::float32}; // Type used for storing parameters, currently all parameters have to have the same type
|
||||
|
||||
bool inferenceOnly_{false};
|
||||
bool inferenceOnly_{false}; // a flag holds whether the graph is used for inference only
|
||||
|
||||
bool checkpointing_{false}; // use gradient checkpointing if true
|
||||
bool checkpointing_{false}; // use gradient checkpointing if true
|
||||
|
||||
bool reloaded_{false};
|
||||
bool reloaded_{false}; // a flag holds whether the graph is reloaded: reloaded is true if the graph loads parameters by load() function.
|
||||
|
||||
bool throwNaN_{false};
|
||||
bool throwNaN_{false}; // a flag holds whether the graph throws a NaN exception
|
||||
|
||||
protected:
|
||||
// Delete, copy and move constructors
|
||||
ExpressionGraph(const ExpressionGraph&) = delete;
|
||||
ExpressionGraph(ExpressionGraph&&) = delete;
|
||||
|
||||
// Holds memory and expressions that correspond to graph parameters
|
||||
// Now we can have multiple types of parameters in a separate parameters object per value type.
|
||||
// This is currently only accessible through private functions during loading, will abort during training
|
||||
// when params() is called (e.g. optimizer) and there is more or other types than the default parameter type.
|
||||
// Currently the only usecase is inference. Trying to access params() for non-default parameter type is going
|
||||
// to abort. Inference does not need to access a whole set of parameters.
|
||||
/**
|
||||
* A map holds memory and nodes that corresponds to graph parameters.
|
||||
* The key is Type and the mapped value is a set of parameter objects with corresponding type.
|
||||
* Now we can have multiple types of parameters in a separate parameters object per value type.
|
||||
* This is currently only accessible through private functions during loading, will abort during training
|
||||
* when params() is called (e.g. optimizer) and there is more or other types than the default parameter type.
|
||||
* Currently the only usecase is inference. Trying to access params() for non-default parameter type is going
|
||||
* to abort. Inference does not need to access a whole set of parameters.
|
||||
*/
|
||||
ElementTypeParamsMap paramsByElementType_;
|
||||
Ptr<Backend> backend_;
|
||||
|
||||
std::string namespace_;
|
||||
Ptr<Backend> backend_; ///< a shared pointer to the backend for the graph
|
||||
std::string namespace_; ///< a string defines the namespace of the graph. Each graph has its own unique namespace.
|
||||
|
||||
public:
|
||||
/** @brief Constructs a new expression graph
|
||||
*
|
||||
* Constructor should be used as New<ExpressionGraph>()
|
||||
*/
|
||||
/** Constructs a new expression graph. Constructor should be used as New<ExpressionGraph>(). */
|
||||
ExpressionGraph(bool inference = false);
|
||||
|
||||
/** Destructor. Clear everything related to the graph except memoized nodes. */
|
||||
virtual ~ExpressionGraph() {
|
||||
clear();
|
||||
for(auto kvParams : paramsByElementType_)
|
||||
kvParams.second->clear();
|
||||
}
|
||||
|
||||
/**
|
||||
* Set device options used to run the graph.
|
||||
* @param deviceId a struct type which stores device no. (size_t)
|
||||
* and device type (DeviceType::cpu or DeviceType::gpu)
|
||||
* @param device a pointer to the device
|
||||
*/
|
||||
virtual void setDevice(DeviceId deviceId = {0, DeviceType::gpu},
|
||||
Ptr<Device> device = nullptr);
|
||||
|
||||
/**
|
||||
* Get device info for the graph.
|
||||
* @return deviceId a struct type which stores device no. (size_t)
|
||||
* and device type (DeviceType::cpu or DeviceType::gpu)
|
||||
*/
|
||||
DeviceId getDeviceId() { return backend_->getDeviceId(); }
|
||||
|
||||
/**
|
||||
* Get backend pointer for the graph.
|
||||
* @return Ptr<Backend> pointer to backend
|
||||
*/
|
||||
Ptr<Backend> getBackend() { return backend_; }
|
||||
|
||||
/** Set whether the graph is used for inference only */
|
||||
void setInference(bool inference) { inferenceOnly_ = inference; }
|
||||
|
||||
/** Check whether the graph is used for inference only (true) or not */
|
||||
bool isInference() { return inferenceOnly_; }
|
||||
|
||||
/**
|
||||
* Set whether the graph uses gradient checkpointing.
|
||||
* <a href="https://github.com/cybertronai/gradient-checkpointing">Gradient Checkpointing</a>
|
||||
* works by trading compute for memory, which reruns a forward-pass segment for each checkpoint segment during backward.
|
||||
*/
|
||||
void setCheckpointing(bool checkpointing) { checkpointing_ = checkpointing; }
|
||||
|
||||
/** Check whether the graph uses gradient checkpointing or not */
|
||||
bool isCheckpointing() { return checkpointing_; }
|
||||
|
||||
/**
|
||||
* Set namespace (std::string) for the graph.
|
||||
* Each graph has its own unique namespace, which is used to form the name of a parameter object.
|
||||
*/
|
||||
void switchParams(const std::string& newNamespace) {
|
||||
namespace_ = newNamespace;
|
||||
}
|
||||
|
||||
/**
|
||||
* Copy all parameter objects from one graph to current graph.
|
||||
* @param graph a pointer to a graph object
|
||||
*/
|
||||
virtual void copyParams(Ptr<ExpressionGraph> graph) {
|
||||
for(auto p : *graph->params())
|
||||
param(p->name(), p->shape(), inits::fromTensor(p->val()), p->value_type());
|
||||
forward(); // this will allocate parameters, execute the intializers and therefore copy parameter values
|
||||
forward(); // this will allocate parameters, execute the initializers and therefore copy parameter values
|
||||
}
|
||||
|
||||
/**
|
||||
* Preallocate workspace memory (MB) for the graph.
|
||||
* Sets the size of the memory available for the forward and backward step of the training procedure.
|
||||
* This does not include model size and optimizer parameters that are allocated outsize workspace.
|
||||
*/
|
||||
void reserveWorkspaceMB(size_t num) {
|
||||
size_t bytes = num * 1024 * 1024 - 1;
|
||||
tensors_->reserve(bytes);
|
||||
}
|
||||
|
||||
/** Copy tensor objects from one graph to current graph */
|
||||
void reuseWorkspace(Ptr<ExpressionGraph> graph) {
|
||||
tensors_ = graph->tensors_;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Performs backpropogation on this expression graph.
|
||||
*
|
||||
* Backpropogation is implemented by performing first the forward pass and
|
||||
* Performs backpropagation on this expression graph.
|
||||
* Backpropagation is implemented by performing first the forward pass and
|
||||
* then the backward pass of algorithmic differentiation (AD) on the nodes of
|
||||
* the graph.
|
||||
*/
|
||||
@ -211,6 +268,12 @@ public:
|
||||
backward();
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform one backpropagation process on the graph to test
|
||||
* whether the graph workspace fits into a given workspace memory.
|
||||
* This function is used for searching the maximum batch size
|
||||
* that fits into given workspace memory.
|
||||
*/
|
||||
bool fits() {
|
||||
try {
|
||||
tensors_->throwAtReallocation(true);
|
||||
@ -223,19 +286,50 @@ public:
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check whether the memory allocated for a tensor object contains a NaN or infinite value.
|
||||
* @param t a Tensor object
|
||||
* @param isNaN a bool type holds the result whether the tensor contains a NaN value (pass by reference)
|
||||
* @param isInf a bool type holds the result whether the tensor contains a infinite value (pass by reference)
|
||||
*/
|
||||
void checkNaN(Tensor t, bool& isNaN, bool& isInf);
|
||||
|
||||
/**
|
||||
* Perform the forward pass on the nodes of the graph.
|
||||
* The forward pass refers to the calculation process.
|
||||
* It traverses through all nodes from input layer to output layer.
|
||||
*/
|
||||
void forward() {
|
||||
for(auto kvParams : paramsByElementType_)
|
||||
kvParams.second->allocateForward();
|
||||
forwardNext();
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform the forward pass without memory allocation for parameters.
|
||||
* Helper function for forward().
|
||||
*/
|
||||
void forwardNext();
|
||||
|
||||
/**
|
||||
* Perform forward pass on a given nodes with finalPass flag.
|
||||
* Helper function for forward() and backward().
|
||||
* @param forwardTape a pointer to the nodes used for forward pass
|
||||
* @param finalPass a bool type which controls whether nodes should be freed with gradient-checkpointing
|
||||
*/
|
||||
void forward(std::list<Expr>& forwardTape, bool finalPass);
|
||||
|
||||
/**
|
||||
* Perform the backward pass on the trainable nodes of the graph.
|
||||
* The back pass refers to the process of computing the output error.
|
||||
* It traverses through all nodes from output layer to input layer.
|
||||
*/
|
||||
void backward(bool reset = true, float clipValue = 0.f);
|
||||
|
||||
/**
|
||||
* Generate graph layout in Graphviz format for visualisation.
|
||||
* @return a string presenting graph layout in Graphviz format (dot)
|
||||
*/
|
||||
std::string graphviz() {
|
||||
std::stringstream ss;
|
||||
ss << "digraph ExpressionGraph {" << std::endl;
|
||||
@ -253,6 +347,10 @@ public:
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
/**
|
||||
* Write graph layout in Graphviz format to a file.
|
||||
* @param filename a string type specifies filename that writes the graph layout
|
||||
*/
|
||||
void graphviz(const std::string& filename) {
|
||||
std::ofstream dot(filename);
|
||||
dot << graphviz();
|
||||
@ -345,6 +443,18 @@ private:
|
||||
}
|
||||
|
||||
public:
|
||||
|
||||
/**
|
||||
* Construct a parameter node in the graph.
|
||||
* @param pname a string type holds the name of the parameter node
|
||||
* @param shape a struct type defines the shape of the parameter tensor
|
||||
* e.g., shape={2,3} means 2D matrix with dim[0]=2 and dim[1]=3
|
||||
* @param init a pointer to a NodeInitializer object, e.g., inits::zeros()
|
||||
* @param elementType a scoped enumerator (enum class) defines the element type, e.g., Type::float16
|
||||
* @param fixed a bool type specifies whether the parameter object is fixed (not trainable) or not.
|
||||
* The default value is false which means the parameter is trainable.
|
||||
* @return a pointer to the parameter node
|
||||
*/
|
||||
Expr param(const std::string& pname,
|
||||
const Shape& shape,
|
||||
const Ptr<inits::NodeInitializer>& init,
|
||||
@ -354,6 +464,17 @@ public:
|
||||
return param(pname, shape, init, elementType, fixed, /*typeSpecified=*/true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct a parameter node in the graph without a specified type, and
|
||||
* the type is set to defaultElementType_.
|
||||
* @param pname a string type holds the name of the parameter node
|
||||
* @param shape a struct type defines the shape of the parameter tensor
|
||||
* e.g., shape={2,3} means 2D matrix with dim[0]=2 and dim[1]=3
|
||||
* @param init a pointer to a NodeInitializer object, e.g., inits::zeros()
|
||||
* @param fixed a bool type specifies whether the parameter object is fixed (not trainable) or not.
|
||||
* The default value is false which means the parameter is trainable.
|
||||
* @return a pointer to the parameter node
|
||||
*/
|
||||
Expr param(const std::string& pname,
|
||||
const Shape& shape,
|
||||
const Ptr<inits::NodeInitializer>& init,
|
||||
@ -362,28 +483,59 @@ public:
|
||||
return param(pname, shape, init, defaultElementType_, fixed, /*typeSpecified=*/false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct a constant node in the graph without a specified type, and
|
||||
* the type is set to defaultElementType_.
|
||||
* @param shape a struct type defines the shape of the constant tensor
|
||||
* e.g., shape={2,3} means 2D matrix with dim[0]=2 and dim[1]=3
|
||||
* @param init a pointer to a NodeInitializer object, e.g., inits::zeros()
|
||||
* @param elementType a scoped enumerator (enum class) defines the element type, e.g., Type::float16
|
||||
* @return a pointer to the constant node
|
||||
*/
|
||||
Expr constant(const Shape& shape,
|
||||
const Ptr<inits::NodeInitializer>& init,
|
||||
Type elementType) {
|
||||
return Expression<ConstantNode>(shared_from_this(), shape, init, elementType);
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct a constant node in the graph without a specified type, and
|
||||
* the type is set to defaultElementType_.
|
||||
* @param shape a struct type defines the shape of the constant tensor
|
||||
* e.g., shape={2,3} means 2D matrix with dim[0]=2 and dim[1]=3
|
||||
* @param init a pointer to a NodeInitializer object, e.g., inits::zeros()
|
||||
* @return a pointer to the constant node
|
||||
*/
|
||||
Expr constant(const Shape& shape,
|
||||
const Ptr<inits::NodeInitializer>& init) {
|
||||
return Expression<ConstantNode>(shared_from_this(), shape, init, defaultElementType_);
|
||||
}
|
||||
|
||||
// @TODO: add version with iterators
|
||||
// shortcut to turn vector of indices to integer tensor, to be used with operators
|
||||
// like rows or select
|
||||
/**
|
||||
* Turn vector of indices to integer tensor.
|
||||
* A shortcut version to turn vector of indices to integer tensor, to be used with operators
|
||||
* like rows() or index_select()
|
||||
* @param indicesVector a vector of IndexType (uint32_t) specifies the indexes
|
||||
*/
|
||||
Expr indices(const std::vector<IndexType>& indicesVector) {
|
||||
return constant({(int)indicesVector.size()},
|
||||
inits::fromVector(indicesVector),
|
||||
Type::uint32);
|
||||
}
|
||||
// this version sets up the shape such that the indices are in a given axis
|
||||
// Use this if you want to pass these indices to gather().
|
||||
// indexee shape = (3, 2, 5, 2); axis = 1 -> resulting shape = (1, size of indicesVector, 1, 1)
|
||||
|
||||
/**
|
||||
* Specify the indexes of elements to be taken from a tensor.
|
||||
* This version sets up the shape such that the indices are in a given axis.
|
||||
* Use this if you want to pass these indices to gather().
|
||||
* E.g., indexee shape = (3, 2, 5, 2); axis = 1 -> resulting shape = (1, size of indicesVector, 1, 1):
|
||||
* - The size of the resulting shape is the same as that of the indexee; here is 4.
|
||||
* - The shape of the specified axis is equal to the size of given indicesVector.
|
||||
* - The shapes of the rest axes are filled with 1.
|
||||
* @param indicesVector a vector of IndexType (uint32_t) specifies the indexes
|
||||
* @param indexee the source tensor that we want to select elements from
|
||||
* @param axis specifies the axis that we want to collect along
|
||||
*/
|
||||
Expr indices(const std::vector<IndexType>& indicesVector, Expr indexee, int axis = -1) {
|
||||
Shape shape;
|
||||
shape.resize(indexee->shape().size());
|
||||
@ -393,24 +545,70 @@ public:
|
||||
Type::uint32);
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct a constant node filled with `1`.
|
||||
* @param shape a struct type defines the shape of the constant dataset
|
||||
* e.g., shape={2,3} means 2D matrix with dim[0]=2 and dim[1]=3
|
||||
* @param elementType a scoped enumerator (enum class) defines the element type, e.g., Type::float16
|
||||
*/
|
||||
Expr ones(const Shape& shape, Type elementType) {
|
||||
return constant(shape, inits::ones(), elementType);
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct a constant node filled with `1` without a specified type,
|
||||
* and the type is set to defaultElementType_.
|
||||
* @param shape a struct type defines the shape of the constant dataset
|
||||
* e.g., shape={2,3} means 2D matrix with dim[0]=2 and dim[1]=3
|
||||
*/
|
||||
Expr ones(const Shape& shape) {
|
||||
return constant(shape, inits::ones(), defaultElementType_);
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct a constant node filled with `0`.
|
||||
* @param shape a struct type defines the shape of the constant dataset
|
||||
* e.g., shape={2,3} means 2D matrix with dim[0]=2 and dim[1]=3
|
||||
* @param elementType a scoped enumerator (enum class) defines the element type, e.g., Type::float16
|
||||
*/
|
||||
Expr zeros(const Shape& shape, Type elementType) {
|
||||
return constant(shape, inits::zeros(), elementType);
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct a constant node filled with `0` without a specified type,
|
||||
* and the type is set to defaultElementType_.
|
||||
* @param shape a struct type defines the shape of the constant dataset
|
||||
* e.g., shape={2,3} means 2D matrix with dim[0]=2 and dim[1]=3
|
||||
*/
|
||||
Expr zeros(const Shape& shape) {
|
||||
return constant(shape, inits::zeros(), defaultElementType_);
|
||||
}
|
||||
|
||||
// prob = dropProb, e.g. 0.1 means 90% of values are kept
|
||||
/**
|
||||
* Construct a dropout mask (a tensor of 0 and 1).
|
||||
* @param dropProb a float type specifies the dropout probability.
|
||||
* E.g., dropProb=0.1 means 90% of values are kept.
|
||||
* @param shape a struct type defines the shape of the constant dataset
|
||||
* e.g., shape={2,3} means 2D matrix with dim[0]=2 and dim[1]=3
|
||||
* @param elementType a scoped enumerator (enum class) defines the element type, e.g., Type::float16
|
||||
*/
|
||||
Expr dropoutMask(float dropProb, const Shape& shape, Type elementType);
|
||||
|
||||
/**
|
||||
* Construct a dropout mask (a tensor of 0 and 1) without a specified type,
|
||||
* and the type is set to defaultElementType_.
|
||||
* @param dropProb a float type specifies the dropout probability.
|
||||
* E.g., dropProb=0.1 means 90% of values are kept.
|
||||
* @param shape a struct type defines the shape of the constant dataset
|
||||
* e.g., shape={2,3} means 2D matrix with dim[0]=2 and dim[1]=3
|
||||
*/
|
||||
Expr dropoutMask(float dropProb, const Shape& shape);
|
||||
|
||||
/**
|
||||
* Get the parameter object by name.
|
||||
* @param name a string specifies the name of the parameter object
|
||||
*/
|
||||
Expr get(std::string name) {
|
||||
if(!namespace_.empty())
|
||||
name = namespace_ + "::" + name;
|
||||
@ -419,6 +617,11 @@ public:
|
||||
return p;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the parameter object by name and type.
|
||||
* @param name a string specifies the name of the parameter object
|
||||
* @param elementType a scoped enumerator (enum class) defines the element type, e.g., Type::float16
|
||||
*/
|
||||
Expr get(std::string name, Type specifiedElementType) {
|
||||
if(!namespace_.empty())
|
||||
name = namespace_ + "::" + name;
|
||||
@ -427,6 +630,10 @@ public:
|
||||
return p;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the Parameters object related to the graph.
|
||||
* The Parameters object holds the whole set of the parameter nodes.
|
||||
*/
|
||||
Ptr<Parameters>& params() {
|
||||
// There are no parameter objects, that's weird.
|
||||
ABORT_IF(paramsByElementType_.empty(), "No parameter object has been created");
|
||||
@ -441,6 +648,10 @@ public:
|
||||
return it->second;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set default element type for the graph.
|
||||
* The default value is used if some node type is not specified.
|
||||
*/
|
||||
void setDefaultElementType(Type defaultElementType) {
|
||||
ABORT_IF(!paramsByElementType_.empty() && defaultElementType != defaultElementType_,
|
||||
"Parameter objects already exist, cannot change default type from {} to {}",
|
||||
@ -448,33 +659,58 @@ public:
|
||||
defaultElementType_ = defaultElementType;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get default element type for the graph.
|
||||
*/
|
||||
Type getDefaultElementType() { return defaultElementType_; }
|
||||
|
||||
/**
|
||||
* Add a expression node to the graph.
|
||||
* @param node a pointer to a expression node
|
||||
*/
|
||||
Expr add(Expr node);
|
||||
|
||||
/**
|
||||
* Allocate memory for the forward pass of the given node.
|
||||
* @param node a pointer to a expression node
|
||||
*/
|
||||
void allocateForward(Expr node) {
|
||||
if(tensors_)
|
||||
tensors_->allocateForward(node);
|
||||
}
|
||||
|
||||
/**
|
||||
* Allocate memory for the backward pass of the given node.
|
||||
* @param node a pointer to a expression node
|
||||
*/
|
||||
void allocateBackward(Expr node) {
|
||||
if(tensors_)
|
||||
tensors_->allocateBackward(node);
|
||||
}
|
||||
|
||||
/**
|
||||
* Free the memory for a tensor object.
|
||||
* @param tensor a reference to the tensor object
|
||||
*/
|
||||
void free(const Tensor& tensor) {
|
||||
if(tensors_)
|
||||
tensors_->free(tensor);
|
||||
}
|
||||
|
||||
// Returns the memory allocator of the graph workspace, allocates row unstructured memory (but 256-byte aligned)
|
||||
/**
|
||||
* Returns the memory allocator of the graph workspace.
|
||||
* Allocates raw unstructured memory (but 256-byte aligned).
|
||||
*/
|
||||
Ptr<Allocator> allocator() { return tensors_->getAllocator(); } // @TODO: rename this to getAllocator();
|
||||
|
||||
// Returns the tensor allocator of the graph workspace, different from above as proper tensor objects are allocated
|
||||
/**
|
||||
* Returns the tensor allocator of the graph workspace.
|
||||
* Different from allocator() as proper tensor objects are allocated.
|
||||
*/
|
||||
Ptr<TensorAllocator> getTensorAllocator() { return tensors_->getTensorAllocator(); }
|
||||
|
||||
/** Clear everything apart from parameters and memoized nodes */
|
||||
void clear() {
|
||||
// clear everything apart from parameters and memoized nodes
|
||||
count_ = 0;
|
||||
nodesForward_.clear();
|
||||
nodesBackward_.clear();
|
||||
@ -484,13 +720,17 @@ public:
|
||||
tensors_->clear();
|
||||
}
|
||||
|
||||
/** Set the flag value whether the graph is reloaded (true) or not */
|
||||
void setReloaded(bool reloaded) { reloaded_ = reloaded; }
|
||||
|
||||
/** Set the flag value whether the graph throws a NaN exception (true) or not */
|
||||
void setThrowNaN(bool throwNaN) { throwNaN_ = throwNaN; }
|
||||
|
||||
/** Get the flag value whether the graph throws a NaN exception (true) or not */
|
||||
bool getThrowNaN() { return throwNaN_; }
|
||||
|
||||
public:
|
||||
// loading from array of io::Items
|
||||
/** Load model (mainly parameter objects) from array of io::Items */
|
||||
void load(std::vector<io::Item>& ioItems, bool markReloaded = true) {
|
||||
setReloaded(false);
|
||||
for(auto& item : ioItems) {
|
||||
@ -509,18 +749,24 @@ public:
|
||||
setReloaded(true);
|
||||
}
|
||||
|
||||
/** Load model by filename */
|
||||
void load(const std::string& name, bool markReloaded = true) {
|
||||
LOG(info, "Loading model from {}", name);
|
||||
auto items = io::loadItems(name);
|
||||
load(items, markReloaded);
|
||||
}
|
||||
|
||||
/** Load model from buffer (a file pointer) */
|
||||
void load(const void* ptr, bool markReloaded = true) {
|
||||
LOG(info, "Loading model from buffer at {}", ptr);
|
||||
auto items = io::loadItems(ptr);
|
||||
load(items, markReloaded);
|
||||
}
|
||||
|
||||
/**
|
||||
* Turn the model (given a file pointer) into a memory-mapped type
|
||||
* by converting all the parameter object to memory-mapped version, i.e., MappedParameters.
|
||||
*/
|
||||
void mmap(const void* ptr, bool markReloaded = true) {
|
||||
ABORT_IF(backend_->getDeviceId().type != DeviceType::cpu || !inferenceOnly_,
|
||||
"Memory mapping only supported for CPU inference mode");
|
||||
@ -543,7 +789,6 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// pre-populate parameters by type
|
||||
for(auto& item : items) {
|
||||
auto it1 = paramsByElementType_.find(item.type);
|
||||
@ -558,9 +803,19 @@ public:
|
||||
}
|
||||
|
||||
public:
|
||||
// convert all parameters into an array of io::Item elements, for saving
|
||||
/**
|
||||
* Convert all parameters into an array of io::Item elements, for saving.
|
||||
* @param ioItems an array of io::Item elements
|
||||
* @param saveElementType the element type for saving
|
||||
*/
|
||||
void save(std::vector<io::Item>& ioItems, Type saveElementType = Type::float32);
|
||||
|
||||
/**
|
||||
* Save all parameters into a file (.npz or .bin).
|
||||
* @param name a string specifies the filename
|
||||
* @param meta a string specifies the name of io::Item elements. If not specified, the parameter name is reserved.
|
||||
* @param saveElementType the element type for saving
|
||||
*/
|
||||
void save(const std::string& name, const std::string& meta = "", Type saveElementType = Type::float32) {
|
||||
std::vector<io::Item> ioItems;
|
||||
save(ioItems, saveElementType);
|
||||
|
@ -72,6 +72,14 @@ Expr sin(Expr a) {
|
||||
return Expression<SinNodeOp>(a);
|
||||
};
|
||||
|
||||
Expr cos(Expr a) {
|
||||
return Expression<CosNodeOp>(a);
|
||||
};
|
||||
|
||||
Expr tan(Expr a) {
|
||||
return Expression<TanNodeOp>(a);
|
||||
};
|
||||
|
||||
Expr swish(Expr a) {
|
||||
return Expression<SwishNodeOp>(a);
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -27,11 +27,6 @@ void Node::free() {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialization for backward step of top node
|
||||
* in computation graph. Allocates memory and sets gradient
|
||||
* to 1 (df/df == 1).
|
||||
*/
|
||||
void Node::init_dependent() {
|
||||
if(!adj_) {
|
||||
graph()->allocateBackward(this);
|
||||
@ -39,12 +34,6 @@ void Node::init_dependent() {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialization for backward step of any non-top node
|
||||
* in computation graph. Allocates memory and sets gradient
|
||||
* to 0 for further accumulation of gradients from all
|
||||
* parents.
|
||||
*/
|
||||
void Node::set_zero_adjoint() {
|
||||
if(!adj_) {
|
||||
graph()->allocateBackward(this);
|
||||
|
@ -28,13 +28,13 @@ protected:
|
||||
std::vector<Expr> children_;
|
||||
|
||||
Weak<ExpressionGraph> graph_;
|
||||
Shape shape_{1, 1, 1, 1};
|
||||
Type valueType_{Type::float32};
|
||||
Shape shape_{1, 1, 1, 1}; // defines the dimensionality of the node (for tensors)
|
||||
Type valueType_{Type::float32}; // defines the element type of the node (for tensors)
|
||||
|
||||
std::string name_{"none"};
|
||||
|
||||
Tensor val_{nullptr};
|
||||
Tensor adj_{nullptr};
|
||||
Tensor val_{nullptr}; // the resulting new tensor in forward pass
|
||||
Tensor adj_{nullptr}; // the accumulated gradients (a tensor) in backward pass
|
||||
|
||||
bool markedForDebug_{false};
|
||||
std::string debugMessage_;
|
||||
@ -105,9 +105,19 @@ public:
|
||||
virtual void free() override;
|
||||
|
||||
virtual void init() override {};
|
||||
|
||||
/**
|
||||
* Initialization for backward step of top node
|
||||
* in computation graph. Allocates memory and sets gradient
|
||||
* to 1 (df/df == 1).
|
||||
*/
|
||||
virtual void init_dependent() override;
|
||||
|
||||
/**
|
||||
* Initialization for backward step of any non-top node
|
||||
* in computation graph. Allocates memory and sets gradient
|
||||
* to 0 for further accumulation of gradients from all
|
||||
* parents.
|
||||
*/
|
||||
virtual void set_zero_adjoint() override;
|
||||
|
||||
virtual Tensor& val() override { return val_; };
|
||||
|
@ -98,9 +98,10 @@ Ptr<NodeInitializer> glorotUniform(bool fanIn, bool fanOut, float scalingFactor)
|
||||
return fromLambda([fanIn, fanOut, scalingFactor](Tensor t) {
|
||||
float scale = sqrtf(6.0f / (t->shape()[-2] + t->shape()[-1]));
|
||||
if(fanIn && !fanOut)
|
||||
scale = sqrtf(3.0f / t->shape()[-2]); // results in columns of matrix to be ~unit length
|
||||
scale = sqrtf(3.0f / t->shape()[-2]); // fanIn mode: the scale of tensor is adapted by the input variance
|
||||
// results in columns of matrix to be ~unit range
|
||||
if(!fanIn && fanOut)
|
||||
scale = sqrtf(3.0f / t->shape()[-1]);
|
||||
scale = sqrtf(3.0f / t->shape()[-1]); // fanOut mode: the scale of tensor is adapted by the output variance
|
||||
|
||||
scale *= scalingFactor;
|
||||
|
||||
@ -112,9 +113,9 @@ Ptr<NodeInitializer> glorotNormal(bool fanIn, bool fanOut, float scalingFactor)
|
||||
return fromLambda([fanIn, fanOut, scalingFactor](Tensor t) {
|
||||
float scale = sqrtf(2.0f / (t->shape()[-2] + t->shape()[-1]));
|
||||
if(fanIn && !fanOut)
|
||||
scale = sqrtf(1.0f / t->shape()[-2]);
|
||||
scale = sqrtf(1.0f / t->shape()[-2]); // fanIn mode: the scale of tensor is adapted by the input variance
|
||||
if(!fanIn && fanOut)
|
||||
scale = sqrtf(1.0f / t->shape()[-1]);
|
||||
scale = sqrtf(1.0f / t->shape()[-1]); // fanOut mode: the scale of tensor is adapted by the output variance
|
||||
|
||||
scale *= scalingFactor;
|
||||
|
||||
@ -170,7 +171,7 @@ Ptr<NodeInitializer> fromWord2vec(const std::string& file,
|
||||
bool normalize /*= false*/) {
|
||||
return fromLambda([file, dimVoc, dimEmb, normalize](Tensor t) {
|
||||
auto embs = Word2VecReader().read(file, dimVoc, dimEmb);
|
||||
if(normalize) {
|
||||
if(normalize) { // scaling to unit length:
|
||||
float norm = 0;
|
||||
for(auto e : embs)
|
||||
norm += e * e;
|
||||
|
@ -11,17 +11,18 @@
|
||||
namespace marian {
|
||||
|
||||
class ExpressionGraph; // Forward declaration
|
||||
|
||||
/**
|
||||
* The namespace inits.
|
||||
* Declare class NodeInitializer and all the available functions to initialise a node.
|
||||
*/
|
||||
namespace inits {
|
||||
|
||||
/**
|
||||
* Base class for specialized NodeInitializers.
|
||||
*
|
||||
* A NodeInitializer is a functor that is associated with parameters
|
||||
* and constants, and is invoked on a tensor during node intialization.
|
||||
* You need to override NodeIntializer::apply(Tensor) with your own
|
||||
* functionality or use a fromLambda intializer.
|
||||
*
|
||||
* and constants, and is invoked on a tensor during node initialization.
|
||||
* You need to override NodeInitializer::apply(Tensor) with your own
|
||||
* functionality or use a fromLambda initializer.
|
||||
* See node_initializers.cpp for examples.
|
||||
*/
|
||||
class NodeInitializer {
|
||||
@ -35,155 +36,242 @@ public:
|
||||
};
|
||||
|
||||
/**
|
||||
* Use a lambda function of form [](Tensor t) { do something with t } to initalize tensor
|
||||
* Use a lambda function of form [](Tensor t) { do something with t } to initialize tensor.
|
||||
* @param func functor
|
||||
*/
|
||||
Ptr<NodeInitializer> fromLambda(std::function<void(Tensor)>&& func);
|
||||
|
||||
/**
|
||||
* Use a lambda function of form [](Tensor t) { do something with t } to initalize tensor
|
||||
* Create temporary tensor of Type intermediateType first, initialize and then copy/convert to actual Tensor
|
||||
* Useful for functions that can only operate on a specific type of tensor
|
||||
* Use a lambda function of form [](Tensor t) { do something with t } to initialize tensor.
|
||||
* Create temporary tensor of Type intermediateType first, initialize and then copy/convert to actual Tensor.
|
||||
* Useful for functions that can only operate on a specific type of tensor.
|
||||
*/
|
||||
Ptr<NodeInitializer> fromLambda(std::function<void(Tensor)>&& func, Type intermediateType);
|
||||
|
||||
/**
|
||||
* Initialize tensor with given value
|
||||
*
|
||||
* Creates a NodeInitializer that will intialize the given tensor
|
||||
* Initialize tensor with given value.
|
||||
* Creates a NodeInitializer that will initialize the given tensor
|
||||
* with `value`. Works with any underlying numeric tensor type.
|
||||
*
|
||||
* @return A NodeInitializer
|
||||
*/
|
||||
Ptr<NodeInitializer> fromValue(float value);
|
||||
|
||||
/**
|
||||
* Fill tensor with `0`
|
||||
*
|
||||
* Creates a NodeInitializer that will intialize the given tensor
|
||||
* Fill tensor with `0`.
|
||||
* Creates a NodeInitializer that will initialize the given tensor
|
||||
* with `0`. Works with any underlying numeric tensor type.
|
||||
*
|
||||
* @return A NodeInitializer
|
||||
*/
|
||||
static Ptr<NodeInitializer> zeros() { return fromValue(0.0f); }
|
||||
|
||||
/**
|
||||
* Fill tensor with `1`
|
||||
*
|
||||
* Creates a NodeInitializer that will intialize the given tensor
|
||||
* Fill tensor with `1`.
|
||||
* Creates a NodeInitializer that will initialize the given tensor
|
||||
* with `1`. Works with any underlying numeric tensor type.
|
||||
*
|
||||
* @return A NodeInitializer
|
||||
*/
|
||||
static Ptr<NodeInitializer> ones() { return fromValue(1.0f); }
|
||||
|
||||
/**
|
||||
* Set diagonal of two dimensional quadratic matrix to `value`.
|
||||
*
|
||||
* Sets all values of the tensor to 0 and intializes the diagonal with
|
||||
* Sets all values of the tensor to 0 and initializes the diagonal with
|
||||
* the given `value`. If no value is specified `1` is used by default.
|
||||
*
|
||||
* @return A NodeInitializer
|
||||
*/
|
||||
Ptr<NodeInitializer> eye(float value = 1.f);
|
||||
|
||||
/**
|
||||
* Intialize tensor with normally distributed random numbers
|
||||
*
|
||||
* Be default this generates floating point numbers from the
|
||||
* Initialize tensor with normally distributed random numbers.
|
||||
* By default this generates floating point numbers from the
|
||||
* normal distribution Normal(0, 1) unless specified differently.
|
||||
*
|
||||
* If compiled with `CUDA`, `marian` will use the `cuRand` library
|
||||
* for both, GPU and CPU computation. The random sequences generated
|
||||
* are the same on both devices.
|
||||
*
|
||||
* If `marian` is compiled without `CUDA`, a random generator
|
||||
* from the C++ standard library is used. These random generators
|
||||
* do not have the same random sequences.
|
||||
*
|
||||
* @return A NodeInitializer
|
||||
*/
|
||||
Ptr<NodeInitializer> normal(float mean = 0.f, float stddev = 1.f);
|
||||
|
||||
/**
|
||||
* Intialize tensor with uniformly distributed random numbers
|
||||
*
|
||||
* Be default this generates floating point numbers from the
|
||||
* Initialize tensor with uniformly distributed random numbers.
|
||||
* By default this generates floating point numbers from the
|
||||
* uniform distribution Uniform(0, 1) unless specified differently.
|
||||
*
|
||||
* If compiled with `CUDA`, `marian` will use the `cuRand` library
|
||||
* for both, GPU and CPU computation. The random sequences generated
|
||||
* are the same on both devices.
|
||||
*
|
||||
* If `marian` is compiled without `CUDA`, a random generator
|
||||
* from the C++ standard library is used. These random generators
|
||||
* do not have the same random sequences.
|
||||
*
|
||||
* @param a the lower bound of interval
|
||||
* @param b the upper bound of interval
|
||||
* @return A NodeInitializer
|
||||
*/
|
||||
Ptr<NodeInitializer> uniform(float a = 0.f, float b = 1.f);
|
||||
|
||||
// @TODO: add documentation
|
||||
/**
|
||||
* Initialize tensor with random numbers from Bernoulli Distribution.
|
||||
* The Bernoulli distribution is the discrete probability distribution of
|
||||
* a random variable which takes value `1` with probability p, and
|
||||
* value `0` with probability (1-p).
|
||||
* By default this function generates a tensor of 0 and 1 with probability p
|
||||
* if bernoulli(p) is called. We offer `scale` and `shift` parameters which
|
||||
* can map {0,1} to {0,1}*`scale`+`shift`.
|
||||
* E.g., bernoulli(tensor, 0.5f, 2.f, -1.f) where p=0.5f, scale=2.f, shift=-1.f.
|
||||
* {0,1} is mapped to {0,1}*2+(-1)= {-1,1}. It generates a tensor composed of
|
||||
* 50% of 1 and 50% of -1.
|
||||
* @return A NodeInitializer
|
||||
*/
|
||||
Ptr<NodeInitializer> bernoulli(float p, float scale = 1.f, float shift = 0.f);
|
||||
|
||||
// @TODO: add documentation
|
||||
/**
|
||||
* Initialize tensor with random numbers from Glorot uniform distribution.
|
||||
* The <a href=http://proceedings.mlr.press/v9/glorot10a/glorot10a.pdf>Glorot uniform</a>,
|
||||
* also called Xavier uniform, is designed to keep the scale of
|
||||
* the gradients roughly the same in all layers.
|
||||
* This function offers three variants (modes).
|
||||
* The values of the tensor is sampled from Uniform(-x*scale, x*scale):
|
||||
* - when fanIn=false and fanOut=false (by default):
|
||||
* x = sqrt(6 / (in + out))
|
||||
* - when fanIn=true and fanOut=false (fanIn mode):
|
||||
* x = sqrt(3 / in)
|
||||
* - when fanIn=false and fanOut=false (fanOut mode):
|
||||
* x = sqrt(3 / out)
|
||||
* where `in` is the number of input units in the tensor, `out` is the number of output units.
|
||||
* `scale` is used to change the range of Uniform distribution.
|
||||
* @return A NodeInitializer
|
||||
*/
|
||||
Ptr<NodeInitializer> glorotUniform(bool fanIn = false, bool fanOut = false, float scale = 1.f);
|
||||
|
||||
// @TODO: add documentation
|
||||
/**
|
||||
* Initialize tensor with random numbers from Glorot Normal distribution.
|
||||
* Similar to function glorotUniform(), this function adopts Normal distribution instead of
|
||||
* uniform distribution.
|
||||
* This function offers three variants (modes).
|
||||
* The values of the tensor is sampled from Normal(-x*scale, x*scale):
|
||||
* - when fanIn=false and fanOut=false (by default):
|
||||
* x = sqrt(2 / (in + out))
|
||||
* - when fanIn=true and fanOut=false (fanIn mode):
|
||||
* x = sqrt(1 / in)
|
||||
* - when fanIn=false and fanOut=false (fanOut mode):
|
||||
* x = sqrt(1 / out)
|
||||
* where `in` is the number of input units in the tensor, `out` is the number of output units.
|
||||
* `scale` is used to change the range of Normal distribution.
|
||||
* @return A NodeInitializer
|
||||
*/
|
||||
Ptr<NodeInitializer> glorotNormal(bool fanIn = false, bool fanOut = false, float scale = 1.f);
|
||||
|
||||
// @TODO: add documentation
|
||||
Ptr<NodeInitializer> dropout(float dropoutProbabilty);
|
||||
/**
|
||||
* Initialize a dropout mask (a tensor of 0 and 1) with given dropout probability.
|
||||
* <a href=https://www.cs.toronto.edu/~hinton/absps/JMLRdropout.pdf>Dropout</a>
|
||||
* is proposed as a technique to prevent Neural Networks from overfitting.
|
||||
* @param dropoutProbability a float type defines the dropout probability.
|
||||
* E.g., dropoutProbability=0.1 means 90% of values are kept.
|
||||
* @return A NodeInitializer
|
||||
*/
|
||||
Ptr<NodeInitializer> dropout(float dropoutProbability);
|
||||
|
||||
/**
|
||||
* Intialize with gumbel noise, i.e. -log(-log(u)) where u ~ Uniform(0 + eps, 1 - eps)
|
||||
*
|
||||
* Initialize with gumbel noise, i.e. -log(-log(u)) where u ~ Uniform(0 + eps, 1 - eps).
|
||||
* @param eps a variable protects from log(0)
|
||||
* @return A NodeInitializer
|
||||
*/
|
||||
Ptr<NodeInitializer> gumbel(float eps = 1e-5f);
|
||||
|
||||
// @TODO: add documentation
|
||||
/**
|
||||
* Initialize tensor by *copying* from the given vector.
|
||||
* Creates a NodeInitializer that will initialize the tensor
|
||||
* by *copying* the values from the given vector
|
||||
* @param v vector
|
||||
* @return A NodeInitializer
|
||||
*/
|
||||
template <typename T>
|
||||
Ptr<NodeInitializer> fromVector(const std::vector<T>& v);
|
||||
|
||||
/**
|
||||
* Initialize tensor by *moving* from the given vector.
|
||||
* Creates a NodeInitializer that will initialize the tensor by *moving* the values
|
||||
* from the given vector into this tensor, and the given vector may be emptied.
|
||||
* This version is the <a href=https://en.cppreference.com/w/cpp/language/reference>
|
||||
* rvalue reference</a> overloading.
|
||||
* @param v vector
|
||||
* @return A NodeInitializer
|
||||
*/
|
||||
template <typename T>
|
||||
Ptr<NodeInitializer> fromVector(std::vector<T>&& v);
|
||||
|
||||
// @TODO: add documentation
|
||||
/**
|
||||
* Initialize tensor from a given sparse vector.
|
||||
* Creates a NodeInitializer that will initialize the tensor from a given
|
||||
* sparse vector (stored in std::pair). The resulting tensor is first filled
|
||||
* with `1e-6` (a placeholder for non-zero element), then set the value to
|
||||
* the given sparse vector.
|
||||
* @param v the sparse vector is stored in `std::pair`:
|
||||
* - the first object (v.first) holds the indexes (in a vector)
|
||||
* - the second object (v.second) holds the corresponding values (in a vector).
|
||||
* This means the value of the resulting tensor at index v.first[i] is v.second[i].
|
||||
* @return A NodeInitializer
|
||||
*/
|
||||
Ptr<NodeInitializer> fromSparseVector(std::pair<std::vector<size_t>, std::vector<float>>& v);
|
||||
|
||||
// @TODO: add documentation
|
||||
/**
|
||||
* Initialize tensor by copying from the given io::Item.
|
||||
* Creates a NodeInitializer that will initialize the tensor by copying the values
|
||||
* from the given io::Item. If this io::Item is a memory-mapped item, then the
|
||||
* function will set the memory region pointing to this item. If this io::Item is
|
||||
* a regular item, then the function will copy the values from this item.
|
||||
* @return A NodeInitializer
|
||||
*/
|
||||
Ptr<NodeInitializer> fromItem(const io::Item& item);
|
||||
|
||||
// @TODO: add documentation
|
||||
/**
|
||||
* Initialize tensor by copying from the given tensor.
|
||||
* Creates a NodeInitializer that will initialize the tensor
|
||||
* by copying the values from the given tensor.
|
||||
* @return A NodeInitializer
|
||||
*/
|
||||
Ptr<NodeInitializer> fromTensor(Tensor tensor);
|
||||
|
||||
// @TODO: add documentation
|
||||
/**
|
||||
* Initialize tensor from a file.
|
||||
* Creates a NodeInitializer that will initialize the tensor
|
||||
* by copying the values from the given file. This function is
|
||||
* mainly used for loading embedding vectors from a file.
|
||||
* @param file filename
|
||||
* @param dimVoc the number of words in the vocabulary
|
||||
* @param dimEmb the length of embedding vectors
|
||||
* @param normalize a flag holds whether the values are normalize.
|
||||
* Here we adopt the method of <a
|
||||
* href=https://en.wikipedia.org/wiki/Feature_scaling#Scaling_to_unit_length>
|
||||
* scaling to unit length</a>, i.e., dividing each element by the Euclidean length of the vector.
|
||||
* @return A NodeInitializer
|
||||
*/
|
||||
Ptr<NodeInitializer> fromWord2vec(const std::string& file,
|
||||
int dimVoc,
|
||||
int dimEmb,
|
||||
bool normalize = false);
|
||||
|
||||
/**
|
||||
* Computes Google's sinusoidal position embeddings.
|
||||
* Computes Google's Transformer-style sinusoidal position embeddings
|
||||
* starting from position 'start' taking into account batch and time
|
||||
* dimensions of the tensor.
|
||||
*
|
||||
* Expected tensor layout {-2: time, -1: model}
|
||||
*
|
||||
* Usually gets later reshaped to {time, 1, model} and
|
||||
* added with a broadcast to learned embeddings. Positional
|
||||
* embeddings are the same for each batch entry and change
|
||||
* over time steps.
|
||||
* dimensions of the tensor. Expected tensor layout {-2: time, -1: model}.
|
||||
* Usually gets later reshaped to {time, 1, model} and added with a broadcast
|
||||
* to learned embeddings. Positional embeddings are the same for each batch
|
||||
* entry and change over time steps.
|
||||
*/
|
||||
Ptr<NodeInitializer> sinusoidalPositionEmbeddings(int start);
|
||||
|
||||
/**
|
||||
* Computes a random rotation matrix for LSH hashing. This is part
|
||||
* of a hash function. The values are orthonormal and computed via
|
||||
* Computes a random rotation matrix for LSH hashing.
|
||||
* This is part of a hash function. The values are orthonormal and computed via
|
||||
* QR decomposition. Same seed results in same random rotation.
|
||||
*/
|
||||
Ptr<NodeInitializer> randomRotation(size_t seed = Config::seed);
|
||||
|
||||
/**
|
||||
* Computes the equivalent of Python's range().
|
||||
* Computes a range from begin to end-1, like Python's range().
|
||||
* The constant being initialized must have one dimension that matches
|
||||
* the number of elements being generated, while any other dimension must be 1.
|
||||
|
@ -5,7 +5,13 @@
|
||||
#include "tensors/tensor.h"
|
||||
|
||||
namespace marian {
|
||||
|
||||
/**
|
||||
* A constant node for the graph.
|
||||
* A constant node is actually a constant tensor whose value is
|
||||
* immutable during the training. ConstantNode instance is usually
|
||||
* used as the inputs. To construct a constant node in the
|
||||
* graph, we use constant() function in ExpressionGraph class.
|
||||
*/
|
||||
struct ConstantNode : public Node {
|
||||
ConstantNode(Ptr<ExpressionGraph> graph,
|
||||
const Shape& shape,
|
||||
@ -35,7 +41,13 @@ private:
|
||||
Ptr<inits::NodeInitializer> init_;
|
||||
bool initialized_;
|
||||
};
|
||||
|
||||
/**
|
||||
* A parameter node for the graph.
|
||||
* A parameter node is used to store model parameters whose value can be
|
||||
* changed during the training, such as weights and biases. To construct
|
||||
* a parameter node in the graph, we use param() function in
|
||||
* ExpressionGraph class.
|
||||
*/
|
||||
struct ParamNode : public Node {
|
||||
ParamNode(Ptr<ExpressionGraph> graph,
|
||||
const Shape& shape,
|
||||
|
@ -646,7 +646,7 @@ struct CosNodeOp : public UnaryNodeOp {
|
||||
return {NodeOp(Add(_1 * -sin(_2), child(0)->grad(), adj_, child(0)->val()))};
|
||||
}
|
||||
|
||||
const std::string type() override { return "sin"; }
|
||||
const std::string type() override { return "cos"; }
|
||||
};
|
||||
|
||||
struct TanNodeOp : public UnaryNodeOp {
|
||||
@ -662,7 +662,7 @@ struct TanNodeOp : public UnaryNodeOp {
|
||||
return {NodeOp(Add(_1 / sqr(cos(_2)), child(0)->grad(), adj_, child(0)->val()))};
|
||||
}
|
||||
|
||||
const std::string type() override { return "sin"; }
|
||||
const std::string type() override { return "tan"; }
|
||||
};
|
||||
|
||||
struct SqrtNodeOp : public UnaryNodeOp {
|
||||
|
@ -37,3 +37,5 @@ template void marian::gpu::Add<marian::functional::BinaryFunctor<marian::functio
|
||||
template void marian::gpu::Add<marian::functional::UnaryFunctor<marian::functional::elem::Abs, marian::functional::BinaryFunctor<marian::functional::elem::Minus, marian::functional::Assignee<1>, marian::functional::Assignee<2> > >, IntrusivePtr<marian::TensorBase>, IntrusivePtr<marian::TensorBase> >(marian::functional::UnaryFunctor<marian::functional::elem::Abs, marian::functional::BinaryFunctor<marian::functional::elem::Minus, marian::functional::Assignee<1>, marian::functional::Assignee<2> > >, float, IntrusivePtr<marian::TensorBase>, IntrusivePtr<marian::TensorBase>, IntrusivePtr<marian::TensorBase>);
|
||||
template void marian::gpu::Aggregate<marian::functional::UnaryFunctor<marian::functional::elem::Abs, marian::functional::Assignee<1> >, marian::functional::BinaryFunctor<marian::functional::elem::Max, marian::functional::Assignee<1>, marian::functional::Assignee<2> >, IntrusivePtr<marian::TensorBase> >(marian::functional::UnaryFunctor<marian::functional::elem::Abs, marian::functional::Assignee<1> >, float, marian::functional::BinaryFunctor<marian::functional::elem::Max, marian::functional::Assignee<1>, marian::functional::Assignee<2> >, float, IntrusivePtr<marian::TensorBase>, IntrusivePtr<marian::TensorBase>);
|
||||
template void marian::gpu::Add<marian::functional::BinaryFunctor<marian::functional::elem::Mult,marian::functional::Assignee<1>,marian::functional::UnaryFunctor<marian::functional::elem::Cos,marian::functional::Assignee<2> > >,class IntrusivePtr<class marian::TensorBase>,class IntrusivePtr<class marian::TensorBase> >(marian::functional::BinaryFunctor<marian::functional::elem::Mult,marian::functional::Assignee<1>,marian::functional::UnaryFunctor<marian::functional::elem::Cos,marian::functional::Assignee<2> > >,float,class IntrusivePtr<class marian::TensorBase>,class IntrusivePtr<class marian::TensorBase>,class IntrusivePtr<class marian::TensorBase>);
|
||||
template void marian::gpu::Add<marian::functional::BinaryFunctor<marian::functional::elem::Mult, marian::functional::Assignee<1>, marian::functional::UnaryFunctor<marian::functional::elem::Neg, marian::functional::UnaryFunctor<marian::functional::elem::Sin, marian::functional::Assignee<2> > > >, marian::Tensor, marian::Tensor >(marian::functional::BinaryFunctor<marian::functional::elem::Mult, marian::functional::Assignee<1>, marian::functional::UnaryFunctor<marian::functional::elem::Neg, marian::functional::UnaryFunctor<marian::functional::elem::Sin, marian::functional::Assignee<2> > > >, float, marian::Tensor, marian::Tensor, marian::Tensor);
|
||||
template void marian::gpu::Add<marian::functional::BinaryFunctor<marian::functional::elem::Div, marian::functional::Assignee<1>, marian::functional::UnaryFunctor<marian::functional::elem::Sqr, marian::functional::UnaryFunctor<marian::functional::elem::Cos, marian::functional::Assignee<2> > > >, marian::Tensor, marian::Tensor >(marian::functional::BinaryFunctor<marian::functional::elem::Div, marian::functional::Assignee<1>, marian::functional::UnaryFunctor<marian::functional::elem::Sqr, marian::functional::UnaryFunctor<marian::functional::elem::Cos, marian::functional::Assignee<2> > > >, float, marian::Tensor, marian::Tensor, marian::Tensor);
|
||||
|
@ -37,6 +37,9 @@ template void marian::AggregateAll<float, float, marian::functional::BinaryFunct
|
||||
template void marian::AggregateAll<float, float, marian::functional::UnaryFunctor<marian::functional::elem::Abs, marian::functional::BinaryFunctor<marian::functional::elem::Minus, marian::functional::Assignee<1>, marian::functional::Assignee<2> > >, marian::functional::BinaryFunctor<marian::functional::elem::Plus, marian::functional::Assignee<1>, marian::functional::Assignee<2> > >(std::shared_ptr<marian::Allocator>, marian::functional::UnaryFunctor<marian::functional::elem::Abs, marian::functional::BinaryFunctor<marian::functional::elem::Minus, marian::functional::Assignee<1>, marian::functional::Assignee<2> > >, float, marian::functional::BinaryFunctor<marian::functional::elem::Plus, marian::functional::Assignee<1>, marian::functional::Assignee<2> >, float, IntrusivePtr<marian::TensorBase>, IntrusivePtr<marian::TensorBase>, IntrusivePtr<marian::TensorBase>);
|
||||
template void marian::AggregateAll<float,float,marian::functional::BinaryFunctor<marian::functional::elem::Mult,marian::functional::Assignee<1>,marian::functional::UnaryFunctor<marian::functional::elem::Cos,marian::functional::Assignee<2> > >,marian::functional::BinaryFunctor<marian::functional::elem::Plus,marian::functional::Assignee<1>,marian::functional::Assignee<2> > >(std::shared_ptr<marian::Allocator>,marian::functional::BinaryFunctor<marian::functional::elem::Mult,marian::functional::Assignee<1>,marian::functional::UnaryFunctor<marian::functional::elem::Cos,marian::functional::Assignee<2> > >,float,marian::functional::BinaryFunctor<marian::functional::elem::Plus,marian::functional::Assignee<1>,marian::functional::Assignee<2> >,float,IntrusivePtr<marian::TensorBase>,IntrusivePtr<marian::TensorBase>,IntrusivePtr<marian::TensorBase>);
|
||||
template void marian::AggregateAll<float, float, marian::functional::UnaryFunctor<marian::functional::elem::Abs, marian::functional::Assignee<1> >, marian::functional::BinaryFunctor<marian::functional::elem::Max, marian::functional::Assignee<1>, marian::functional::Assignee<2> > >(std::shared_ptr<marian::Allocator>, marian::functional::UnaryFunctor<marian::functional::elem::Abs, marian::functional::Assignee<1> >, float, marian::functional::BinaryFunctor<marian::functional::elem::Max, marian::functional::Assignee<1>, marian::functional::Assignee<2> >, float, IntrusivePtr<marian::TensorBase>, IntrusivePtr<marian::TensorBase>);
|
||||
template void marian::AggregateAll<float, float, marian::functional::BinaryFunctor<marian::functional::elem::Mult, marian::functional::Assignee<1>, marian::functional::UnaryFunctor<marian::functional::elem::Neg, marian::functional::UnaryFunctor<marian::functional::elem::Sin, marian::functional::Assignee<2> > > >, marian::functional::BinaryFunctor<marian::functional::elem::Plus, marian::functional::Assignee<1>, marian::functional::Assignee<2> > >(std::shared_ptr<marian::Allocator>, marian::functional::BinaryFunctor<marian::functional::elem::Mult, marian::functional::Assignee<1>, marian::functional::UnaryFunctor<marian::functional::elem::Neg, marian::functional::UnaryFunctor<marian::functional::elem::Sin, marian::functional::Assignee<2> > > >, float, marian::functional::BinaryFunctor<marian::functional::elem::Plus, marian::functional::Assignee<1>, marian::functional::Assignee<2> >, float, marian::Tensor, marian::Tensor, marian::Tensor);
|
||||
template void marian::AggregateAll<float, float, marian::functional::BinaryFunctor<marian::functional::elem::Div, marian::functional::Assignee<1>, marian::functional::UnaryFunctor<marian::functional::elem::Sqr, marian::functional::UnaryFunctor<marian::functional::elem::Cos, marian::functional::Assignee<2> > > >, marian::functional::BinaryFunctor<marian::functional::elem::Plus, marian::functional::Assignee<1>, marian::functional::Assignee<2> > >(std::shared_ptr<marian::Allocator>, marian::functional::BinaryFunctor<marian::functional::elem::Div, marian::functional::Assignee<1>, marian::functional::UnaryFunctor<marian::functional::elem::Sqr, marian::functional::UnaryFunctor<marian::functional::elem::Cos, marian::functional::Assignee<2> > > >, float, marian::functional::BinaryFunctor<marian::functional::elem::Plus, marian::functional::Assignee<1>, marian::functional::Assignee<2> >, float, marian::Tensor, marian::Tensor, marian::Tensor);
|
||||
|
||||
#if COMPILE_FP16
|
||||
template void AggregateAll<__half, float, BinaryFunctor<elem::Mult, BinaryFunctor<elem::Mult, Capture, Assignee<1>>, Assignee<2>>, BinaryFunctor<elem::Plus, Assignee<1>, Assignee<2>>>(std::shared_ptr<Allocator>, BinaryFunctor<elem::Mult, BinaryFunctor<elem::Mult, Capture, Assignee<1>>, Assignee<2>>, float, BinaryFunctor<elem::Plus, Assignee<1>, Assignee<2>>, float, marian::Tensor, marian::Tensor, marian::Tensor);
|
||||
template void AggregateAll<__half, float, BinaryFunctor<elem::Mult, BinaryFunctor<elem::Mult, Capture, BinaryFunctor<elem::Div, Capture, Assignee<1>>>, Assignee<2>>, BinaryFunctor<elem::Plus, Assignee<1>, Assignee<2>>>(std::shared_ptr<Allocator>, BinaryFunctor<elem::Mult, BinaryFunctor<elem::Mult, Capture, BinaryFunctor<elem::Div, Capture, Assignee<1>>>, Assignee<2>>, float, BinaryFunctor<elem::Plus, Assignee<1>, Assignee<2>>, float, marian::Tensor, marian::Tensor, marian::Tensor);
|
||||
@ -75,4 +78,6 @@ template void marian::AggregateAll<__half, float, marian::functional::BinaryFunc
|
||||
template void marian::AggregateAll<__half, float, marian::functional::UnaryFunctor<marian::functional::elem::Abs, marian::functional::BinaryFunctor<marian::functional::elem::Minus, marian::functional::Assignee<1>, marian::functional::Assignee<2> > >, marian::functional::BinaryFunctor<marian::functional::elem::Plus, marian::functional::Assignee<1>, marian::functional::Assignee<2> > >(std::shared_ptr<marian::Allocator>, marian::functional::UnaryFunctor<marian::functional::elem::Abs, marian::functional::BinaryFunctor<marian::functional::elem::Minus, marian::functional::Assignee<1>, marian::functional::Assignee<2> > >, float, marian::functional::BinaryFunctor<marian::functional::elem::Plus, marian::functional::Assignee<1>, marian::functional::Assignee<2> >, float, IntrusivePtr<marian::TensorBase>, IntrusivePtr<marian::TensorBase>, IntrusivePtr<marian::TensorBase>);
|
||||
template void marian::AggregateAll<__half,float,marian::functional::BinaryFunctor<marian::functional::elem::Mult,marian::functional::Assignee<1>,marian::functional::UnaryFunctor<marian::functional::elem::Cos,marian::functional::Assignee<2> > >,marian::functional::BinaryFunctor<marian::functional::elem::Plus,marian::functional::Assignee<1>,marian::functional::Assignee<2> > >(std::shared_ptr<marian::Allocator>,marian::functional::BinaryFunctor<marian::functional::elem::Mult,marian::functional::Assignee<1>,marian::functional::UnaryFunctor<marian::functional::elem::Cos,marian::functional::Assignee<2> > >,float,marian::functional::BinaryFunctor<marian::functional::elem::Plus,marian::functional::Assignee<1>,marian::functional::Assignee<2> >,float,IntrusivePtr<marian::TensorBase>,IntrusivePtr<marian::TensorBase>,IntrusivePtr<marian::TensorBase>);
|
||||
template void marian::AggregateAll<__half, float, marian::functional::UnaryFunctor<marian::functional::elem::Abs, marian::functional::Assignee<1> >, marian::functional::BinaryFunctor<marian::functional::elem::Max, marian::functional::Assignee<1>, marian::functional::Assignee<2> > >(std::shared_ptr<marian::Allocator>, marian::functional::UnaryFunctor<marian::functional::elem::Abs, marian::functional::Assignee<1> >, float, marian::functional::BinaryFunctor<marian::functional::elem::Max, marian::functional::Assignee<1>, marian::functional::Assignee<2> >, float, IntrusivePtr<marian::TensorBase>, IntrusivePtr<marian::TensorBase>);
|
||||
template void marian::AggregateAll<__half, float, marian::functional::BinaryFunctor<marian::functional::elem::Mult, marian::functional::Assignee<1>, marian::functional::UnaryFunctor<marian::functional::elem::Neg, marian::functional::UnaryFunctor<marian::functional::elem::Sin, marian::functional::Assignee<2> > > >, marian::functional::BinaryFunctor<marian::functional::elem::Plus, marian::functional::Assignee<1>, marian::functional::Assignee<2> > >(std::shared_ptr<marian::Allocator>, marian::functional::BinaryFunctor<marian::functional::elem::Mult, marian::functional::Assignee<1>, marian::functional::UnaryFunctor<marian::functional::elem::Neg, marian::functional::UnaryFunctor<marian::functional::elem::Sin, marian::functional::Assignee<2> > > >, float, marian::functional::BinaryFunctor<marian::functional::elem::Plus, marian::functional::Assignee<1>, marian::functional::Assignee<2> >, float, marian::Tensor, marian::Tensor, marian::Tensor);
|
||||
template void marian::AggregateAll<__half, float, marian::functional::BinaryFunctor<marian::functional::elem::Div, marian::functional::Assignee<1>, marian::functional::UnaryFunctor<marian::functional::elem::Sqr, marian::functional::UnaryFunctor<marian::functional::elem::Cos, marian::functional::Assignee<2> > > >, marian::functional::BinaryFunctor<marian::functional::elem::Plus, marian::functional::Assignee<1>, marian::functional::Assignee<2> > >(std::shared_ptr<marian::Allocator>, marian::functional::BinaryFunctor<marian::functional::elem::Div, marian::functional::Assignee<1>, marian::functional::UnaryFunctor<marian::functional::elem::Sqr, marian::functional::UnaryFunctor<marian::functional::elem::Cos, marian::functional::Assignee<2> > > >, float, marian::functional::BinaryFunctor<marian::functional::elem::Plus, marian::functional::Assignee<1>, marian::functional::Assignee<2> >, float, marian::Tensor, marian::Tensor, marian::Tensor);
|
||||
#endif
|
||||
|
@ -68,6 +68,8 @@ template void marian::gpu::Element<marian::functional::Assign<marian::functional
|
||||
template void marian::gpu::Element<marian::functional::Assign<marian::functional::Var<1>, marian::functional::UnaryFunctor<marian::functional::elem::Floor, marian::functional::BinaryFunctor<marian::functional::elem::Div, marian::functional::UnaryFunctor<marian::functional::elem::Log, marian::functional::BinaryFunctor<marian::functional::elem::Mult, marian::functional::UnaryFunctor<marian::functional::elem::Abs, marian::functional::BinaryFunctor<marian::functional::elem::Div, marian::functional::Assignee<2>, marian::functional::Capture> >, marian::functional::Capture> >, marian::functional::UnaryFunctor<marian::functional::elem::Log, marian::functional::Capture> > > >, IntrusivePtr<marian::TensorBase> >(marian::functional::Assign<marian::functional::Var<1>, marian::functional::UnaryFunctor<marian::functional::elem::Floor, marian::functional::BinaryFunctor<marian::functional::elem::Div, marian::functional::UnaryFunctor<marian::functional::elem::Log, marian::functional::BinaryFunctor<marian::functional::elem::Mult, marian::functional::UnaryFunctor<marian::functional::elem::Abs, marian::functional::BinaryFunctor<marian::functional::elem::Div, marian::functional::Assignee<2>, marian::functional::Capture> >, marian::functional::Capture> >, marian::functional::UnaryFunctor<marian::functional::elem::Log, marian::functional::Capture> > > >, IntrusivePtr<marian::TensorBase>, IntrusivePtr<marian::TensorBase>);
|
||||
template void marian::gpu::Element<marian::functional::Assign<marian::functional::Var<1>, marian::functional::BinaryFunctor<marian::functional::elem::Mult, marian::functional::BinaryFunctor<marian::functional::elem::Mult, marian::functional::BinaryFunctor<marian::functional::elem::Pow, marian::functional::Capture, marian::functional::Assignee<1> >, marian::functional::Capture>, marian::functional::UnaryFunctor<marian::functional::elem::Sgn, marian::functional::Assignee<2> > > >, IntrusivePtr<marian::TensorBase> >(marian::functional::Assign<marian::functional::Var<1>, marian::functional::BinaryFunctor<marian::functional::elem::Mult, marian::functional::BinaryFunctor<marian::functional::elem::Mult, marian::functional::BinaryFunctor<marian::functional::elem::Pow, marian::functional::Capture, marian::functional::Assignee<1> >, marian::functional::Capture>, marian::functional::UnaryFunctor<marian::functional::elem::Sgn, marian::functional::Assignee<2> > > >, IntrusivePtr<marian::TensorBase>, IntrusivePtr<marian::TensorBase>);
|
||||
template void marian::gpu::Element<marian::functional::Assign<marian::functional::Var<1>, marian::functional::BinaryFunctor<marian::functional::elem::Mult, marian::functional::BinaryFunctor<marian::functional::elem::Mult, marian::functional::UnaryFunctor<marian::functional::elem::Sgn, marian::functional::Assignee<1> >, marian::functional::Capture>, marian::functional::BinaryFunctor<marian::functional::elem::Pow, marian::functional::Capture, marian::functional::BinaryFunctor<marian::functional::elem::Clip, marian::functional::UnaryFunctor<marian::functional::elem::Floor, marian::functional::BinaryFunctor<marian::functional::elem::Div, marian::functional::UnaryFunctor<marian::functional::elem::Log, marian::functional::BinaryFunctor<marian::functional::elem::Mult, marian::functional::UnaryFunctor<marian::functional::elem::Abs, marian::functional::BinaryFunctor<marian::functional::elem::Div, marian::functional::Assignee<1>, marian::functional::Capture> >, marian::functional::Capture> >, marian::functional::UnaryFunctor<marian::functional::elem::Log, marian::functional::Capture> > >, marian::functional::Capture> > > >>(marian::functional::Assign<marian::functional::Var<1>, marian::functional::BinaryFunctor<marian::functional::elem::Mult, marian::functional::BinaryFunctor<marian::functional::elem::Mult, marian::functional::UnaryFunctor<marian::functional::elem::Sgn, marian::functional::Assignee<1> >, marian::functional::Capture>, marian::functional::BinaryFunctor<marian::functional::elem::Pow, marian::functional::Capture, marian::functional::BinaryFunctor<marian::functional::elem::Clip, marian::functional::UnaryFunctor<marian::functional::elem::Floor, marian::functional::BinaryFunctor<marian::functional::elem::Div, marian::functional::UnaryFunctor<marian::functional::elem::Log, marian::functional::BinaryFunctor<marian::functional::elem::Mult, marian::functional::UnaryFunctor<marian::functional::elem::Abs, marian::functional::BinaryFunctor<marian::functional::elem::Div, marian::functional::Assignee<1>, marian::functional::Capture> >, marian::functional::Capture> >, marian::functional::UnaryFunctor<marian::functional::elem::Log, marian::functional::Capture> > >, marian::functional::Capture> > > >, IntrusivePtr<marian::TensorBase>);
|
||||
template void marian::gpu::Element<marian::functional::Assign<marian::functional::Var<1>, marian::functional::UnaryFunctor<marian::functional::elem::Cos, marian::functional::Assignee<2> > >, marian::Tensor >(marian::functional::Assign<marian::functional::Var<1>, marian::functional::UnaryFunctor<marian::functional::elem::Cos, marian::functional::Assignee<2> > >, marian::Tensor, marian::Tensor);
|
||||
template void marian::gpu::Element<marian::functional::Assign<marian::functional::Var<1>, marian::functional::UnaryFunctor<marian::functional::elem::Tan, marian::functional::Assignee<2> > >, marian::Tensor >(marian::functional::Assign<marian::functional::Var<1>, marian::functional::UnaryFunctor<marian::functional::elem::Tan, marian::functional::Assignee<2> > >, marian::Tensor, marian::Tensor);
|
||||
// How to add new specializations:
|
||||
// When you use a new specialization, it will cause a link error of this form (example):
|
||||
// .../src/tensors/tensor_operators.h:41: undefined reference to `void marian::gpu::Element<marian::functional::Assign< ... > ( ... )'
|
||||
|
@ -21,6 +21,12 @@ namespace io {
|
||||
struct Item;
|
||||
}
|
||||
|
||||
/**
|
||||
* Main implementation of a <a href="https://en.wikipedia.org/wiki/Tensor">tensor</a>,
|
||||
* a multi-dimensional matrix containing elements of a single data type.
|
||||
* TensorBase contains the data, data type, pointer to
|
||||
* memory region, shape, backend info and other attributes.
|
||||
*/
|
||||
class TensorBase {
|
||||
MemoryPiece::PtrType memory_;
|
||||
Shape shape_;
|
||||
|
Loading…
Reference in New Issue
Block a user