mirror of
https://github.com/marian-nmt/marian.git
synced 2024-09-17 09:47:34 +03:00
Merged PR 23094: Adapt --cost-scaling to more stable setting
This PR sets default parameters for cost-scaling to 8.f 10000 1.f 8.f, i.e. when scaling scale by 8 and do not try to automatically scale up or down. This seems most stable than variable cost-scaling with larger numbers that was the default before.
This commit is contained in:
parent
310d2f42f6
commit
16bfa0c913
@ -15,6 +15,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
|
||||
- Scripts using PyYAML now use `safe_load`; see https://msg.pyyaml.org/load
|
||||
|
||||
### Changed
|
||||
- Set default parameters for cost-scaling to 8.f 10000 1.f 8.f, i.e. when scaling scale by 8 and do not try to automatically scale up or down. This seems most stable.
|
||||
- Make guided-alignment faster via sparse memory layout, add alignment points for EOS, remove losses other than ce.
|
||||
- Changed minimal C++ standard to C++-17
|
||||
- Faster LSH top-k search on CPU
|
||||
|
@ -32,7 +32,7 @@ void ConfigParser::addAliases(cli::CLIWrapper& cli) {
|
||||
if(mode_ == cli::mode::training) {
|
||||
config["precision"] = std::vector<std::string>({"float16", "float32"}); // inference type, optimization type, save type
|
||||
// scaling factor, frequency, multiplier at increase, minium scaling factor
|
||||
config["cost-scaling"] = std::vector<std::string>({"256.f", "1000", "2.f", "256.f"});
|
||||
config["cost-scaling"] = std::vector<std::string>({"8.f", "10000", "1.f", "8.f"});
|
||||
} else {
|
||||
config["precision"] = std::vector<std::string>({"float16"}); // for inference we do not need the other types
|
||||
}
|
||||
|
@ -534,7 +534,7 @@ void ConfigParser::addOptionsTraining(cli::CLIWrapper& cli) {
|
||||
// mixed precision training
|
||||
cli.add<bool>("--fp16",
|
||||
"Shortcut for mixed precision training with float16 and cost-scaling, "
|
||||
"corresponds to: --precision float16 float32 --cost-scaling 256.f 1000 2.f 256.f");
|
||||
"corresponds to: --precision float16 float32 --cost-scaling 8.f 10000 1.f 8.f");
|
||||
cli.add<std::vector<std::string>>("--precision",
|
||||
"Mixed precision training for forward/backward pass and optimizaton. "
|
||||
"Defines types for: forward/backward pass, optimization.",
|
||||
@ -542,7 +542,7 @@ void ConfigParser::addOptionsTraining(cli::CLIWrapper& cli) {
|
||||
cli.add<std::vector<std::string>>("--cost-scaling",
|
||||
"Dynamic cost scaling for mixed precision training: "
|
||||
"scaling factor, frequency, multiplier, minimum factor")
|
||||
->implicit_val("256.f 1000 2.f 256.f");
|
||||
->implicit_val("8.f 10000 1.f 8.f");
|
||||
cli.add<size_t>("--gradient-norm-average-window",
|
||||
"Window size over which the exponential average of the gradient norm is recorded (for logging and scaling). "
|
||||
"After this many updates about 90% of the mass of the exponential average comes from these updates",
|
||||
|
Loading…
Reference in New Issue
Block a user