Fix nasty bug in BPE position encoding

Example: sid=1313136 left=37596 right=37597 produce 18446744071878513373, which is incorrect.
This commit is contained in:
Vadim Markovtsev 2023-05-18 19:39:30 +02:00 committed by GitHub
parent f2219b53e2
commit 3805cbb616
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -71,7 +71,9 @@ class Trainer : public TrainerInterface {
CHECK_GE(r, 0);
CHECK_LE(l, std::numeric_limits<uint16_t>::max());
CHECK_LE(r, std::numeric_limits<uint16_t>::max());
const uint64_t n = (static_cast<uint64_t>(sid) << 32 | (l << 16 | r));
const uint64_t n = (static_cast<uint64_t>(sid) << 32) |
(static_cast<uint64_t>(l) << 16) |
r;
return n;
}