Print errors by default in WASM build (#343)

* Remove BadHTML exception in favour of ABORT macro
   `ABORT()` gives us readable error messages, even when exception support is disabled.
* Control marian exception global setting in tests through fixture
* WASM: construct BlockingService with critical logging by default
   This log level is only used by ABORT()

See also: 
- mozilla/firefox-translations#65, 
- mozilla/firefox-translations#68
- mozilla/firefox-translations#70 
- mozilla/firefox-translations#56
This commit is contained in:
Jelmer 2022-02-09 12:54:36 +00:00 committed by GitHub
parent 6b2a855234
commit 80bd4e7651
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 52 additions and 44 deletions

View File

@ -10,6 +10,17 @@
using namespace marian::bergamot;
using marian::string_view;
class MarianThrowsExceptionsFixture {
protected:
MarianThrowsExceptionsFixture() : prev_(marian::getThrowExceptionOnAbort()) {
marian::setThrowExceptionOnAbort(true);
}
~MarianThrowsExceptionsFixture() { marian::setThrowExceptionOnAbort(prev_); }
private:
bool prev_;
};
std::ostream &operator<<(std::ostream &out, std::pair<ByteRange, ByteRange> const &b) {
return out << '(' << b.first << ',' << b.second << ')';
}
@ -76,9 +87,7 @@ TEST_CASE("Ignore HTML if process_markup is false") {
CHECK(response.source.text == html_code);
}
TEST_CASE("Abort if alignments are missing") {
marian::setThrowExceptionOnAbort(true);
TEST_CASE_METHOD(MarianThrowsExceptionsFixture, "Abort if alignments are missing") {
std::string input("<p>hello <b>world</b></p>\n");
HTML html(std::move(input), true);
@ -108,9 +117,7 @@ TEST_CASE("Abort if alignments are missing") {
"Response object does not contain alignments. TranslationModel or ResponseOptions is misconfigured?");
}
TEST_CASE("Abort if alignments are misconfigured") {
marian::setThrowExceptionOnAbort(true);
TEST_CASE_METHOD(MarianThrowsExceptionsFixture, "Abort if alignments are misconfigured") {
std::string input("<p>hello <b>world</b></p>\n");
HTML html(std::move(input), true);

View File

@ -47,32 +47,6 @@ size_t countPrefixWhitespaces(string_view const &input) {
return size;
}
// Formatters used for exception messages combined with format()
std::ostream &operator<<(std::ostream &out, HTML::Tag const *tag) {
if (tag == nullptr) return out << "[nullptr]";
switch (tag->type) {
case HTML::Tag::ELEMENT:
return out << '<' << tag->name << tag->attributes << '>';
case HTML::Tag::VOID_ELEMENT:
return out << '<' << tag->name << tag->attributes << "/>";
case HTML::Tag::COMMENT:
return out << "<!--" << tag->data << "-->";
case HTML::Tag::PROCESSING_INSTRUCTION:
return out << "<?" << tag->data << "?>";
case HTML::Tag::WHITESPACE:
return out << "[inserted space]";
}
return out << "[Unknown tag type]";
}
std::ostream &operator<<(std::ostream &out, HTML::Taint const &tags) {
for (auto it = tags.begin(); it != tags.end(); ++it) {
if (it != tags.begin()) out << ' ';
out << *it;
}
return out;
}
// Very simple replacement for std::format introduced in C++20
std::string format(std::string const &formatTemplate) { return formatTemplate; }
@ -270,6 +244,32 @@ size_t debugCountTokens(AnnotatedText const &text) {
namespace marian::bergamot {
// Formatters used for exception messages combined with format()
std::ostream &operator<<(std::ostream &out, HTML::Tag const *tag) {
if (tag == nullptr) return out << "[nullptr]";
switch (tag->type) {
case HTML::Tag::ELEMENT:
return out << '<' << tag->name << tag->attributes << '>';
case HTML::Tag::VOID_ELEMENT:
return out << '<' << tag->name << tag->attributes << "/>";
case HTML::Tag::COMMENT:
return out << "<!--" << tag->data << "-->";
case HTML::Tag::PROCESSING_INSTRUCTION:
return out << "<?" << tag->data << "?>";
case HTML::Tag::WHITESPACE:
return out << "[inserted space]";
}
return out << "[Unknown tag type]";
}
std::ostream &operator<<(std::ostream &out, HTML::Taint const &tags) {
for (auto it = tags.begin(); it != tags.end(); ++it) {
if (it != tags.begin()) out << ' ';
out << *it;
}
return out;
}
HTML::HTML(std::string &&source, bool process_markup, Options &&options) : options_(std::move(options)) {
if (!process_markup) return;
@ -288,7 +288,7 @@ HTML::HTML(std::string &&source, bool process_markup, Options &&options) : optio
while (!stop) {
switch (scanner.next()) {
case markup::Scanner::TT_ERROR:
throw BadHTML("HTML parse error");
ABORT("HTML parse error");
case markup::Scanner::TT_EOF:
stop = true;
@ -354,10 +354,10 @@ HTML::HTML(std::string &&source, bool process_markup, Options &&options) : optio
// bit of "<img/>", then completely ignore it.
if (contains(options_.voidTags, std::string(scanner.tag()))) break;
if (stack.empty()) throw BadHTML(format("Encountered more closing tags ({}) than opening tags", scanner.tag()));
ABORT_IF(stack.empty(), "Encountered more closing tags ({}) than opening tags", scanner.tag());
if (stack.back()->name != scanner.tag())
throw BadHTML(format("Encountered unexpected closing tag </{}>, stack is {}", scanner.tag(), stack));
ABORT_IF(stack.back()->name != scanner.tag(), "Encountered unexpected closing tag </{}>, stack is {}",
scanner.tag(), stack);
// What to do with "<u></u>" case, where tag is immediately closed
// so it never makes it into the taint of any of the spans? This adds
@ -407,11 +407,11 @@ HTML::HTML(std::string &&source, bool process_markup, Options &&options) : optio
break;
default:
throw BadHTML("Unsupported scanner token type");
ABORT("Unsupported scanner token type");
}
}
if (!stack.empty()) throw BadHTML(format("Not all tags were closed: {}", stack));
ABORT_IF(!stack.empty(), "Not all tags were closed: {}", stack);
// Add a trailing span (that's empty) to signify all closed tags.
spans_.emplace_back(Span{source.size(), source.size(), stack});

View File

@ -14,11 +14,6 @@ namespace bergamot {
struct Response;
class BadHTML : public std::runtime_error {
public:
explicit BadHTML(std::string const &what) : std::runtime_error(what) {}
};
class HTML {
public:
struct Options {

View File

@ -75,9 +75,15 @@ EMSCRIPTEN_BINDINGS(blocking_service_config) {
// aggregate-batching etc.
}
std::shared_ptr<BlockingService> BlockingServiceFactory(const BlockingService::Config& config) {
auto copy = config;
copy.logger.level = "critical";
return std::make_shared<BlockingService>(copy);
}
EMSCRIPTEN_BINDINGS(blocking_service) {
class_<BlockingService>("BlockingService")
.constructor<BlockingService::Config>()
.smart_ptr_constructor("BlockingService", &BlockingServiceFactory)
.function("translate", &BlockingService::translateMultiple)
.function("translateViaPivoting", &BlockingService::pivotMultiple);