mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-01-08 12:19:37 +03:00
LibMarkdown: Handle delimiter flanking with punctuation
This patch handles the following two rules 1) A delimiter run is either (a) not followed by a Unicode punctuation character, or (b) followed by a Unicode punctuation character and preceded by Unicode whitespace or a Unicode punctuation character. 2) A _ can be used to open/close a delimiter run if it's (a) not part of a left-flanking delimiter run or (b) part of a left-flanking delimiter run followed by a Unicode punctuation character.
This commit is contained in:
parent
d28459fb11
commit
af5a07399e
Notes:
sideshowbarker
2024-07-18 04:10:45 +09:00
Author: https://github.com/petelliott Commit: https://github.com/SerenityOS/serenity/commit/af5a07399ee Pull-request: https://github.com/SerenityOS/serenity/pull/9928 Reviewed-by: https://github.com/BenWiederhake ✅ Reviewed-by: https://github.com/alimpfard
@ -169,12 +169,31 @@ Text Text::parse(StringView const& str)
|
|||||||
return text;
|
return text;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool flanking(StringView const& str, size_t start, size_t end, int dir)
|
||||||
|
{
|
||||||
|
ssize_t next = ((dir > 0) ? end : start) + dir;
|
||||||
|
if (next < 0 || next >= (ssize_t)str.length())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (isspace(str[next]))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (!ispunct(str[next]))
|
||||||
|
return true;
|
||||||
|
|
||||||
|
ssize_t prev = ((dir > 0) ? start : end) - dir;
|
||||||
|
if (prev < 0 || prev >= (ssize_t)str.length())
|
||||||
|
return true;
|
||||||
|
|
||||||
|
return isspace(str[prev]) || ispunct(str[prev]);
|
||||||
|
}
|
||||||
|
|
||||||
Vector<Text::Token> Text::tokenize(StringView const& str)
|
Vector<Text::Token> Text::tokenize(StringView const& str)
|
||||||
{
|
{
|
||||||
Vector<Token> tokens;
|
Vector<Token> tokens;
|
||||||
StringBuilder current_token;
|
StringBuilder current_token;
|
||||||
|
|
||||||
auto flush_token = [&](bool left_flanking, bool right_flanking, bool is_run) {
|
auto flush_run = [&](bool left_flanking, bool right_flanking, bool punct_before, bool punct_after, bool is_run) {
|
||||||
if (current_token.is_empty())
|
if (current_token.is_empty())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
@ -182,11 +201,17 @@ Vector<Text::Token> Text::tokenize(StringView const& str)
|
|||||||
current_token.build(),
|
current_token.build(),
|
||||||
left_flanking,
|
left_flanking,
|
||||||
right_flanking,
|
right_flanking,
|
||||||
|
punct_before,
|
||||||
|
punct_after,
|
||||||
is_run,
|
is_run,
|
||||||
});
|
});
|
||||||
current_token.clear();
|
current_token.clear();
|
||||||
};
|
};
|
||||||
|
|
||||||
|
auto flush_token = [&]() {
|
||||||
|
flush_run(false, false, false, false, false);
|
||||||
|
};
|
||||||
|
|
||||||
for (size_t offset = 0; offset < str.length(); ++offset) {
|
for (size_t offset = 0; offset < str.length(); ++offset) {
|
||||||
auto has = [&](StringView const& seq) {
|
auto has = [&](StringView const& seq) {
|
||||||
if (offset + seq.length() > str.length())
|
if (offset + seq.length() > str.length())
|
||||||
@ -197,9 +222,9 @@ Vector<Text::Token> Text::tokenize(StringView const& str)
|
|||||||
|
|
||||||
auto expect = [&](StringView const& seq) {
|
auto expect = [&](StringView const& seq) {
|
||||||
VERIFY(has(seq));
|
VERIFY(has(seq));
|
||||||
flush_token(false, false, false);
|
flush_token();
|
||||||
current_token.append(seq);
|
current_token.append(seq);
|
||||||
flush_token(false, false, false);
|
flush_token();
|
||||||
offset += seq.length() - 1;
|
offset += seq.length() - 1;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -209,7 +234,7 @@ Vector<Text::Token> Text::tokenize(StringView const& str)
|
|||||||
current_token.append(str[offset + 1]);
|
current_token.append(str[offset + 1]);
|
||||||
++offset;
|
++offset;
|
||||||
} else if (ch == '*' || ch == '_' || ch == '`') {
|
} else if (ch == '*' || ch == '_' || ch == '`') {
|
||||||
flush_token(false, false, false);
|
flush_token();
|
||||||
|
|
||||||
char delim = ch;
|
char delim = ch;
|
||||||
size_t run_offset;
|
size_t run_offset;
|
||||||
@ -217,15 +242,15 @@ Vector<Text::Token> Text::tokenize(StringView const& str)
|
|||||||
current_token.append(str[run_offset]);
|
current_token.append(str[run_offset]);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool left_flanking = run_offset < str.length() && !isspace(str[run_offset]);
|
flush_run(flanking(str, offset, run_offset - 1, +1),
|
||||||
bool right_flanking = offset > 0 && !isspace(str[offset - 1]);
|
flanking(str, offset, run_offset - 1, -1),
|
||||||
flush_token(left_flanking, right_flanking, true);
|
offset > 0 && ispunct(str[offset - 1]),
|
||||||
|
run_offset < str.length() && ispunct(str[run_offset]),
|
||||||
|
true);
|
||||||
offset = run_offset - 1;
|
offset = run_offset - 1;
|
||||||
|
|
||||||
} else if (ch == '\n') {
|
} else if (has("\n")) {
|
||||||
flush_token(false, false, false);
|
expect("\n");
|
||||||
current_token.append(ch);
|
|
||||||
flush_token(false, false, false);
|
|
||||||
} else if (has("[")) {
|
} else if (has("[")) {
|
||||||
expect("[");
|
expect("[");
|
||||||
} else if (has("![")) {
|
} else if (has("![")) {
|
||||||
@ -238,7 +263,7 @@ Vector<Text::Token> Text::tokenize(StringView const& str)
|
|||||||
current_token.append(ch);
|
current_token.append(ch);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
flush_token(false, false, false);
|
flush_token();
|
||||||
return tokens;
|
return tokens;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -276,7 +301,7 @@ NonnullOwnPtr<Text::MultiNode> Text::parse_sequence(Vector<Token>::ConstIterator
|
|||||||
|
|
||||||
bool Text::can_open(Token const& opening)
|
bool Text::can_open(Token const& opening)
|
||||||
{
|
{
|
||||||
return (opening.run_char() == '*' && opening.left_flanking) || (opening.run_char() == '_' && opening.left_flanking && !opening.right_flanking);
|
return (opening.run_char() == '*' && opening.left_flanking) || (opening.run_char() == '_' && opening.left_flanking && (!opening.right_flanking || opening.punct_before));
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Text::can_close_for(Token const& opening, Text::Token const& closing)
|
bool Text::can_close_for(Token const& opening, Text::Token const& closing)
|
||||||
@ -287,7 +312,7 @@ bool Text::can_close_for(Token const& opening, Text::Token const& closing)
|
|||||||
if (opening.run_length() != closing.run_length())
|
if (opening.run_length() != closing.run_length())
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
return (opening.run_char() == '*' && closing.right_flanking) || (opening.run_char() == '_' && !closing.left_flanking && closing.right_flanking);
|
return (opening.run_char() == '*' && closing.right_flanking) || (opening.run_char() == '_' && closing.right_flanking && (!closing.left_flanking || closing.punct_after));
|
||||||
}
|
}
|
||||||
|
|
||||||
NonnullOwnPtr<Text::Node> Text::parse_emph(Vector<Token>::ConstIterator& tokens, bool in_link)
|
NonnullOwnPtr<Text::Node> Text::parse_emph(Vector<Token>::ConstIterator& tokens, bool in_link)
|
||||||
|
@ -111,6 +111,8 @@ private:
|
|||||||
// definition, see the CommonMark spec.
|
// definition, see the CommonMark spec.
|
||||||
bool left_flanking;
|
bool left_flanking;
|
||||||
bool right_flanking;
|
bool right_flanking;
|
||||||
|
bool punct_before;
|
||||||
|
bool punct_after;
|
||||||
// is_run indicates that this token is a 'delimiter run'. A delimiter
|
// is_run indicates that this token is a 'delimiter run'. A delimiter
|
||||||
// run occurs when several of the same sytactical character ('`', '_',
|
// run occurs when several of the same sytactical character ('`', '_',
|
||||||
// or '*') occur in a row.
|
// or '*') occur in a row.
|
||||||
|
Loading…
Reference in New Issue
Block a user