CHANGED: Preserve leading non-indent space in fenced code blocks

This commit is contained in:
Fletcher T. Penney 2017-07-23 17:40:20 -04:00
parent c9bbd14db6
commit 78ab69648a
7 changed files with 1478 additions and 939 deletions

View File

@ -471,6 +471,7 @@ enum token_types {
TEXT_EMPTY, TEXT_EMPTY,
TEXT_HASH, TEXT_HASH,
TEXT_LINEBREAK, TEXT_LINEBREAK,
TEXT_LINEBREAK_SP,
TEXT_NL, TEXT_NL,
TEXT_NL_SP, TEXT_NL_SP,
TEXT_NUMBER_POSS_LIST, TEXT_NUMBER_POSS_LIST,

File diff suppressed because it is too large Load Diff

View File

@ -97,7 +97,7 @@ int scan(Scanner * s, const char * stop) {
// NL with single space -- this would otherwise be wrapped into a TEXT_PLAIN // NL with single space -- this would otherwise be wrapped into a TEXT_PLAIN
NL ' ' / [^ \t\r\n] { return TEXT_NL_SP; } NL ' ' / [^ \t\r\n] { return TEXT_NL_SP; }
TEXT_LINEBREAK ' ' / [^ \t\r\n] { return TEXT_LINEBREAK_SP; }
"{{TOC}}" { return TOC; } "{{TOC}}" { return TOC; }

View File

@ -374,26 +374,30 @@ void mmd_assign_line_type(mmd_engine * e, token * line) {
const char * source = e->dstr->str; const char * source = e->dstr->str;
token * t = NULL; token * t = NULL;
token * first_child = line->child;
short temp_short; short temp_short;
size_t scan_len; size_t scan_len;
// Skip non-indenting space // Skip non-indenting space
if (line->child->type == NON_INDENT_SPACE) { if (first_child->type == NON_INDENT_SPACE) {
token_remove_first_child(line); //token_remove_first_child(line);
} else if (line->child->type == TEXT_PLAIN && line->child->len == 1) { first_child = first_child->next;
if (source[line->child->start] == ' ') { } else if (first_child->type == TEXT_PLAIN && first_child->len == 1) {
token_remove_first_child(line); if (source[first_child->start] == ' ') {
//token_remove_first_child(line);
first_child = first_child->next;
} }
} }
if (line->child == NULL) { if (first_child == NULL) {
line->type = LINE_EMPTY; line->type = LINE_EMPTY;
return; return;
} }
switch (line->child->type) { switch (first_child->type) {
case INDENT_TAB: case INDENT_TAB:
if (line_is_empty(line->child)) { if (line_is_empty(first_child)) {
line->type = LINE_EMPTY; line->type = LINE_EMPTY;
e->allow_meta = false; e->allow_meta = false;
} else { } else {
@ -403,7 +407,7 @@ void mmd_assign_line_type(mmd_engine * e, token * line) {
break; break;
case INDENT_SPACE: case INDENT_SPACE:
if (line_is_empty(line->child)) { if (line_is_empty(first_child)) {
line->type = LINE_EMPTY; line->type = LINE_EMPTY;
e->allow_meta = false; e->allow_meta = false;
} else { } else {
@ -423,7 +427,7 @@ void mmd_assign_line_type(mmd_engine * e, token * line) {
case ANGLE_RIGHT: case ANGLE_RIGHT:
line->type = LINE_BLOCKQUOTE; line->type = LINE_BLOCKQUOTE;
line->child->type = MARKER_BLOCKQUOTE; first_child->type = MARKER_BLOCKQUOTE;
break; break;
case BACKTICK: case BACKTICK:
@ -432,10 +436,10 @@ void mmd_assign_line_type(mmd_engine * e, token * line) {
break; break;
} }
scan_len = scan_fence_end(&source[line->child->start]); scan_len = scan_fence_end(&source[first_child->start]);
if (scan_len) { if (scan_len) {
switch (line->child->len) { switch (first_child->len) {
case 3: case 3:
line->type = LINE_FENCE_BACKTICK_3; line->type = LINE_FENCE_BACKTICK_3;
break; break;
@ -451,10 +455,10 @@ void mmd_assign_line_type(mmd_engine * e, token * line) {
break; break;
} else { } else {
scan_len = scan_fence_start(&source[line->child->start]); scan_len = scan_fence_start(&source[first_child->start]);
if (scan_len) { if (scan_len) {
switch (line->child->len) { switch (first_child->len) {
case 3: case 3:
line->type = LINE_FENCE_BACKTICK_START_3; line->type = LINE_FENCE_BACKTICK_START_3;
break; break;
@ -482,7 +486,7 @@ void mmd_assign_line_type(mmd_engine * e, token * line) {
break; break;
} }
if (scan_definition(&source[line->child->start])) { if (scan_definition(&source[first_child->start])) {
line->type = LINE_DEFINITION; line->type = LINE_DEFINITION;
} }
@ -494,12 +498,12 @@ void mmd_assign_line_type(mmd_engine * e, token * line) {
case HASH4: case HASH4:
case HASH5: case HASH5:
case HASH6: case HASH6:
if (scan_atx(&source[line->child->start])) { if (scan_atx(&source[first_child->start])) {
line->type = (line->child->type - HASH1) + LINE_ATX_1; line->type = (first_child->type - HASH1) + LINE_ATX_1;
line->child->type = (line->type - LINE_ATX_1) + MARKER_H1; first_child->type = (line->type - LINE_ATX_1) + MARKER_H1;
// Strip trailing whitespace from '#' sequence // Strip trailing whitespace from '#' sequence
line->child->len = line->child->type - MARKER_H1 + 1; first_child->len = first_child->type - MARKER_H1 + 1;
// Strip trailing '#' sequence if present // Strip trailing '#' sequence if present
if (line->child->tail->type == TEXT_NL) { if (line->child->tail->type == TEXT_NL) {
@ -522,7 +526,7 @@ void mmd_assign_line_type(mmd_engine * e, token * line) {
break; break;
case HTML_COMMENT_START: case HTML_COMMENT_START:
if (!line->child->next || !line->child->next->next) { if (!first_child->next || !first_child->next->next) {
line->type = LINE_START_COMMENT; line->type = LINE_START_COMMENT;
} else { } else {
line->type = LINE_PLAIN; line->type = LINE_PLAIN;
@ -531,7 +535,7 @@ void mmd_assign_line_type(mmd_engine * e, token * line) {
break; break;
case HTML_COMMENT_STOP: case HTML_COMMENT_STOP:
if (!line->child->next || !line->child->next->next) { if (!first_child->next || !first_child->next->next) {
line->type = LINE_STOP_COMMENT; line->type = LINE_STOP_COMMENT;
} else { } else {
line->type = LINE_PLAIN; line->type = LINE_PLAIN;
@ -540,19 +544,19 @@ void mmd_assign_line_type(mmd_engine * e, token * line) {
break; break;
case TEXT_NUMBER_POSS_LIST: case TEXT_NUMBER_POSS_LIST:
switch (source[line->child->next->start]) { switch (source[first_child->next->start]) {
case ' ': case ' ':
case '\t': case '\t':
line->type = LINE_LIST_ENUMERATED; line->type = LINE_LIST_ENUMERATED;
line->child->type = MARKER_LIST_ENUMERATOR; first_child->type = MARKER_LIST_ENUMERATOR;
switch (line->child->next->type) { switch (first_child->next->type) {
case TEXT_PLAIN: case TEXT_PLAIN:
// Strip whitespace between bullet and text // Strip whitespace between bullet and text
while (char_is_whitespace(source[line->child->next->start])) { while (char_is_whitespace(source[first_child->next->start])) {
line->child->next->start++; first_child->next->start++;
line->child->next->len--; first_child->next->len--;
} }
break; break;
@ -560,7 +564,7 @@ void mmd_assign_line_type(mmd_engine * e, token * line) {
case INDENT_SPACE: case INDENT_SPACE:
case INDENT_TAB: case INDENT_TAB:
case NON_INDENT_SPACE: case NON_INDENT_SPACE:
t = line->child; t = first_child;
while (t->next && ((t->next->type == INDENT_SPACE) || while (t->next && ((t->next->type == INDENT_SPACE) ||
(t->next->type == INDENT_TAB) || (t->next->type == INDENT_TAB) ||
@ -575,7 +579,7 @@ void mmd_assign_line_type(mmd_engine * e, token * line) {
default: default:
line->type = LINE_PLAIN; line->type = LINE_PLAIN;
line->child->type = TEXT_PLAIN; first_child->type = TEXT_PLAIN;
break; break;
} }
@ -584,7 +588,7 @@ void mmd_assign_line_type(mmd_engine * e, token * line) {
case EQUAL: case EQUAL:
// Could this be a setext heading marker? // Could this be a setext heading marker?
if (scan_setext(&source[line->child->start])) { if (scan_setext(&source[first_child->start])) {
line->type = LINE_SETEXT_1; line->type = LINE_SETEXT_1;
} else { } else {
line->type = LINE_PLAIN; line->type = LINE_PLAIN;
@ -594,7 +598,7 @@ void mmd_assign_line_type(mmd_engine * e, token * line) {
case DASH_N: case DASH_N:
case DASH_M: case DASH_M:
if (scan_setext(&source[line->child->start])) { if (scan_setext(&source[first_child->start])) {
line->type = LINE_SETEXT_2; line->type = LINE_SETEXT_2;
break; break;
} }
@ -602,14 +606,14 @@ void mmd_assign_line_type(mmd_engine * e, token * line) {
case STAR: case STAR:
case UL: case UL:
// Could this be a horizontal rule? // Could this be a horizontal rule?
t = line->child->next; t = first_child->next;
temp_short = line->child->len; temp_short = first_child->len;
while (t) { while (t) {
switch (t->type) { switch (t->type) {
case DASH_N: case DASH_N:
case DASH_M: case DASH_M:
if (t->type == line->child->type) { if (t->type == first_child->type) {
t = t->next; t = t->next;
if (t) { if (t) {
@ -624,7 +628,7 @@ void mmd_assign_line_type(mmd_engine * e, token * line) {
case STAR: case STAR:
case UL: case UL:
if (t->type == line->child->type) { if (t->type == first_child->type) {
t = t->next; t = t->next;
temp_short++; temp_short++;
} else { } else {
@ -668,7 +672,7 @@ void mmd_assign_line_type(mmd_engine * e, token * line) {
break; break;
} }
if (line->child->type == UL) { if (first_child->type == UL) {
// Revert to plain for this type // Revert to plain for this type
line->type = LINE_PLAIN; line->type = LINE_PLAIN;
break; break;
@ -676,29 +680,29 @@ void mmd_assign_line_type(mmd_engine * e, token * line) {
// If longer than 1 character, then it can't be a list marker, so it's a // If longer than 1 character, then it can't be a list marker, so it's a
// plain line // plain line
if (line->child->len > 1) { if (first_child->len > 1) {
line->type = LINE_PLAIN; line->type = LINE_PLAIN;
break; break;
} }
case PLUS: case PLUS:
if (!line->child->next) { if (!first_child->next) {
// TODO: Should this be an empty list item instead?? // TODO: Should this be an empty list item instead??
line->type = LINE_PLAIN; line->type = LINE_PLAIN;
} else { } else {
switch (source[line->child->next->start]) { switch (source[first_child->next->start]) {
case ' ': case ' ':
case '\t': case '\t':
line->type = LINE_LIST_BULLETED; line->type = LINE_LIST_BULLETED;
line->child->type = MARKER_LIST_BULLET; first_child->type = MARKER_LIST_BULLET;
switch (line->child->next->type) { switch (first_child->next->type) {
case TEXT_PLAIN: case TEXT_PLAIN:
// Strip whitespace between bullet and text // Strip whitespace between bullet and text
while (char_is_whitespace(source[line->child->next->start])) { while (char_is_whitespace(source[first_child->next->start])) {
line->child->next->start++; first_child->next->start++;
line->child->next->len--; first_child->next->len--;
} }
break; break;
@ -706,7 +710,7 @@ void mmd_assign_line_type(mmd_engine * e, token * line) {
case INDENT_SPACE: case INDENT_SPACE:
case INDENT_TAB: case INDENT_TAB:
case NON_INDENT_SPACE: case NON_INDENT_SPACE:
t = line->child; t = first_child;
while (t->next && ((t->next->type == INDENT_SPACE) || while (t->next && ((t->next->type == INDENT_SPACE) ||
(t->next->type == INDENT_TAB) || (t->next->type == INDENT_TAB) ||
@ -821,7 +825,7 @@ void mmd_assign_line_type(mmd_engine * e, token * line) {
if ((line->type == LINE_PLAIN) && if ((line->type == LINE_PLAIN) &&
!(e->extensions & EXT_COMPATIBILITY)) { !(e->extensions & EXT_COMPATIBILITY)) {
// Check if this is a potential table line // Check if this is a potential table line
token * walker = line->child; token * walker = first_child;
while (walker != NULL) { while (walker != NULL) {
if (walker->type == PIPE) { if (walker->type == PIPE) {
@ -1014,12 +1018,24 @@ token * mmd_tokenize_string(mmd_engine * e, size_t start, size_t len, bool stop_
break; break;
case TEXT_NL_SP: case TEXT_NL_SP:
t = token_new(TEXT_NL, (size_t)(s.start - e->dstr->str), (size_t)(s.cur - s.start - 1)); case TEXT_LINEBREAK_SP:
case TEXT_LINEBREAK: case TEXT_LINEBREAK:
case TEXT_NL: case TEXT_NL:
// We hit the end of a line // We hit the end of a line
if (type != TEXT_NL_SP) switch (type) {
t = token_new(type, (size_t)(s.start - e->dstr->str), (size_t)(s.cur - s.start)); case TEXT_NL_SP:
t = token_new(TEXT_NL, (size_t)(s.start - e->dstr->str), (size_t)(s.cur - s.start) - 1);
break;
case TEXT_LINEBREAK_SP:
t = token_new(TEXT_LINEBREAK, (size_t)(s.start - e->dstr->str), (size_t)(s.cur - s.start) - 1);
break;
default:
t = token_new(type, (size_t)(s.start - e->dstr->str), (size_t)(s.cur - s.start));
break;
}
token_append_child(line, t); token_append_child(line, t);
@ -1043,13 +1059,23 @@ token * mmd_tokenize_string(mmd_engine * e, size_t start, size_t len, bool stop_
} }
} }
if (type == TEXT_NL_SP) { switch (type) {
line = token_new(0, s.cur - e->dstr->str - 1, 0); case TEXT_NL_SP:
t = token_new(NON_INDENT_SPACE, (size_t)(s.cur - e->dstr->str - 1), 1); line = token_new(0, s.cur - e->dstr->str - 1, 0);
token_append_child(line, t); t = token_new(NON_INDENT_SPACE, (size_t)(s.cur - e->dstr->str - 1), 1);
} else { token_append_child(line, t);
line = token_new(0, s.cur - e->dstr->str, 0); break;
}
case TEXT_LINEBREAK_SP:
line = token_new(0, s.cur - e->dstr->str - 1, 0);
t = token_new(NON_INDENT_SPACE, (size_t)(s.cur - e->dstr->str - 1), 1);
token_append_child(line, t);
break;
default:
line = token_new(0, s.cur - e->dstr->str, 0);
break;
}
break; break;
@ -1961,7 +1987,7 @@ void strip_line_tokens_from_block(mmd_engine * e, token * block) {
#ifndef NDEBUG #ifndef NDEBUG
fprintf(stderr, "Strip line tokens from %d (%lu:%lu) (child %d)\n", block->type, block->start, block->len, block->child->type); fprintf(stderr, "Strip line tokens from %d (%lu:%lu) (child %d)\n", block->type, block->start, block->len, block->child->type);
token_tree_describe(block, NULL); token_tree_describe(block, e->dstr->str);
#endif #endif
token * l = block->child; token * l = block->child;
@ -1997,6 +2023,11 @@ void strip_line_tokens_from_block(mmd_engine * e, token * block) {
// Move contents of line directly into the parent block // Move contents of line directly into the parent block
while (l != NULL) { while (l != NULL) {
// Remove leading non-indent space from line
if (block->type != BLOCK_CODE_FENCED && l->child && l->child->type == NON_INDENT_SPACE) {
token_remove_first_child(l);
}
switch (l->type) { switch (l->type) {
case LINE_SETEXT_1: case LINE_SETEXT_1:
case LINE_SETEXT_2: case LINE_SETEXT_2:
@ -2036,11 +2067,6 @@ void strip_line_tokens_from_block(mmd_engine * e, token * block) {
case LINE_STOP_COMMENT: case LINE_STOP_COMMENT:
handle_line: handle_line:
// Remove leading non-indent space from line
if (block->type != BLOCK_CODE_FENCED && l->child && l->child->type == NON_INDENT_SPACE) {
token_remove_first_child(l);
}
case LINE_INDENTED_TAB: case LINE_INDENTED_TAB:
case LINE_INDENTED_SPACE: case LINE_INDENTED_SPACE:

View File

@ -303,7 +303,7 @@ office:mimetype="application/vnd.oasis.opendocument.text">
<text:p text:style-name="Preformatted Text">&lt;p&gt;foo&lt;/p&gt;<text:line-break/></text:p> <text:p text:style-name="Preformatted Text">&lt;p&gt;foo&lt;/p&gt;<text:line-break/></text:p>
<text:p text:style-name="Preformatted Text">foo<text:line-break/><text:line-break/><text:tab/>bar<text:line-break/><text:line-break/>bar<text:line-break/><text:line-break/> foo<text:line-break/></text:p> <text:p text:style-name="Preformatted Text">foo<text:line-break/><text:line-break/><text:tab/>bar<text:line-break/><text:line-break/> bar<text:line-break/><text:line-break/> foo<text:line-break/></text:p>
<text:p text:style-name="Preformatted Text">foo<text:line-break/></text:p> <text:p text:style-name="Preformatted Text">foo<text:line-break/></text:p>
</office:text> </office:text>

View File

@ -42,7 +42,7 @@ bar
bar bar
bar bar
foo foo
</code></pre> </code></pre>

View File

@ -48,7 +48,7 @@ foo
bar bar
bar bar
foo foo
\end{verbatim} \end{verbatim}