FIXED: Include non-breaking space (ASCII 160) in re2c patterns

This commit is contained in:
Fletcher T. Penney 2017-03-15 00:33:18 -04:00
parent 11f1f0e137
commit 340352cb37
4 changed files with 860 additions and 661 deletions

File diff suppressed because it is too large Load Diff

View File

@ -60,12 +60,11 @@
// Basic scanner struct
#define YYCTYPE char
#define YYCTYPE unsigned char
#define YYCURSOR s->cur
#define YYMARKER s->ptr
#define YYCTXMARKER s->ctx
int scan(Scanner * s, const char * stop) {
scan:
@ -80,15 +79,16 @@ int scan(Scanner * s, const char * stop) {
re2c:yyfill:enable = 0;
NL = "\r\n" | '\n' | '\r';
SP = [ \t]+;
WS = [ \t\240]; // Whitespace from char_lookup.c
SP = WS+;
SPNL = [ \t]* NL;
SPNL = WS* NL;
INDENT_TAB = '\t';
INDENT_SPACE = ' '{4};
NON_INDENT_SPACE = ' '{2,3};
INDENT_SPACE = [ \240]{4};
NON_INDENT_SPACE = [ \240]{2,3};
TEXT_LINEBREAK = ' '{2,} NL;
TEXT_LINEBREAK = [ \240]{2,} NL;
// The order of these seems to matter
@ -226,7 +226,7 @@ int scan(Scanner * s, const char * stop) {
' '? NL { return TEXT_NL; }
NON_INDENT_SPACE { return NON_INDENT_SPACE; }
' ' / '\t' { return NON_INDENT_SPACE; }
[ \240] / '\t' { return NON_INDENT_SPACE; }
"*" { return STAR; }
"+" { return PLUS; }

File diff suppressed because it is too large Load Diff

View File

@ -59,21 +59,21 @@
/*!re2c
re2c:define:YYCTYPE = "char";
re2c:define:YYCTYPE = "unsigned char";
re2c:define:YYCURSOR = c;
re2c:define:YYMARKER = marker;
re2c:define:YYCTXMARKER = marker;
re2c:yyfill:enable = 0;
nl = ( '\n' | '\r' '\n'?);
sp = [ \t]*;
sp = [ \t\240]*;
spnl = sp (nl sp)?;
non_indent = ' '{0,3};
non_indent = [ \240]{0,3};
nl_eof = nl | '\x00';
email = 'mailto:'? [-A-Za-z0-9+_./!%~$]+ '@' [^ \t\n\r\x00>]+;
email = 'mailto:'? [-A-Za-z0-9+_./!%~$]+ '@' [^ \240\t\n\r\x00>]+;
url = [A-Za-z\-]+ '://' [^ \t\n\r\x00>]+;
url = [A-Za-z\-]+ '://' [^ \240\t\n\r\x00>]+;
name = [A-Za-z_:] [A-Za-z0-9_.:-]*;
quoted_d = '"' [^"\n\r\x00]* '"';
@ -99,7 +99,7 @@
ref_link = non_indent '[' label ']' ':' finish_line;
destination = ('<' [^ \t\n\r\x00>]* '>') | [^ \t\n\r\x00]+;
destination = ('<' [^ \240\t\n\r\x00>]* '>') | [^ \240\t\n\r\x00]+;
ref_link_no_attributes = non_indent '[' label ']' ':' spnl destination sp (nl_eof | (nl? (title) sp) nl_eof);
@ -136,7 +136,7 @@
fence_end = non_indent [`~]{3,} sp nl_eof;
meta_key = [A-Za-z0-9] [A-Za-z0-9_ \t\-\.]*;
meta_key = [A-Za-z0-9] [A-Za-z0-9_ \240\t\-\.]*;
meta_value = [^\n\r\x00]+;
@ -144,7 +144,7 @@
definition = non_indent ':' sp [^\n\r\x00];
table_separator = (('|' [:\-= \t|+]*) | ([:\-= \t+]+ '|' [:\-= \t|+]*)) nl_eof;
table_separator = (('|' [:\-= \240\t|+]*) | ([:\-= \240\t+]+ '|' [:\-= \240\t|+]*)) nl_eof;
align = [\-=]+;
align_left = sp ':' align sp ('|' | nl_eof);
@ -158,7 +158,7 @@
setext_2 = non_indent '-'{2,} nl_eof;
atx = '#'+ [ \t]+ [^ \t\n\r\x00];
atx = '#'+ [ \240\t]+ [^ \240\t\n\r\x00];
*/