diff --git a/docs/changelog.rst b/docs/changelog.rst index c43df4a92..fd2ad31ec 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -42,6 +42,9 @@ Changelog (:iss:`754`) +- Fix detection of URLs in HTML source code (URLs inside quotes) (:iss:`785`) + + 0.11.3 [2018-07-10] ------------------------------ diff --git a/kittens/hints/main.py b/kittens/hints/main.py index 319ac314a..947eca4fd 100644 --- a/kittens/hints/main.py +++ b/kittens/hints/main.py @@ -170,7 +170,7 @@ def regex_finditer(pat, minimum_match_length, text): yield s, e -closing_bracket_map = {'(': ')', '[': ']', '{': '}', '<': '>'} +closing_bracket_map = {'(': ')', '[': ']', '{': '}', '<': '>', '*': '*', '"': '"', "'": "'"} opening_brackets = ''.join(closing_bracket_map) postprocessor_map = {} @@ -189,15 +189,15 @@ def url(text, s, e): e -= len(url) - idx while text[e - 1] in '.,?!' and e > 1: # remove trailing punctuation e -= 1 + # truncate url at closing bracket/quote + if s > 0 and e <= len(text) and text[s-1] in opening_brackets: + q = closing_bracket_map[text[s-1]] + idx = text.find(q, s) + if idx > s: + e = idx # Restructured Text URLs if e > 3 and text[e-2:e] == '`_': e -= 2 - # Remove trailing bracket if matched by leading bracket - if s > 0 and e < len(text) and text[s-1] in opening_brackets and text[e-1] == closing_bracket_map[text[s-1]]: - e -= 1 - # Remove trailing quote if matched by leading quote - if s > 0 and e < len(text) and text[s-1] in '\'"' and text[e-1] == text[s-1]: - e -= 1 return s, e diff --git a/kitty/line.c b/kitty/line.c index b545be181..0c1cc2127 100644 --- a/kitty/line.c +++ b/kitty/line.c @@ -132,10 +132,11 @@ line_url_start_at(Line *self, index_type x) { } index_type -line_url_end_at(Line *self, index_type x, bool check_short) { +line_url_end_at(Line *self, index_type x, bool check_short, char_type sentinel) { index_type ans = x; if (x >= self->xnum || (check_short && self->xnum <= MIN_URL_LEN + 3)) return 0; - while (ans < self->xnum && is_url_char(self->cpu_cells[ans].ch)) ans++; + if (sentinel) { while (ans < self->xnum && self->cpu_cells[ans].ch != sentinel && is_url_char(self->cpu_cells[ans].ch)) ans++; } + else { while (ans < self->xnum && is_url_char(self->cpu_cells[ans].ch)) ans++; } if (ans) ans--; while (ans > x && can_strip_from_end_of_url(self->cpu_cells[ans].ch)) ans--; return ans; @@ -148,9 +149,11 @@ url_start_at(Line *self, PyObject *x) { } static PyObject* -url_end_at(Line *self, PyObject *x) { +url_end_at(Line *self, PyObject *args) { #define url_end_at_doc "url_end_at(x) -> Return the end cell number for a URL containing x or 0 if not found" - return PyLong_FromUnsignedLong((unsigned long)line_url_end_at(self, PyLong_AsUnsignedLong(x), true)); + unsigned int x, sentinel = 0; + if (!PyArg_ParseTuple(args, "I|I", &x, &sentinel)) return NULL; + return PyLong_FromUnsignedLong((unsigned long)line_url_end_at(self, x, true, sentinel)); } // }}} @@ -560,7 +563,7 @@ static PyMethodDef methods[] = { METHOD(is_continued, METH_NOARGS) METHOD(width, METH_O) METHOD(url_start_at, METH_O) - METHOD(url_end_at, METH_O) + METHOD(url_end_at, METH_VARARGS) METHOD(sprite_at, METH_O) {NULL} /* Sentinel */ diff --git a/kitty/lineops.h b/kitty/lineops.h index 3a9d7e762..b642b4faa 100644 --- a/kitty/lineops.h +++ b/kitty/lineops.h @@ -59,7 +59,7 @@ void line_set_char(Line *, unsigned int , uint32_t , unsigned int , Cursor *, bo void line_right_shift(Line *, unsigned int , unsigned int ); void line_add_combining_char(Line *, uint32_t , unsigned int ); index_type line_url_start_at(Line *self, index_type x); -index_type line_url_end_at(Line *self, index_type x, bool); +index_type line_url_end_at(Line *self, index_type x, bool, char_type); index_type line_as_ansi(Line *self, Py_UCS4 *buf, index_type buflen); unsigned int line_length(Line *self); size_t cell_as_unicode(CPUCell *cell, bool include_cc, Py_UCS4 *buf, char_type); diff --git a/kitty/mouse.c b/kitty/mouse.c index ad0126e77..94ef5a916 100644 --- a/kitty/mouse.c +++ b/kitty/mouse.c @@ -209,32 +209,57 @@ extend_selection(Window *w) { } static inline void -extend_url(Screen *screen, Line *line, index_type *x, index_type *y) { +extend_url(Screen *screen, Line *line, index_type *x, index_type *y, char_type sentinel) { unsigned int count = 0; while(count++ < 10) { if (*x != line->xnum - 1) break; line = screen_visual_line(screen, *y + 1); if (!line) break; // we deliberately allow non-continued lines as some programs, like mutt split URLs with newlines at line boundaries - index_type new_x = line_url_end_at(line, 0, false); + index_type new_x = line_url_end_at(line, 0, false, sentinel); if (!new_x) break; *y += 1; *x = new_x; } } +static inline char_type +get_url_sentinel(Line *line, index_type url_start) { + char_type before = 0, sentinel; + if (url_start > 0 && url_start < line->xnum) before = line->cpu_cells[url_start - 1].ch; + switch(before) { + case '"': + case '\'': + case '*': + sentinel = before; break; + case '(': + sentinel = ')'; break; + case '[': + sentinel = ']'; break; + case '{': + sentinel = '}'; break; + case '<': + sentinel = '>'; break; + default: + sentinel = 0; break; + } + return sentinel; +} + static inline void detect_url(Screen *screen, unsigned int x, unsigned int y) { bool has_url = false; index_type url_start, url_end = 0; Line *line = screen_visual_line(screen, y); + char_type sentinel; if (line) { url_start = line_url_start_at(line, x); - if (url_start < line->xnum) url_end = line_url_end_at(line, x, true); + sentinel = get_url_sentinel(line, url_start); + if (url_start < line->xnum) url_end = line_url_end_at(line, x, true, sentinel); has_url = url_end > url_start; } if (has_url) { mouse_cursor_shape = HAND; index_type y_extended = y; - extend_url(screen, line, &url_end, &y_extended); + extend_url(screen, line, &url_end, &y_extended, sentinel); screen_mark_url(screen, url_start, y, url_end, y_extended); } else { mouse_cursor_shape = BEAM; diff --git a/kitty_tests/hints.py b/kitty_tests/hints.py index 924e61946..629cb517a 100644 --- a/kitty_tests/hints.py +++ b/kitty_tests/hints.py @@ -29,3 +29,4 @@ class TestHints(BaseTest): t(u + '\nxxx', u + 'xxx', len(u)) t('link:{}[xxx]'.format(u), u) t('`xyz <{}>`_.'.format(u), u) + t('moo'.format(u), u)