diff --git a/src/file.cc b/src/file.cc index f7d8beb47..6773e6e0f 100644 --- a/src/file.cc +++ b/src/file.cc @@ -394,8 +394,9 @@ CandidateList complete_filename(StringView prefix, const Regex& ignored_regex, auto filter = [&ignored_regex, check_ignored_regex, include_hidden, only_dir](const dirent& entry, struct stat& st) { - return (include_hidden or StringView{entry.d_name}.substr(0_byte, 1_byte) != ".") and - (not check_ignored_regex or not regex_match(entry.d_name, ignored_regex)) and + StringView name{entry.d_name}; + return (include_hidden or name.substr(0_byte, 1_byte) != ".") and + (not check_ignored_regex or not regex_match(name.begin(), name.end(), ignored_regex)) and (not only_dir or S_ISDIR(st.st_mode)); }; auto files = list_files(dirname, filter); diff --git a/src/highlighters.cc b/src/highlighters.cc index eaa19567a..52db836d5 100644 --- a/src/highlighters.cc +++ b/src/highlighters.cc @@ -308,13 +308,14 @@ private: buffer.iterator_at(range.end), m_regex, match_flags(is_bol(range.begin), is_eol(buffer, range.end), + is_bow(buffer, range.begin), is_eow(buffer, range.end))}; RegexIt re_end; for (; re_it != re_end; ++re_it) { for (size_t i = 0; i < m_faces.size(); ++i) { - auto& sub = (*re_it)[m_faces[i].first]; + const auto& sub = (*re_it)[m_faces[i].first]; matches.push_back({sub.first.coord(), sub.second.coord()}); } } diff --git a/src/regex.cc b/src/regex.cc index 686e94c42..d70d8dd65 100644 --- a/src/regex.cc +++ b/src/regex.cc @@ -5,6 +5,12 @@ namespace Kakoune { +using Utf8It = RegexUtf8It; + +Regex::Regex(StringView re, flag_type flags) try + : boost::wregex{Utf8It{re.begin(), re}, Utf8It{re.end(), re}}, m_str(re.str()) +{} catch (std::runtime_error& err) { throw regex_error(err.what()); } + String option_to_string(const Regex& re) { return re.str(); diff --git a/src/regex.hh b/src/regex.hh index 1ccc1fe7c..9e966c33a 100644 --- a/src/regex.hh +++ b/src/regex.hh @@ -3,12 +3,9 @@ #include "string.hh" #include "exception.hh" +#include "utf8_iterator.hh" -#ifdef KAK_USE_STDREGEX -#include -#else #include -#endif namespace Kakoune { @@ -20,16 +17,12 @@ struct regex_error : runtime_error {} }; -#ifdef KAK_USE_STDREGEX // Regex that keeps track of its string representation -struct Regex : std::regex +struct Regex : boost::wregex { Regex() = default; - explicit Regex(StringView re, flag_type flags = ECMAScript) try - : std::regex(re.begin(), re.end(), flags), m_str(re.str()) {} - catch (std::runtime_error& err) { throw regex_error(err.what()); } - + explicit Regex(StringView re, flag_type flags = ECMAScript); bool empty() const { return m_str.empty(); } bool operator==(const Regex& other) const { return m_str == other.m_str; } bool operator!=(const Regex& other) const { return m_str != other.m_str; } @@ -39,37 +32,97 @@ struct Regex : std::regex private: String m_str; }; -namespace regex_ns = std; -#else -struct Regex : boost::regex + +template +using RegexUtf8It = utf8::iterator; + +namespace RegexConstant = boost::regex_constants; + +template +struct MatchResults : boost::match_results> { - Regex() = default; + using ParentType = boost::match_results>; + struct SubMatch : std::pair + { + SubMatch() = default; + SubMatch(const boost::sub_match>& m) + : std::pair{m.first.base(), m.second.base()}, + matched{m.matched} + {} - explicit Regex(StringView re, flag_type flags = ECMAScript) try - : boost::regex(re.begin(), re.end(), flags) {} - catch (std::runtime_error& err) { throw regex_error(err.what()); } + bool matched = false; + }; - String str() const { auto s = boost::regex::str(); return {s.data(), (int)s.length()}; } + struct iterator : boost::match_results>::iterator + { + using ParentType = typename boost::match_results>::iterator; + iterator(const ParentType& it) : ParentType(it) {} + + SubMatch operator*() const { return {ParentType::operator*()}; } + }; + + iterator begin() const { return {ParentType::begin()}; } + iterator cbegin() const { return {ParentType::cbegin()}; } + iterator end() const { return {ParentType::end()}; } + iterator cend() const { return {ParentType::cend()}; } + + SubMatch operator[](size_t s) const { return {ParentType::operator[](s)}; } }; -namespace regex_ns = boost; -#endif template -using RegexIterator = regex_ns::regex_iterator; - -template -using MatchResults = regex_ns::match_results; - -namespace RegexConstant = regex_ns::regex_constants; - -inline RegexConstant::match_flag_type match_flags(bool bol, bool eol, bool eow) +struct RegexIterator : boost::regex_iterator> { - return (bol ? RegexConstant::match_default : RegexConstant::match_not_bol | - RegexConstant::match_prev_avail) | + using ParentType = boost::regex_iterator>; + using Utf8It = RegexUtf8It; + using ValueType = MatchResults; + + RegexIterator() = default; + RegexIterator(Iterator begin, Iterator end, const Regex& re, + RegexConstant::match_flag_type flags = RegexConstant::match_default) + : ParentType{Utf8It{begin, begin, end}, Utf8It{end, begin, end}, re, flags} {} + + const ValueType& operator*() const { return *reinterpret_cast(&ParentType::operator*()); } + const ValueType* operator->() const { return reinterpret_cast(ParentType::operator->()); } +}; + +inline RegexConstant::match_flag_type match_flags(bool bol, bool eol, bool bow, bool eow) +{ + return (bol ? RegexConstant::match_default : RegexConstant::match_not_bol) | (eol ? RegexConstant::match_default : RegexConstant::match_not_eol) | + (bow ? RegexConstant::match_default : RegexConstant::match_not_bow) | (eow ? RegexConstant::match_default : RegexConstant::match_not_eow); } +template +bool regex_match(It begin, It end, const Regex& re) +{ + using Utf8It = RegexUtf8It; + return boost::regex_match(Utf8It{begin, begin, end}, Utf8It{end, begin, end}, re); +} + +template +bool regex_match(It begin, It end, MatchResults& res, const Regex& re) +{ + using Utf8It = RegexUtf8It; + return boost::regex_match(Utf8It{begin, begin, end}, Utf8It{end, begin, end}, res, re); +} + +template +bool regex_search(It begin, It end, const Regex& re, + RegexConstant::match_flag_type flags = RegexConstant::match_default) +{ + using Utf8It = RegexUtf8It; + return boost::regex_search(Utf8It{begin, begin, end}, Utf8It{end, begin, end}, re); +} + +template +bool regex_search(It begin, It end, MatchResults& res, const Regex& re, + RegexConstant::match_flag_type flags = RegexConstant::match_default) +{ + using Utf8It = RegexUtf8It; + return boost::regex_search(Utf8It{begin, begin, end}, Utf8It{end, begin, end}, res, re); +} + String option_to_string(const Regex& re); void option_from_string(StringView str, Regex& re); diff --git a/src/selectors.cc b/src/selectors.cc index 65526b776..b240e7a75 100644 --- a/src/selectors.cc +++ b/src/selectors.cc @@ -615,20 +615,21 @@ void select_all_matches(SelectionList& selections, const Regex& regex, unsigned auto sel_end = utf8::next(buffer.iterator_at(sel.max()), buffer.end()); const auto flags = match_flags(is_bol(sel_beg.coord()), is_eol(buffer, sel_end.coord()), + is_bow(buffer, sel_beg.coord()), is_eow(buffer, sel_end.coord())); RegexIt re_it(sel_beg, sel_end, regex, flags); RegexIt re_end; for (; re_it != re_end; ++re_it) { - auto begin = ensure_char_start(buffer, (*re_it)[capture].first); + auto begin = (*re_it)[capture].first; if (begin == sel_end) continue; - auto end = ensure_char_start(buffer, (*re_it)[capture].second); + auto end = (*re_it)[capture].second; CaptureList captures; captures.reserve(mark_count); - for (auto& match : *re_it) + for (const auto& match : *re_it) captures.push_back(buffer.string(match.first.coord(), match.second.coord())); @@ -661,6 +662,7 @@ void split_selections(SelectionList& selections, const Regex& regex, unsigned ca auto sel_end = utf8::next(buffer.iterator_at(sel.max()), buffer.end()); const auto flags = match_flags(is_bol(begin.coord()), is_eol(buffer, sel_end.coord()), + is_bow(buffer, begin.coord()), is_eow(buffer, sel_end.coord())); RegexIt re_it(begin, sel_end, regex, flags); @@ -674,11 +676,10 @@ void split_selections(SelectionList& selections, const Regex& regex, unsigned ca if (end != buf_begin) { - end = ensure_char_start(buffer, end); auto sel_end = (begin == end) ? end : utf8::previous(end, begin); result.push_back(keep_direction({ begin.coord(), sel_end.coord() }, sel)); } - begin = ensure_char_start(buffer, (*re_it)[capture].second); + begin = (*re_it)[capture].second; } if (begin.coord() <= sel.max()) result.push_back(keep_direction({ begin.coord(), sel.max() }, sel)); diff --git a/src/selectors.hh b/src/selectors.hh index 67c231d41..6ac840b11 100644 --- a/src/selectors.hh +++ b/src/selectors.hh @@ -226,7 +226,8 @@ inline bool find_last_match(const Buffer& buffer, const BufferIterator& pos, const bool is_pos_eow = is_eow(buffer, pos.coord()); auto begin = buffer.begin(); while (begin != pos and regex_search(begin, pos, matches, regex, - match_flags(is_bol(begin.coord()), is_pos_eol, is_pos_eow))) + match_flags(is_bol(begin.coord()), is_pos_eol, + is_bow(buffer, begin.coord()), is_pos_eow))) { begin = utf8::next(matches[0].first, pos); if (res.empty() or matches[0].second > res[0].second) @@ -244,7 +245,8 @@ bool find_match_in_buffer(const Buffer& buffer, const BufferIterator pos, if (direction == Forward) { if (regex_search(pos, buffer.end(), matches, ex, - match_flags(is_bol(pos.coord()), true, true))) + match_flags(is_bol(pos.coord()), true, + is_bow(buffer, pos.coord()), true))) return true; wrapped = true; return regex_search(buffer.begin(), buffer.end(), matches, ex); @@ -276,9 +278,9 @@ Selection find_next_match(const Buffer& buffer, const Selection& sel, const Rege auto pos = direction == Forward ? utf8::next(begin, buffer.end()) : begin; if ((found = find_match_in_buffer(buffer, pos, matches, regex, wrapped))) { - begin = ensure_char_start(buffer, matches[0].first); - end = ensure_char_start(buffer, matches[0].second); - for (auto& match : matches) + begin = matches[0].first; + end = matches[0].second; + for (const auto& match : matches) captures.push_back(buffer.string(match.first.coord(), match.second.coord())); } diff --git a/test/regression/638-highlight-codepoint-with-bracket/cmd b/test/regression/638-highlight-codepoint-with-bracket/cmd new file mode 100644 index 000000000..977f45f13 --- /dev/null +++ b/test/regression/638-highlight-codepoint-with-bracket/cmd @@ -0,0 +1 @@ +w:q diff --git a/test/regression/638-highlight-codepoint-with-bracket/display b/test/regression/638-highlight-codepoint-with-bracket/display new file mode 100644 index 000000000..527099958 --- /dev/null +++ b/test/regression/638-highlight-codepoint-with-bracket/display @@ -0,0 +1,5 @@ +{ "jsonrpc": "2.0", "method": "draw", "params": [[[{ "face": { "fg": "red", "bg": "default", "attributes": [] }, "contents": "“" }, { "face": { "fg": "white", "bg": "blue", "attributes": [] }, "contents": "We" }, { "face": { "fg": "black", "bg": "white", "attributes": [] }, "contents": " " }, { "face": { "fg": "default", "bg": "default", "attributes": [] }, "contents": "ought to scrape this planet clean of every living thing on it," }, { "face": { "fg": "red", "bg": "default", "attributes": [] }, "contents": "”" }, { "face": { "fg": "default", "bg": "default", "attributes": [] }, "contents": "\u000a" }]], { "fg": "default", "bg": "default", "attributes": [] }, { "fg": "blue", "bg": "default", "attributes": [] }] } +{ "jsonrpc": "2.0", "method": "menu_hide", "params": [] } +{ "jsonrpc": "2.0", "method": "info_hide", "params": [] } +{ "jsonrpc": "2.0", "method": "draw_status", "params": [[], [{ "face": { "fg": "default", "bg": "default", "attributes": [] }, "contents": "out 1:4 " }, { "face": { "fg": "default", "bg": "default", "attributes": [] }, "contents": " " }, { "face": { "fg": "blue", "bg": "default", "attributes": [] }, "contents": "1 sel" }, { "face": { "fg": "default", "bg": "default", "attributes": [] }, "contents": " - unnamed0@[kak-test-regression-638-highlight-codepoint-with-bracket]" }], { "fg": "cyan", "bg": "default", "attributes": [] }] } +{ "jsonrpc": "2.0", "method": "refresh", "params": [true] } diff --git a/test/regression/638-highlight-codepoint-with-bracket/in b/test/regression/638-highlight-codepoint-with-bracket/in new file mode 100644 index 000000000..6ef6a849a --- /dev/null +++ b/test/regression/638-highlight-codepoint-with-bracket/in @@ -0,0 +1 @@ +“We ought to scrape this planet clean of every living thing on it,” diff --git a/test/regression/638-highlight-codepoint-with-bracket/out b/test/regression/638-highlight-codepoint-with-bracket/out new file mode 100644 index 000000000..6ef6a849a --- /dev/null +++ b/test/regression/638-highlight-codepoint-with-bracket/out @@ -0,0 +1 @@ +“We ought to scrape this planet clean of every living thing on it,” diff --git a/test/regression/638-highlight-codepoint-with-bracket/patch b/test/regression/638-highlight-codepoint-with-bracket/patch new file mode 100644 index 000000000..9ebbce12f --- /dev/null +++ b/test/regression/638-highlight-codepoint-with-bracket/patch @@ -0,0 +1,11 @@ +--- a/display 2016-05-10 09:21:59.272300947 +0100 ++++ b/display 2016-05-10 09:37:27.866341923 +0100 +@@ -1,5 +1,5 @@ +-{ "jsonrpc": "2.0", "method": "draw", "params": [[[{ "face": { "fg": "black", "bg": "white", "attributes": [] }, "contents": "“" }, { "face": { "fg": "default", "bg": "default", "attributes": [] }, "contents": "We ought to scrape this planet clean of every living thing on it," }, { "face": { "fg": "red", "bg": "default", "attributes": [] }, "contents": "”" }, { "face": { "fg": "default", "bg": "default", "attributes": [] }, "contents": "\u000a" }]], { "fg": "default", "bg": "default", "attributes": [] }, { "fg": "blue", "bg": "default", "attributes": [] }] } ++{ "jsonrpc": "2.0", "method": "draw", "params": [[[{ "face": { "fg": "red", "bg": "default", "attributes": [] }, "contents": "“" }, { "face": { "fg": "white", "bg": "blue", "attributes": [] }, "contents": "We" }, { "face": { "fg": "black", "bg": "white", "attributes": [] }, "contents": " " }, { "face": { "fg": "default", "bg": "default", "attributes": [] }, "contents": "ought to scrape this planet clean of every living thing on it," }, { "face": { "fg": "red", "bg": "default", "attributes": [] }, "contents": "”" }, { "face": { "fg": "default", "bg": "default", "attributes": [] }, "contents": "\u000a" }]], { "fg": "default", "bg": "default", "attributes": [] }, { "fg": "blue", "bg": "default", "attributes": [] }] } + { "jsonrpc": "2.0", "method": "menu_hide", "params": [] } + { "jsonrpc": "2.0", "method": "info_hide", "params": [] } +-{ "jsonrpc": "2.0", "method": "draw_status", "params": [[], [{ "face": { "fg": "default", "bg": "default", "attributes": [] }, "contents": "out 1:1 " }, { "face": { "fg": "default", "bg": "default", "attributes": [] }, "contents": " " }, { "face": { "fg": "blue", "bg": "default", "attributes": [] }, "contents": "1 sel" }, { "face": { "fg": "default", "bg": "default", "attributes": [] }, "contents": " - unnamed0@[kak-test-regression-638-highlight-codepoint-with-bracket]" }], { "fg": "cyan", "bg": "default", "attributes": [] }] } ++{ "jsonrpc": "2.0", "method": "draw_status", "params": [[], [{ "face": { "fg": "default", "bg": "default", "attributes": [] }, "contents": "out 1:4 " }, { "face": { "fg": "default", "bg": "default", "attributes": [] }, "contents": " " }, { "face": { "fg": "blue", "bg": "default", "attributes": [] }, "contents": "1 sel" }, { "face": { "fg": "default", "bg": "default", "attributes": [] }, "contents": " - unnamed0@[kak-test-regression-638-highlight-codepoint-with-bracket]" }], { "fg": "cyan", "bg": "default", "attributes": [] }] } + { "jsonrpc": "2.0", "method": "refresh", "params": [true] } + diff --git a/test/regression/638-highlight-codepoint-with-bracket/rc b/test/regression/638-highlight-codepoint-with-bracket/rc new file mode 100644 index 000000000..5e2704fc9 --- /dev/null +++ b/test/regression/638-highlight-codepoint-with-bracket/rc @@ -0,0 +1 @@ +addhl regex '[“”]' 0:red