From 4f2584a091f34cdb3eb0dda116709ef95fc34732 Mon Sep 17 00:00:00 2001 From: Maxime Coste Date: Sun, 18 Oct 2015 16:55:21 +0100 Subject: [PATCH 01/12] Experiment with ranked word completion depending on word boundaries --- src/insert_completer.cc | 42 ++++++++++++------------- src/word_db.cc | 69 +++++++++++++++++++++++++++++++++++++++++ src/word_db.hh | 9 ++++++ 3 files changed, 99 insertions(+), 21 deletions(-) diff --git a/src/insert_completer.cc b/src/insert_completer.cc index 5b3b12245..8a9779a0d 100644 --- a/src/insert_completer.cc +++ b/src/insert_completer.cc @@ -72,7 +72,7 @@ WordDB& get_word_db(const Buffer& buffer) return cache_val.as(); } -template +template InsertCompletion complete_word(const Buffer& buffer, ByteCoord cursor_pos) { auto pos = buffer.iterator_at(cursor_pos); @@ -93,23 +93,23 @@ InsertCompletion complete_word(const Buffer& buffer, ByteCoord cursor_pos) String current_word{begin, end}; - struct MatchAndBuffer { - MatchAndBuffer(StringView m, const Buffer* b = nullptr) : match(m), buffer(b) {} + struct RankedWordAndBuffer : WordDB::RankedWord + { + RankedWordAndBuffer(StringView w, int r = 0, const Buffer* b = nullptr) + : WordDB::RankedWord{w, r}, buffer{b} {} - bool operator==(const MatchAndBuffer& other) const { return match == other.match; } - bool operator<(const MatchAndBuffer& other) const { return match < other.match; } + bool operator==(const RankedWordAndBuffer& other) const { return word == other.word; } + bool operator<(const RankedWordAndBuffer& other) const { return rank > other.rank; } - StringView match; const Buffer* buffer; }; - Vector matches; + Vector matches; auto add_matches = [&](const Buffer& buf) { auto& word_db = get_word_db(buf); - auto bufmatches = word_db.find_matching( - prefix, subseq ? subsequence_match : prefix_match); + auto bufmatches = word_db.find_matching(prefix); for (auto& m : bufmatches) - matches.push_back({ m, &buf }); + matches.push_back({ m.word, m.rank, &buf }); }; add_matches(buffer); @@ -131,8 +131,8 @@ InsertCompletion complete_word(const Buffer& buffer, ByteCoord cursor_pos) matches.erase(std::unique(matches.begin(), matches.end()), matches.end()); const auto longest = std::accumulate(matches.begin(), matches.end(), 0_char, - [](const CharCount& lhs, const MatchAndBuffer& rhs) - { return std::max(lhs, rhs.match.char_length()); }); + [](const CharCount& lhs, const RankedWordAndBuffer& rhs) + { return std::max(lhs, rhs.word.char_length()); }); InsertCompletion::CandidateList candidates; candidates.reserve(matches.size()); @@ -141,15 +141,17 @@ InsertCompletion complete_word(const Buffer& buffer, ByteCoord cursor_pos) DisplayLine menu_entry; if (m.buffer) { - const auto pad_len = longest + 1 - m.match.char_length(); - menu_entry.push_back(m.match.str()); + const auto pad_len = longest + 1 - m.word.char_length(); + menu_entry.push_back(m.word.str()); menu_entry.push_back(String{' ', pad_len}); menu_entry.push_back({ m.buffer->display_name(), get_face("MenuInfo") }); } else - menu_entry.push_back(m.match.str()); + menu_entry.push_back(m.word.str()); - candidates.push_back({m.match.str(), "", std::move(menu_entry)}); + menu_entry.push_back({ " " + to_string(m.rank), get_face("cyan") }); + + candidates.push_back({m.word.str(), "", std::move(menu_entry)}); } return { begin.coord(), cursor_pos, std::move(candidates), buffer.timestamp() }; @@ -419,13 +421,11 @@ bool InsertCompleter::setup_ifn() return true; if (completer.mode == InsertCompleterDesc::Word and *completer.param == "buffer" and - (try_complete(complete_word) or - try_complete(complete_word))) + try_complete(complete_word)) return true; if (completer.mode == InsertCompleterDesc::Word and *completer.param == "all" and - (try_complete(complete_word) or - try_complete(complete_word))) + try_complete(complete_word)) return true; } return false; @@ -504,7 +504,7 @@ void InsertCompleter::explicit_file_complete() void InsertCompleter::explicit_word_complete() { - try_complete(complete_word); + try_complete(complete_word); } void InsertCompleter::explicit_line_complete() diff --git a/src/word_db.cc b/src/word_db.cc index f15ea3803..addb9fabf 100644 --- a/src/word_db.cc +++ b/src/word_db.cc @@ -27,6 +27,13 @@ UsedLetters used_letters(StringView str) return res; } +constexpr UsedLetters upper_mask = 0xFFFFFFC000000; + +UsedLetters to_lower(UsedLetters letters) +{ + return ((letters & upper_mask) >> 26) | (letters & (~upper_mask)); +} + static WordDB::WordList get_words(const SharedString& content) { WordDB::WordList res; @@ -136,6 +143,63 @@ int WordDB::get_word_occurences(StringView word) const return 0; } +WordDB::RankedWordList WordDB::find_matching(StringView query) +{ + auto match_rank = [](StringView candidate, StringView query) + { + int rank = 0; + auto it = candidate.begin(); + char prev = 0; + for (auto c : query) + { + if (it == candidate.end()) + return 0; + + const bool islow = islower(c); + auto eq_c = [islow, c](char ch) { return islow ? tolower(ch) == c : ch == c; }; + + if (eq_c(*it)) // improve rank on contiguous + ++rank; + + while (!eq_c(*it)) + { + prev = *it; + if (++it == candidate.end()) + return 0; + } + // Improve rank on word boundaries + if (prev == 0 or prev == '_' or + (islower(prev) and isupper(*it))) + rank += 5; + + prev = c; + ++rank; + ++it; + } + return rank; + }; + + auto matches = [](UsedLetters query, UsedLetters letters) + { + return (query & letters) == query; + }; + + update_db(); + const UsedLetters letters = used_letters(query); + RankedWordList res; + for (auto&& word : m_words) + { + UsedLetters word_letters = word.second.letters; + if (not matches(to_lower(letters), to_lower(word_letters)) or + not matches(letters & upper_mask, word_letters & upper_mask)) + continue; + if (int rank = match_rank(word.first, query)) + res.push_back({ word.first, rank }); + } + + return res; +} + UnitTest test_word_db{[]() { Buffer buffer("test", Buffer::Flags::None, @@ -160,4 +224,9 @@ UnitTest test_word_db{[]() kak_assert(res == WordDB::WordList{ "allo" COMMA "mutch" COMMA "retchou" COMMA "tchou" }); }}; +UnitTest test_used_letters{[]() +{ + kak_assert(used_letters("abcd") == to_lower(used_letters("abcdABCD"))); +}}; + } diff --git a/src/word_db.hh b/src/word_db.hh index ad0dbadc1..4854e8812 100644 --- a/src/word_db.hh +++ b/src/word_db.hh @@ -38,6 +38,15 @@ public: return res; } + struct RankedWord + { + StringView word; + int rank; + }; + using RankedWordList = Vector; + + RankedWordList find_matching(StringView str); + int get_word_occurences(StringView word) const; private: void update_db(); From 364914afbd3da050d021dceee02d4c89e78b9d52 Mon Sep 17 00:00:00 2001 From: Maxime Coste Date: Sun, 18 Oct 2015 17:51:13 +0100 Subject: [PATCH 02/12] Do not filter completions in InsertCompleter anymore Just rerun the actual completers --- src/insert_completer.cc | 35 ----------------------------------- 1 file changed, 35 deletions(-) diff --git a/src/insert_completer.cc b/src/insert_completer.cc index 8a9779a0d..f81c47c3b 100644 --- a/src/insert_completer.cc +++ b/src/insert_completer.cc @@ -350,41 +350,6 @@ void InsertCompleter::select(int offset, Vector& keystrokes) void InsertCompleter::update() { - if (m_completions.is_valid()) - { - ByteCount longest_completion = 0; - for (auto& candidate : m_completions.candidates) - longest_completion = std::max(longest_completion, candidate.completion.length()); - - ByteCoord cursor = m_context.selections().main().cursor(); - ByteCoord compl_beg = m_completions.begin; - if (cursor.line == compl_beg.line and - compl_beg.column <= cursor.column and - cursor.column < compl_beg.column + longest_completion) - { - String prefix = m_context.buffer().string(compl_beg, cursor); - - if (m_context.buffer().timestamp() == m_completions.timestamp) - m_matching_candidates = m_completions.candidates; - else - { - m_matching_candidates.clear(); - for (auto& candidate : m_completions.candidates) - { - if (candidate.completion.substr(0, prefix.length()) == prefix) - m_matching_candidates.push_back(candidate); - } - } - if (not m_matching_candidates.empty()) - { - m_current_candidate = m_matching_candidates.size(); - m_completions.end = cursor; - menu_show(); - m_matching_candidates.push_back({prefix, ""}); - return; - } - } - } reset(); setup_ifn(); } From 73e438be07d4a7623e52419674bef4d1bc8dcb21 Mon Sep 17 00:00:00 2001 From: Maxime Coste Date: Mon, 19 Oct 2015 13:43:23 +0100 Subject: [PATCH 03/12] Fix handling of explicit insert completers --- src/insert_completer.cc | 17 +++++++++++++---- src/insert_completer.hh | 2 ++ 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/src/insert_completer.cc b/src/insert_completer.cc index f81c47c3b..214e8f254 100644 --- a/src/insert_completer.cc +++ b/src/insert_completer.cc @@ -350,6 +350,9 @@ void InsertCompleter::select(int offset, Vector& keystrokes) void InsertCompleter::update() { + if (m_explicit_completer and try_complete(m_explicit_completer)) + return; + reset(); setup_ifn(); } @@ -357,6 +360,7 @@ void InsertCompleter::update() void InsertCompleter::reset() { m_completions = InsertCompletion{}; + m_explicit_completer = nullptr; if (m_context.has_ui()) { m_context.ui().menu_hide(); @@ -462,21 +466,26 @@ bool InsertCompleter::try_complete(CompleteFunc complete_func) void InsertCompleter::explicit_file_complete() { - try_complete([this](const Buffer& buffer, ByteCoord cursor_pos) { + auto func = [this](const Buffer& buffer, ByteCoord cursor_pos) { return complete_filename(buffer, cursor_pos, m_options); - }); + }; + try_complete(func); + m_explicit_completer = func; } void InsertCompleter::explicit_word_complete() { try_complete(complete_word); + m_explicit_completer = complete_word; } void InsertCompleter::explicit_line_complete() { - try_complete([this](const Buffer& buffer, ByteCoord cursor_pos) { + auto func = [this](const Buffer& buffer, ByteCoord cursor_pos) { return complete_line(buffer, m_options, cursor_pos); - }); + }; + try_complete(func); + m_explicit_completer = func; } } diff --git a/src/insert_completer.hh b/src/insert_completer.hh index 650fbf699..5aeade49f 100644 --- a/src/insert_completer.hh +++ b/src/insert_completer.hh @@ -89,6 +89,8 @@ private: InsertCompletion m_completions; CandidateList m_matching_candidates; int m_current_candidate = -1; + + std::function m_explicit_completer; }; } From 9449f763e056badbded390c7a7b96980f8580d8b Mon Sep 17 00:00:00 2001 From: Maxime Coste Date: Mon, 19 Oct 2015 19:39:05 +0100 Subject: [PATCH 04/12] Fix removal of duplicated words in insert completion --- src/insert_completer.cc | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/insert_completer.cc b/src/insert_completer.cc index 214e8f254..ee9ca292a 100644 --- a/src/insert_completer.cc +++ b/src/insert_completer.cc @@ -127,8 +127,12 @@ InsertCompletion complete_word(const Buffer& buffer, ByteCoord cursor_pos) } } unordered_erase(matches, StringView{prefix}); - std::sort(matches.begin(), matches.end()); + std::sort(matches.begin(), matches.end(), + [](const RankedWordAndBuffer& lhs, const RankedWordAndBuffer& rhs) { + return lhs.word < rhs.word; + }); matches.erase(std::unique(matches.begin(), matches.end()), matches.end()); + std::sort(matches.begin(), matches.end()); const auto longest = std::accumulate(matches.begin(), matches.end(), 0_char, [](const CharCount& lhs, const RankedWordAndBuffer& rhs) From 2bf391f36d104c1446ab5b72a9c6005fa6f06db9 Mon Sep 17 00:00:00 2001 From: Maxime Coste Date: Tue, 20 Oct 2015 13:50:55 +0100 Subject: [PATCH 05/12] Sort insert completer words by name favoring lower case and then stable sort by rank --- src/insert_completer.cc | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/insert_completer.cc b/src/insert_completer.cc index ee9ca292a..3d15d3f39 100644 --- a/src/insert_completer.cc +++ b/src/insert_completer.cc @@ -127,12 +127,19 @@ InsertCompletion complete_word(const Buffer& buffer, ByteCoord cursor_pos) } } unordered_erase(matches, StringView{prefix}); + // Sort by word, favoring lowercase std::sort(matches.begin(), matches.end(), [](const RankedWordAndBuffer& lhs, const RankedWordAndBuffer& rhs) { - return lhs.word < rhs.word; + return std::lexicographical_compare( + lhs.word.begin(), lhs.word.end(), rhs.word.begin(), rhs.word.end(), + [](char a, char b) { + const bool low_a = islower(a), low_b = islower(b); + return low_a == low_b ? a < b : low_a; + }); }); matches.erase(std::unique(matches.begin(), matches.end()), matches.end()); - std::sort(matches.begin(), matches.end()); + // Stable sort by rank to preserve by word sorting + std::stable_sort(matches.begin(), matches.end()); const auto longest = std::accumulate(matches.begin(), matches.end(), 0_char, [](const CharCount& lhs, const RankedWordAndBuffer& rhs) From 019b3235b0e7aaad086631e1ee9b3102f096d349 Mon Sep 17 00:00:00 2001 From: Maxime Coste Date: Wed, 21 Oct 2015 20:16:36 +0100 Subject: [PATCH 06/12] Remove legacy WordDB::find_matching method --- src/word_db.cc | 43 ++++++++++++++++++++++++++++++++----------- src/word_db.hh | 16 ---------------- 2 files changed, 32 insertions(+), 27 deletions(-) diff --git a/src/word_db.cc b/src/word_db.cc index addb9fabf..c1c8f9676 100644 --- a/src/word_db.cc +++ b/src/word_db.cc @@ -34,9 +34,12 @@ UsedLetters to_lower(UsedLetters letters) return ((letters & upper_mask) >> 26) | (letters & (~upper_mask)); } -static WordDB::WordList get_words(const SharedString& content) +using WordList = Vector; + + +static WordList get_words(const SharedString& content) { - WordDB::WordList res; + WordList res; using Utf8It = utf8::iterator; const char* word_start = content.begin(); bool in_word = false; @@ -189,6 +192,12 @@ WordDB::RankedWordList WordDB::find_matching(StringView query) RankedWordList res; for (auto&& word : m_words) { + if (query.empty()) + { + res.push_back({word.first, 1 }); + continue; + } + UsedLetters word_letters = word.second.letters; if (not matches(to_lower(letters), to_lower(word_letters)) or not matches(letters & upper_mask, word_letters & upper_mask)) @@ -202,6 +211,18 @@ WordDB::RankedWordList WordDB::find_matching(StringView query) UnitTest test_word_db{[]() { + auto cmp_words = [](const WordDB::RankedWord& lhs, const WordDB::RankedWord& rhs) { + return lhs.word < rhs.word; + }; + + auto eq = [](ArrayView lhs, const WordList& rhs) { + return lhs.size() == rhs.size() and + std::equal(lhs.begin(), lhs.end(), rhs.begin(), + [](const WordDB::RankedWord& lhs, const StringView& rhs) { + return lhs.word == rhs; + }); + }; + Buffer buffer("test", Buffer::Flags::None, "tchou mutch\n" "tchou kanaky tchou\n" @@ -209,19 +230,19 @@ UnitTest test_word_db{[]() "tchaa tchaa\n" "allo\n"); WordDB word_db(buffer); - auto res = word_db.find_matching("", prefix_match); - std::sort(res.begin(), res.end()); - kak_assert(res == WordDB::WordList{ "allo" COMMA "kanaky" COMMA "mutch" COMMA "tchaa" COMMA "tchou" }); + auto res = word_db.find_matching(""); + std::sort(res.begin(), res.end(), cmp_words); + kak_assert(eq(res, WordList{ "allo" COMMA "kanaky" COMMA "mutch" COMMA "tchaa" COMMA "tchou" })); kak_assert(word_db.get_word_occurences("tchou") == 3); kak_assert(word_db.get_word_occurences("allo") == 1); buffer.erase(buffer.iterator_at({1, 6}), buffer.iterator_at({4, 0})); - res = word_db.find_matching("", prefix_match); - std::sort(res.begin(), res.end()); - kak_assert(res == WordDB::WordList{ "allo" COMMA "mutch" COMMA "tchou" }); + res = word_db.find_matching(""); + std::sort(res.begin(), res.end(), cmp_words); + kak_assert(eq(res, WordList{ "allo" COMMA "mutch" COMMA "tchou" })); buffer.insert(buffer.iterator_at({1, 0}), "re"); - res = word_db.find_matching("", subsequence_match); - std::sort(res.begin(), res.end()); - kak_assert(res == WordDB::WordList{ "allo" COMMA "mutch" COMMA "retchou" COMMA "tchou" }); + res = word_db.find_matching(""); + std::sort(res.begin(), res.end(), cmp_words); + kak_assert(eq(res, WordList{ "allo" COMMA "mutch" COMMA "retchou" COMMA "tchou" })); }}; UnitTest test_used_letters{[]() diff --git a/src/word_db.hh b/src/word_db.hh index 4854e8812..45aa52284 100644 --- a/src/word_db.hh +++ b/src/word_db.hh @@ -22,22 +22,6 @@ public: WordDB(const WordDB&) = delete; WordDB(WordDB&&) = default; - using WordList = Vector; - template - WordList find_matching(StringView str, MatchFunc match) - { - update_db(); - const UsedLetters letters = used_letters(str); - WordList res; - for (auto&& word : m_words) - { - if ((letters & word.second.letters) == letters and - match(word.first, str)) - res.push_back(word.first); - } - return res; - } - struct RankedWord { StringView word; From c77cb7c777ffc6f23ad53003d3531c1957fcdd0b Mon Sep 17 00:00:00 2001 From: Maxime Coste Date: Thu, 22 Oct 2015 19:49:08 +0100 Subject: [PATCH 07/12] Extract WordDB::RankedWord as RankedMatch in its own file --- src/insert_completer.cc | 16 +++++++-------- src/ranked_match.cc | 40 +++++++++++++++++++++++++++++++++++++ src/ranked_match.hh | 21 ++++++++++++++++++++ src/word_db.cc | 44 +++++------------------------------------ src/word_db.hh | 10 ++-------- 5 files changed, 76 insertions(+), 55 deletions(-) create mode 100644 src/ranked_match.cc create mode 100644 src/ranked_match.hh diff --git a/src/insert_completer.cc b/src/insert_completer.cc index 3d15d3f39..a4ce0ba67 100644 --- a/src/insert_completer.cc +++ b/src/insert_completer.cc @@ -93,17 +93,17 @@ InsertCompletion complete_word(const Buffer& buffer, ByteCoord cursor_pos) String current_word{begin, end}; - struct RankedWordAndBuffer : WordDB::RankedWord + struct RankedMatchAndBuffer : RankedMatch { - RankedWordAndBuffer(StringView w, int r = 0, const Buffer* b = nullptr) - : WordDB::RankedWord{w, r}, buffer{b} {} + RankedMatchAndBuffer(StringView w, int r = 0, const Buffer* b = nullptr) + : RankedMatch{w, r}, buffer{b} {} - bool operator==(const RankedWordAndBuffer& other) const { return word == other.word; } - bool operator<(const RankedWordAndBuffer& other) const { return rank > other.rank; } + bool operator==(const RankedMatchAndBuffer& other) const { return word == other.word; } + bool operator<(const RankedMatchAndBuffer& other) const { return rank > other.rank; } const Buffer* buffer; }; - Vector matches; + Vector matches; auto add_matches = [&](const Buffer& buf) { auto& word_db = get_word_db(buf); @@ -129,7 +129,7 @@ InsertCompletion complete_word(const Buffer& buffer, ByteCoord cursor_pos) unordered_erase(matches, StringView{prefix}); // Sort by word, favoring lowercase std::sort(matches.begin(), matches.end(), - [](const RankedWordAndBuffer& lhs, const RankedWordAndBuffer& rhs) { + [](const RankedMatchAndBuffer& lhs, const RankedMatchAndBuffer& rhs) { return std::lexicographical_compare( lhs.word.begin(), lhs.word.end(), rhs.word.begin(), rhs.word.end(), [](char a, char b) { @@ -142,7 +142,7 @@ InsertCompletion complete_word(const Buffer& buffer, ByteCoord cursor_pos) std::stable_sort(matches.begin(), matches.end()); const auto longest = std::accumulate(matches.begin(), matches.end(), 0_char, - [](const CharCount& lhs, const RankedWordAndBuffer& rhs) + [](const CharCount& lhs, const RankedMatchAndBuffer& rhs) { return std::max(lhs, rhs.word.char_length()); }); InsertCompletion::CandidateList candidates; diff --git a/src/ranked_match.cc b/src/ranked_match.cc new file mode 100644 index 000000000..0063b8ae3 --- /dev/null +++ b/src/ranked_match.cc @@ -0,0 +1,40 @@ +#include "ranked_match.hh" + +namespace Kakoune +{ + +int match_rank(StringView candidate, StringView query) +{ + int rank = 0; + auto it = candidate.begin(); + char prev = 0; + for (auto c : query) + { + if (it == candidate.end()) + return 0; + + const bool islow = islower(c); + auto eq_c = [islow, c](char ch) { return islow ? tolower(ch) == c : ch == c; }; + + if (eq_c(*it)) // improve rank on contiguous + ++rank; + + while (!eq_c(*it)) + { + prev = *it; + if (++it == candidate.end()) + return 0; + } + // Improve rank on word boundaries + if (prev == 0 or prev == '_' or + (islower(prev) and isupper(*it))) + rank += 5; + + prev = c; + ++rank; + ++it; + } + return rank; +} + +} diff --git a/src/ranked_match.hh b/src/ranked_match.hh new file mode 100644 index 000000000..2e248dc1b --- /dev/null +++ b/src/ranked_match.hh @@ -0,0 +1,21 @@ +#ifndef ranked_match_hh_INCLUDED +#define ranked_match_hh_INCLUDED + +#include "string.hh" +#include "vector.hh" + +namespace Kakoune +{ + +struct RankedMatch +{ + StringView word; + int rank; +}; +using RankedMatchList = Vector; + +int match_rank(StringView candidate, StringView query); + +} + +#endif // ranked_match_hh_INCLUDED diff --git a/src/word_db.cc b/src/word_db.cc index c1c8f9676..80b59f0cd 100644 --- a/src/word_db.cc +++ b/src/word_db.cc @@ -146,42 +146,8 @@ int WordDB::get_word_occurences(StringView word) const return 0; } -WordDB::RankedWordList WordDB::find_matching(StringView query) +RankedMatchList WordDB::find_matching(StringView query) { - auto match_rank = [](StringView candidate, StringView query) - { - int rank = 0; - auto it = candidate.begin(); - char prev = 0; - for (auto c : query) - { - if (it == candidate.end()) - return 0; - - const bool islow = islower(c); - auto eq_c = [islow, c](char ch) { return islow ? tolower(ch) == c : ch == c; }; - - if (eq_c(*it)) // improve rank on contiguous - ++rank; - - while (!eq_c(*it)) - { - prev = *it; - if (++it == candidate.end()) - return 0; - } - // Improve rank on word boundaries - if (prev == 0 or prev == '_' or - (islower(prev) and isupper(*it))) - rank += 5; - - prev = c; - ++rank; - ++it; - } - return rank; - }; - auto matches = [](UsedLetters query, UsedLetters letters) { return (query & letters) == query; @@ -189,7 +155,7 @@ WordDB::RankedWordList WordDB::find_matching(StringView query) update_db(); const UsedLetters letters = used_letters(query); - RankedWordList res; + RankedMatchList res; for (auto&& word : m_words) { if (query.empty()) @@ -211,14 +177,14 @@ WordDB::RankedWordList WordDB::find_matching(StringView query) UnitTest test_word_db{[]() { - auto cmp_words = [](const WordDB::RankedWord& lhs, const WordDB::RankedWord& rhs) { + auto cmp_words = [](const RankedMatch& lhs, const RankedMatch& rhs) { return lhs.word < rhs.word; }; - auto eq = [](ArrayView lhs, const WordList& rhs) { + auto eq = [](ArrayView lhs, const WordList& rhs) { return lhs.size() == rhs.size() and std::equal(lhs.begin(), lhs.end(), rhs.begin(), - [](const WordDB::RankedWord& lhs, const StringView& rhs) { + [](const RankedMatch& lhs, const StringView& rhs) { return lhs.word == rhs; }); }; diff --git a/src/word_db.hh b/src/word_db.hh index 45aa52284..91846eb65 100644 --- a/src/word_db.hh +++ b/src/word_db.hh @@ -5,6 +5,7 @@ #include "shared_string.hh" #include "unordered_map.hh" #include "vector.hh" +#include "ranked_match.hh" #include @@ -22,14 +23,7 @@ public: WordDB(const WordDB&) = delete; WordDB(WordDB&&) = default; - struct RankedWord - { - StringView word; - int rank; - }; - using RankedWordList = Vector; - - RankedWordList find_matching(StringView str); + RankedMatchList find_matching(StringView str); int get_word_occurences(StringView word) const; private: From 89d22f3335dfa471189f4925ec58d9b658a269ae Mon Sep 17 00:00:00 2001 From: Maxime Coste Date: Tue, 27 Oct 2015 21:25:18 +0000 Subject: [PATCH 08/12] Move more logic into RankedMatch --- src/insert_completer.cc | 37 +++++++++++++------------------------ src/ranked_match.cc | 27 ++++++++++++++++++++++++++- src/ranked_match.hh | 18 +++++++++++++----- src/word_db.cc | 11 ++++++----- 4 files changed, 58 insertions(+), 35 deletions(-) diff --git a/src/insert_completer.cc b/src/insert_completer.cc index a4ce0ba67..02b888562 100644 --- a/src/insert_completer.cc +++ b/src/insert_completer.cc @@ -95,11 +95,13 @@ InsertCompletion complete_word(const Buffer& buffer, ByteCoord cursor_pos) struct RankedMatchAndBuffer : RankedMatch { - RankedMatchAndBuffer(StringView w, int r = 0, const Buffer* b = nullptr) - : RankedMatch{w, r}, buffer{b} {} + RankedMatchAndBuffer(const RankedMatch& m, const Buffer* b = nullptr) + : RankedMatch{m}, buffer{b} {} - bool operator==(const RankedMatchAndBuffer& other) const { return word == other.word; } - bool operator<(const RankedMatchAndBuffer& other) const { return rank > other.rank; } + bool operator==(StringView other) const { return candidate() == other; } + + bool operator==(const RankedMatchAndBuffer& other) const { return RankedMatch::operator==(other); } + bool operator<(const RankedMatchAndBuffer& other) const { return RankedMatch::operator<(other);; } const Buffer* buffer; }; @@ -109,7 +111,7 @@ InsertCompletion complete_word(const Buffer& buffer, ByteCoord cursor_pos) auto& word_db = get_word_db(buf); auto bufmatches = word_db.find_matching(prefix); for (auto& m : bufmatches) - matches.push_back({ m.word, m.rank, &buf }); + matches.push_back({ m, &buf }); }; add_matches(buffer); @@ -127,23 +129,12 @@ InsertCompletion complete_word(const Buffer& buffer, ByteCoord cursor_pos) } } unordered_erase(matches, StringView{prefix}); - // Sort by word, favoring lowercase - std::sort(matches.begin(), matches.end(), - [](const RankedMatchAndBuffer& lhs, const RankedMatchAndBuffer& rhs) { - return std::lexicographical_compare( - lhs.word.begin(), lhs.word.end(), rhs.word.begin(), rhs.word.end(), - [](char a, char b) { - const bool low_a = islower(a), low_b = islower(b); - return low_a == low_b ? a < b : low_a; - }); - }); + std::sort(matches.begin(), matches.end()); matches.erase(std::unique(matches.begin(), matches.end()), matches.end()); - // Stable sort by rank to preserve by word sorting - std::stable_sort(matches.begin(), matches.end()); const auto longest = std::accumulate(matches.begin(), matches.end(), 0_char, [](const CharCount& lhs, const RankedMatchAndBuffer& rhs) - { return std::max(lhs, rhs.word.char_length()); }); + { return std::max(lhs, rhs.candidate().char_length()); }); InsertCompletion::CandidateList candidates; candidates.reserve(matches.size()); @@ -152,17 +143,15 @@ InsertCompletion complete_word(const Buffer& buffer, ByteCoord cursor_pos) DisplayLine menu_entry; if (m.buffer) { - const auto pad_len = longest + 1 - m.word.char_length(); - menu_entry.push_back(m.word.str()); + const auto pad_len = longest + 1 - m.candidate().char_length(); + menu_entry.push_back(m.candidate().str()); menu_entry.push_back(String{' ', pad_len}); menu_entry.push_back({ m.buffer->display_name(), get_face("MenuInfo") }); } else - menu_entry.push_back(m.word.str()); + menu_entry.push_back(m.candidate().str()); - menu_entry.push_back({ " " + to_string(m.rank), get_face("cyan") }); - - candidates.push_back({m.word.str(), "", std::move(menu_entry)}); + candidates.push_back({m.candidate().str(), "", std::move(menu_entry)}); } return { begin.coord(), cursor_pos, std::move(candidates), buffer.timestamp() }; diff --git a/src/ranked_match.cc b/src/ranked_match.cc index 0063b8ae3..03cb2fcdd 100644 --- a/src/ranked_match.cc +++ b/src/ranked_match.cc @@ -3,7 +3,7 @@ namespace Kakoune { -int match_rank(StringView candidate, StringView query) +static bool match_rank(StringView candidate, StringView query) { int rank = 0; auto it = candidate.begin(); @@ -37,4 +37,29 @@ int match_rank(StringView candidate, StringView query) return rank; } +RankedMatch::RankedMatch(StringView candidate, StringView query) +{ + if (candidate.empty() or query.empty()) + { + m_candidate = candidate; + return; + } + + m_match_rank = match_rank(candidate, query); +} + +bool RankedMatch::operator<(const RankedMatch& other) const +{ + if (m_match_rank == other.m_match_rank) + return std::lexicographical_compare( + m_candidate.begin(), m_candidate.end(), + other.m_candidate.begin(), other.m_candidate.end(), + [](char a, char b) { + const bool low_a = islower(a), low_b = islower(b); + return low_a == low_b ? a < b : low_a; + }); + + return m_match_rank < other.m_match_rank; +} + } diff --git a/src/ranked_match.hh b/src/ranked_match.hh index 2e248dc1b..d6367453e 100644 --- a/src/ranked_match.hh +++ b/src/ranked_match.hh @@ -9,12 +9,20 @@ namespace Kakoune struct RankedMatch { - StringView word; - int rank; -}; -using RankedMatchList = Vector; + RankedMatch(StringView candidate, StringView query); -int match_rank(StringView candidate, StringView query); + const StringView& candidate() const { return m_candidate; } + bool operator<(const RankedMatch& other) const; + bool operator==(const RankedMatch& other) const { return m_candidate == other.m_candidate; } + + explicit operator bool() const { return not m_candidate.empty(); } + +private: + StringView m_candidate; + int m_match_rank = 0; +}; + +using RankedMatchList = Vector; } diff --git a/src/word_db.cc b/src/word_db.cc index 80b59f0cd..147287b56 100644 --- a/src/word_db.cc +++ b/src/word_db.cc @@ -160,7 +160,7 @@ RankedMatchList WordDB::find_matching(StringView query) { if (query.empty()) { - res.push_back({word.first, 1 }); + res.push_back(RankedMatch{word.first, query}); continue; } @@ -168,8 +168,9 @@ RankedMatchList WordDB::find_matching(StringView query) if (not matches(to_lower(letters), to_lower(word_letters)) or not matches(letters & upper_mask, word_letters & upper_mask)) continue; - if (int rank = match_rank(word.first, query)) - res.push_back({ word.first, rank }); + + if (RankedMatch match{word.first, query}) + res.push_back(match); } return res; @@ -178,14 +179,14 @@ RankedMatchList WordDB::find_matching(StringView query) UnitTest test_word_db{[]() { auto cmp_words = [](const RankedMatch& lhs, const RankedMatch& rhs) { - return lhs.word < rhs.word; + return lhs.candidate() < rhs.candidate(); }; auto eq = [](ArrayView lhs, const WordList& rhs) { return lhs.size() == rhs.size() and std::equal(lhs.begin(), lhs.end(), rhs.begin(), [](const RankedMatch& lhs, const StringView& rhs) { - return lhs.word == rhs; + return lhs.candidate() == rhs; }); }; From 24043bbffe6b277bd5f9b4a68df751c6d2090967 Mon Sep 17 00:00:00 2001 From: Maxime Coste Date: Thu, 29 Oct 2015 13:36:30 +0000 Subject: [PATCH 09/12] Use an heuristic based match ranking algorithm inspired by what YouCompleteMe does --- src/ranked_match.cc | 122 ++++++++++++++++++++++++++++++-------------- src/ranked_match.hh | 5 +- 2 files changed, 89 insertions(+), 38 deletions(-) diff --git a/src/ranked_match.cc b/src/ranked_match.cc index 03cb2fcdd..9acaa8b41 100644 --- a/src/ranked_match.cc +++ b/src/ranked_match.cc @@ -1,65 +1,113 @@ #include "ranked_match.hh" +#include "unit_tests.hh" + namespace Kakoune { -static bool match_rank(StringView candidate, StringView query) +static int count_word_boundaries_match(StringView candidate, StringView query) { - int rank = 0; - auto it = candidate.begin(); + int count = 0; + auto it = query.begin(); char prev = 0; - for (auto c : query) + for (auto c : candidate) { - if (it == candidate.end()) - return 0; - - const bool islow = islower(c); - auto eq_c = [islow, c](char ch) { return islow ? tolower(ch) == c : ch == c; }; - - if (eq_c(*it)) // improve rank on contiguous - ++rank; - - while (!eq_c(*it)) - { - prev = *it; - if (++it == candidate.end()) - return 0; - } - // Improve rank on word boundaries - if (prev == 0 or prev == '_' or - (islower(prev) and isupper(*it))) - rank += 5; - + const bool is_word_boundary = prev == 0 or + (ispunct(prev) and is_word(c)) or + (islower(prev) and isupper(c)); prev = c; - ++rank; + + if (not is_word_boundary) + continue; + + const char lc = tolower(c); + for (; it != query.end(); ++it) + { + const char qc = *it; + if (qc == (islower(qc) ? lc : c)) + { + ++count; + ++it; + break; + } + } + if (it == query.end()) + break; + } + return count; +} + +static bool smartcase_eq(char query, char candidate) +{ + return query == (islower(query) ? tolower(candidate) : candidate); +} + +static bool subsequence_match_smart_case(StringView str, StringView subseq) +{ + auto it = str.begin(); + for (auto& c : subseq) + { + if (it == str.end()) + return false; + while (not smartcase_eq(c, *it)) + { + if (++it == str.end()) + return false; + } ++it; } - return rank; + return true; } RankedMatch::RankedMatch(StringView candidate, StringView query) { - if (candidate.empty() or query.empty()) + if (candidate.empty() or query.length() > candidate.length()) + return; + + if (query.empty()) { m_candidate = candidate; return; } - m_match_rank = match_rank(candidate, query); + if (not subsequence_match_smart_case(candidate, query)) + return; + + m_candidate = candidate; + + m_first_char_match = smartcase_eq(query[0], candidate[0]); + m_word_boundary_match_count = count_word_boundaries_match(candidate, query); + m_only_word_boundary = m_word_boundary_match_count == query.length(); + m_prefix = std::equal(query.begin(), query.end(), candidate.begin(), smartcase_eq); } bool RankedMatch::operator<(const RankedMatch& other) const { - if (m_match_rank == other.m_match_rank) - return std::lexicographical_compare( - m_candidate.begin(), m_candidate.end(), - other.m_candidate.begin(), other.m_candidate.end(), - [](char a, char b) { - const bool low_a = islower(a), low_b = islower(b); - return low_a == low_b ? a < b : low_a; - }); + if (m_only_word_boundary or other.m_only_word_boundary) + return m_only_word_boundary and other.m_only_word_boundary ? + m_word_boundary_match_count > other.m_word_boundary_match_count + : m_only_word_boundary; - return m_match_rank < other.m_match_rank; + if (m_prefix != other.m_prefix) + return m_prefix; + + if (m_word_boundary_match_count != other.m_word_boundary_match_count) + return m_word_boundary_match_count > other.m_word_boundary_match_count; + + if (m_first_char_match != other.m_first_char_match) + return m_first_char_match; + + return std::lexicographical_compare( + m_candidate.begin(), m_candidate.end(), + other.m_candidate.begin(), other.m_candidate.end(), + [](char a, char b) { + const bool low_a = islower(a), low_b = islower(b); + return low_a == low_b ? a < b : low_a; + }); } +UnitTest test_ranked_match{[] { + kak_assert(count_word_boundaries_match("run_all_tests", "rat") == 3); +}}; + } diff --git a/src/ranked_match.hh b/src/ranked_match.hh index d6367453e..2a8050026 100644 --- a/src/ranked_match.hh +++ b/src/ranked_match.hh @@ -19,7 +19,10 @@ struct RankedMatch private: StringView m_candidate; - int m_match_rank = 0; + bool m_first_char_match = false; + bool m_prefix = false; + int m_word_boundary_match_count = 0; + bool m_only_word_boundary = false; }; using RankedMatchList = Vector; From 32b51e2cc3931c32ffb575f57803b53da75a07e6 Mon Sep 17 00:00:00 2001 From: Maxime Coste Date: Thu, 29 Oct 2015 13:57:50 +0000 Subject: [PATCH 10/12] Use ranked matches for option completion as well --- src/insert_completer.cc | 35 ++++++++++++++++++++++++++--------- 1 file changed, 26 insertions(+), 9 deletions(-) diff --git a/src/insert_completer.cc b/src/insert_completer.cc index 02b888562..08ff50711 100644 --- a/src/insert_completer.cc +++ b/src/insert_completer.cc @@ -233,29 +233,46 @@ InsertCompletion complete_option(const Buffer& buffer, ByteCoord cursor_pos, if (cursor_pos.line == coord.line and cursor_pos.column >= coord.column) { - StringView prefix = buffer[coord.line].substr( + StringView query = buffer[coord.line].substr( coord.column, cursor_pos.column - coord.column); - const CharCount tabstop = options["tabstop"].get(); const CharCount column = get_column(buffer, tabstop, cursor_pos); - InsertCompletion::CandidateList candidates; + struct RankedMatchAndInfo : RankedMatch + { + using RankedMatch::RankedMatch; + using RankedMatch::operator==; + using RankedMatch::operator<; + + StringView docstring; + DisplayLine menu_entry; + }; + + Vector matches; + for (auto it = opt.begin() + 1; it != opt.end(); ++it) { auto splitted = split(*it, '@'); - if (not splitted.empty() and prefix_match(splitted[0], prefix)) + if (splitted.empty()) + continue; + if (RankedMatchAndInfo match{splitted[0], query}) { - StringView completion = splitted[0]; - StringView docstring = splitted.size() > 1 ? splitted[1] : StringView{}; - - DisplayLine menu_entry = splitted.size() > 2 ? + match.docstring = splitted.size() > 1 ? splitted[1] : StringView{}; + match.menu_entry = splitted.size() > 2 ? parse_display_line(expand_tabs(splitted[2], tabstop, column)) : DisplayLine{ expand_tabs(splitted[0], tabstop, column) }; - candidates.push_back({completion.str(), docstring.str(), std::move(menu_entry)}); + matches.push_back(std::move(match)); } } + std::sort(matches.begin(), matches.end()); + InsertCompletion::CandidateList candidates; + candidates.reserve(matches.size()); + for (auto& match : matches) + candidates.push_back({ match.candidate().str(), match.docstring.str(), + std::move(match.menu_entry) }); + return { coord, end, std::move(candidates), timestamp }; } } From 34d7bdbc011ed01fa87e3c1933556742b2bab6d5 Mon Sep 17 00:00:00 2001 From: Maxime Coste Date: Fri, 30 Oct 2015 13:07:48 +0000 Subject: [PATCH 11/12] Minor code cleanup in insert_completer.cc --- src/insert_completer.cc | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/insert_completer.cc b/src/insert_completer.cc index 08ff50711..808a08a23 100644 --- a/src/insert_completer.cc +++ b/src/insert_completer.cc @@ -98,11 +98,10 @@ InsertCompletion complete_word(const Buffer& buffer, ByteCoord cursor_pos) RankedMatchAndBuffer(const RankedMatch& m, const Buffer* b = nullptr) : RankedMatch{m}, buffer{b} {} + using RankedMatch::operator==; + using RankedMatch::operator<; bool operator==(StringView other) const { return candidate() == other; } - bool operator==(const RankedMatchAndBuffer& other) const { return RankedMatch::operator==(other); } - bool operator<(const RankedMatchAndBuffer& other) const { return RankedMatch::operator<(other);; } - const Buffer* buffer; }; Vector matches; From f556ef12c4ecab4c348cd0e862e4bcd65bc93245 Mon Sep 17 00:00:00 2001 From: Maxime Coste Date: Fri, 30 Oct 2015 13:08:07 +0000 Subject: [PATCH 12/12] Fix preservation of entered text in option insert completer --- src/insert_completer.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/insert_completer.cc b/src/insert_completer.cc index 808a08a23..add9c0f06 100644 --- a/src/insert_completer.cc +++ b/src/insert_completer.cc @@ -217,7 +217,7 @@ InsertCompletion complete_option(const Buffer& buffer, ByteCoord cursor_pos, str_to_int({match[2].first, match[2].second}) - 1 }; if (not buffer.is_valid(coord)) return {}; - auto end = coord; + auto end = cursor_pos; if (match[3].matched) { ByteCount len = str_to_int({match[3].first, match[3].second});