2014-01-17 02:07:42 +04:00
|
|
|
#include "word_db.hh"
|
|
|
|
|
|
|
|
#include "utils.hh"
|
2014-05-27 00:00:26 +04:00
|
|
|
#include "line_modification.hh"
|
2014-01-17 02:07:42 +04:00
|
|
|
#include "utf8_iterator.hh"
|
|
|
|
|
|
|
|
namespace Kakoune
|
|
|
|
{
|
|
|
|
|
2014-10-07 12:15:32 +04:00
|
|
|
static std::vector<InternedString> get_words(const InternedString& content)
|
2014-01-17 02:07:42 +04:00
|
|
|
{
|
2014-10-01 03:20:12 +04:00
|
|
|
std::vector<InternedString> res;
|
2014-07-03 00:14:01 +04:00
|
|
|
using Iterator = utf8::iterator<const char*, utf8::InvalidPolicy::Pass>;
|
2014-05-11 15:51:37 +04:00
|
|
|
const char* word_start = content.begin();
|
2014-01-17 02:07:42 +04:00
|
|
|
bool in_word = false;
|
|
|
|
for (Iterator it{word_start}, end{content.end()}; it != end; ++it)
|
|
|
|
{
|
|
|
|
Codepoint c = *it;
|
|
|
|
const bool word = is_word(c);
|
|
|
|
if (not in_word and word)
|
|
|
|
{
|
|
|
|
word_start = it.base();
|
|
|
|
in_word = true;
|
|
|
|
}
|
|
|
|
else if (in_word and not word)
|
|
|
|
{
|
2014-10-07 12:15:32 +04:00
|
|
|
const ByteCount start = word_start - content.begin();
|
|
|
|
const ByteCount length = it.base() - word_start;
|
|
|
|
res.push_back(content.acquire_substr(start, length));
|
2014-01-17 02:07:42 +04:00
|
|
|
in_word = false;
|
|
|
|
}
|
|
|
|
}
|
2014-01-24 04:56:33 +04:00
|
|
|
return res;
|
2014-01-17 02:07:42 +04:00
|
|
|
}
|
|
|
|
|
2014-10-01 03:20:12 +04:00
|
|
|
static void add_words(WordDB::WordList& wl, const std::vector<InternedString>& words)
|
2014-01-17 02:07:42 +04:00
|
|
|
{
|
2014-01-24 04:56:33 +04:00
|
|
|
for (auto& w : words)
|
|
|
|
++wl[w];
|
|
|
|
}
|
2014-01-17 02:07:42 +04:00
|
|
|
|
2014-10-01 03:20:12 +04:00
|
|
|
static void remove_words(WordDB::WordList& wl, const std::vector<InternedString>& words)
|
2014-01-24 04:56:33 +04:00
|
|
|
{
|
|
|
|
for (auto& w : words)
|
2014-01-17 02:07:42 +04:00
|
|
|
{
|
2014-01-24 04:56:33 +04:00
|
|
|
auto it = wl.find(w);
|
|
|
|
kak_assert(it != wl.end() and it->second > 0);
|
|
|
|
if (--it->second == 0)
|
|
|
|
wl.erase(it);
|
2014-01-17 02:07:42 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-01-24 04:56:33 +04:00
|
|
|
WordDB::WordDB(const Buffer& buffer)
|
2014-05-15 00:19:19 +04:00
|
|
|
: m_buffer{&buffer}, m_timestamp{buffer.timestamp()}
|
2014-01-17 02:07:42 +04:00
|
|
|
{
|
2014-01-24 04:56:33 +04:00
|
|
|
m_line_to_words.reserve((int)buffer.line_count());
|
|
|
|
for (auto line = 0_line, end = buffer.line_count(); line < end; ++line)
|
2014-01-17 02:07:42 +04:00
|
|
|
{
|
2014-01-24 04:56:33 +04:00
|
|
|
m_line_to_words.push_back(get_words(buffer[line]));
|
|
|
|
add_words(m_words, m_line_to_words.back());
|
2014-01-17 02:07:42 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-01-24 04:56:33 +04:00
|
|
|
void WordDB::update_db()
|
2014-01-17 02:07:42 +04:00
|
|
|
{
|
2014-05-15 00:19:19 +04:00
|
|
|
auto& buffer = *m_buffer;
|
|
|
|
|
2014-05-27 00:00:26 +04:00
|
|
|
auto modifs = compute_line_modifications(buffer, m_timestamp);
|
2014-05-15 00:19:19 +04:00
|
|
|
m_timestamp = buffer.timestamp();
|
|
|
|
|
2014-01-24 04:56:33 +04:00
|
|
|
if (modifs.empty())
|
|
|
|
return;
|
2014-01-17 02:07:42 +04:00
|
|
|
|
|
|
|
|
2014-01-24 04:56:33 +04:00
|
|
|
LineToWords new_lines;
|
|
|
|
new_lines.reserve((int)buffer.line_count());
|
|
|
|
|
|
|
|
auto old_line = 0_line;
|
|
|
|
for (auto& modif : modifs)
|
|
|
|
{
|
2014-05-27 00:00:26 +04:00
|
|
|
kak_assert(0_line <= modif.new_line and modif.new_line < buffer.line_count());
|
|
|
|
kak_assert(old_line <= modif.old_line);
|
|
|
|
while (old_line < modif.old_line)
|
2014-05-20 23:39:38 +04:00
|
|
|
new_lines.push_back(std::move(m_line_to_words[(int)old_line++]));
|
2014-01-24 04:56:33 +04:00
|
|
|
|
2014-05-27 00:00:26 +04:00
|
|
|
kak_assert((int)new_lines.size() == (int)modif.new_line);
|
|
|
|
|
|
|
|
while (old_line <= modif.old_line + modif.num_removed)
|
|
|
|
{
|
|
|
|
kak_assert(old_line < m_line_to_words.size());
|
2014-01-24 04:56:33 +04:00
|
|
|
remove_words(m_words, m_line_to_words[(int)old_line++]);
|
2014-05-27 00:00:26 +04:00
|
|
|
}
|
2014-01-24 04:56:33 +04:00
|
|
|
|
2014-05-27 00:00:26 +04:00
|
|
|
for (auto l = 0_line; l <= modif.num_added; ++l)
|
2014-01-24 04:56:33 +04:00
|
|
|
{
|
2014-05-27 00:00:26 +04:00
|
|
|
if (modif.new_line + l >= buffer.line_count())
|
2014-05-24 17:10:27 +04:00
|
|
|
break;
|
|
|
|
|
2014-05-27 00:00:26 +04:00
|
|
|
new_lines.push_back(get_words(buffer[modif.new_line + l]));
|
2014-01-24 04:56:33 +04:00
|
|
|
add_words(m_words, new_lines.back());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
while (old_line != (int)m_line_to_words.size())
|
|
|
|
new_lines.push_back(std::move(m_line_to_words[(int)old_line++]));
|
|
|
|
|
|
|
|
m_line_to_words = std::move(new_lines);
|
2014-01-17 02:07:42 +04:00
|
|
|
}
|
|
|
|
|
2014-10-01 03:20:12 +04:00
|
|
|
std::vector<InternedString> WordDB::find_prefix(StringView prefix)
|
2014-01-17 02:07:42 +04:00
|
|
|
{
|
2014-01-24 04:56:33 +04:00
|
|
|
update_db();
|
|
|
|
|
2014-10-01 03:20:12 +04:00
|
|
|
std::vector<InternedString> res;
|
2014-01-24 04:56:33 +04:00
|
|
|
for (auto it = m_words.lower_bound(prefix); it != m_words.end(); ++it)
|
2014-01-17 02:07:42 +04:00
|
|
|
{
|
2014-01-19 23:43:19 +04:00
|
|
|
if (not prefix_match(it->first, prefix))
|
|
|
|
break;
|
|
|
|
res.push_back(it->first);
|
2014-01-17 02:07:42 +04:00
|
|
|
}
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
2014-10-01 03:20:12 +04:00
|
|
|
std::vector<InternedString> WordDB::find_subsequence(StringView subsequence)
|
2014-07-30 22:58:34 +04:00
|
|
|
{
|
|
|
|
update_db();
|
|
|
|
|
2014-10-01 03:20:12 +04:00
|
|
|
std::vector<InternedString> res;
|
2014-07-30 22:58:34 +04:00
|
|
|
for (auto it = m_words.begin(); it != m_words.end(); ++it)
|
|
|
|
{
|
|
|
|
if (subsequence_match(it->first, subsequence))
|
|
|
|
res.push_back(it->first);
|
|
|
|
}
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
2014-10-01 03:20:12 +04:00
|
|
|
int WordDB::get_word_occurences(StringView word) const
|
2014-04-22 22:31:31 +04:00
|
|
|
{
|
|
|
|
auto it = m_words.find(word);
|
|
|
|
if (it != m_words.end())
|
|
|
|
return it->second;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-01-17 02:07:42 +04:00
|
|
|
}
|