ladybird/Userland/Libraries/LibUnicode/Segmenter.h
Timothy Flynn ebdb92eef6 LibUnicode+Everywhere: Merge LibLocale back into LibUnicode
LibLocale was split off from LibUnicode a couple years ago to reduce the
number of applications on SerenityOS that depend on CLDR data. Now that
we use ICU, both LibUnicode and LibLocale are actually linking in this
data. And since vcpkg gives us static libraries, both libraries are over
30MB in size.

This patch reverts the separation and merges LibLocale into LibUnicode
again. We now have just one library that includes the ICU data.

Further, this will let LibUnicode share the locale cache that previously
would only exist in LibLocale.
2024-06-23 19:52:45 +02:00

63 lines
1.8 KiB
C++

/*
* Copyright (c) 2024, Tim Flynn <trflynn89@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <AK/Function.h>
#include <AK/NonnullOwnPtr.h>
#include <AK/Optional.h>
#include <AK/StringView.h>
namespace Unicode {
enum class SegmenterGranularity {
Grapheme,
Sentence,
Word,
};
SegmenterGranularity segmenter_granularity_from_string(StringView);
StringView segmenter_granularity_to_string(SegmenterGranularity);
class Segmenter {
public:
static NonnullOwnPtr<Segmenter> create(SegmenterGranularity segmenter_granularity);
static NonnullOwnPtr<Segmenter> create(StringView locale, SegmenterGranularity segmenter_granularity);
virtual ~Segmenter() = default;
SegmenterGranularity segmenter_granularity() const { return m_segmenter_granularity; }
virtual NonnullOwnPtr<Segmenter> clone() const = 0;
virtual void set_segmented_text(String) = 0;
virtual void set_segmented_text(Utf16View const&) = 0;
virtual size_t current_boundary() = 0;
enum class Inclusive {
No,
Yes,
};
virtual Optional<size_t> previous_boundary(size_t index, Inclusive = Inclusive::No) = 0;
virtual Optional<size_t> next_boundary(size_t index, Inclusive = Inclusive::No) = 0;
using SegmentationCallback = Function<IterationDecision(size_t)>;
virtual void for_each_boundary(String, SegmentationCallback) = 0;
virtual void for_each_boundary(Utf16View const&, SegmentationCallback) = 0;
virtual void for_each_boundary(Utf32View const&, SegmentationCallback) = 0;
virtual bool is_current_boundary_word_like() const = 0;
protected:
explicit Segmenter(SegmenterGranularity segmenter_granularity)
: m_segmenter_granularity(segmenter_granularity)
{
}
SegmenterGranularity m_segmenter_granularity { SegmenterGranularity::Grapheme };
};
}