2021-10-30 03:28:55 +03:00
|
|
|
//! This module defines several useful string variants, including copy-on-write and immutable
|
|
|
|
//! implementations.
|
|
|
|
use std::borrow::Cow;
|
|
|
|
|
|
|
|
use crate::clone::*;
|
2021-11-10 16:36:08 +03:00
|
|
|
use crate::impls;
|
2021-10-30 03:28:55 +03:00
|
|
|
use derive_more::*;
|
2021-11-10 16:36:08 +03:00
|
|
|
use itertools::*;
|
|
|
|
#[cfg(feature = "serde")]
|
2021-10-30 03:28:55 +03:00
|
|
|
use serde::Deserialize;
|
2021-11-10 16:36:08 +03:00
|
|
|
#[cfg(feature = "serde")]
|
2021-10-30 03:28:55 +03:00
|
|
|
use serde::Serialize;
|
2021-11-10 16:36:08 +03:00
|
|
|
use std::ops::Deref;
|
|
|
|
use std::rc::Rc;
|
2021-10-30 03:28:55 +03:00
|
|
|
|
|
|
|
// =================
|
|
|
|
// === StringOps ===
|
|
|
|
// =================
|
|
|
|
|
|
|
|
pub trait StringOps {
|
2021-11-10 16:36:08 +03:00
|
|
|
fn is_enclosed(&self, first_char: char, last_char: char) -> bool;
|
2021-10-30 03:28:55 +03:00
|
|
|
}
|
|
|
|
|
2021-11-10 16:36:08 +03:00
|
|
|
impl<T: AsRef<str>> StringOps for T {
|
2021-10-30 03:28:55 +03:00
|
|
|
/// Check if given string starts and ends with given characters.
|
|
|
|
///
|
|
|
|
/// Optimized to be O(1) if both characters are within ASCII range.
|
2021-11-10 16:36:08 +03:00
|
|
|
fn is_enclosed(&self, first_char: char, last_char: char) -> bool {
|
2021-10-30 03:28:55 +03:00
|
|
|
let text = self.as_ref();
|
|
|
|
if first_char.is_ascii() && last_char.is_ascii() {
|
|
|
|
let bytes = text.as_bytes();
|
|
|
|
bytes.first() == Some(&(first_char as u8)) && bytes.last() == Some(&(last_char as u8))
|
|
|
|
} else {
|
|
|
|
let mut chars = text.chars();
|
2021-11-10 16:36:08 +03:00
|
|
|
let first = chars.next();
|
|
|
|
let last = chars.last().or(first);
|
2021-10-30 03:28:55 +03:00
|
|
|
first == Some(first_char) && last == Some(last_char)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// ===========
|
|
|
|
// === Str ===
|
|
|
|
// ===========
|
|
|
|
|
|
|
|
/// Abstraction for any kind of string as an argument. Functions defined as
|
|
|
|
/// `fn test<S:Str>(s: Str) { ... }` can be called with `String`, `&String`, and `&str` without
|
|
|
|
/// requiring caller to know the implementation details. Moreover, the definition can decide if it
|
|
|
|
/// needs allocation or not. Calling `s.as_ref()` will never allocate, while `s.into()` will
|
|
|
|
/// allocate only when necessary.
|
|
|
|
pub trait Str = Into<String> + AsRef<str>;
|
|
|
|
|
|
|
|
// =================
|
|
|
|
// === CowString ===
|
|
|
|
// =================
|
|
|
|
|
|
|
|
// === Definition ===
|
|
|
|
|
|
|
|
/// A copy-on-write String implementation. It is a newtype wrapper for `Cow<'static,str>` and
|
|
|
|
/// provides many useful impls for efficient workflow. Use it whenever you want to store a string
|
|
|
|
/// but you are not sure if the string will be allocated or not. This way you can store a static
|
|
|
|
/// slice as long as you can and switch to allocated String on demand.
|
2021-11-10 16:36:08 +03:00
|
|
|
#[derive(Clone, Debug, Default, Display)]
|
|
|
|
pub struct CowString(Cow<'static, str>);
|
2021-10-30 03:28:55 +03:00
|
|
|
|
|
|
|
// === Conversions From CowString ===
|
|
|
|
|
2021-11-10 16:36:08 +03:00
|
|
|
impls! { From <&CowString> for String { |t| t.clone().into() } }
|
|
|
|
impls! { From <CowString> for String { |t| t.0.into() } }
|
2021-10-30 03:28:55 +03:00
|
|
|
|
|
|
|
// === Conversions To CowString ===
|
|
|
|
|
2021-11-10 16:36:08 +03:00
|
|
|
impls! { From <Cow<'static,str>> for CowString { |t| Self(t) } }
|
|
|
|
impls! { From <&Cow<'static,str>> for CowString { |t| Self(t.clone()) } }
|
|
|
|
impls! { From <&'static str> for CowString { |t| Self(t.into()) } }
|
|
|
|
impls! { From <String> for CowString { |t| Self(t.into()) } }
|
|
|
|
impls! { From <&String> for CowString { |t| t.to_string().into() } }
|
|
|
|
impls! { From <&CowString> for CowString { |t| t.clone() } }
|
2021-10-30 03:28:55 +03:00
|
|
|
|
|
|
|
// === Instances ===
|
|
|
|
|
|
|
|
impl Deref for CowString {
|
|
|
|
type Target = str;
|
|
|
|
fn deref(&self) -> &str {
|
|
|
|
self.0.deref()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl AsRef<str> for CowString {
|
|
|
|
fn as_ref(&self) -> &str {
|
|
|
|
self.deref()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// ================
|
|
|
|
// === ImString ===
|
|
|
|
// ================
|
|
|
|
|
|
|
|
/// Immutable string implementation with a fast clone implementation.
|
2021-11-10 16:36:08 +03:00
|
|
|
#[derive(Clone, CloneRef, Default, Eq, Hash, PartialEq)]
|
|
|
|
#[cfg_attr(feature = "serde", derive(Deserialize, Serialize))]
|
2021-10-30 03:28:55 +03:00
|
|
|
pub struct ImString {
|
2021-11-10 16:36:08 +03:00
|
|
|
content: Rc<String>,
|
2021-10-30 03:28:55 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
impl ImString {
|
|
|
|
/// Constructor.
|
2021-11-10 16:36:08 +03:00
|
|
|
pub fn new(content: impl Into<String>) -> Self {
|
2021-10-30 03:28:55 +03:00
|
|
|
let content = Rc::new(content.into());
|
2021-11-10 16:36:08 +03:00
|
|
|
Self { content }
|
2021-10-30 03:28:55 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Extract a string slice containing the entire string.
|
|
|
|
pub fn as_str(&self) -> &str {
|
|
|
|
&self.content
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl std::fmt::Display for ImString {
|
2021-11-10 16:36:08 +03:00
|
|
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
|
|
std::fmt::Display::fmt(&self.content, f)
|
2021-10-30 03:28:55 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl std::fmt::Debug for ImString {
|
2021-11-10 16:36:08 +03:00
|
|
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
|
|
std::fmt::Debug::fmt(&self.content, f)
|
2021-10-30 03:28:55 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl Deref for ImString {
|
|
|
|
type Target = str;
|
|
|
|
fn deref(&self) -> &Self::Target {
|
|
|
|
&self.content
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl AsRef<ImString> for ImString {
|
|
|
|
fn as_ref(&self) -> &ImString {
|
|
|
|
self
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl AsRef<String> for ImString {
|
|
|
|
fn as_ref(&self) -> &String {
|
|
|
|
self.content.as_ref()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl AsRef<str> for ImString {
|
|
|
|
fn as_ref(&self) -> &str {
|
|
|
|
self.content.as_ref()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl From<String> for ImString {
|
2021-11-10 16:36:08 +03:00
|
|
|
fn from(t: String) -> Self {
|
2021-10-30 03:28:55 +03:00
|
|
|
Self::new(t)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl From<&String> for ImString {
|
2021-11-10 16:36:08 +03:00
|
|
|
fn from(t: &String) -> Self {
|
2021-10-30 03:28:55 +03:00
|
|
|
Self::new(t)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl From<&&String> for ImString {
|
2021-11-10 16:36:08 +03:00
|
|
|
fn from(t: &&String) -> Self {
|
2021-10-30 03:28:55 +03:00
|
|
|
Self::new(*t)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl From<&str> for ImString {
|
2021-11-10 16:36:08 +03:00
|
|
|
fn from(t: &str) -> Self {
|
2021-10-30 03:28:55 +03:00
|
|
|
Self::new(t)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl From<&&str> for ImString {
|
2021-11-10 16:36:08 +03:00
|
|
|
fn from(t: &&str) -> Self {
|
2021-10-30 03:28:55 +03:00
|
|
|
Self::new(*t)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl From<ImString> for String {
|
2021-11-10 16:36:08 +03:00
|
|
|
fn from(value: ImString) -> Self {
|
2021-10-30 03:28:55 +03:00
|
|
|
match Rc::try_unwrap(value.content) {
|
|
|
|
Ok(str) => str,
|
|
|
|
Err(rc) => rc.deref().clone(),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl PartialEq<&str> for ImString {
|
2021-11-10 16:36:08 +03:00
|
|
|
fn eq(&self, other: &&str) -> bool {
|
2021-10-30 03:28:55 +03:00
|
|
|
self.content.as_ref().eq(other)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl PartialEq<String> for ImString {
|
2021-11-10 16:36:08 +03:00
|
|
|
fn eq(&self, other: &String) -> bool {
|
2021-10-30 03:28:55 +03:00
|
|
|
self.content.as_ref().eq(other)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl PartialEq<ImString> for String {
|
2021-11-10 16:36:08 +03:00
|
|
|
fn eq(&self, other: &ImString) -> bool {
|
2021-10-30 03:28:55 +03:00
|
|
|
self.eq(other.content.as_ref())
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// === Macros ===
|
|
|
|
|
|
|
|
/// Defines a newtype for `ImString`.
|
|
|
|
#[macro_export]
|
|
|
|
macro_rules! im_string_newtype {
|
|
|
|
($($(#$meta:tt)* $name:ident),* $(,)?) => {$(
|
|
|
|
$(#$meta)*
|
|
|
|
#[derive(Clone,CloneRef,Debug,Default,Eq,Hash,PartialEq)]
|
|
|
|
#[derive($crate::serde_reexports::Serialize,$crate::serde_reexports::Deserialize)]
|
|
|
|
pub struct $name {
|
|
|
|
content : ImString
|
|
|
|
}
|
|
|
|
|
|
|
|
impl $name {
|
|
|
|
/// Constructor.
|
|
|
|
pub fn new(content:impl Into<ImString>) -> Self {
|
|
|
|
let content = content.into();
|
|
|
|
Self {content}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl Deref for $name {
|
|
|
|
type Target = str;
|
|
|
|
fn deref(&self) -> &Self::Target {
|
|
|
|
&self.content
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl AsRef<$name> for $name {
|
|
|
|
fn as_ref(&self) -> &$name {
|
|
|
|
self
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl AsRef<ImString> for $name {
|
|
|
|
fn as_ref(&self) -> &ImString {
|
|
|
|
self.content.as_ref()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl AsRef<String> for $name {
|
|
|
|
fn as_ref(&self) -> &String {
|
|
|
|
self.content.as_ref()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl AsRef<str> for $name {
|
|
|
|
fn as_ref(&self) -> &str {
|
|
|
|
self.content.as_ref()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl From<String> for $name {
|
|
|
|
fn from(t:String) -> Self {
|
|
|
|
Self::new(t)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl From<&String> for $name {
|
|
|
|
fn from(t:&String) -> Self {
|
|
|
|
Self::new(t)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl From<&&String> for $name {
|
|
|
|
fn from(t:&&String) -> Self {
|
|
|
|
Self::new(t)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl From<&str> for $name {
|
|
|
|
fn from(t:&str) -> Self {
|
|
|
|
Self::new(t)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl From<&&str> for $name {
|
|
|
|
fn from(t:&&str) -> Self {
|
|
|
|
Self::new(t)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
)*};
|
|
|
|
}
|
|
|
|
|
2021-11-10 16:36:08 +03:00
|
|
|
// ===============================
|
|
|
|
// === Common Pre- and Postfix ===
|
|
|
|
// ===============================
|
|
|
|
|
|
|
|
/// Return the length of the longest common prefix of the two strings. If they are completely
|
|
|
|
/// different this will be zero.
|
|
|
|
///
|
|
|
|
/// Example:
|
|
|
|
/// ```
|
|
|
|
/// # use enso_prelude::*;
|
|
|
|
/// let a = "🐁hospital";
|
|
|
|
/// let b = "🐁host";
|
|
|
|
/// let c = "🐇bunny🐇";
|
|
|
|
///
|
|
|
|
/// assert_eq!(common_prefix_length(a, b), 4);
|
|
|
|
/// assert_eq!(common_prefix_length(a, c), 0);
|
|
|
|
/// assert_eq!(common_prefix_length(a, a), 9);
|
|
|
|
/// ```
|
|
|
|
pub fn common_prefix_length(source_a: &str, source_b: &str) -> usize {
|
|
|
|
let shortest = source_a.chars().count().min(source_b.chars().count());
|
|
|
|
let chars_a = source_a.chars();
|
|
|
|
let chars_b = source_b.chars();
|
|
|
|
let mut zipped = chars_a.zip(chars_b);
|
|
|
|
let mismatch = zipped.find_position(|(a, b)| *a != *b);
|
|
|
|
mismatch.map(|(ix, _)| ix).unwrap_or(shortest)
|
|
|
|
}
|
2021-10-30 03:28:55 +03:00
|
|
|
|
2021-11-10 16:36:08 +03:00
|
|
|
/// Return the length of the longest common postfix of the two strings. If they are completely
|
|
|
|
/// different this will be zero.
|
|
|
|
///
|
|
|
|
/// Example:
|
|
|
|
/// ```
|
|
|
|
/// # use enso_prelude::*;
|
|
|
|
/// let a = "sunny🐇yard";
|
|
|
|
/// let b = "🐇yard";
|
|
|
|
/// let c = "🐇";
|
|
|
|
///
|
|
|
|
/// assert_eq!(common_postfix_length(a, b), 5);
|
|
|
|
/// assert_eq!(common_postfix_length(a, c), 0);
|
|
|
|
/// assert_eq!(common_postfix_length(a, a), 10);
|
|
|
|
/// ```
|
|
|
|
pub fn common_postfix_length(source_a: &str, source_b: &str) -> usize {
|
|
|
|
let shortest = source_a.chars().count().min(source_b.chars().count());
|
|
|
|
let chars_a = source_a.chars().rev();
|
|
|
|
let chars_b = source_b.chars().rev();
|
|
|
|
let mut zipped = chars_a.zip(chars_b);
|
|
|
|
let mismatch = zipped.find_position(|(a, b)| *a != *b);
|
|
|
|
mismatch.map(|(ix, _)| ix).unwrap_or(shortest)
|
|
|
|
}
|
2021-10-30 03:28:55 +03:00
|
|
|
|
|
|
|
// =============
|
|
|
|
// === Tests ===
|
|
|
|
// =============
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
mod tests {
|
|
|
|
use super::*;
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_string_ops() {
|
|
|
|
// === Matching against ascii ===
|
2021-11-10 16:36:08 +03:00
|
|
|
assert!("{}".is_enclosed('{', '}'));
|
|
|
|
assert!("{ }".is_enclosed('{', '}'));
|
|
|
|
assert!(!"{".is_enclosed('{', '}'));
|
|
|
|
assert!(!"{a".is_enclosed('{', '}'));
|
|
|
|
assert!(!"a}".is_enclosed('{', '}'));
|
|
|
|
assert!(!"}".is_enclosed('{', '}'));
|
|
|
|
assert!(!"".is_enclosed('{', '}'));
|
|
|
|
assert!("{a}".is_enclosed('{', '}'));
|
|
|
|
assert!("{字}".is_enclosed('{', '}'));
|
|
|
|
assert!(!"{".is_enclosed('{', '}'));
|
|
|
|
assert!(!"{字".is_enclosed('{', '}'));
|
|
|
|
assert!(!"字}".is_enclosed('{', '}'));
|
|
|
|
assert!(!"}".is_enclosed('{', '}'));
|
|
|
|
assert!(!"".is_enclosed('{', '}'));
|
2021-10-30 03:28:55 +03:00
|
|
|
|
|
|
|
// === Matching against non-ascii ===
|
2021-11-10 16:36:08 +03:00
|
|
|
assert!("【】".is_enclosed('【', '】'));
|
|
|
|
assert!("【 】".is_enclosed('【', '】'));
|
|
|
|
assert!("【 a】".is_enclosed('【', '】'));
|
|
|
|
assert!(!"【".is_enclosed('【', '】'));
|
|
|
|
assert!(!"【a".is_enclosed('【', '】'));
|
|
|
|
assert!(!"a】".is_enclosed('【', '】'));
|
|
|
|
assert!(!"】".is_enclosed('【', '】'));
|
|
|
|
assert!(!"".is_enclosed('【', '】'));
|
2021-10-30 03:28:55 +03:00
|
|
|
|
|
|
|
// === Edge case of matching single char string ===
|
2021-11-10 16:36:08 +03:00
|
|
|
assert!("{".is_enclosed('{', '{'));
|
|
|
|
assert!("【".is_enclosed('【', '【'));
|
2021-10-30 03:28:55 +03:00
|
|
|
}
|
|
|
|
}
|