sq/libsq/core/stringz/stringz.go

// Package stringz contains string functions similar in spirit
// to the stdlib strings package.
package stringz

import (
	"bufio"
	"bytes"
	"encoding/json"
	"fmt"
	"io"
	"math/rand"
	"strconv"
	"strings"
	"time"
	"unicode"

	"github.com/google/uuid"

	"github.com/neilotoole/sq/libsq/core/errz"
)

// Redacted is the "xxxxx" string used for redacted
// values, such as passwords.
const Redacted = "xxxxx"

func init() { //nolint:gochecknoinits
	rand.Seed(time.Now().UnixNano())
}

// Reverse reverses the input string.
func Reverse(input string) string {
	n := 0
	runes := make([]rune, len(input))
	for _, r := range input {
		runes[n] = r
		n++
	}
	runes = runes[0:n]
	// Reverse
	for i := 0; i < n/2; i++ {
		runes[i], runes[n-1-i] = runes[n-1-i], runes[i]
	}
	// Convert back to UTF-8.
	return string(runes)
}

// GenerateAlphaColName returns an Excel-style column name
// for index n, starting with A, B, C... and continuing
// to AA, AB, AC, etc...
func GenerateAlphaColName(n int, lower bool) string {
	start := 'A'
	if lower {
		start = 'a'
	}

	return genAlphaCol(n, start, 26)
}

func genAlphaCol(n int, start rune, lenAlpha int) string {
	buf := &bytes.Buffer{}
	for ; n >= 0; n = (n / lenAlpha) - 1 {
		buf.WriteRune(rune(n%lenAlpha) + start)
	}

	return Reverse(buf.String())
}

// ParseBool is an expansion of strconv.ParseBool that also
// accepts variants of "yes" and "no" (which are bool
// representations returned by some data sources).
func ParseBool(s string) (bool, error) {
	switch s {
	default:
		b, err := strconv.ParseBool(s)
		if err != nil {
			return b, errz.Err(err)
		}
		return b, nil
	case "1", "yes", "Yes", "YES", "y", "Y":
		return true, nil
	case "0", "no", "No", "NO", "n", "N":
		return false, nil
	}
}

// InSlice returns true if the needle is present in the haystack.
func InSlice(haystack []string, needle string) bool {
	return SliceIndex(haystack, needle) != -1
}

// SliceIndex returns the index of needle in haystack, or -1.
func SliceIndex(haystack []string, needle string) int {
	for i, item := range haystack {
		if item == needle {
			return i
		}
	}
	return -1
}

// FormatFloat formats f. This method exists to provide a standard
// float formatting across the codebase.
func FormatFloat(f float64) string {
	return strconv.FormatFloat(f, 'f', -1, 64)
}

// ByteSized returns a human-readable byte size, e.g. "2.1 MB", "3.0 TB", etc.
// TODO: replace this usage with "github.com/c2h5oh/datasize"
func ByteSized(size int64, precision int, sep string) string {
	f := float64(size)
	tpl := "%." + strconv.Itoa(precision) + "f" + sep

	switch {
	case f >= yb:
		return fmt.Sprintf(tpl+"YB", f/yb)
	case f >= zb:
		return fmt.Sprintf(tpl+"ZB", f/zb)
	case f >= eb:
		return fmt.Sprintf(tpl+"EB", f/eb)
	case f >= pb:
		return fmt.Sprintf(tpl+"PB", f/pb)
	case f >= tb:
		return fmt.Sprintf(tpl+"TB", f/tb)
	case f >= gb:
		return fmt.Sprintf(tpl+"GB", f/gb)
	case f >= mb:
		return fmt.Sprintf(tpl+"MB", f/mb)
	case f >= kb:
		return fmt.Sprintf(tpl+"KB", f/kb)
	}
	return fmt.Sprintf(tpl+"B", f)
}

const (
	_          = iota // ignore first value by assigning to blank identifier
	kb float64 = 1 << (10 * iota)
	mb
	gb
	tb
	pb
	eb
	zb
	yb
)

func SprintJSON(value any) string {
	j, err := json.MarshalIndent(value, "", "  ")
	if err != nil {
		panic(err)
	}
	return string(j)
}

// UUID returns a new UUID string.
func UUID() string {
	return uuid.New().String()
}

// Uniq32 returns a UUID-like string that only contains
// alphanumeric chars. The result has length 32.
// The first element is guaranteed to be a letter.
func Uniq32() string {
	return UniqN(32)
}

// Uniq8 returns a UUID-like string that only contains
// alphanumeric chars. The result has length 8.
// The first element is guaranteed to be a letter.
func Uniq8() string {
	// I'm sure there's a more efficient way of doing this, but
	// this is fine for now.
	return UniqN(8)
}

// UniqSuffix returns s with a unique suffix.
func UniqSuffix(s string) string {
	return s + "_" + Uniq8()
}

// UniqPrefix returns s with a unique prefix.
func UniqPrefix(s string) string {
	return Uniq8() + "_" + s
}

const (
	// charsetAlphanumericLower is a set of characters to generate from. Note
	// that ambiguous chars such as "i" or "j" are excluded.
	charsetAlphanumericLower = "abcdefghkrstuvwxyz2345689"

	// charsetAlphaLower is similar to charsetAlphanumericLower, but
	// without numbers.
	charsetAlphaLower = "abcdefghkrstuvwxyz"
)

func stringWithCharset(length int, charset string) string {
	if charset == "" {
		panic("charset has zero length")
	}

	if length <= 0 {
		return ""
	}

	b := make([]byte, length)
	for i := range b {
		b[i] = charset[rand.Intn(len(charset))] //#nosec G404 // Doesn't need to be strongly random
	}

	return string(b)
}

// UniqN returns a uniq string of length n. The first element is
// guaranteed to be a letter.
func UniqN(length int) string {
	switch {
	case length <= 0:
		return ""
	case length == 1:
		return stringWithCharset(1, charsetAlphaLower)
	default:
		return stringWithCharset(1, charsetAlphaLower) + stringWithCharset(length-1, charsetAlphanumericLower)
	}
}

// Plu handles the most common (English language) case of
// pluralization. With arg s being "row(s) col(s)", Plu
// returns "row col" if arg i is 1, otherwise returns "rows cols".
func Plu(s string, i int) string {
	if i == 1 {
		return strings.ReplaceAll(s, "(s)", "")
	}
	return strings.ReplaceAll(s, "(s)", "s")
}

// RepeatJoin returns a string consisting of count copies
// of s separated by sep. For example:
//
//	stringz.RepeatJoin("?", 3, ", ") == "?, ?, ?"
func RepeatJoin(s string, count int, sep string) string {
	if s == "" || count == 0 {
		return ""
	}
	if count == 1 {
		return s
	}

	var b strings.Builder
	b.Grow(len(s)*count + len(sep)*(count-1))
	for i := 0; i < count; i++ {
		b.WriteString(s)
		if i < count-1 {
			b.WriteString(sep)
		}
	}

	return b.String()
}

// Surround returns s prefixed and suffixed with w.
func Surround(s, w string) string {
	sb := strings.Builder{}
	sb.Grow(len(s) + len(w)*2)
	sb.WriteString(w)
	sb.WriteString(s)
	sb.WriteString(w)
	return sb.String()
}

// SurroundSlice returns a new slice with each element
// of a prefixed and suffixed with w, unless a is nil,
// in which case nil is returned.
func SurroundSlice(a []string, w string) []string {
	if a == nil {
		return nil
	}
	if len(a) == 0 {
		return []string{}
	}
	ret := make([]string, len(a))
	sb := strings.Builder{}
	for i := 0; i < len(a); i++ {
		sb.Grow(len(a[i]) + len(w)*2)
		sb.WriteString(w)
		sb.WriteString(a[i])
		sb.WriteString(w)
		ret[i] = sb.String()
		sb.Reset()
	}

	return ret
}

// PrefixSlice returns a new slice with each element
// of a prefixed with w, unless a is nil, in which
// case nil is returned.
func PrefixSlice(a []string, w string) []string {
	if a == nil {
		return nil
	}
	if len(a) == 0 {
		return []string{}
	}
	ret := make([]string, len(a))
	sb := strings.Builder{}
	for i := 0; i < len(a); i++ {
		sb.Grow(len(a[i]) + len(w))
		sb.WriteString(w)
		sb.WriteString(a[i])
		ret[i] = sb.String()
		sb.Reset()
	}

	return ret
}

const (
	// DateFormat is the layout for dates (without a time component), such as 2006-01-02.
	DateFormat = "2006-01-02"

	// TimeFormat is the layout for 24-hour time (without a date component), such as 15:04:05.
	TimeFormat = "15:04:05"

	// DatetimeFormat is the layout for a date/time timestamp.
	DatetimeFormat = time.RFC3339Nano
)

// UniqTableName returns a new lower-case table name based on
// tbl, with a unique suffix, and a maximum length of 63. This
// value of 63 is chosen because it's less than the maximum table name
// length for Postgres, SQL Server, SQLite and MySQL.
func UniqTableName(tbl string) string {
	const maxLength = 63
	tbl = strings.TrimSpace(tbl)
	tbl = strings.ToLower(tbl)
	if tbl == "" {
		tbl = "tbl"
	}

	suffix := "__" + Uniq8()
	if len(tbl) > maxLength-len(suffix) {
		tbl = tbl[0 : maxLength-len(suffix)]
	}
	tbl += suffix

	// paranoid sanitization
	tbl = strings.ReplaceAll(tbl, "@", "_")
	tbl = strings.ReplaceAll(tbl, "/", "_")

	return tbl
}

// SanitizeAlphaNumeric replaces any non-alphanumeric
// runes of s with r (which is typically underscore).
//
//	a#2%3.4_ --> a_2_3_4_
func SanitizeAlphaNumeric(s string, r rune) string {
	runes := []rune(s)

	for i, v := range runes {
		switch {
		case v == r, unicode.IsLetter(v), unicode.IsNumber(v):
		default:
			runes[i] = r
		}
	}

	return string(runes)
}

// LineCount returns the number of lines in r. If skipEmpty is
// true, empty lines are skipped (a whitespace-only line is not
// considered empty). If r is nil or any error occurs, -1 is returned.
func LineCount(r io.Reader, skipEmpty bool) int {
	if r == nil {
		return -1
	}

	sc := bufio.NewScanner(r)
	var i int

	if skipEmpty {
		for sc.Scan() {
			if len(sc.Bytes()) > 0 {
				i++
			}
		}

		if sc.Err() != nil {
			return -1
		}

		return i
	}

	for i = 0; sc.Scan(); i++ {
	}

	return i
}

// TrimLen returns s but with a maximum length of maxLen.
func TrimLen(s string, maxLen int) string {
	if len(s) <= maxLen {
		return s
	}

	return s[:maxLen]
}

const (
	// RFC3339Milli is an RFC3339 format with millisecond precision.
	RFC3339Milli = "2006-01-02T15:04:05.000Z07:00"

	// RFC3339MilliZulu is the same as RFC3339Milli, but in zulu time.
	RFC3339MilliZulu = "2006-01-02T15:04:05.000Z"

	// rfc3339variant is a variant using "-0700" suffix.
	rfc3339variant = "2006-01-02T15:04:05-0700"

	// RFC3339Zulu is an RFC3339 format, in Zulu time.
	RFC3339Zulu = "2006-01-02T15:04:05Z"

	// ISO8601 is similar to RFC3339Milli, but doesn't have the colon
	// in the timezone offset.
	ISO8601 = "2006-01-02T15:04:05.000Z0700"

	// DateOnly is a date-only format.
	DateOnly = "2006-01-02"
)

// TimestampUTC returns the RFC3339Milli representation of t in UTC.
func TimestampUTC(t time.Time) string {
	return t.UTC().Format(RFC3339Milli)
}

// DateUTC returns a date representation (2020-10-31) of t in UTC.
func DateUTC(t time.Time) string {
	return t.UTC().Format(DateOnly)
}

// TimestampToRFC3339 takes a RFC3339Milli, ISO8601 or RFC3339
// timestamp, and returns RFC3339. That is, the milliseconds are dropped.
// On error, the empty string is returned.
func TimestampToRFC3339(s string) string {
	t, err := ParseTimestampUTC(s)
	if err != nil {
		return ""
	}
	return t.UTC().Format(RFC3339Zulu)
}

// TimestampToDate takes a RFC3339Milli, ISO8601 or RFC3339
// timestamp, and returns just the date component.
// On error, the empty string is returned.
func TimestampToDate(s string) string {
	t, err := ParseTimestampUTC(s)
	if err != nil {
		return ""
	}
	return t.UTC().Format(DateOnly)
}

// ParseTimestampUTC is the counterpart of TimestampUTC. It attempts
// to parse s first in RFC3339Milli, then time.RFC3339 format, falling
// back to the subtly different ISO8601 format.
func ParseTimestampUTC(s string) (time.Time, error) {
	t, err := time.Parse(RFC3339Milli, s)
	if err == nil {
		return t.UTC(), nil
	}

	// Fallback to RFC3339
	t, err = time.Parse(time.RFC3339, s)
	if err == nil {
		return t.UTC(), nil
	}

	// Fallback to ISO8601
	t, err = time.Parse(ISO8601, s)
	if err == nil {
		return t.UTC(), nil
	}

	t, err = time.Parse(rfc3339variant, s)
	if err == nil {
		return t.UTC(), nil
	}

	return time.Time{}, errz.Errorf("failed to parse timestamp {%s}", s)
}

// ParseLocalDate accepts a date string s, returning the local midnight
// time of that date. Arg s must in format "2006-01-02".
func ParseLocalDate(s string) (time.Time, error) {
	if !strings.ContainsRune(s, 'T') {
		// It's a date
		t, err := time.ParseInLocation("2006-01-02", s, time.Local)
		if err != nil {
			return t, err
		}

		return t, nil
	}

	// There's a 'T' in s, which means its probably a timestamp.
	return time.Time{}, errz.Errorf("invalid date format: %s", s)
}

// ParseUTCDate accepts a date string s, returning the UTC midnight
// time of that date. Arg s must in format "2006-01-02".
func ParseUTCDate(s string) (time.Time, error) {
	if !strings.ContainsRune(s, 'T') {
		// It's a date
		t, err := time.ParseInLocation("2006-01-02", s, time.UTC)
		if err != nil {
			return t, err
		}

		return t, nil
	}

	// There's a 'T' in s, which means its probably a timestamp.
	return time.Time{}, errz.Errorf("invalid date format: %s", s)
}

// ParseDateOrTimestampUTC attempts to parse s as either
// a date (see ParseUTCDate), or timestamp (see ParseTimestampUTC).
// The returned time is in UTC.
func ParseDateOrTimestampUTC(s string) (time.Time, error) {
	if strings.ContainsRune(s, 'T') {
		return ParseTimestampUTC(s)
	}

	t, err := ParseUTCDate(s)
	return t.UTC(), err
}