mosesdecoder/jam-files/boost-build/util/string.jam

# Copyright 2002 Dave Abrahams
# Copyright 2002, 2003 Rene Rivera
# Distributed under the Boost Software License, Version 1.0.
# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)

import regex ;


# Characters considered whitespace, as a list.
.whitespace-chars = " " "	" "
" ;

# Characters considered whitespace, as a single string.
.whitespace = $(.whitespace-chars:J="") ;


# Returns the canonical set of whitespace characters, as a list.
#
rule whitespace-chars ( )
{
    return $(.whitespace-chars) ;
}


# Returns the canonical set of whitespace characters, as a single string.
#
rule whitespace ( )
{
    return $(.whitespace) ;
}


# Splits the given string into a list of strings composed of each character of
# the string in sequence.
#
rule chars (
    string  # The string to split.
    )
{
    local result ;
    while $(string)
    {
        local s = [ MATCH (.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.*) : $(string) ] ;
        string = $(s[9]) ;
        result += $(s[1-8]) ;
    }

    # Trim off empty strings.
    while $(result[1]) && ! $(result[-1])
    {
        result = $(result[1--2]) ;
    }

    return $(result) ;
}


# Apply a set of standard transformations to string to produce an abbreviation
# no more than 5 characters long.
#
rule abbreviate ( string )
{
    local r = $(.abbreviated-$(string)) ;
    if $(r)
    {
        return $(r) ;
    }
    # Anything less than 4 characters gets no abbreviation.
    else if ! [ MATCH (....) : $(string) ]
    {
        .abbreviated-$(string) = $(string) ;
        return $(string) ;
    }
    else
    {
        # Separate the initial letter in case it's a vowel.
        local s1 = [ MATCH ^(.)(.*) : $(string) ] ;

        # Drop trailing "ing".
        local s2 = [ MATCH ^(.*)ing$ : $(s1[2]) ] ;
        s2 ?= $(s1[2]) ;

        # Reduce all doubled characters to one.
        local last = "" ;
        for local c in [ chars $(s2) ]
        {
            if $(c) != $(last)
            {
                r += $(c) ;
                last = $(c) ;
            }
        }
        s2 = $(r:J="") ;

        # Chop all vowels out of the remainder.
        s2 = [ regex.replace $(s2) [AEIOUaeiou] "" ] ;

        # Shorten remaining consonants to 4 characters.
        s2 = [ MATCH ^(.?.?.?.?) : $(s2) ] ;

        # Glue the initial character back on to the front.
        s2 = $(s1[1])$(s2) ;

        .abbreviated-$(string) = $(s2) ;
        return $(s2) ;
    }
}


# Concatenates the given strings, inserting the given separator between each
# string.
#
rule join (
    strings *  # The strings to join.
    : separator ?  # The optional separator.
    )
{
    separator ?= "" ;
    return $(strings:J=$(separator)) ;
}


# Split a string into whitespace separated words.
#
rule words (
    string  # The string to split.
    : whitespace *  # Optional, characters to consider as whitespace.
    )
{
    whitespace = $(whitespace:J="") ;
    whitespace ?= $(.whitespace) ;
    local w = ;
    while $(string)
    {
        string = [ MATCH "^[$(whitespace)]*([^$(whitespace)]*)(.*)" : $(string) ] ;
        if $(string[1]) && $(string[1]) != ""
        {
            w += $(string[1]) ;
        }
        string = $(string[2]) ;
    }
    return $(w) ;
}


# Check that the given string is composed entirely of whitespace.
#
rule is-whitespace (
    string ?  # The string to test.
    )
{
    if ! $(string) { return true ; }
    else if $(string) = "" { return true ; }
    else if [ MATCH "^([$(.whitespace)]+)$" : $(string) ] { return true ; }
    else { return ; }
}

rule __test__ ( )
{
    import assert ;
    assert.result a b c : chars abc ;

    assert.result rntm : abbreviate runtime ;
    assert.result ovrld : abbreviate overload ;
    assert.result dbg : abbreviate debugging ;
    assert.result async : abbreviate asynchronous ;
    assert.result pop : abbreviate pop ;
    assert.result aaa : abbreviate aaa ;
    assert.result qck : abbreviate quack ;
    assert.result sttc : abbreviate static ;

    # Check boundary cases.
    assert.result a : chars a ;
    assert.result : chars "" ;
    assert.result a b c d e f g h : chars abcdefgh ;
    assert.result a b c d e f g h i : chars abcdefghi ;
    assert.result a b c d e f g h i j : chars abcdefghij ;
    assert.result a b c d e f g h i j k : chars abcdefghijk ;

    assert.result a//b/c/d : join a "" b c d : / ;
    assert.result abcd : join  a "" b c d ;

    assert.result a b c : words "a b	c" ;

    assert.true is-whitespace "     	" ;
    assert.false is-whitespace "  a b c	" ;
    assert.true is-whitespace "" ;
    assert.true is-whitespace ;
}