mosesdecoder/jam-files/boost-build/util/string.jam
2011-11-18 13:14:09 -05:00

190 lines
4.5 KiB
Plaintext

# Copyright 2002 Dave Abrahams
# Copyright 2002, 2003 Rene Rivera
# Distributed under the Boost Software License, Version 1.0.
# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
import regex ;
# Characters considered whitespace, as a list.
.whitespace-chars = " " " " "
" ;
# Characters considered whitespace, as a single string.
.whitespace = $(.whitespace-chars:J="") ;
# Returns the canonical set of whitespace characters, as a list.
#
rule whitespace-chars ( )
{
return $(.whitespace-chars) ;
}
# Returns the canonical set of whitespace characters, as a single string.
#
rule whitespace ( )
{
return $(.whitespace) ;
}
# Splits the given string into a list of strings composed of each character of
# the string in sequence.
#
rule chars (
string # The string to split.
)
{
local result ;
while $(string)
{
local s = [ MATCH (.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.*) : $(string) ] ;
string = $(s[9]) ;
result += $(s[1-8]) ;
}
# Trim off empty strings.
while $(result[1]) && ! $(result[-1])
{
result = $(result[1--2]) ;
}
return $(result) ;
}
# Apply a set of standard transformations to string to produce an abbreviation
# no more than 5 characters long.
#
rule abbreviate ( string )
{
local r = $(.abbreviated-$(string)) ;
if $(r)
{
return $(r) ;
}
# Anything less than 4 characters gets no abbreviation.
else if ! [ MATCH (....) : $(string) ]
{
.abbreviated-$(string) = $(string) ;
return $(string) ;
}
else
{
# Separate the initial letter in case it's a vowel.
local s1 = [ MATCH ^(.)(.*) : $(string) ] ;
# Drop trailing "ing".
local s2 = [ MATCH ^(.*)ing$ : $(s1[2]) ] ;
s2 ?= $(s1[2]) ;
# Reduce all doubled characters to one.
local last = "" ;
for local c in [ chars $(s2) ]
{
if $(c) != $(last)
{
r += $(c) ;
last = $(c) ;
}
}
s2 = $(r:J="") ;
# Chop all vowels out of the remainder.
s2 = [ regex.replace $(s2) [AEIOUaeiou] "" ] ;
# Shorten remaining consonants to 4 characters.
s2 = [ MATCH ^(.?.?.?.?) : $(s2) ] ;
# Glue the initial character back on to the front.
s2 = $(s1[1])$(s2) ;
.abbreviated-$(string) = $(s2) ;
return $(s2) ;
}
}
# Concatenates the given strings, inserting the given separator between each
# string.
#
rule join (
strings * # The strings to join.
: separator ? # The optional separator.
)
{
separator ?= "" ;
return $(strings:J=$(separator)) ;
}
# Split a string into whitespace separated words.
#
rule words (
string # The string to split.
: whitespace * # Optional, characters to consider as whitespace.
)
{
whitespace = $(whitespace:J="") ;
whitespace ?= $(.whitespace) ;
local w = ;
while $(string)
{
string = [ MATCH "^[$(whitespace)]*([^$(whitespace)]*)(.*)" : $(string) ] ;
if $(string[1]) && $(string[1]) != ""
{
w += $(string[1]) ;
}
string = $(string[2]) ;
}
return $(w) ;
}
# Check that the given string is composed entirely of whitespace.
#
rule is-whitespace (
string ? # The string to test.
)
{
if ! $(string) { return true ; }
else if $(string) = "" { return true ; }
else if [ MATCH "^([$(.whitespace)]+)$" : $(string) ] { return true ; }
else { return ; }
}
rule __test__ ( )
{
import assert ;
assert.result a b c : chars abc ;
assert.result rntm : abbreviate runtime ;
assert.result ovrld : abbreviate overload ;
assert.result dbg : abbreviate debugging ;
assert.result async : abbreviate asynchronous ;
assert.result pop : abbreviate pop ;
assert.result aaa : abbreviate aaa ;
assert.result qck : abbreviate quack ;
assert.result sttc : abbreviate static ;
# Check boundary cases.
assert.result a : chars a ;
assert.result : chars "" ;
assert.result a b c d e f g h : chars abcdefgh ;
assert.result a b c d e f g h i : chars abcdefghi ;
assert.result a b c d e f g h i j : chars abcdefghij ;
assert.result a b c d e f g h i j k : chars abcdefghijk ;
assert.result a//b/c/d : join a "" b c d : / ;
assert.result abcd : join a "" b c d ;
assert.result a b c : words "a b c" ;
assert.true is-whitespace " " ;
assert.false is-whitespace " a b c " ;
assert.true is-whitespace "" ;
assert.true is-whitespace ;
}