mirror of
https://github.com/tweag/asterius.git
synced 2024-10-26 09:21:04 +03:00
Remove text shims
This commit is contained in:
parent
4dddcb3c98
commit
ad42769975
@ -73,12 +73,6 @@ ahc-cabal act-as-setup --build-type=Simple -- build -j --builddir=$ASTERIUS_TMP_
|
||||
ahc-cabal act-as-setup --build-type=Simple -- install --builddir=$ASTERIUS_TMP_DIR/dist/ghci
|
||||
popd
|
||||
|
||||
pushd text
|
||||
ahc-cabal act-as-setup --build-type=Simple -- configure --builddir=$ASTERIUS_TMP_DIR/dist/text $ASTERIUS_CONFIGURE_OPTIONS
|
||||
ahc-cabal act-as-setup --build-type=Simple -- build -j --builddir=$ASTERIUS_TMP_DIR/dist/text
|
||||
ahc-cabal act-as-setup --build-type=Simple -- install --builddir=$ASTERIUS_TMP_DIR/dist/text
|
||||
popd
|
||||
|
||||
ahc-cabal v1-install $ASTERIUS_CONFIGURE_OPTIONS \
|
||||
aeson \
|
||||
parsec
|
||||
|
@ -11,7 +11,6 @@ import { StableNameManager } from "./rts.stablename.mjs";
|
||||
import { StaticPtrManager } from "./rts.staticptr.mjs";
|
||||
import { Scheduler } from "./rts.scheduler.mjs";
|
||||
import { IntegerManager } from "./rts.integer.mjs";
|
||||
import { TextCBits } from "./rts.text.mjs";
|
||||
import { TimeCBits } from "./rts.time.mjs";
|
||||
import { GC } from "./rts.gc.mjs";
|
||||
import { ExceptionHelper } from "./rts.exception.mjs";
|
||||
@ -103,7 +102,6 @@ export async function newAsteriusInstance(req) {
|
||||
__asterius_stableptr_manager
|
||||
),
|
||||
__asterius_integer_manager = new IntegerManager(),
|
||||
__asterius_text_cbits = new TextCBits(__asterius_memory),
|
||||
__asterius_time_cbits = new TimeCBits(__asterius_memory, req.targetSpecificModule),
|
||||
__asterius_gc = new GC(
|
||||
__asterius_memory,
|
||||
@ -181,7 +179,6 @@ export async function newAsteriusInstance(req) {
|
||||
write: (fd, buf, count) => __asterius_fs.write(fd, buf, count)
|
||||
},
|
||||
posix: modulify(new (req.targetSpecificModule.posix)(__asterius_memory, rtsConstants)),
|
||||
text: modulify(__asterius_text_cbits),
|
||||
time: modulify(__asterius_time_cbits),
|
||||
// cannot name this float since float is a keyword.
|
||||
floatCBits: modulify(__asterius_float_cbits),
|
||||
|
@ -1,64 +0,0 @@
|
||||
import { Memory } from "./rts.memory.mjs";
|
||||
|
||||
export class TextCBits {
|
||||
constructor(memory) {
|
||||
this.memory = memory;
|
||||
Object.seal(this);
|
||||
}
|
||||
|
||||
_hs_text_memcpy(_dst, dst_off, _src, src_off, n) {
|
||||
const dst = Memory.unTag(_dst) + dst_off * 2,
|
||||
src = Memory.unTag(_src) + src_off * 2;
|
||||
this.memory.i8View.copyWithin(dst, src, src + n * 2);
|
||||
}
|
||||
|
||||
_hs_text_memcmp(_dst, dst_off, _src, src_off, n) {
|
||||
const dst = Memory.unTag(_dst) + dst_off * 2,
|
||||
src = Memory.unTag(_src) + src_off * 2;
|
||||
for (let i = 0; i < n * 2; ++i) {
|
||||
const sgn = Math.sign(
|
||||
this.memory.i8View[dst + i] - this.memory.i8View[src + i]
|
||||
);
|
||||
if (sgn) return sgn;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
_hs_text_decode_utf8(dest, destoffp, src, srcend) {
|
||||
const dec = new TextDecoder("utf-8", { fatal: true }),
|
||||
s = dec.decode(
|
||||
this.memory.i8View.subarray(Memory.unTag(src), Memory.unTag(srcend))
|
||||
);
|
||||
for (let i = 0; i < s.length; ++i)
|
||||
this.memory.i16Store(dest + i * 2, s.charCodeAt(i));
|
||||
this.memory.i64Store(destoffp, s.length);
|
||||
return srcend;
|
||||
}
|
||||
|
||||
_hs_text_encode_utf8(destp, src, srcoff, srclen) {
|
||||
const dec = new TextDecoder("utf-16le", { fatal: true }),
|
||||
s = dec.decode(
|
||||
this.memory.i8View.subarray(
|
||||
Memory.unTag(src + srcoff * 2),
|
||||
Memory.unTag(src + srcoff * 2 + srclen * 2)
|
||||
)
|
||||
),
|
||||
dest = Number(this.memory.i64Load(destp)),
|
||||
enc = new TextEncoder(),
|
||||
// `Data.Text.Encoding.encodeUtf8` allocates a `ByteArray#` of size
|
||||
// `srclen * 3` to ensure enough space for a single-pass encoding from
|
||||
// UTF-16 to UTF-8. See the comment near
|
||||
// `https://github.com/haskell/text/blob/2176eb38b5238e763e8076b0d0db8c2f2014ab8b/Data/Text/Encoding.hs#L432`
|
||||
// and the "Buffer Sizing" section of
|
||||
// `https://developer.mozilla.org/en-US/docs/Web/API/TextEncoder/encodeInto`
|
||||
// for details.
|
||||
l = enc.encodeInto(
|
||||
s,
|
||||
this.memory.i8View.subarray(
|
||||
Memory.unTag(dest),
|
||||
Memory.unTag(dest + srclen * 3)
|
||||
)
|
||||
).written;
|
||||
this.memory.i64Store(destp, dest + l);
|
||||
}
|
||||
}
|
@ -136,9 +136,7 @@ rtsAsteriusModule opts =
|
||||
SM.fromList $
|
||||
map
|
||||
(\(func_sym, (_, func)) -> (func_sym, func))
|
||||
( floatCBits
|
||||
<> textCBits
|
||||
)
|
||||
floatCBits
|
||||
}
|
||||
<> hsInitFunction opts
|
||||
<> wasmApplyRelocsFunction opts
|
||||
@ -615,8 +613,7 @@ rtsFunctionImports debug =
|
||||
)
|
||||
<> map
|
||||
(fst . snd)
|
||||
( floatCBits <> textCBits
|
||||
)
|
||||
floatCBits
|
||||
<> schedulerImports
|
||||
<> exportsImports
|
||||
<> envImports
|
||||
@ -711,20 +708,6 @@ rtsGlobalExports = mempty
|
||||
emitErrorMessage :: [ValueType] -> BS.ByteString -> Expression
|
||||
emitErrorMessage vts ev = Barf {barfMessage = ev, barfReturnTypes = vts}
|
||||
|
||||
textCBits :: [(EntitySymbol, (FunctionImport, Function))]
|
||||
textCBits =
|
||||
map
|
||||
( \(func_sym, param_vts, ret_vts) ->
|
||||
( mkEntitySymbol func_sym,
|
||||
generateRTSWrapper "text" func_sym param_vts ret_vts
|
||||
)
|
||||
)
|
||||
[ ("_hs_text_memcpy", [I64, I64, I64, I64, I64], []),
|
||||
("_hs_text_memcmp", [I64, I64, I64, I64, I64], [I64]),
|
||||
("_hs_text_decode_utf8", [I64, I64, I64, I64], [I64]),
|
||||
("_hs_text_encode_utf8", [I64, I64, I64, I64], [])
|
||||
]
|
||||
|
||||
floatCBits :: [(EntitySymbol, (FunctionImport, Function))]
|
||||
floatCBits =
|
||||
map
|
||||
|
10
ghc-toolkit/boot-libs/text/.gitignore
vendored
10
ghc-toolkit/boot-libs/text/.gitignore
vendored
@ -1,10 +0,0 @@
|
||||
/dist/
|
||||
/dist-boot/
|
||||
/dist-install/
|
||||
/dist-newstyle/
|
||||
/cabal-dev/
|
||||
/cabal.sandbox.config
|
||||
/ghc.mk
|
||||
/GNUmakefile
|
||||
/.ghc.environment.*
|
||||
/cabal.project.local
|
@ -1,14 +0,0 @@
|
||||
^(?:dist|benchmarks/dist|tests/coverage|tests/dist)$
|
||||
^benchmarks/.*\.txt$
|
||||
^tests/text-testdata.tar.bz2$
|
||||
^tests/(?:\.hpc|bm|qc|qc-hpc|stdio-hpc|text/test)$
|
||||
\.(?:aux|eventlog|h[ip]|log|[oa]|orig|prof|ps|rej|swp|tix)$
|
||||
~$
|
||||
|
||||
syntax: glob
|
||||
.\#*
|
||||
cabal-dev
|
||||
cabal.sandbox.config
|
||||
\.cabal-sandbox
|
||||
scripts/CaseFolding.txt
|
||||
scripts/SpecialCasing.txt
|
@ -1,67 +0,0 @@
|
||||
0edd5dc96024e6c0f56f175ffa673f9178cece8c 0.9.0.0
|
||||
1de0da99d31ba212e179d88957cf5f3b9c4facd7 0.7.0.1
|
||||
30575507578cf04b5b1501ee23b1faa83315eb88 0.1
|
||||
321e61f17630dbd346c1c9991831572f964b7d5a 0.4
|
||||
34cef27f87287cd3c17d78ac99e9de2d6c8a1200 0.6
|
||||
3cbff6d9bca135a85e428a7f6c4e15739a562b8b 0.7.1.0
|
||||
587c8661393f83cb2b9452f6cc1d6885e52fe544 0.8.1.0
|
||||
61c322bd5b6446255870422f854266396169a7e5 0.5
|
||||
9adb5edd9b6407335e8772ea86036b938471fc89 0.2
|
||||
b4fdbef1c1027a70b95c1a81747d968890acc3f6 0.3
|
||||
de7cb0cb2a72606126c357b860bb0dbf7f145ad2 0.9.0.1
|
||||
f2c9b1c019e3277926378d7b5a604f1c995f0202 0.7
|
||||
f3d625f2d0730cb6adf8506a1f37762178c9aea6 0.10.0.0
|
||||
99ea36957b1d47145d70f2ee8b13f1ddfd09245c 0.10.0.1
|
||||
1b7889216e78b00d69b32c48782111bcd935978d 0.10.0.2
|
||||
75157eaec6eb38574ad5c347e75c48dcb7262dee 0.11.0.0
|
||||
0d7bc7bd77ec5a8f6d064de59d459e5eaf906b0f 0.11.0.1
|
||||
a9d582a75dea9a4417883019d5110a35e4c07265 0.11.0.2
|
||||
36b2a3aa661892ea25101199220383ba6d579abb 0.11.0.3
|
||||
06e407bcdc9a3905abf9551062ecc6f92abcdde5 0.11.0.4
|
||||
94bcf16e812f6fb49b87598673e17270f48c2bf1 0.11.0.5
|
||||
784a32ffbec0b77adb43e9a64328db0482007309 0.11.0.6
|
||||
c808123a3d37f25004d2aad8f3a395acfcf9122f 0.11.0.7
|
||||
679826e742b239c5dd0c1fa4ebe98aa4bfe02a00 0.11.0.8
|
||||
b49eebfa5673da89a23939d523c9de9edbd410b0 0.11.1.0
|
||||
9c820a2ecaea01bc614d8e6bccf90e57431bdcbb 0.11.1.1
|
||||
ed3a60ec627af6ba8af321009aa00224b3296e47 0.11.1.2
|
||||
b75d3041d275e8e76b26605b0d426a572ddf1249 0.11.1.3
|
||||
53906ad0c7e64f6c9a5df569e7cc642579aab8fc 0.11.1.5
|
||||
9d6d3a9690ade506897c072060eb92868e229d5c 0.11.1.6
|
||||
5ac062eace36b333566d1676979a57039a188840 0.11.1.7
|
||||
9f01361a73071936b6ab0bdb879f1a45cca6577a 0.11.1.8
|
||||
5dce2a934be53e74310fedffb51d7bd3e61fa993 0.11.1.9
|
||||
9f47a2cfc9e51fd622d7553f08ad2ac1faad0438 0.11.1.10
|
||||
9f47a2cfc9e51fd622d7553f08ad2ac1faad0438 0.11.1.10
|
||||
407937739e9e764f1ae0f1f9ca454c42dca38772 0.11.1.10
|
||||
8b981edd27befa4c2dd334fcb7db22ac67e22b67 0.11.1.11
|
||||
204da16b5098531bdf858c388e2620238ef2aa5e 0.11.1.12
|
||||
6a3d847a56a69d0514a79cb212cb218271ad0917 0.11.1.13
|
||||
1d2c6fa9092c6a4000b2abdd9d01f3efcd477be5 0.11.2.0
|
||||
78219784cf3652cc662805bf2971bd62d80210a9 0.11.2.1
|
||||
4297307ebc11ad677cfba6b40319e7e5e2c0cfee 0.11.2.3
|
||||
7fa79662b66aade97fe49394977213fe6432942e 0.11.3.0
|
||||
d99cd091cdf71ce807a4255f6cc509c3154f51ea 0.11.3.1
|
||||
31f57612fd425d1bb56352ce6b79e41ce6699a45 1.0.0.0
|
||||
7cba97c86467c1881dfced68613891f3c65561b0 1.0.0.1
|
||||
62674a9bbc83145d1d7be28a05d456ab6bb5ff89 1.1.0.0
|
||||
68a32d80c0829da164d1e6b3acedcc3efeda0957 1.1.0.1
|
||||
6d863daafbf0e1289ec0a0eb9b4be2a0a9cb8d3d 1.1.1.0
|
||||
fb7299f22449750c653ac64dee4bd63c13ea6e4c 1.1.1.1
|
||||
eb170cb533b63534d166b5fd5344b36676adf140 1.1.1.2
|
||||
e749e05f715d02445803f653cd3d97d5395d7c6f 1.1.1.3
|
||||
b0dff8ca1142595bab489a768ca62e6a6805aac4 1.1.1.4
|
||||
f8a60279237c1c28e094868740d9188dd94fcc9e 1.2.0.0
|
||||
6231451979b2d33e28038b75f2dab19c9731f5c3 1.2.0.1
|
||||
99a3503b4ad001fc1894e90286e96d7dea1e9195 1.2.0.2
|
||||
47f138edf15576b9adaeed64784e30811099f202 1.2.0.3
|
||||
0b064b0c69106bbcd13cb0099e72ae5fa2347bf0 1.2.0.4
|
||||
8bbb3e222a0037035d893f731b0a23be449f7346 1.2.0.5
|
||||
18c7f8e19872762b7e28ee1871e72f70ea54e3cd 1.2.0.6
|
||||
42f751b87b4a187db93f595a8166d51746ff57d4 1.2.1.0
|
||||
78227a84740b70dcd4cf7a65f3bcf603d87e48e2 1.2.1.1
|
||||
40f268e6735749ef36f487e1d928eaa957bb431d 1.2.1.2
|
||||
631fd055e27fa10983e17e9ad36d1b285b85c661 1.2.1.3
|
||||
8221a68ed5ec37028357df9c6c25f52320e54ad9 1.2.2.0
|
||||
ce1c5ac65eb9f5580114b4e380092b05a485e95d 1.2.2.1
|
||||
d72c634a91ffe27ec5cd03921dd53efc8129d8f6 1.2.2.2
|
@ -1,158 +0,0 @@
|
||||
# This Travis job script has been generated by a script via
|
||||
#
|
||||
# haskell-ci 'text.cabal'
|
||||
#
|
||||
# For more information, see https://github.com/haskell-CI/haskell-ci
|
||||
#
|
||||
# version: 0.3.20190521
|
||||
#
|
||||
language: c
|
||||
dist: xenial
|
||||
git:
|
||||
# whether to recursively clone submodules
|
||||
submodules: false
|
||||
cache:
|
||||
directories:
|
||||
- $HOME/.cabal/packages
|
||||
- $HOME/.cabal/store
|
||||
before_cache:
|
||||
- rm -fv $CABALHOME/packages/hackage.haskell.org/build-reports.log
|
||||
# remove files that are regenerated by 'cabal update'
|
||||
- rm -fv $CABALHOME/packages/hackage.haskell.org/00-index.*
|
||||
- rm -fv $CABALHOME/packages/hackage.haskell.org/*.json
|
||||
- rm -fv $CABALHOME/packages/hackage.haskell.org/01-index.cache
|
||||
- rm -fv $CABALHOME/packages/hackage.haskell.org/01-index.tar
|
||||
- rm -fv $CABALHOME/packages/hackage.haskell.org/01-index.tar.idx
|
||||
- rm -rfv $CABALHOME/packages/head.hackage
|
||||
matrix:
|
||||
include:
|
||||
- compiler: ghc-8.6.5
|
||||
addons: {"apt":{"sources":["hvr-ghc"],"packages":["ghc-8.6.5","cabal-install-2.4"]}}
|
||||
- compiler: ghc-8.4.4
|
||||
addons: {"apt":{"sources":["hvr-ghc"],"packages":["ghc-8.4.4","cabal-install-2.4"]}}
|
||||
- compiler: ghc-8.2.2
|
||||
addons: {"apt":{"sources":["hvr-ghc"],"packages":["ghc-8.2.2","cabal-install-2.4"]}}
|
||||
- compiler: ghc-8.0.2
|
||||
addons: {"apt":{"sources":["hvr-ghc"],"packages":["ghc-8.0.2","cabal-install-2.4"]}}
|
||||
- compiler: ghc-7.10.3
|
||||
addons: {"apt":{"sources":["hvr-ghc"],"packages":["ghc-7.10.3","cabal-install-2.4"]}}
|
||||
- compiler: ghc-7.8.4
|
||||
addons: {"apt":{"sources":["hvr-ghc"],"packages":["ghc-7.8.4","cabal-install-2.4"]}}
|
||||
- compiler: ghc-7.6.3
|
||||
addons: {"apt":{"sources":["hvr-ghc"],"packages":["ghc-7.6.3","cabal-install-2.4"]}}
|
||||
- compiler: ghc-7.4.2
|
||||
addons: {"apt":{"sources":["hvr-ghc"],"packages":["ghc-7.4.2","cabal-install-2.4"]}}
|
||||
- compiler: ghc-7.2.2
|
||||
addons: {"apt":{"sources":["hvr-ghc"],"packages":["ghc-7.2.2","cabal-install-2.4"]}}
|
||||
- compiler: ghc-7.0.4
|
||||
addons: {"apt":{"sources":["hvr-ghc"],"packages":["ghc-7.0.4","cabal-install-2.4"]}}
|
||||
before_install:
|
||||
- HC=$(echo "/opt/$CC/bin/ghc" | sed 's/-/\//')
|
||||
- HCPKG="$HC-pkg"
|
||||
- unset CC
|
||||
- CABAL=/opt/ghc/bin/cabal
|
||||
- CABALHOME=$HOME/.cabal
|
||||
- export PATH="$CABALHOME/bin:$PATH"
|
||||
- TOP=$(pwd)
|
||||
- HCNUMVER=$(( $(${HC} --numeric-version|sed -E 's/([0-9]+)\.([0-9]+)\.([0-9]+).*/\1 * 10000 + \2 * 100 + \3/') ))
|
||||
- echo $HCNUMVER
|
||||
- CABAL="$CABAL -vnormal+nowrap+markoutput"
|
||||
- set -o pipefail
|
||||
- |
|
||||
echo 'function blue(s) { printf "\033[0;34m" s "\033[0m " }' >> .colorful.awk
|
||||
echo 'BEGIN { state = "output"; }' >> .colorful.awk
|
||||
echo '/^-----BEGIN CABAL OUTPUT-----$/ { state = "cabal" }' >> .colorful.awk
|
||||
echo '/^-----END CABAL OUTPUT-----$/ { state = "output" }' >> .colorful.awk
|
||||
echo '!/^(-----BEGIN CABAL OUTPUT-----|-----END CABAL OUTPUT-----)/ {' >> .colorful.awk
|
||||
echo ' if (state == "cabal") {' >> .colorful.awk
|
||||
echo ' print blue($0)' >> .colorful.awk
|
||||
echo ' } else {' >> .colorful.awk
|
||||
echo ' print $0' >> .colorful.awk
|
||||
echo ' }' >> .colorful.awk
|
||||
echo '}' >> .colorful.awk
|
||||
- cat .colorful.awk
|
||||
- |
|
||||
color_cabal_output () {
|
||||
awk -f $TOP/.colorful.awk
|
||||
}
|
||||
- echo text | color_cabal_output
|
||||
install:
|
||||
- ${CABAL} --version
|
||||
- echo "$(${HC} --version) [$(${HC} --print-project-git-commit-id 2> /dev/null || echo '?')]"
|
||||
- TEST=--enable-tests
|
||||
- BENCH=--enable-benchmarks
|
||||
- GHCHEAD=${GHCHEAD-false}
|
||||
- rm -f $CABALHOME/config
|
||||
- |
|
||||
echo "verbose: normal +nowrap +markoutput" >> $CABALHOME/config
|
||||
echo "remote-build-reporting: anonymous" >> $CABALHOME/config
|
||||
echo "remote-repo-cache: $CABALHOME/packages" >> $CABALHOME/config
|
||||
echo "logs-dir: $CABALHOME/logs" >> $CABALHOME/config
|
||||
echo "world-file: $CABALHOME/world" >> $CABALHOME/config
|
||||
echo "extra-prog-path: $CABALHOME/bin" >> $CABALHOME/config
|
||||
echo "symlink-bindir: $CABALHOME/bin" >> $CABALHOME/config
|
||||
echo "installdir: $CABALHOME/bin" >> $CABALHOME/config
|
||||
echo "build-summary: $CABALHOME/logs/build.log" >> $CABALHOME/config
|
||||
echo "store-dir: $CABALHOME/store" >> $CABALHOME/config
|
||||
echo "install-dirs user" >> $CABALHOME/config
|
||||
echo " prefix: $CABALHOME" >> $CABALHOME/config
|
||||
echo "repository hackage.haskell.org" >> $CABALHOME/config
|
||||
echo " url: http://hackage.haskell.org/" >> $CABALHOME/config
|
||||
- cat $CABALHOME/config
|
||||
- rm -fv cabal.project cabal.project.local cabal.project.freeze
|
||||
- travis_retry ${CABAL} v2-update -v
|
||||
# Generate cabal.project
|
||||
- rm -rf cabal.project cabal.project.local cabal.project.freeze
|
||||
- touch cabal.project
|
||||
- |
|
||||
echo 'packages: "."' >> cabal.project
|
||||
- |
|
||||
echo "write-ghc-environment-files: always" >> cabal.project
|
||||
- "for pkg in $($HCPKG list --simple-output); do echo $pkg | sed 's/-[^-]*$//' | (grep -vE -- '^(text)$' || true) | sed 's/^/constraints: /' | sed 's/$/ installed/' >> cabal.project.local; done"
|
||||
- cat cabal.project || true
|
||||
- cat cabal.project.local || true
|
||||
- if [ -f "./configure.ac" ]; then (cd "." && autoreconf -i); fi
|
||||
- ${CABAL} v2-freeze -w ${HC} ${TEST} ${BENCH} | color_cabal_output
|
||||
- "cat cabal.project.freeze | sed -E 's/^(constraints: *| *)//' | sed 's/any.//'"
|
||||
- rm cabal.project.freeze
|
||||
- ${CABAL} v2-build -w ${HC} ${TEST} ${BENCH} --dep -j2 all | color_cabal_output
|
||||
- ${CABAL} v2-build -w ${HC} --disable-tests --disable-benchmarks --dep -j2 all | color_cabal_output
|
||||
script:
|
||||
- DISTDIR=$(mktemp -d /tmp/dist-test.XXXX)
|
||||
# Packaging...
|
||||
- ${CABAL} v2-sdist all | color_cabal_output
|
||||
# Unpacking...
|
||||
- mv dist-newstyle/sdist/*.tar.gz ${DISTDIR}/
|
||||
- cp -av th-tests ${DISTDIR}/th-tests.src
|
||||
- cd ${DISTDIR} || false
|
||||
- find . -maxdepth 1 -name '*.tar.gz' -exec tar -xvf '{}' \;
|
||||
# Generate cabal.project
|
||||
- rm -rf cabal.project cabal.project.local cabal.project.freeze
|
||||
- touch cabal.project
|
||||
- |
|
||||
echo 'packages: "text-*/*.cabal"' >> cabal.project
|
||||
- |
|
||||
echo 'packages: ./th-tests.src/' >> cabal.project
|
||||
- |
|
||||
echo "write-ghc-environment-files: always" >> cabal.project
|
||||
- "for pkg in $($HCPKG list --simple-output); do echo $pkg | sed 's/-[^-]*$//' | (grep -vE -- '^(text)$' || true) | sed 's/^/constraints: /' | sed 's/$/ installed/' >> cabal.project.local; done"
|
||||
- cat cabal.project || true
|
||||
- cat cabal.project.local || true
|
||||
# Building...
|
||||
# this builds all libraries and executables (without tests/benchmarks)
|
||||
- ${CABAL} v2-build -w ${HC} --disable-tests --disable-benchmarks all | color_cabal_output
|
||||
# Building with tests and benchmarks...
|
||||
# build & run tests, build benchmarks
|
||||
- ${CABAL} v2-build -w ${HC} ${TEST} ${BENCH} all | color_cabal_output
|
||||
# Testing...
|
||||
- ${CABAL} v2-test -w ${HC} ${TEST} ${BENCH} all | color_cabal_output
|
||||
# cabal check...
|
||||
- (cd text-* && ${CABAL} -vnormal check)
|
||||
# haddock...
|
||||
- ${CABAL} v2-haddock -w ${HC} ${TEST} ${BENCH} all | color_cabal_output
|
||||
# Building without installed constraints for packages in global-db...
|
||||
- rm -f cabal.project.local
|
||||
- ${CABAL} v2-build -w ${HC} --disable-tests --disable-benchmarks all | color_cabal_output
|
||||
|
||||
# REGENDATA ["text.cabal"]
|
||||
# EOF
|
File diff suppressed because it is too large
Load Diff
@ -1,249 +0,0 @@
|
||||
{-# LANGUAGE BangPatterns, CPP, MagicHash, Rank2Types,
|
||||
RecordWildCards, UnboxedTuples, UnliftedFFITypes #-}
|
||||
{-# OPTIONS_GHC -fno-warn-unused-matches #-}
|
||||
-- |
|
||||
-- Module : Data.Text.Array
|
||||
-- Copyright : (c) 2009, 2010, 2011 Bryan O'Sullivan
|
||||
--
|
||||
-- License : BSD-style
|
||||
-- Maintainer : bos@serpentine.com
|
||||
-- Portability : portable
|
||||
--
|
||||
-- Packed, unboxed, heap-resident arrays. Suitable for performance
|
||||
-- critical use, both in terms of large data quantities and high
|
||||
-- speed.
|
||||
--
|
||||
-- This module is intended to be imported @qualified@, to avoid name
|
||||
-- clashes with "Prelude" functions, e.g.
|
||||
--
|
||||
-- > import qualified Data.Text.Array as A
|
||||
--
|
||||
-- The names in this module resemble those in the 'Data.Array' family
|
||||
-- of modules, but are shorter due to the assumption of qualified
|
||||
-- naming.
|
||||
module Data.Text.Array
|
||||
(
|
||||
-- * Types
|
||||
Array(Array, aBA)
|
||||
, MArray(MArray, maBA)
|
||||
|
||||
-- * Functions
|
||||
, copyM
|
||||
, copyI
|
||||
, empty
|
||||
, equal
|
||||
#if defined(ASSERTS)
|
||||
, length
|
||||
#endif
|
||||
, run
|
||||
, run2
|
||||
, toList
|
||||
, unsafeFreeze
|
||||
, unsafeIndex
|
||||
, new
|
||||
, unsafeWrite
|
||||
) where
|
||||
|
||||
#if defined(ASSERTS)
|
||||
-- This fugly hack is brought by GHC's apparent reluctance to deal
|
||||
-- with MagicHash and UnboxedTuples when inferring types. Eek!
|
||||
# define CHECK_BOUNDS(_func_,_len_,_k_) \
|
||||
if (_k_) < 0 || (_k_) >= (_len_) then error ("Data.Text.Array." ++ (_func_) ++ ": bounds error, offset " ++ show (_k_) ++ ", length " ++ show (_len_)) else
|
||||
#else
|
||||
# define CHECK_BOUNDS(_func_,_len_,_k_)
|
||||
#endif
|
||||
|
||||
#include "MachDeps.h"
|
||||
|
||||
#if defined(ASSERTS)
|
||||
import Control.Exception (assert)
|
||||
#endif
|
||||
#if MIN_VERSION_base(4,4,0)
|
||||
import Control.Monad.ST.Unsafe (unsafeIOToST)
|
||||
#else
|
||||
import Control.Monad.ST (unsafeIOToST)
|
||||
#endif
|
||||
import Data.Bits ((.&.), xor)
|
||||
import Data.Text.Internal.Unsafe (inlinePerformIO)
|
||||
import Data.Text.Internal.Unsafe.Shift (shiftL, shiftR)
|
||||
#if MIN_VERSION_base(4,5,0)
|
||||
import Foreign.C.Types (CInt(CInt), CSize(CSize))
|
||||
#else
|
||||
import Foreign.C.Types (CInt, CSize)
|
||||
#endif
|
||||
import GHC.Base (ByteArray#, MutableByteArray#, Int(..),
|
||||
indexWord16Array#, newByteArray#,
|
||||
unsafeFreezeByteArray#, writeWord16Array#)
|
||||
import GHC.ST (ST(..), runST)
|
||||
import GHC.Word (Word16(..))
|
||||
import Prelude hiding (length, read)
|
||||
|
||||
-- | Immutable array type.
|
||||
--
|
||||
-- The 'Array' constructor is exposed since @text-1.1.1.3@
|
||||
data Array = Array {
|
||||
aBA :: ByteArray#
|
||||
#if defined(ASSERTS)
|
||||
, aLen :: {-# UNPACK #-} !Int -- length (in units of Word16, not bytes)
|
||||
#endif
|
||||
}
|
||||
|
||||
-- | Mutable array type, for use in the ST monad.
|
||||
--
|
||||
-- The 'MArray' constructor is exposed since @text-1.1.1.3@
|
||||
data MArray s = MArray {
|
||||
maBA :: MutableByteArray# s
|
||||
#if defined(ASSERTS)
|
||||
, maLen :: {-# UNPACK #-} !Int -- length (in units of Word16, not bytes)
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(ASSERTS)
|
||||
-- | Operations supported by all arrays.
|
||||
class IArray a where
|
||||
-- | Return the length of an array.
|
||||
length :: a -> Int
|
||||
|
||||
instance IArray Array where
|
||||
length = aLen
|
||||
{-# INLINE length #-}
|
||||
|
||||
instance IArray (MArray s) where
|
||||
length = maLen
|
||||
{-# INLINE length #-}
|
||||
#endif
|
||||
|
||||
-- | Create an uninitialized mutable array.
|
||||
new :: forall s. Int -> ST s (MArray s)
|
||||
new n
|
||||
| n < 0 || n .&. highBit /= 0 = array_size_error
|
||||
| otherwise = ST $ \s1# ->
|
||||
case newByteArray# len# s1# of
|
||||
(# s2#, marr# #) -> (# s2#, MArray marr#
|
||||
#if defined(ASSERTS)
|
||||
n
|
||||
#endif
|
||||
#)
|
||||
where !(I# len#) = bytesInArray n
|
||||
highBit = maxBound `xor` (maxBound `shiftR` 1)
|
||||
{-# INLINE new #-}
|
||||
|
||||
array_size_error :: a
|
||||
array_size_error = error "Data.Text.Array.new: size overflow"
|
||||
|
||||
-- | Freeze a mutable array. Do not mutate the 'MArray' afterwards!
|
||||
unsafeFreeze :: MArray s -> ST s Array
|
||||
unsafeFreeze MArray{..} = ST $ \s1# ->
|
||||
case unsafeFreezeByteArray# maBA s1# of
|
||||
(# s2#, ba# #) -> (# s2#, Array ba#
|
||||
#if defined(ASSERTS)
|
||||
maLen
|
||||
#endif
|
||||
#)
|
||||
{-# INLINE unsafeFreeze #-}
|
||||
|
||||
-- | Indicate how many bytes would be used for an array of the given
|
||||
-- size.
|
||||
bytesInArray :: Int -> Int
|
||||
bytesInArray n = n `shiftL` 1
|
||||
{-# INLINE bytesInArray #-}
|
||||
|
||||
-- | Unchecked read of an immutable array. May return garbage or
|
||||
-- crash on an out-of-bounds access.
|
||||
unsafeIndex :: Array -> Int -> Word16
|
||||
unsafeIndex Array{..} i@(I# i#) =
|
||||
CHECK_BOUNDS("unsafeIndex",aLen,i)
|
||||
case indexWord16Array# aBA i# of r# -> (W16# r#)
|
||||
{-# INLINE unsafeIndex #-}
|
||||
|
||||
-- | Unchecked write of a mutable array. May return garbage or crash
|
||||
-- on an out-of-bounds access.
|
||||
unsafeWrite :: MArray s -> Int -> Word16 -> ST s ()
|
||||
unsafeWrite MArray{..} i@(I# i#) (W16# e#) = ST $ \s1# ->
|
||||
CHECK_BOUNDS("unsafeWrite",maLen,i)
|
||||
case writeWord16Array# maBA i# e# s1# of
|
||||
s2# -> (# s2#, () #)
|
||||
{-# INLINE unsafeWrite #-}
|
||||
|
||||
-- | Convert an immutable array to a list.
|
||||
toList :: Array -> Int -> Int -> [Word16]
|
||||
toList ary off len = loop 0
|
||||
where loop i | i < len = unsafeIndex ary (off+i) : loop (i+1)
|
||||
| otherwise = []
|
||||
|
||||
-- | An empty immutable array.
|
||||
empty :: Array
|
||||
empty = runST (new 0 >>= unsafeFreeze)
|
||||
|
||||
-- | Run an action in the ST monad and return an immutable array of
|
||||
-- its result.
|
||||
run :: (forall s. ST s (MArray s)) -> Array
|
||||
run k = runST (k >>= unsafeFreeze)
|
||||
|
||||
-- | Run an action in the ST monad and return an immutable array of
|
||||
-- its result paired with whatever else the action returns.
|
||||
run2 :: (forall s. ST s (MArray s, a)) -> (Array, a)
|
||||
run2 k = runST (do
|
||||
(marr,b) <- k
|
||||
arr <- unsafeFreeze marr
|
||||
return (arr,b))
|
||||
{-# INLINE run2 #-}
|
||||
|
||||
-- | Copy some elements of a mutable array.
|
||||
copyM :: MArray s -- ^ Destination
|
||||
-> Int -- ^ Destination offset
|
||||
-> MArray s -- ^ Source
|
||||
-> Int -- ^ Source offset
|
||||
-> Int -- ^ Count
|
||||
-> ST s ()
|
||||
copyM dest didx src sidx count
|
||||
| count <= 0 = return ()
|
||||
| otherwise =
|
||||
#if defined(ASSERTS)
|
||||
assert (sidx + count <= length src) .
|
||||
assert (didx + count <= length dest) .
|
||||
#endif
|
||||
unsafeIOToST $ memcpyM (maBA dest) (fromIntegral didx)
|
||||
(maBA src) (fromIntegral sidx)
|
||||
(fromIntegral count)
|
||||
{-# INLINE copyM #-}
|
||||
|
||||
-- | Copy some elements of an immutable array.
|
||||
copyI :: MArray s -- ^ Destination
|
||||
-> Int -- ^ Destination offset
|
||||
-> Array -- ^ Source
|
||||
-> Int -- ^ Source offset
|
||||
-> Int -- ^ First offset in destination /not/ to
|
||||
-- copy (i.e. /not/ length)
|
||||
-> ST s ()
|
||||
copyI dest i0 src j0 top
|
||||
| i0 >= top = return ()
|
||||
| otherwise = unsafeIOToST $
|
||||
memcpyI (maBA dest) (fromIntegral i0)
|
||||
(aBA src) (fromIntegral j0)
|
||||
(fromIntegral (top-i0))
|
||||
{-# INLINE copyI #-}
|
||||
|
||||
-- | Compare portions of two arrays for equality. No bounds checking
|
||||
-- is performed.
|
||||
equal :: Array -- ^ First
|
||||
-> Int -- ^ Offset into first
|
||||
-> Array -- ^ Second
|
||||
-> Int -- ^ Offset into second
|
||||
-> Int -- ^ Count
|
||||
-> Bool
|
||||
equal arrA offA arrB offB count = inlinePerformIO $ do
|
||||
i <- memcmp (aBA arrA) (fromIntegral offA)
|
||||
(aBA arrB) (fromIntegral offB) (fromIntegral count)
|
||||
return $! i == 0
|
||||
{-# INLINE equal #-}
|
||||
|
||||
foreign import ccall unsafe "_hs_text_memcpy" memcpyI
|
||||
:: MutableByteArray# s -> CSize -> ByteArray# -> CSize -> CSize -> IO ()
|
||||
|
||||
foreign import ccall unsafe "_hs_text_memcmp" memcmp
|
||||
:: ByteArray# -> CSize -> ByteArray# -> CSize -> CSize -> IO CInt
|
||||
|
||||
foreign import ccall unsafe "_hs_text_memcpy" memcpyM
|
||||
:: MutableByteArray# s -> CSize -> MutableByteArray# s -> CSize -> CSize
|
||||
-> IO ()
|
@ -1,535 +0,0 @@
|
||||
{-# LANGUAGE BangPatterns, CPP, GeneralizedNewtypeDeriving, MagicHash,
|
||||
UnliftedFFITypes #-}
|
||||
#if __GLASGOW_HASKELL__ >= 702
|
||||
{-# LANGUAGE Trustworthy #-}
|
||||
#endif
|
||||
-- |
|
||||
-- Module : Data.Text.Encoding
|
||||
-- Copyright : (c) 2009, 2010, 2011 Bryan O'Sullivan,
|
||||
-- (c) 2009 Duncan Coutts,
|
||||
-- (c) 2008, 2009 Tom Harper
|
||||
--
|
||||
-- License : BSD-style
|
||||
-- Maintainer : bos@serpentine.com
|
||||
-- Portability : portable
|
||||
--
|
||||
-- Functions for converting 'Text' values to and from 'ByteString',
|
||||
-- using several standard encodings.
|
||||
--
|
||||
-- To gain access to a much larger family of encodings, use the
|
||||
-- <http://hackage.haskell.org/package/text-icu text-icu package>.
|
||||
|
||||
module Data.Text.Encoding
|
||||
(
|
||||
-- * Decoding ByteStrings to Text
|
||||
-- $strict
|
||||
decodeASCII
|
||||
, decodeLatin1
|
||||
, decodeUtf8
|
||||
, decodeUtf16LE
|
||||
, decodeUtf16BE
|
||||
, decodeUtf32LE
|
||||
, decodeUtf32BE
|
||||
|
||||
-- ** Catchable failure
|
||||
, decodeUtf8'
|
||||
|
||||
-- ** Controllable error handling
|
||||
, decodeUtf8With
|
||||
, decodeUtf16LEWith
|
||||
, decodeUtf16BEWith
|
||||
, decodeUtf32LEWith
|
||||
, decodeUtf32BEWith
|
||||
|
||||
-- ** Stream oriented decoding
|
||||
-- $stream
|
||||
, streamDecodeUtf8
|
||||
, streamDecodeUtf8With
|
||||
, Decoding(..)
|
||||
|
||||
-- * Encoding Text to ByteStrings
|
||||
, encodeUtf8
|
||||
, encodeUtf16LE
|
||||
, encodeUtf16BE
|
||||
, encodeUtf32LE
|
||||
, encodeUtf32BE
|
||||
|
||||
-- * Encoding Text using ByteString Builders
|
||||
, encodeUtf8Builder
|
||||
, encodeUtf8BuilderEscaped
|
||||
) where
|
||||
|
||||
#if MIN_VERSION_base(4,4,0)
|
||||
import Control.Monad.ST.Unsafe (unsafeIOToST, unsafeSTToIO)
|
||||
#else
|
||||
import Control.Monad.ST (unsafeIOToST, unsafeSTToIO)
|
||||
#endif
|
||||
|
||||
import Control.Exception (evaluate, try, throwIO, ErrorCall(ErrorCall))
|
||||
import Control.Monad.ST (runST)
|
||||
import Data.Bits ((.&.))
|
||||
import Data.ByteString as B
|
||||
import Data.ByteString.Internal as B hiding (c2w)
|
||||
import Data.Text.Encoding.Error (OnDecodeError, UnicodeException, strictDecode)
|
||||
import Data.Text.Internal (Text(..), safe, text)
|
||||
import Data.Text.Internal.Private (runText)
|
||||
import Data.Text.Internal.Unsafe.Char (ord, unsafeWrite)
|
||||
import Data.Text.Internal.Unsafe.Shift (shiftR)
|
||||
import Data.Text.Show ()
|
||||
import Data.Text.Unsafe (unsafeDupablePerformIO)
|
||||
import Data.Word (Word8, Word32)
|
||||
#if MIN_VERSION_base(4,5,0)
|
||||
import Foreign.C.Types (CSize(CSize))
|
||||
#else
|
||||
import Foreign.C.Types (CSize)
|
||||
#endif
|
||||
import Foreign.ForeignPtr (withForeignPtr)
|
||||
import Foreign.Marshal.Utils (with)
|
||||
import Foreign.Ptr (Ptr, minusPtr, nullPtr, plusPtr)
|
||||
import Foreign.Storable (Storable, peek, poke)
|
||||
import GHC.Base (ByteArray#, MutableByteArray#)
|
||||
import qualified Data.ByteString.Builder as B
|
||||
import qualified Data.ByteString.Builder.Internal as B hiding (empty, append)
|
||||
import qualified Data.ByteString.Builder.Prim as BP
|
||||
import qualified Data.ByteString.Builder.Prim.Internal as BP
|
||||
import qualified Data.Text.Array as A
|
||||
import qualified Data.Text.Internal.Encoding.Fusion as E
|
||||
import qualified Data.Text.Internal.Encoding.Utf16 as U16
|
||||
import qualified Data.Text.Internal.Fusion as F
|
||||
|
||||
#include "text_cbits.h"
|
||||
|
||||
-- $strict
|
||||
--
|
||||
-- All of the single-parameter functions for decoding bytestrings
|
||||
-- encoded in one of the Unicode Transformation Formats (UTF) operate
|
||||
-- in a /strict/ mode: each will throw an exception if given invalid
|
||||
-- input.
|
||||
--
|
||||
-- Each function has a variant, whose name is suffixed with -'With',
|
||||
-- that gives greater control over the handling of decoding errors.
|
||||
-- For instance, 'decodeUtf8' will throw an exception, but
|
||||
-- 'decodeUtf8With' allows the programmer to determine what to do on a
|
||||
-- decoding error.
|
||||
|
||||
-- | /Deprecated/. Decode a 'ByteString' containing 7-bit ASCII
|
||||
-- encoded text.
|
||||
decodeASCII :: ByteString -> Text
|
||||
decodeASCII = decodeUtf8
|
||||
{-# DEPRECATED decodeASCII "Use decodeUtf8 instead" #-}
|
||||
|
||||
-- | Decode a 'ByteString' containing Latin-1 (aka ISO-8859-1) encoded text.
|
||||
--
|
||||
-- 'decodeLatin1' is semantically equivalent to
|
||||
-- @Data.Text.pack . Data.ByteString.Char8.unpack@
|
||||
decodeLatin1 :: ByteString -> Text
|
||||
decodeLatin1 (PS fp off len) = text a 0 len
|
||||
where
|
||||
a = A.run (A.new len >>= unsafeIOToST . go)
|
||||
go dest = withForeignPtr fp $ \ptr -> do
|
||||
c_decode_latin1 (A.maBA dest) (ptr `plusPtr` off) (ptr `plusPtr` (off+len))
|
||||
return dest
|
||||
|
||||
-- | Decode a 'ByteString' containing UTF-8 encoded text.
|
||||
--
|
||||
-- __NOTE__: The replacement character returned by 'OnDecodeError'
|
||||
-- MUST be within the BMP plane; surrogate code points will
|
||||
-- automatically be remapped to the replacement char @U+FFFD@
|
||||
-- (/since 0.11.3.0/), whereas code points beyond the BMP will throw an
|
||||
-- 'error' (/since 1.2.3.1/); For earlier versions of @text@ using
|
||||
-- those unsupported code points would result in undefined behavior.
|
||||
decodeUtf8With :: OnDecodeError -> ByteString -> Text
|
||||
decodeUtf8With onErr (PS fp off len) = runText $ \done -> do
|
||||
let go dest = withForeignPtr fp $ \ptr ->
|
||||
with (0::CSize) $ \destOffPtr -> do
|
||||
let end = ptr `plusPtr` (off + len)
|
||||
loop curPtr = do
|
||||
curPtr' <- c_decode_utf8 (A.maBA dest) destOffPtr curPtr end
|
||||
if curPtr' == end
|
||||
then do
|
||||
n <- peek destOffPtr
|
||||
unsafeSTToIO (done dest (fromIntegral n))
|
||||
else do
|
||||
x <- peek curPtr'
|
||||
case onErr desc (Just x) of
|
||||
Nothing -> loop $ curPtr' `plusPtr` 1
|
||||
Just c
|
||||
| c > '\xFFFF' -> throwUnsupportedReplChar
|
||||
| otherwise -> do
|
||||
destOff <- peek destOffPtr
|
||||
w <- unsafeSTToIO $
|
||||
unsafeWrite dest (fromIntegral destOff)
|
||||
(safe c)
|
||||
poke destOffPtr (destOff + fromIntegral w)
|
||||
loop $ curPtr' `plusPtr` 1
|
||||
loop (ptr `plusPtr` off)
|
||||
(unsafeIOToST . go) =<< A.new len
|
||||
where
|
||||
desc = "Data.Text.Internal.Encoding.decodeUtf8: Invalid UTF-8 stream"
|
||||
|
||||
throwUnsupportedReplChar = throwIO $
|
||||
ErrorCall "decodeUtf8With: non-BMP replacement characters not supported"
|
||||
-- TODO: The code currently assumes that the transcoded UTF-16
|
||||
-- stream is at most twice as long (in bytes) as the input UTF-8
|
||||
-- stream. To justify this assumption one has to assume that the
|
||||
-- error handler replacement character also satisfies this
|
||||
-- invariant, by emitting at most one UTF16 code unit.
|
||||
--
|
||||
-- One easy way to support the full range of code-points for
|
||||
-- replacement characters in the error handler is to simply change
|
||||
-- the (over-)allocation to `A.new (2*len)` and then shrink back the
|
||||
-- `ByteArray#` to the real size (recent GHCs have a cheap
|
||||
-- `ByteArray#` resize-primop for that which allow the GC to reclaim
|
||||
-- the overallocation). However, this would require 4 times as much
|
||||
-- (temporary) storage as the original UTF-8 required.
|
||||
--
|
||||
-- Another strategy would be to optimistically assume that
|
||||
-- replacement characters are within the BMP, and if the case of a
|
||||
-- non-BMP replacement occurs reallocate the target buffer (or throw
|
||||
-- an exception, and fallback to a pessimistic codepath, like e.g.
|
||||
-- `decodeUtf8With onErr bs = F.unstream (E.streamUtf8 onErr bs)`)
|
||||
--
|
||||
-- Alternatively, `OnDecodeError` could become a datastructure which
|
||||
-- statically encodes the replacement-character range,
|
||||
-- e.g. something isomorphic to
|
||||
--
|
||||
-- Either (... -> Maybe Word16) (... -> Maybe Char)
|
||||
--
|
||||
-- And allow to statically switch between the BMP/non-BMP
|
||||
-- replacement-character codepaths. There's multiple ways to address
|
||||
-- this with different tradeoffs; but ideally we should optimise for
|
||||
-- the optimistic/error-free case.
|
||||
{- INLINE[0] decodeUtf8With #-}
|
||||
|
||||
-- $stream
|
||||
--
|
||||
-- The 'streamDecodeUtf8' and 'streamDecodeUtf8With' functions accept
|
||||
-- a 'ByteString' that represents a possibly incomplete input (e.g. a
|
||||
-- packet from a network stream) that may not end on a UTF-8 boundary.
|
||||
--
|
||||
-- 1. The maximal prefix of 'Text' that could be decoded from the
|
||||
-- given input.
|
||||
--
|
||||
-- 2. The suffix of the 'ByteString' that could not be decoded due to
|
||||
-- insufficient input.
|
||||
--
|
||||
-- 3. A function that accepts another 'ByteString'. That string will
|
||||
-- be assumed to directly follow the string that was passed as
|
||||
-- input to the original function, and it will in turn be decoded.
|
||||
--
|
||||
-- To help understand the use of these functions, consider the Unicode
|
||||
-- string @\"hi ☃\"@. If encoded as UTF-8, this becomes @\"hi
|
||||
-- \\xe2\\x98\\x83\"@; the final @\'☃\'@ is encoded as 3 bytes.
|
||||
--
|
||||
-- Now suppose that we receive this encoded string as 3 packets that
|
||||
-- are split up on untidy boundaries: @[\"hi \\xe2\", \"\\x98\",
|
||||
-- \"\\x83\"]@. We cannot decode the entire Unicode string until we
|
||||
-- have received all three packets, but we would like to make progress
|
||||
-- as we receive each one.
|
||||
--
|
||||
-- @
|
||||
-- ghci> let s0\@('Some' _ _ f0) = 'streamDecodeUtf8' \"hi \\xe2\"
|
||||
-- ghci> s0
|
||||
-- 'Some' \"hi \" \"\\xe2\" _
|
||||
-- @
|
||||
--
|
||||
-- We use the continuation @f0@ to decode our second packet.
|
||||
--
|
||||
-- @
|
||||
-- ghci> let s1\@('Some' _ _ f1) = f0 \"\\x98\"
|
||||
-- ghci> s1
|
||||
-- 'Some' \"\" \"\\xe2\\x98\"
|
||||
-- @
|
||||
--
|
||||
-- We could not give @f0@ enough input to decode anything, so it
|
||||
-- returned an empty string. Once we feed our second continuation @f1@
|
||||
-- the last byte of input, it will make progress.
|
||||
--
|
||||
-- @
|
||||
-- ghci> let s2\@('Some' _ _ f2) = f1 \"\\x83\"
|
||||
-- ghci> s2
|
||||
-- 'Some' \"\\x2603\" \"\" _
|
||||
-- @
|
||||
--
|
||||
-- If given invalid input, an exception will be thrown by the function
|
||||
-- or continuation where it is encountered.
|
||||
|
||||
-- | A stream oriented decoding result.
|
||||
--
|
||||
-- @since 1.0.0.0
|
||||
data Decoding = Some Text ByteString (ByteString -> Decoding)
|
||||
|
||||
instance Show Decoding where
|
||||
showsPrec d (Some t bs _) = showParen (d > prec) $
|
||||
showString "Some " . showsPrec prec' t .
|
||||
showChar ' ' . showsPrec prec' bs .
|
||||
showString " _"
|
||||
where prec = 10; prec' = prec + 1
|
||||
|
||||
newtype CodePoint = CodePoint Word32 deriving (Eq, Show, Num, Storable)
|
||||
newtype DecoderState = DecoderState Word32 deriving (Eq, Show, Num, Storable)
|
||||
|
||||
-- | Decode, in a stream oriented way, a 'ByteString' containing UTF-8
|
||||
-- encoded text that is known to be valid.
|
||||
--
|
||||
-- If the input contains any invalid UTF-8 data, an exception will be
|
||||
-- thrown (either by this function or a continuation) that cannot be
|
||||
-- caught in pure code. For more control over the handling of invalid
|
||||
-- data, use 'streamDecodeUtf8With'.
|
||||
--
|
||||
-- @since 1.0.0.0
|
||||
streamDecodeUtf8 :: ByteString -> Decoding
|
||||
streamDecodeUtf8 = streamDecodeUtf8With strictDecode
|
||||
|
||||
-- | Decode, in a stream oriented way, a 'ByteString' containing UTF-8
|
||||
-- encoded text.
|
||||
--
|
||||
-- @since 1.0.0.0
|
||||
streamDecodeUtf8With :: OnDecodeError -> ByteString -> Decoding
|
||||
streamDecodeUtf8With onErr = decodeChunk B.empty 0 0
|
||||
where
|
||||
-- We create a slightly larger than necessary buffer to accommodate a
|
||||
-- potential surrogate pair started in the last buffer
|
||||
decodeChunk :: ByteString -> CodePoint -> DecoderState -> ByteString
|
||||
-> Decoding
|
||||
decodeChunk undecoded0 codepoint0 state0 bs@(PS fp off len) =
|
||||
runST $ (unsafeIOToST . decodeChunkToBuffer) =<< A.new (len+1)
|
||||
where
|
||||
decodeChunkToBuffer :: A.MArray s -> IO Decoding
|
||||
decodeChunkToBuffer dest = withForeignPtr fp $ \ptr ->
|
||||
with (0::CSize) $ \destOffPtr ->
|
||||
with codepoint0 $ \codepointPtr ->
|
||||
with state0 $ \statePtr ->
|
||||
with nullPtr $ \curPtrPtr ->
|
||||
let end = ptr `plusPtr` (off + len)
|
||||
loop curPtr = do
|
||||
poke curPtrPtr curPtr
|
||||
curPtr' <- c_decode_utf8_with_state (A.maBA dest) destOffPtr
|
||||
curPtrPtr end codepointPtr statePtr
|
||||
state <- peek statePtr
|
||||
case state of
|
||||
UTF8_REJECT -> do
|
||||
-- We encountered an encoding error
|
||||
x <- peek curPtr'
|
||||
poke statePtr 0
|
||||
case onErr desc (Just x) of
|
||||
Nothing -> loop $ curPtr' `plusPtr` 1
|
||||
Just c -> do
|
||||
destOff <- peek destOffPtr
|
||||
w <- unsafeSTToIO $
|
||||
unsafeWrite dest (fromIntegral destOff) (safe c)
|
||||
poke destOffPtr (destOff + fromIntegral w)
|
||||
loop $ curPtr' `plusPtr` 1
|
||||
|
||||
_ -> do
|
||||
-- We encountered the end of the buffer while decoding
|
||||
n <- peek destOffPtr
|
||||
codepoint <- peek codepointPtr
|
||||
chunkText <- unsafeSTToIO $ do
|
||||
arr <- A.unsafeFreeze dest
|
||||
return $! text arr 0 (fromIntegral n)
|
||||
lastPtr <- peek curPtrPtr
|
||||
let left = lastPtr `minusPtr` curPtr
|
||||
!undecoded = case state of
|
||||
UTF8_ACCEPT -> B.empty
|
||||
_ -> B.append undecoded0 (B.drop left bs)
|
||||
return $ Some chunkText undecoded
|
||||
(decodeChunk undecoded codepoint state)
|
||||
in loop (ptr `plusPtr` off)
|
||||
desc = "Data.Text.Internal.Encoding.streamDecodeUtf8With: Invalid UTF-8 stream"
|
||||
|
||||
-- | Decode a 'ByteString' containing UTF-8 encoded text that is known
|
||||
-- to be valid.
|
||||
--
|
||||
-- If the input contains any invalid UTF-8 data, an exception will be
|
||||
-- thrown that cannot be caught in pure code. For more control over
|
||||
-- the handling of invalid data, use 'decodeUtf8'' or
|
||||
-- 'decodeUtf8With'.
|
||||
decodeUtf8 :: ByteString -> Text
|
||||
decodeUtf8 = decodeUtf8With strictDecode
|
||||
{-# INLINE[0] decodeUtf8 #-}
|
||||
{-# RULES "STREAM stream/decodeUtf8 fusion" [1]
|
||||
forall bs. F.stream (decodeUtf8 bs) = E.streamUtf8 strictDecode bs #-}
|
||||
|
||||
-- | Decode a 'ByteString' containing UTF-8 encoded text.
|
||||
--
|
||||
-- If the input contains any invalid UTF-8 data, the relevant
|
||||
-- exception will be returned, otherwise the decoded text.
|
||||
decodeUtf8' :: ByteString -> Either UnicodeException Text
|
||||
decodeUtf8' = unsafeDupablePerformIO . try . evaluate . decodeUtf8With strictDecode
|
||||
{-# INLINE decodeUtf8' #-}
|
||||
|
||||
-- | Encode text to a ByteString 'B.Builder' using UTF-8 encoding.
|
||||
--
|
||||
-- @since 1.1.0.0
|
||||
encodeUtf8Builder :: Text -> B.Builder
|
||||
encodeUtf8Builder = encodeUtf8BuilderEscaped (BP.liftFixedToBounded BP.word8)
|
||||
|
||||
-- | Encode text using UTF-8 encoding and escape the ASCII characters using
|
||||
-- a 'BP.BoundedPrim'.
|
||||
--
|
||||
-- Use this function is to implement efficient encoders for text-based formats
|
||||
-- like JSON or HTML.
|
||||
--
|
||||
-- @since 1.1.0.0
|
||||
{-# INLINE encodeUtf8BuilderEscaped #-}
|
||||
-- TODO: Extend documentation with references to source code in @blaze-html@
|
||||
-- or @aeson@ that uses this function.
|
||||
encodeUtf8BuilderEscaped :: BP.BoundedPrim Word8 -> Text -> B.Builder
|
||||
encodeUtf8BuilderEscaped be =
|
||||
-- manual eta-expansion to ensure inlining works as expected
|
||||
\txt -> B.builder (mkBuildstep txt)
|
||||
where
|
||||
bound = max 4 $ BP.sizeBound be
|
||||
|
||||
mkBuildstep (Text arr off len) !k =
|
||||
outerLoop off
|
||||
where
|
||||
iend = off + len
|
||||
|
||||
outerLoop !i0 !br@(B.BufferRange op0 ope)
|
||||
| i0 >= iend = k br
|
||||
| outRemaining > 0 = goPartial (i0 + min outRemaining inpRemaining)
|
||||
-- TODO: Use a loop with an integrated bound's check if outRemaining
|
||||
-- is smaller than 8, as this will save on divisions.
|
||||
| otherwise = return $ B.bufferFull bound op0 (outerLoop i0)
|
||||
where
|
||||
outRemaining = (ope `minusPtr` op0) `div` bound
|
||||
inpRemaining = iend - i0
|
||||
|
||||
goPartial !iendTmp = go i0 op0
|
||||
where
|
||||
go !i !op
|
||||
| i < iendTmp = case A.unsafeIndex arr i of
|
||||
w | w <= 0x7F -> do
|
||||
BP.runB be (fromIntegral w) op >>= go (i + 1)
|
||||
| w <= 0x7FF -> do
|
||||
poke8 0 $ (w `shiftR` 6) + 0xC0
|
||||
poke8 1 $ (w .&. 0x3f) + 0x80
|
||||
go (i + 1) (op `plusPtr` 2)
|
||||
| 0xD800 <= w && w <= 0xDBFF -> do
|
||||
let c = ord $ U16.chr2 w (A.unsafeIndex arr (i+1))
|
||||
poke8 0 $ (c `shiftR` 18) + 0xF0
|
||||
poke8 1 $ ((c `shiftR` 12) .&. 0x3F) + 0x80
|
||||
poke8 2 $ ((c `shiftR` 6) .&. 0x3F) + 0x80
|
||||
poke8 3 $ (c .&. 0x3F) + 0x80
|
||||
go (i + 2) (op `plusPtr` 4)
|
||||
| otherwise -> do
|
||||
poke8 0 $ (w `shiftR` 12) + 0xE0
|
||||
poke8 1 $ ((w `shiftR` 6) .&. 0x3F) + 0x80
|
||||
poke8 2 $ (w .&. 0x3F) + 0x80
|
||||
go (i + 1) (op `plusPtr` 3)
|
||||
| otherwise =
|
||||
outerLoop i (B.BufferRange op ope)
|
||||
where
|
||||
poke8 j v = poke (op `plusPtr` j) (fromIntegral v :: Word8)
|
||||
|
||||
-- | Encode text using UTF-8 encoding.
|
||||
encodeUtf8 :: Text -> ByteString
|
||||
encodeUtf8 (Text arr off len)
|
||||
| len == 0 = B.empty
|
||||
| otherwise = unsafeDupablePerformIO $ do
|
||||
fp <- mallocByteString (len*3) -- see https://github.com/haskell/text/issues/194 for why len*3 is enough
|
||||
withForeignPtr fp $ \ptr ->
|
||||
with ptr $ \destPtr -> do
|
||||
c_encode_utf8 destPtr (A.aBA arr) (fromIntegral off) (fromIntegral len)
|
||||
newDest <- peek destPtr
|
||||
let utf8len = newDest `minusPtr` ptr
|
||||
if utf8len >= len `shiftR` 1
|
||||
then return (PS fp 0 utf8len)
|
||||
else do
|
||||
fp' <- mallocByteString utf8len
|
||||
withForeignPtr fp' $ \ptr' -> do
|
||||
memcpy ptr' ptr (fromIntegral utf8len)
|
||||
return (PS fp' 0 utf8len)
|
||||
|
||||
-- | Decode text from little endian UTF-16 encoding.
|
||||
decodeUtf16LEWith :: OnDecodeError -> ByteString -> Text
|
||||
decodeUtf16LEWith onErr bs = F.unstream (E.streamUtf16LE onErr bs)
|
||||
{-# INLINE decodeUtf16LEWith #-}
|
||||
|
||||
-- | Decode text from little endian UTF-16 encoding.
|
||||
--
|
||||
-- If the input contains any invalid little endian UTF-16 data, an
|
||||
-- exception will be thrown. For more control over the handling of
|
||||
-- invalid data, use 'decodeUtf16LEWith'.
|
||||
decodeUtf16LE :: ByteString -> Text
|
||||
decodeUtf16LE = decodeUtf16LEWith strictDecode
|
||||
{-# INLINE decodeUtf16LE #-}
|
||||
|
||||
-- | Decode text from big endian UTF-16 encoding.
|
||||
decodeUtf16BEWith :: OnDecodeError -> ByteString -> Text
|
||||
decodeUtf16BEWith onErr bs = F.unstream (E.streamUtf16BE onErr bs)
|
||||
{-# INLINE decodeUtf16BEWith #-}
|
||||
|
||||
-- | Decode text from big endian UTF-16 encoding.
|
||||
--
|
||||
-- If the input contains any invalid big endian UTF-16 data, an
|
||||
-- exception will be thrown. For more control over the handling of
|
||||
-- invalid data, use 'decodeUtf16BEWith'.
|
||||
decodeUtf16BE :: ByteString -> Text
|
||||
decodeUtf16BE = decodeUtf16BEWith strictDecode
|
||||
{-# INLINE decodeUtf16BE #-}
|
||||
|
||||
-- | Encode text using little endian UTF-16 encoding.
|
||||
encodeUtf16LE :: Text -> ByteString
|
||||
encodeUtf16LE txt = E.unstream (E.restreamUtf16LE (F.stream txt))
|
||||
{-# INLINE encodeUtf16LE #-}
|
||||
|
||||
-- | Encode text using big endian UTF-16 encoding.
|
||||
encodeUtf16BE :: Text -> ByteString
|
||||
encodeUtf16BE txt = E.unstream (E.restreamUtf16BE (F.stream txt))
|
||||
{-# INLINE encodeUtf16BE #-}
|
||||
|
||||
-- | Decode text from little endian UTF-32 encoding.
|
||||
decodeUtf32LEWith :: OnDecodeError -> ByteString -> Text
|
||||
decodeUtf32LEWith onErr bs = F.unstream (E.streamUtf32LE onErr bs)
|
||||
{-# INLINE decodeUtf32LEWith #-}
|
||||
|
||||
-- | Decode text from little endian UTF-32 encoding.
|
||||
--
|
||||
-- If the input contains any invalid little endian UTF-32 data, an
|
||||
-- exception will be thrown. For more control over the handling of
|
||||
-- invalid data, use 'decodeUtf32LEWith'.
|
||||
decodeUtf32LE :: ByteString -> Text
|
||||
decodeUtf32LE = decodeUtf32LEWith strictDecode
|
||||
{-# INLINE decodeUtf32LE #-}
|
||||
|
||||
-- | Decode text from big endian UTF-32 encoding.
|
||||
decodeUtf32BEWith :: OnDecodeError -> ByteString -> Text
|
||||
decodeUtf32BEWith onErr bs = F.unstream (E.streamUtf32BE onErr bs)
|
||||
{-# INLINE decodeUtf32BEWith #-}
|
||||
|
||||
-- | Decode text from big endian UTF-32 encoding.
|
||||
--
|
||||
-- If the input contains any invalid big endian UTF-32 data, an
|
||||
-- exception will be thrown. For more control over the handling of
|
||||
-- invalid data, use 'decodeUtf32BEWith'.
|
||||
decodeUtf32BE :: ByteString -> Text
|
||||
decodeUtf32BE = decodeUtf32BEWith strictDecode
|
||||
{-# INLINE decodeUtf32BE #-}
|
||||
|
||||
-- | Encode text using little endian UTF-32 encoding.
|
||||
encodeUtf32LE :: Text -> ByteString
|
||||
encodeUtf32LE txt = E.unstream (E.restreamUtf32LE (F.stream txt))
|
||||
{-# INLINE encodeUtf32LE #-}
|
||||
|
||||
-- | Encode text using big endian UTF-32 encoding.
|
||||
encodeUtf32BE :: Text -> ByteString
|
||||
encodeUtf32BE txt = E.unstream (E.restreamUtf32BE (F.stream txt))
|
||||
{-# INLINE encodeUtf32BE #-}
|
||||
|
||||
foreign import ccall unsafe "_hs_text_decode_utf8" c_decode_utf8
|
||||
:: MutableByteArray# s -> Ptr CSize
|
||||
-> Ptr Word8 -> Ptr Word8 -> IO (Ptr Word8)
|
||||
|
||||
foreign import ccall unsafe "_hs_text_decode_utf8_state" c_decode_utf8_with_state
|
||||
:: MutableByteArray# s -> Ptr CSize
|
||||
-> Ptr (Ptr Word8) -> Ptr Word8
|
||||
-> Ptr CodePoint -> Ptr DecoderState -> IO (Ptr Word8)
|
||||
|
||||
foreign import ccall unsafe "_hs_text_decode_latin1" c_decode_latin1
|
||||
:: MutableByteArray# s -> Ptr Word8 -> Ptr Word8 -> IO ()
|
||||
|
||||
foreign import ccall unsafe "_hs_text_encode_utf8" c_encode_utf8
|
||||
:: Ptr (Ptr Word8) -> ByteArray# -> CSize -> CSize -> IO ()
|
@ -1,124 +0,0 @@
|
||||
{-# LANGUAGE CPP, DeriveDataTypeable #-}
|
||||
#if __GLASGOW_HASKELL__ >= 704
|
||||
{-# LANGUAGE Safe #-}
|
||||
#elif __GLASGOW_HASKELL__ >= 702
|
||||
{-# LANGUAGE Trustworthy #-}
|
||||
#endif
|
||||
-- |
|
||||
-- Module : Data.Text.Encoding.Error
|
||||
-- Copyright : (c) Bryan O'Sullivan 2009
|
||||
--
|
||||
-- License : BSD-style
|
||||
-- Maintainer : bos@serpentine.com
|
||||
-- Portability : GHC
|
||||
--
|
||||
-- Types and functions for dealing with encoding and decoding errors
|
||||
-- in Unicode text.
|
||||
--
|
||||
-- The standard functions for encoding and decoding text are strict,
|
||||
-- which is to say that they throw exceptions on invalid input. This
|
||||
-- is often unhelpful on real world input, so alternative functions
|
||||
-- exist that accept custom handlers for dealing with invalid inputs.
|
||||
-- These 'OnError' handlers are normal Haskell functions. You can use
|
||||
-- one of the presupplied functions in this module, or you can write a
|
||||
-- custom handler of your own.
|
||||
|
||||
module Data.Text.Encoding.Error
|
||||
(
|
||||
-- * Error handling types
|
||||
UnicodeException(..)
|
||||
, OnError
|
||||
, OnDecodeError
|
||||
, OnEncodeError
|
||||
-- * Useful error handling functions
|
||||
, lenientDecode
|
||||
, strictDecode
|
||||
, strictEncode
|
||||
, ignore
|
||||
, replace
|
||||
) where
|
||||
|
||||
import Control.DeepSeq (NFData (..))
|
||||
import Control.Exception (Exception, throw)
|
||||
import Data.Typeable (Typeable)
|
||||
import Data.Word (Word8)
|
||||
import Numeric (showHex)
|
||||
|
||||
-- | Function type for handling a coding error. It is supplied with
|
||||
-- two inputs:
|
||||
--
|
||||
-- * A 'String' that describes the error.
|
||||
--
|
||||
-- * The input value that caused the error. If the error arose
|
||||
-- because the end of input was reached or could not be identified
|
||||
-- precisely, this value will be 'Nothing'.
|
||||
--
|
||||
-- If the handler returns a value wrapped with 'Just', that value will
|
||||
-- be used in the output as the replacement for the invalid input. If
|
||||
-- it returns 'Nothing', no value will be used in the output.
|
||||
--
|
||||
-- Should the handler need to abort processing, it should use 'error'
|
||||
-- or 'throw' an exception (preferably a 'UnicodeException'). It may
|
||||
-- use the description provided to construct a more helpful error
|
||||
-- report.
|
||||
type OnError a b = String -> Maybe a -> Maybe b
|
||||
|
||||
-- | A handler for a decoding error.
|
||||
type OnDecodeError = OnError Word8 Char
|
||||
|
||||
-- | A handler for an encoding error.
|
||||
{-# DEPRECATED OnEncodeError "This exception is never used in practice, and will be removed." #-}
|
||||
type OnEncodeError = OnError Char Word8
|
||||
|
||||
-- | An exception type for representing Unicode encoding errors.
|
||||
data UnicodeException =
|
||||
DecodeError String (Maybe Word8)
|
||||
-- ^ Could not decode a byte sequence because it was invalid under
|
||||
-- the given encoding, or ran out of input in mid-decode.
|
||||
| EncodeError String (Maybe Char)
|
||||
-- ^ Tried to encode a character that could not be represented
|
||||
-- under the given encoding, or ran out of input in mid-encode.
|
||||
deriving (Eq, Typeable)
|
||||
|
||||
{-# DEPRECATED EncodeError "This constructor is never used, and will be removed." #-}
|
||||
|
||||
showUnicodeException :: UnicodeException -> String
|
||||
showUnicodeException (DecodeError desc (Just w))
|
||||
= "Cannot decode byte '\\x" ++ showHex w ("': " ++ desc)
|
||||
showUnicodeException (DecodeError desc Nothing)
|
||||
= "Cannot decode input: " ++ desc
|
||||
showUnicodeException (EncodeError desc (Just c))
|
||||
= "Cannot encode character '\\x" ++ showHex (fromEnum c) ("': " ++ desc)
|
||||
showUnicodeException (EncodeError desc Nothing)
|
||||
= "Cannot encode input: " ++ desc
|
||||
|
||||
instance Show UnicodeException where
|
||||
show = showUnicodeException
|
||||
|
||||
instance Exception UnicodeException
|
||||
|
||||
instance NFData UnicodeException where
|
||||
rnf (DecodeError desc w) = rnf desc `seq` rnf w `seq` ()
|
||||
rnf (EncodeError desc c) = rnf desc `seq` rnf c `seq` ()
|
||||
|
||||
-- | Throw a 'UnicodeException' if decoding fails.
|
||||
strictDecode :: OnDecodeError
|
||||
strictDecode desc c = throw (DecodeError desc c)
|
||||
|
||||
-- | Replace an invalid input byte with the Unicode replacement
|
||||
-- character U+FFFD.
|
||||
lenientDecode :: OnDecodeError
|
||||
lenientDecode _ _ = Just '\xfffd'
|
||||
|
||||
-- | Throw a 'UnicodeException' if encoding fails.
|
||||
{-# DEPRECATED strictEncode "This function always throws an exception, and will be removed." #-}
|
||||
strictEncode :: OnEncodeError
|
||||
strictEncode desc c = throw (EncodeError desc c)
|
||||
|
||||
-- | Ignore an invalid input, substituting nothing in the output.
|
||||
ignore :: OnError a b
|
||||
ignore _ _ = Nothing
|
||||
|
||||
-- | Replace an invalid input with a valid output.
|
||||
replace :: b -> OnError a b
|
||||
replace c _ _ = Just c
|
@ -1,176 +0,0 @@
|
||||
{-# LANGUAGE BangPatterns, CPP, GeneralizedNewtypeDeriving #-}
|
||||
-- |
|
||||
-- Module : Data.Text.Foreign
|
||||
-- Copyright : (c) 2009, 2010 Bryan O'Sullivan
|
||||
--
|
||||
-- License : BSD-style
|
||||
-- Maintainer : bos@serpentine.com
|
||||
-- Portability : GHC
|
||||
--
|
||||
-- Support for using 'Text' data with native code via the Haskell
|
||||
-- foreign function interface.
|
||||
|
||||
module Data.Text.Foreign
|
||||
(
|
||||
-- * Interoperability with native code
|
||||
-- $interop
|
||||
I16
|
||||
-- * Safe conversion functions
|
||||
, fromPtr
|
||||
, useAsPtr
|
||||
, asForeignPtr
|
||||
-- ** Encoding as UTF-8
|
||||
, peekCStringLen
|
||||
, withCStringLen
|
||||
-- * Unsafe conversion code
|
||||
, lengthWord16
|
||||
, unsafeCopyToPtr
|
||||
-- * Low-level manipulation
|
||||
-- $lowlevel
|
||||
, dropWord16
|
||||
, takeWord16
|
||||
) where
|
||||
|
||||
#if defined(ASSERTS)
|
||||
import Control.Exception (assert)
|
||||
#endif
|
||||
#if MIN_VERSION_base(4,4,0)
|
||||
import Control.Monad.ST.Unsafe (unsafeIOToST)
|
||||
#else
|
||||
import Control.Monad.ST (unsafeIOToST)
|
||||
#endif
|
||||
import Data.ByteString.Unsafe (unsafePackCStringLen, unsafeUseAsCStringLen)
|
||||
import Data.Text.Encoding (decodeUtf8, encodeUtf8)
|
||||
import Data.Text.Internal (Text(..), empty)
|
||||
import Data.Text.Unsafe (lengthWord16)
|
||||
import Data.Word (Word16)
|
||||
import Foreign.C.String (CStringLen)
|
||||
import Foreign.ForeignPtr (ForeignPtr, mallocForeignPtrArray, withForeignPtr)
|
||||
import Foreign.Marshal.Alloc (allocaBytes)
|
||||
import Foreign.Ptr (Ptr, castPtr, plusPtr)
|
||||
import Foreign.Storable (peek, poke)
|
||||
import qualified Data.Text.Array as A
|
||||
|
||||
-- $interop
|
||||
--
|
||||
-- The 'Text' type is implemented using arrays that are not guaranteed
|
||||
-- to have a fixed address in the Haskell heap. All communication with
|
||||
-- native code must thus occur by copying data back and forth.
|
||||
--
|
||||
-- The 'Text' type's internal representation is UTF-16, using the
|
||||
-- platform's native endianness. This makes copied data suitable for
|
||||
-- use with native libraries that use a similar representation, such
|
||||
-- as ICU. To interoperate with native libraries that use different
|
||||
-- internal representations, such as UTF-8 or UTF-32, consider using
|
||||
-- the functions in the 'Data.Text.Encoding' module.
|
||||
|
||||
-- | A type representing a number of UTF-16 code units.
|
||||
newtype I16 = I16 Int
|
||||
deriving (Bounded, Enum, Eq, Integral, Num, Ord, Read, Real, Show)
|
||||
|
||||
-- | /O(n)/ Create a new 'Text' from a 'Ptr' 'Word16' by copying the
|
||||
-- contents of the array.
|
||||
fromPtr :: Ptr Word16 -- ^ source array
|
||||
-> I16 -- ^ length of source array (in 'Word16' units)
|
||||
-> IO Text
|
||||
fromPtr _ (I16 0) = return empty
|
||||
fromPtr ptr (I16 len) =
|
||||
#if defined(ASSERTS)
|
||||
assert (len > 0) $
|
||||
#endif
|
||||
return $! Text arr 0 len
|
||||
where
|
||||
arr = A.run (A.new len >>= copy)
|
||||
copy marr = loop ptr 0
|
||||
where
|
||||
loop !p !i | i == len = return marr
|
||||
| otherwise = do
|
||||
A.unsafeWrite marr i =<< unsafeIOToST (peek p)
|
||||
loop (p `plusPtr` 2) (i + 1)
|
||||
|
||||
-- $lowlevel
|
||||
--
|
||||
-- Foreign functions that use UTF-16 internally may return indices in
|
||||
-- units of 'Word16' instead of characters. These functions may
|
||||
-- safely be used with such indices, as they will adjust offsets if
|
||||
-- necessary to preserve the validity of a Unicode string.
|
||||
|
||||
-- | /O(1)/ Return the prefix of the 'Text' of @n@ 'Word16' units in
|
||||
-- length.
|
||||
--
|
||||
-- If @n@ would cause the 'Text' to end inside a surrogate pair, the
|
||||
-- end of the prefix will be advanced by one additional 'Word16' unit
|
||||
-- to maintain its validity.
|
||||
takeWord16 :: I16 -> Text -> Text
|
||||
takeWord16 (I16 n) t@(Text arr off len)
|
||||
| n <= 0 = empty
|
||||
| n >= len || m >= len = t
|
||||
| otherwise = Text arr off m
|
||||
where
|
||||
m | w < 0xD800 || w > 0xDBFF = n
|
||||
| otherwise = n+1
|
||||
w = A.unsafeIndex arr (off+n-1)
|
||||
|
||||
-- | /O(1)/ Return the suffix of the 'Text', with @n@ 'Word16' units
|
||||
-- dropped from its beginning.
|
||||
--
|
||||
-- If @n@ would cause the 'Text' to begin inside a surrogate pair, the
|
||||
-- beginning of the suffix will be advanced by one additional 'Word16'
|
||||
-- unit to maintain its validity.
|
||||
dropWord16 :: I16 -> Text -> Text
|
||||
dropWord16 (I16 n) t@(Text arr off len)
|
||||
| n <= 0 = t
|
||||
| n >= len || m >= len = empty
|
||||
| otherwise = Text arr (off+m) (len-m)
|
||||
where
|
||||
m | w < 0xD800 || w > 0xDBFF = n
|
||||
| otherwise = n+1
|
||||
w = A.unsafeIndex arr (off+n-1)
|
||||
|
||||
-- | /O(n)/ Copy a 'Text' to an array. The array is assumed to be big
|
||||
-- enough to hold the contents of the entire 'Text'.
|
||||
unsafeCopyToPtr :: Text -> Ptr Word16 -> IO ()
|
||||
unsafeCopyToPtr (Text arr off len) ptr = loop ptr off
|
||||
where
|
||||
end = off + len
|
||||
loop !p !i | i == end = return ()
|
||||
| otherwise = do
|
||||
poke p (A.unsafeIndex arr i)
|
||||
loop (p `plusPtr` 2) (i + 1)
|
||||
|
||||
-- | /O(n)/ Perform an action on a temporary, mutable copy of a
|
||||
-- 'Text'. The copy is freed as soon as the action returns.
|
||||
useAsPtr :: Text -> (Ptr Word16 -> I16 -> IO a) -> IO a
|
||||
useAsPtr t@(Text _arr _off len) action =
|
||||
allocaBytes (len * 2) $ \buf -> do
|
||||
unsafeCopyToPtr t buf
|
||||
action (castPtr buf) (fromIntegral len)
|
||||
|
||||
-- | /O(n)/ Make a mutable copy of a 'Text'.
|
||||
asForeignPtr :: Text -> IO (ForeignPtr Word16, I16)
|
||||
asForeignPtr t@(Text _arr _off len) = do
|
||||
fp <- mallocForeignPtrArray len
|
||||
withForeignPtr fp $ unsafeCopyToPtr t
|
||||
return (fp, I16 len)
|
||||
|
||||
-- | /O(n)/ Decode a C string with explicit length, which is assumed
|
||||
-- to have been encoded as UTF-8. If decoding fails, a
|
||||
-- 'UnicodeException' is thrown.
|
||||
--
|
||||
-- @since 1.0.0.0
|
||||
peekCStringLen :: CStringLen -> IO Text
|
||||
peekCStringLen cs = do
|
||||
bs <- unsafePackCStringLen cs
|
||||
return $! decodeUtf8 bs
|
||||
|
||||
-- | Marshal a 'Text' into a C string encoded as UTF-8 in temporary
|
||||
-- storage, with explicit length information. The encoded string may
|
||||
-- contain NUL bytes, and is not followed by a trailing NUL byte.
|
||||
--
|
||||
-- The temporary storage is freed when the subcomputation terminates
|
||||
-- (either normally or via an exception), so the pointer to the
|
||||
-- temporary storage must /not/ be used after this function returns.
|
||||
--
|
||||
-- @since 1.0.0.0
|
||||
withCStringLen :: Text -> (CStringLen -> IO a) -> IO a
|
||||
withCStringLen t act = unsafeUseAsCStringLen (encodeUtf8 t) act
|
@ -1,350 +0,0 @@
|
||||
{-# LANGUAGE BangPatterns, CPP, RecordWildCards, ScopedTypeVariables #-}
|
||||
#if __GLASGOW_HASKELL__ >= 702
|
||||
{-# LANGUAGE Trustworthy #-}
|
||||
#endif
|
||||
-- |
|
||||
-- Module : Data.Text.IO
|
||||
-- Copyright : (c) 2009, 2010 Bryan O'Sullivan,
|
||||
-- (c) 2009 Simon Marlow
|
||||
-- License : BSD-style
|
||||
-- Maintainer : bos@serpentine.com
|
||||
-- Portability : GHC
|
||||
--
|
||||
-- Efficient locale-sensitive support for text I\/O.
|
||||
--
|
||||
-- Skip past the synopsis for some important notes on performance and
|
||||
-- portability across different versions of GHC.
|
||||
|
||||
module Data.Text.IO
|
||||
(
|
||||
-- * Performance
|
||||
-- $performance
|
||||
|
||||
-- * Locale support
|
||||
-- $locale
|
||||
-- * File-at-a-time operations
|
||||
readFile
|
||||
, writeFile
|
||||
, appendFile
|
||||
-- * Operations on handles
|
||||
, hGetContents
|
||||
, hGetChunk
|
||||
, hGetLine
|
||||
, hPutStr
|
||||
, hPutStrLn
|
||||
-- * Special cases for standard input and output
|
||||
, interact
|
||||
, getContents
|
||||
, getLine
|
||||
, putStr
|
||||
, putStrLn
|
||||
) where
|
||||
|
||||
import Data.Text (Text)
|
||||
import Prelude hiding (appendFile, getContents, getLine, interact,
|
||||
putStr, putStrLn, readFile, writeFile)
|
||||
import System.IO (Handle, IOMode(..), hPutChar, openFile, stdin, stdout,
|
||||
withFile)
|
||||
import qualified Control.Exception as E
|
||||
import Control.Monad (liftM2, when)
|
||||
import Data.IORef (readIORef, writeIORef)
|
||||
import qualified Data.Text as T
|
||||
import Data.Text.Internal.Fusion (stream)
|
||||
import Data.Text.Internal.Fusion.Types (Step(..), Stream(..))
|
||||
import Data.Text.Internal.IO (hGetLineWith, readChunk)
|
||||
import GHC.IO.Buffer (Buffer(..), BufferState(..), CharBufElem, CharBuffer,
|
||||
RawCharBuffer, emptyBuffer, isEmptyBuffer, newCharBuffer,
|
||||
writeCharBuf)
|
||||
import GHC.IO.Exception (IOException(ioe_type), IOErrorType(InappropriateType))
|
||||
import GHC.IO.Handle.Internals (augmentIOError, hClose_help, wantReadableHandle,
|
||||
wantWritableHandle)
|
||||
import GHC.IO.Handle.Text (commitBuffer')
|
||||
import GHC.IO.Handle.Types (BufferList(..), BufferMode(..), Handle__(..),
|
||||
HandleType(..), Newline(..))
|
||||
import System.IO (hGetBuffering, hFileSize, hSetBuffering, hTell)
|
||||
import System.IO.Error (isEOFError)
|
||||
|
||||
-- $performance
|
||||
-- #performance#
|
||||
--
|
||||
-- The functions in this module obey the runtime system's locale,
|
||||
-- character set encoding, and line ending conversion settings.
|
||||
--
|
||||
-- If you know in advance that you will be working with data that has
|
||||
-- a specific encoding (e.g. UTF-8), and your application is highly
|
||||
-- performance sensitive, you may find that it is faster to perform
|
||||
-- I\/O with bytestrings and to encode and decode yourself than to use
|
||||
-- the functions in this module.
|
||||
--
|
||||
-- Whether this will hold depends on the version of GHC you are using,
|
||||
-- the platform you are working on, the data you are working with, and
|
||||
-- the encodings you are using, so be sure to test for yourself.
|
||||
|
||||
-- | The 'readFile' function reads a file and returns the contents of
|
||||
-- the file as a string. The entire file is read strictly, as with
|
||||
-- 'getContents'.
|
||||
readFile :: FilePath -> IO Text
|
||||
readFile name = openFile name ReadMode >>= hGetContents
|
||||
|
||||
-- | Write a string to a file. The file is truncated to zero length
|
||||
-- before writing begins.
|
||||
writeFile :: FilePath -> Text -> IO ()
|
||||
writeFile p = withFile p WriteMode . flip hPutStr
|
||||
|
||||
-- | Write a string the end of a file.
|
||||
appendFile :: FilePath -> Text -> IO ()
|
||||
appendFile p = withFile p AppendMode . flip hPutStr
|
||||
|
||||
catchError :: String -> Handle -> Handle__ -> IOError -> IO (Text, Bool)
|
||||
catchError caller h Handle__{..} err
|
||||
| isEOFError err = do
|
||||
buf <- readIORef haCharBuffer
|
||||
return $ if isEmptyBuffer buf
|
||||
then (T.empty, True)
|
||||
else (T.singleton '\r', True)
|
||||
| otherwise = E.throwIO (augmentIOError err caller h)
|
||||
|
||||
-- | Wrap readChunk and return a value indicating if we're reached the EOF.
|
||||
-- This is needed because unpack_nl is unable to discern the difference
|
||||
-- between a buffer with just \r due to EOF or because not enough data was left
|
||||
-- for decoding. e.g. the final character decoded from the byte buffer was \r.
|
||||
readChunkEof :: Handle__ -> CharBuffer -> IO (Text, Bool)
|
||||
readChunkEof hh buf = do t <- readChunk hh buf
|
||||
return (t, False)
|
||||
|
||||
-- | /Experimental./ Read a single chunk of strict text from a
|
||||
-- 'Handle'. The size of the chunk depends on the amount of input
|
||||
-- currently buffered.
|
||||
--
|
||||
-- This function blocks only if there is no data available, and EOF
|
||||
-- has not yet been reached. Once EOF is reached, this function
|
||||
-- returns an empty string instead of throwing an exception.
|
||||
hGetChunk :: Handle -> IO Text
|
||||
hGetChunk h = wantReadableHandle "hGetChunk" h readSingleChunk
|
||||
where
|
||||
readSingleChunk hh@Handle__{..} = do
|
||||
buf <- readIORef haCharBuffer
|
||||
(t, _) <- readChunkEof hh buf `E.catch` catchError "hGetChunk" h hh
|
||||
return (hh, t)
|
||||
|
||||
-- | Read the remaining contents of a 'Handle' as a string. The
|
||||
-- 'Handle' is closed once the contents have been read, or if an
|
||||
-- exception is thrown.
|
||||
--
|
||||
-- Internally, this function reads a chunk at a time from the
|
||||
-- lower-level buffering abstraction, and concatenates the chunks into
|
||||
-- a single string once the entire file has been read.
|
||||
--
|
||||
-- As a result, it requires approximately twice as much memory as its
|
||||
-- result to construct its result. For files more than a half of
|
||||
-- available RAM in size, this may result in memory exhaustion.
|
||||
hGetContents :: Handle -> IO Text
|
||||
hGetContents h = do
|
||||
chooseGoodBuffering h
|
||||
wantReadableHandle "hGetContents" h readAll
|
||||
where
|
||||
readAll hh@Handle__{..} = do
|
||||
let readChunks = do
|
||||
buf <- readIORef haCharBuffer
|
||||
(t, eof) <- readChunkEof hh buf
|
||||
`E.catch` catchError "hGetContents" h hh
|
||||
if eof
|
||||
then return [t]
|
||||
else (t:) `fmap` readChunks
|
||||
ts <- readChunks
|
||||
(hh', _) <- hClose_help hh
|
||||
return (hh'{haType=ClosedHandle}, T.concat ts)
|
||||
|
||||
-- | Use a more efficient buffer size if we're reading in
|
||||
-- block-buffered mode with the default buffer size. When we can
|
||||
-- determine the size of the handle we're reading, set the buffer size
|
||||
-- to that, so that we can read the entire file in one chunk.
|
||||
-- Otherwise, use a buffer size of at least 16KB.
|
||||
chooseGoodBuffering :: Handle -> IO ()
|
||||
chooseGoodBuffering h = do
|
||||
bufMode <- hGetBuffering h
|
||||
case bufMode of
|
||||
BlockBuffering Nothing -> do
|
||||
d <- E.catch (liftM2 (-) (hFileSize h) (hTell h)) $ \(e::IOException) ->
|
||||
if ioe_type e == InappropriateType
|
||||
then return 16384 -- faster than the 2KB default
|
||||
else E.throwIO e
|
||||
when (d > 0) . hSetBuffering h . BlockBuffering . Just . fromIntegral $ d
|
||||
_ -> return ()
|
||||
|
||||
-- | Read a single line from a handle.
|
||||
hGetLine :: Handle -> IO Text
|
||||
hGetLine = hGetLineWith T.concat
|
||||
|
||||
-- | Write a string to a handle.
|
||||
hPutStr :: Handle -> Text -> IO ()
|
||||
-- This function is lifted almost verbatim from GHC.IO.Handle.Text.
|
||||
hPutStr h t = do
|
||||
(buffer_mode, nl) <-
|
||||
wantWritableHandle "hPutStr" h $ \h_ -> do
|
||||
bmode <- getSpareBuffer h_
|
||||
return (bmode, haOutputNL h_)
|
||||
let str = stream t
|
||||
case buffer_mode of
|
||||
(NoBuffering, _) -> hPutChars h str
|
||||
(LineBuffering, buf) -> writeLines h nl buf str
|
||||
(BlockBuffering _, buf)
|
||||
| nl == CRLF -> writeBlocksCRLF h buf str
|
||||
| otherwise -> writeBlocksRaw h buf str
|
||||
|
||||
hPutChars :: Handle -> Stream Char -> IO ()
|
||||
hPutChars h (Stream next0 s0 _len) = loop s0
|
||||
where
|
||||
loop !s = case next0 s of
|
||||
Done -> return ()
|
||||
Skip s' -> loop s'
|
||||
Yield x s' -> hPutChar h x >> loop s'
|
||||
|
||||
-- The following functions are largely lifted from GHC.IO.Handle.Text,
|
||||
-- but adapted to a coinductive stream of data instead of an inductive
|
||||
-- list.
|
||||
--
|
||||
-- We have several variations of more or less the same code for
|
||||
-- performance reasons. Splitting the original buffered write
|
||||
-- function into line- and block-oriented versions gave us a 2.1x
|
||||
-- performance improvement. Lifting out the raw/cooked newline
|
||||
-- handling gave a few more percent on top.
|
||||
|
||||
writeLines :: Handle -> Newline -> Buffer CharBufElem -> Stream Char -> IO ()
|
||||
writeLines h nl buf0 (Stream next0 s0 _len) = outer s0 buf0
|
||||
where
|
||||
outer s1 Buffer{bufRaw=raw, bufSize=len} = inner s1 (0::Int)
|
||||
where
|
||||
inner !s !n =
|
||||
case next0 s of
|
||||
Done -> commit n False{-no flush-} True{-release-} >> return ()
|
||||
Skip s' -> inner s' n
|
||||
Yield x s'
|
||||
| n + 1 >= len -> commit n True{-needs flush-} False >>= outer s
|
||||
| x == '\n' -> do
|
||||
n' <- if nl == CRLF
|
||||
then do n1 <- writeCharBuf raw n '\r'
|
||||
writeCharBuf raw n1 '\n'
|
||||
else writeCharBuf raw n x
|
||||
commit n' True{-needs flush-} False >>= outer s'
|
||||
| otherwise -> writeCharBuf raw n x >>= inner s'
|
||||
commit = commitBuffer h raw len
|
||||
|
||||
writeBlocksCRLF :: Handle -> Buffer CharBufElem -> Stream Char -> IO ()
|
||||
writeBlocksCRLF h buf0 (Stream next0 s0 _len) = outer s0 buf0
|
||||
where
|
||||
outer s1 Buffer{bufRaw=raw, bufSize=len} = inner s1 (0::Int)
|
||||
where
|
||||
inner !s !n =
|
||||
case next0 s of
|
||||
Done -> commit n False{-no flush-} True{-release-} >> return ()
|
||||
Skip s' -> inner s' n
|
||||
Yield x s'
|
||||
| n + 1 >= len -> commit n True{-needs flush-} False >>= outer s
|
||||
| x == '\n' -> do n1 <- writeCharBuf raw n '\r'
|
||||
writeCharBuf raw n1 '\n' >>= inner s'
|
||||
| otherwise -> writeCharBuf raw n x >>= inner s'
|
||||
commit = commitBuffer h raw len
|
||||
|
||||
writeBlocksRaw :: Handle -> Buffer CharBufElem -> Stream Char -> IO ()
|
||||
writeBlocksRaw h buf0 (Stream next0 s0 _len) = outer s0 buf0
|
||||
where
|
||||
outer s1 Buffer{bufRaw=raw, bufSize=len} = inner s1 (0::Int)
|
||||
where
|
||||
inner !s !n =
|
||||
case next0 s of
|
||||
Done -> commit n False{-no flush-} True{-release-} >> return ()
|
||||
Skip s' -> inner s' n
|
||||
Yield x s'
|
||||
| n + 1 >= len -> commit n True{-needs flush-} False >>= outer s
|
||||
| otherwise -> writeCharBuf raw n x >>= inner s'
|
||||
commit = commitBuffer h raw len
|
||||
|
||||
-- This function is completely lifted from GHC.IO.Handle.Text.
|
||||
getSpareBuffer :: Handle__ -> IO (BufferMode, CharBuffer)
|
||||
getSpareBuffer Handle__{haCharBuffer=ref,
|
||||
haBuffers=spare_ref,
|
||||
haBufferMode=mode}
|
||||
= do
|
||||
case mode of
|
||||
NoBuffering -> return (mode, error "no buffer!")
|
||||
_ -> do
|
||||
bufs <- readIORef spare_ref
|
||||
buf <- readIORef ref
|
||||
case bufs of
|
||||
BufferListCons b rest -> do
|
||||
writeIORef spare_ref rest
|
||||
return ( mode, emptyBuffer b (bufSize buf) WriteBuffer)
|
||||
BufferListNil -> do
|
||||
new_buf <- newCharBuffer (bufSize buf) WriteBuffer
|
||||
return (mode, new_buf)
|
||||
|
||||
|
||||
-- This function is completely lifted from GHC.IO.Handle.Text.
|
||||
commitBuffer :: Handle -> RawCharBuffer -> Int -> Int -> Bool -> Bool
|
||||
-> IO CharBuffer
|
||||
commitBuffer hdl !raw !sz !count flush release =
|
||||
wantWritableHandle "commitAndReleaseBuffer" hdl $
|
||||
commitBuffer' raw sz count flush release
|
||||
{-# INLINE commitBuffer #-}
|
||||
|
||||
-- | Write a string to a handle, followed by a newline.
|
||||
hPutStrLn :: Handle -> Text -> IO ()
|
||||
hPutStrLn h t = hPutStr h t >> hPutChar h '\n'
|
||||
|
||||
-- | The 'interact' function takes a function of type @Text -> Text@
|
||||
-- as its argument. The entire input from the standard input device is
|
||||
-- passed to this function as its argument, and the resulting string
|
||||
-- is output on the standard output device.
|
||||
interact :: (Text -> Text) -> IO ()
|
||||
interact f = putStr . f =<< getContents
|
||||
|
||||
-- | Read all user input on 'stdin' as a single string.
|
||||
getContents :: IO Text
|
||||
getContents = hGetContents stdin
|
||||
|
||||
-- | Read a single line of user input from 'stdin'.
|
||||
getLine :: IO Text
|
||||
getLine = hGetLine stdin
|
||||
|
||||
-- | Write a string to 'stdout'.
|
||||
putStr :: Text -> IO ()
|
||||
putStr = hPutStr stdout
|
||||
|
||||
-- | Write a string to 'stdout', followed by a newline.
|
||||
putStrLn :: Text -> IO ()
|
||||
putStrLn = hPutStrLn stdout
|
||||
|
||||
-- $locale
|
||||
--
|
||||
-- /Note/: The behaviour of functions in this module depends on the
|
||||
-- version of GHC you are using.
|
||||
--
|
||||
-- Beginning with GHC 6.12, text I\/O is performed using the system or
|
||||
-- handle's current locale and line ending conventions.
|
||||
--
|
||||
-- Under GHC 6.10 and earlier, the system I\/O libraries do not
|
||||
-- support locale-sensitive I\/O or line ending conversion. On these
|
||||
-- versions of GHC, functions in this library all use UTF-8. What
|
||||
-- does this mean in practice?
|
||||
--
|
||||
-- * All data that is read will be decoded as UTF-8.
|
||||
--
|
||||
-- * Before data is written, it is first encoded as UTF-8.
|
||||
--
|
||||
-- * On both reading and writing, the platform's native newline
|
||||
-- conversion is performed.
|
||||
--
|
||||
-- If you must use a non-UTF-8 locale on an older version of GHC, you
|
||||
-- will have to perform the transcoding yourself, e.g. as follows:
|
||||
--
|
||||
-- > import qualified Data.ByteString as B
|
||||
-- > import Data.Text (Text)
|
||||
-- > import Data.Text.Encoding (encodeUtf16)
|
||||
-- >
|
||||
-- > putStr_Utf16LE :: Text -> IO ()
|
||||
-- > putStr_Utf16LE t = B.putStr (encodeUtf16LE t)
|
||||
--
|
||||
-- On transcoding errors, an 'IOError' exception is thrown. You can
|
||||
-- use the API in "Data.Text.Encoding" if you need more control over
|
||||
-- error handling or transcoding.
|
@ -1,188 +0,0 @@
|
||||
{-# LANGUAGE CPP, DeriveDataTypeable, UnboxedTuples #-}
|
||||
{-# OPTIONS_HADDOCK not-home #-}
|
||||
|
||||
-- |
|
||||
-- Module : Data.Text.Internal
|
||||
-- Copyright : (c) 2008, 2009 Tom Harper,
|
||||
-- (c) 2009, 2010 Bryan O'Sullivan,
|
||||
-- (c) 2009 Duncan Coutts
|
||||
--
|
||||
-- License : BSD-style
|
||||
-- Maintainer : bos@serpentine.com
|
||||
-- Stability : experimental
|
||||
-- Portability : GHC
|
||||
--
|
||||
-- A module containing private 'Text' internals. This exposes the
|
||||
-- 'Text' representation and low level construction functions.
|
||||
-- Modules which extend the 'Text' system may need to use this module.
|
||||
--
|
||||
-- You should not use this module unless you are determined to monkey
|
||||
-- with the internals, as the functions here do just about nothing to
|
||||
-- preserve data invariants. You have been warned!
|
||||
|
||||
#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__)
|
||||
#include "MachDeps.h"
|
||||
#endif
|
||||
|
||||
module Data.Text.Internal
|
||||
(
|
||||
-- * Types
|
||||
-- $internals
|
||||
Text(..)
|
||||
-- * Construction
|
||||
, text
|
||||
, textP
|
||||
-- * Safety
|
||||
, safe
|
||||
-- * Code that must be here for accessibility
|
||||
, empty
|
||||
, empty_
|
||||
-- * Utilities
|
||||
, firstf
|
||||
-- * Checked multiplication
|
||||
, mul
|
||||
, mul32
|
||||
, mul64
|
||||
-- * Debugging
|
||||
, showText
|
||||
) where
|
||||
|
||||
#if defined(ASSERTS)
|
||||
import Control.Exception (assert)
|
||||
#endif
|
||||
import Data.Bits
|
||||
import Data.Int (Int32, Int64)
|
||||
import Data.Text.Internal.Unsafe.Char (ord)
|
||||
import Data.Typeable (Typeable)
|
||||
import qualified Data.Text.Array as A
|
||||
|
||||
-- | A space efficient, packed, unboxed Unicode text type.
|
||||
data Text = Text
|
||||
{-# UNPACK #-} !A.Array -- payload (Word16 elements)
|
||||
{-# UNPACK #-} !Int -- offset (units of Word16, not Char)
|
||||
{-# UNPACK #-} !Int -- length (units of Word16, not Char)
|
||||
deriving (Typeable)
|
||||
|
||||
-- | Smart constructor.
|
||||
text_ :: A.Array -> Int -> Int -> Text
|
||||
text_ arr off len =
|
||||
#if defined(ASSERTS)
|
||||
let c = A.unsafeIndex arr off
|
||||
alen = A.length arr
|
||||
in assert (len >= 0) .
|
||||
assert (off >= 0) .
|
||||
assert (alen == 0 || len == 0 || off < alen) .
|
||||
assert (len == 0 || c < 0xDC00 || c > 0xDFFF) $
|
||||
#endif
|
||||
Text arr off len
|
||||
{-# INLINE text_ #-}
|
||||
|
||||
-- | /O(1)/ The empty 'Text'.
|
||||
empty :: Text
|
||||
empty = Text A.empty 0 0
|
||||
{-# INLINE [1] empty #-}
|
||||
|
||||
-- | A non-inlined version of 'empty'.
|
||||
empty_ :: Text
|
||||
empty_ = Text A.empty 0 0
|
||||
{-# NOINLINE empty_ #-}
|
||||
|
||||
-- | Construct a 'Text' without invisibly pinning its byte array in
|
||||
-- memory if its length has dwindled to zero.
|
||||
text :: A.Array -> Int -> Int -> Text
|
||||
text arr off len | len == 0 = empty
|
||||
| otherwise = text_ arr off len
|
||||
{-# INLINE text #-}
|
||||
|
||||
textP :: A.Array -> Int -> Int -> Text
|
||||
{-# DEPRECATED textP "Use text instead" #-}
|
||||
textP = text
|
||||
|
||||
-- | A useful 'show'-like function for debugging purposes.
|
||||
showText :: Text -> String
|
||||
showText (Text arr off len) =
|
||||
"Text " ++ show (A.toList arr off len) ++ ' ' :
|
||||
show off ++ ' ' : show len
|
||||
|
||||
-- | Map a 'Char' to a 'Text'-safe value.
|
||||
--
|
||||
-- UTF-16 surrogate code points are not included in the set of Unicode
|
||||
-- scalar values, but are unfortunately admitted as valid 'Char'
|
||||
-- values by Haskell. They cannot be represented in a 'Text'. This
|
||||
-- function remaps those code points to the Unicode replacement
|
||||
-- character (U+FFFD, \'�\'), and leaves other code points
|
||||
-- unchanged.
|
||||
safe :: Char -> Char
|
||||
safe c
|
||||
| ord c .&. 0x1ff800 /= 0xd800 = c
|
||||
| otherwise = '\xfffd'
|
||||
{-# INLINE [0] safe #-}
|
||||
|
||||
-- | Apply a function to the first element of an optional pair.
|
||||
firstf :: (a -> c) -> Maybe (a,b) -> Maybe (c,b)
|
||||
firstf f (Just (a, b)) = Just (f a, b)
|
||||
firstf _ Nothing = Nothing
|
||||
|
||||
-- | Checked multiplication. Calls 'error' if the result would
|
||||
-- overflow.
|
||||
mul :: Int -> Int -> Int
|
||||
#if WORD_SIZE_IN_BITS == 64
|
||||
mul a b = fromIntegral $ fromIntegral a `mul64` fromIntegral b
|
||||
#else
|
||||
mul a b = fromIntegral $ fromIntegral a `mul32` fromIntegral b
|
||||
#endif
|
||||
{-# INLINE mul #-}
|
||||
infixl 7 `mul`
|
||||
|
||||
-- | Checked multiplication. Calls 'error' if the result would
|
||||
-- overflow.
|
||||
mul64 :: Int64 -> Int64 -> Int64
|
||||
mul64 a b
|
||||
| a >= 0 && b >= 0 = mul64_ a b
|
||||
| a >= 0 = -mul64_ a (-b)
|
||||
| b >= 0 = -mul64_ (-a) b
|
||||
| otherwise = mul64_ (-a) (-b)
|
||||
{-# INLINE mul64 #-}
|
||||
infixl 7 `mul64`
|
||||
|
||||
mul64_ :: Int64 -> Int64 -> Int64
|
||||
mul64_ a b
|
||||
| ahi > 0 && bhi > 0 = error "overflow"
|
||||
| top > 0x7fffffff = error "overflow"
|
||||
| total < 0 = error "overflow"
|
||||
| otherwise = total
|
||||
where (# ahi, alo #) = (# a `shiftR` 32, a .&. 0xffffffff #)
|
||||
(# bhi, blo #) = (# b `shiftR` 32, b .&. 0xffffffff #)
|
||||
top = ahi * blo + alo * bhi
|
||||
total = (top `shiftL` 32) + alo * blo
|
||||
{-# INLINE mul64_ #-}
|
||||
|
||||
-- | Checked multiplication. Calls 'error' if the result would
|
||||
-- overflow.
|
||||
mul32 :: Int32 -> Int32 -> Int32
|
||||
mul32 a b = case fromIntegral a * fromIntegral b of
|
||||
ab | ab < min32 || ab > max32 -> error "overflow"
|
||||
| otherwise -> fromIntegral ab
|
||||
where min32 = -0x80000000 :: Int64
|
||||
max32 = 0x7fffffff
|
||||
{-# INLINE mul32 #-}
|
||||
infixl 7 `mul32`
|
||||
|
||||
-- $internals
|
||||
--
|
||||
-- Internally, the 'Text' type is represented as an array of 'Word16'
|
||||
-- UTF-16 code units. The offset and length fields in the constructor
|
||||
-- are in these units, /not/ units of 'Char'.
|
||||
--
|
||||
-- Invariants that all functions must maintain:
|
||||
--
|
||||
-- * Since the 'Text' type uses UTF-16 internally, it cannot represent
|
||||
-- characters in the reserved surrogate code point range U+D800 to
|
||||
-- U+DFFF. To maintain this invariant, the 'safe' function maps
|
||||
-- 'Char' values in this range to the replacement character (U+FFFD,
|
||||
-- \'�\').
|
||||
--
|
||||
-- * A leading (or \"high\") surrogate code unit (0xD800–0xDBFF) must
|
||||
-- always be followed by a trailing (or \"low\") surrogate code unit
|
||||
-- (0xDC00-0xDFFF). A trailing surrogate code unit must always be
|
||||
-- preceded by a leading surrogate code unit.
|
@ -1,329 +0,0 @@
|
||||
{-# LANGUAGE BangPatterns, CPP, Rank2Types #-}
|
||||
{-# OPTIONS_HADDOCK not-home #-}
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
-- |
|
||||
-- Module : Data.Text.Internal.Builder
|
||||
-- Copyright : (c) 2013 Bryan O'Sullivan
|
||||
-- (c) 2010 Johan Tibell
|
||||
-- License : BSD-style (see LICENSE)
|
||||
--
|
||||
-- Maintainer : Johan Tibell <johan.tibell@gmail.com>
|
||||
-- Stability : experimental
|
||||
-- Portability : portable to Hugs and GHC
|
||||
--
|
||||
-- /Warning/: this is an internal module, and does not have a stable
|
||||
-- API or name. Functions in this module may not check or enforce
|
||||
-- preconditions expected by public modules. Use at your own risk!
|
||||
--
|
||||
-- Efficient construction of lazy @Text@ values. The principal
|
||||
-- operations on a @Builder@ are @singleton@, @fromText@, and
|
||||
-- @fromLazyText@, which construct new builders, and 'mappend', which
|
||||
-- concatenates two builders.
|
||||
--
|
||||
-- To get maximum performance when building lazy @Text@ values using a
|
||||
-- builder, associate @mappend@ calls to the right. For example,
|
||||
-- prefer
|
||||
--
|
||||
-- > singleton 'a' `mappend` (singleton 'b' `mappend` singleton 'c')
|
||||
--
|
||||
-- to
|
||||
--
|
||||
-- > singleton 'a' `mappend` singleton 'b' `mappend` singleton 'c'
|
||||
--
|
||||
-- as the latter associates @mappend@ to the left.
|
||||
--
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module Data.Text.Internal.Builder
|
||||
( -- * Public API
|
||||
-- ** The Builder type
|
||||
Builder
|
||||
, toLazyText
|
||||
, toLazyTextWith
|
||||
|
||||
-- ** Constructing Builders
|
||||
, singleton
|
||||
, fromText
|
||||
, fromLazyText
|
||||
, fromString
|
||||
|
||||
-- ** Flushing the buffer state
|
||||
, flush
|
||||
|
||||
-- * Internal functions
|
||||
, append'
|
||||
, ensureFree
|
||||
, writeN
|
||||
) where
|
||||
|
||||
import Control.Monad.ST (ST, runST)
|
||||
import Data.Monoid (Monoid(..))
|
||||
#if !MIN_VERSION_base(4,11,0) && MIN_VERSION_base(4,9,0)
|
||||
import Data.Semigroup (Semigroup(..))
|
||||
#endif
|
||||
import Data.Text.Internal (Text(..))
|
||||
import Data.Text.Internal.Lazy (smallChunkSize)
|
||||
import Data.Text.Unsafe (inlineInterleaveST)
|
||||
import Data.Text.Internal.Unsafe.Char (unsafeWrite)
|
||||
import Prelude hiding (map, putChar)
|
||||
|
||||
import qualified Data.String as String
|
||||
import qualified Data.Text as S
|
||||
import qualified Data.Text.Array as A
|
||||
import qualified Data.Text.Lazy as L
|
||||
|
||||
------------------------------------------------------------------------
|
||||
|
||||
-- | A @Builder@ is an efficient way to build lazy @Text@ values.
|
||||
-- There are several functions for constructing builders, but only one
|
||||
-- to inspect them: to extract any data, you have to turn them into
|
||||
-- lazy @Text@ values using @toLazyText@.
|
||||
--
|
||||
-- Internally, a builder constructs a lazy @Text@ by filling arrays
|
||||
-- piece by piece. As each buffer is filled, it is \'popped\' off, to
|
||||
-- become a new chunk of the resulting lazy @Text@. All this is
|
||||
-- hidden from the user of the @Builder@.
|
||||
newtype Builder = Builder {
|
||||
-- Invariant (from Data.Text.Lazy):
|
||||
-- The lists include no null Texts.
|
||||
runBuilder :: forall s. (Buffer s -> ST s [S.Text])
|
||||
-> Buffer s
|
||||
-> ST s [S.Text]
|
||||
}
|
||||
|
||||
#if MIN_VERSION_base(4,9,0)
|
||||
instance Semigroup Builder where
|
||||
(<>) = append
|
||||
{-# INLINE (<>) #-}
|
||||
#endif
|
||||
|
||||
instance Monoid Builder where
|
||||
mempty = empty
|
||||
{-# INLINE mempty #-}
|
||||
#if MIN_VERSION_base(4,9,0)
|
||||
mappend = (<>) -- future-proof definition
|
||||
#else
|
||||
mappend = append
|
||||
#endif
|
||||
{-# INLINE mappend #-}
|
||||
mconcat = foldr mappend Data.Monoid.mempty
|
||||
{-# INLINE mconcat #-}
|
||||
|
||||
instance String.IsString Builder where
|
||||
fromString = fromString
|
||||
{-# INLINE fromString #-}
|
||||
|
||||
instance Show Builder where
|
||||
show = show . toLazyText
|
||||
|
||||
instance Eq Builder where
|
||||
a == b = toLazyText a == toLazyText b
|
||||
|
||||
instance Ord Builder where
|
||||
a <= b = toLazyText a <= toLazyText b
|
||||
|
||||
------------------------------------------------------------------------
|
||||
|
||||
-- | /O(1)./ The empty @Builder@, satisfying
|
||||
--
|
||||
-- * @'toLazyText' 'empty' = 'L.empty'@
|
||||
--
|
||||
empty :: Builder
|
||||
empty = Builder (\ k buf -> k buf)
|
||||
{-# INLINE empty #-}
|
||||
|
||||
-- | /O(1)./ A @Builder@ taking a single character, satisfying
|
||||
--
|
||||
-- * @'toLazyText' ('singleton' c) = 'L.singleton' c@
|
||||
--
|
||||
singleton :: Char -> Builder
|
||||
singleton c = writeAtMost 2 $ \ marr o -> unsafeWrite marr o c
|
||||
{-# INLINE singleton #-}
|
||||
|
||||
------------------------------------------------------------------------
|
||||
|
||||
-- | /O(1)./ The concatenation of two builders, an associative
|
||||
-- operation with identity 'empty', satisfying
|
||||
--
|
||||
-- * @'toLazyText' ('append' x y) = 'L.append' ('toLazyText' x) ('toLazyText' y)@
|
||||
--
|
||||
append :: Builder -> Builder -> Builder
|
||||
append (Builder f) (Builder g) = Builder (f . g)
|
||||
{-# INLINE [0] append #-}
|
||||
|
||||
-- TODO: Experiment to find the right threshold.
|
||||
copyLimit :: Int
|
||||
copyLimit = 128
|
||||
|
||||
-- This function attempts to merge small @Text@ values instead of
|
||||
-- treating each value as its own chunk. We may not always want this.
|
||||
|
||||
-- | /O(1)./ A @Builder@ taking a 'S.Text', satisfying
|
||||
--
|
||||
-- * @'toLazyText' ('fromText' t) = 'L.fromChunks' [t]@
|
||||
--
|
||||
fromText :: S.Text -> Builder
|
||||
fromText t@(Text arr off l)
|
||||
| S.null t = empty
|
||||
| l <= copyLimit = writeN l $ \marr o -> A.copyI marr o arr off (l+o)
|
||||
| otherwise = flush `append` mapBuilder (t :)
|
||||
{-# INLINE [1] fromText #-}
|
||||
|
||||
{-# RULES
|
||||
"fromText/pack" forall s .
|
||||
fromText (S.pack s) = fromString s
|
||||
#-}
|
||||
|
||||
-- | /O(1)./ A Builder taking a @String@, satisfying
|
||||
--
|
||||
-- * @'toLazyText' ('fromString' s) = 'L.fromChunks' [S.pack s]@
|
||||
--
|
||||
fromString :: String -> Builder
|
||||
fromString str = Builder $ \k (Buffer p0 o0 u0 l0) ->
|
||||
let loop !marr !o !u !l [] = k (Buffer marr o u l)
|
||||
loop marr o u l s@(c:cs)
|
||||
| l <= 1 = do
|
||||
arr <- A.unsafeFreeze marr
|
||||
let !t = Text arr o u
|
||||
marr' <- A.new chunkSize
|
||||
ts <- inlineInterleaveST (loop marr' 0 0 chunkSize s)
|
||||
return $ t : ts
|
||||
| otherwise = do
|
||||
n <- unsafeWrite marr (o+u) c
|
||||
loop marr o (u+n) (l-n) cs
|
||||
in loop p0 o0 u0 l0 str
|
||||
where
|
||||
chunkSize = smallChunkSize
|
||||
{-# INLINE fromString #-}
|
||||
|
||||
-- | /O(1)./ A @Builder@ taking a lazy @Text@, satisfying
|
||||
--
|
||||
-- * @'toLazyText' ('fromLazyText' t) = t@
|
||||
--
|
||||
fromLazyText :: L.Text -> Builder
|
||||
fromLazyText ts = flush `append` mapBuilder (L.toChunks ts ++)
|
||||
{-# INLINE fromLazyText #-}
|
||||
|
||||
------------------------------------------------------------------------
|
||||
|
||||
-- Our internal buffer type
|
||||
data Buffer s = Buffer {-# UNPACK #-} !(A.MArray s)
|
||||
{-# UNPACK #-} !Int -- offset
|
||||
{-# UNPACK #-} !Int -- used units
|
||||
{-# UNPACK #-} !Int -- length left
|
||||
|
||||
------------------------------------------------------------------------
|
||||
|
||||
-- | /O(n)./ Extract a lazy @Text@ from a @Builder@ with a default
|
||||
-- buffer size. The construction work takes place if and when the
|
||||
-- relevant part of the lazy @Text@ is demanded.
|
||||
toLazyText :: Builder -> L.Text
|
||||
toLazyText = toLazyTextWith smallChunkSize
|
||||
|
||||
-- | /O(n)./ Extract a lazy @Text@ from a @Builder@, using the given
|
||||
-- size for the initial buffer. The construction work takes place if
|
||||
-- and when the relevant part of the lazy @Text@ is demanded.
|
||||
--
|
||||
-- If the initial buffer is too small to hold all data, subsequent
|
||||
-- buffers will be the default buffer size.
|
||||
toLazyTextWith :: Int -> Builder -> L.Text
|
||||
toLazyTextWith chunkSize m = L.fromChunks (runST $
|
||||
newBuffer chunkSize >>= runBuilder (m `append` flush) (const (return [])))
|
||||
|
||||
-- | /O(1)./ Pop the strict @Text@ we have constructed so far, if any,
|
||||
-- yielding a new chunk in the result lazy @Text@.
|
||||
flush :: Builder
|
||||
flush = Builder $ \ k buf@(Buffer p o u l) ->
|
||||
if u == 0
|
||||
then k buf
|
||||
else do arr <- A.unsafeFreeze p
|
||||
let !b = Buffer p (o+u) 0 l
|
||||
!t = Text arr o u
|
||||
ts <- inlineInterleaveST (k b)
|
||||
return $! t : ts
|
||||
{-# INLINE [1] flush #-}
|
||||
-- defer inlining so that flush/flush rule may fire.
|
||||
|
||||
------------------------------------------------------------------------
|
||||
|
||||
-- | Sequence an ST operation on the buffer
|
||||
withBuffer :: (forall s. Buffer s -> ST s (Buffer s)) -> Builder
|
||||
withBuffer f = Builder $ \k buf -> f buf >>= k
|
||||
{-# INLINE withBuffer #-}
|
||||
|
||||
-- | Get the size of the buffer
|
||||
withSize :: (Int -> Builder) -> Builder
|
||||
withSize f = Builder $ \ k buf@(Buffer _ _ _ l) ->
|
||||
runBuilder (f l) k buf
|
||||
{-# INLINE withSize #-}
|
||||
|
||||
-- | Map the resulting list of texts.
|
||||
mapBuilder :: ([S.Text] -> [S.Text]) -> Builder
|
||||
mapBuilder f = Builder (fmap f .)
|
||||
|
||||
------------------------------------------------------------------------
|
||||
|
||||
-- | Ensure that there are at least @n@ many elements available.
|
||||
ensureFree :: Int -> Builder
|
||||
ensureFree !n = withSize $ \ l ->
|
||||
if n <= l
|
||||
then empty
|
||||
else flush `append'` withBuffer (const (newBuffer (max n smallChunkSize)))
|
||||
{-# INLINE [0] ensureFree #-}
|
||||
|
||||
writeAtMost :: Int -> (forall s. A.MArray s -> Int -> ST s Int) -> Builder
|
||||
writeAtMost n f = ensureFree n `append'` withBuffer (writeBuffer f)
|
||||
{-# INLINE [0] writeAtMost #-}
|
||||
|
||||
-- | Ensure that @n@ many elements are available, and then use @f@ to
|
||||
-- write some elements into the memory.
|
||||
writeN :: Int -> (forall s. A.MArray s -> Int -> ST s ()) -> Builder
|
||||
writeN n f = writeAtMost n (\ p o -> f p o >> return n)
|
||||
{-# INLINE writeN #-}
|
||||
|
||||
writeBuffer :: (A.MArray s -> Int -> ST s Int) -> Buffer s -> ST s (Buffer s)
|
||||
writeBuffer f (Buffer p o u l) = do
|
||||
n <- f p (o+u)
|
||||
return $! Buffer p o (u+n) (l-n)
|
||||
{-# INLINE writeBuffer #-}
|
||||
|
||||
newBuffer :: Int -> ST s (Buffer s)
|
||||
newBuffer size = do
|
||||
arr <- A.new size
|
||||
return $! Buffer arr 0 0 size
|
||||
{-# INLINE newBuffer #-}
|
||||
|
||||
------------------------------------------------------------------------
|
||||
-- Some nice rules for Builder
|
||||
|
||||
-- This function makes GHC understand that 'writeN' and 'ensureFree'
|
||||
-- are *not* recursive in the precense of the rewrite rules below.
|
||||
-- This is not needed with GHC 7+.
|
||||
append' :: Builder -> Builder -> Builder
|
||||
append' (Builder f) (Builder g) = Builder (f . g)
|
||||
{-# INLINE append' #-}
|
||||
|
||||
{-# RULES
|
||||
|
||||
"append/writeAtMost" forall a b (f::forall s. A.MArray s -> Int -> ST s Int)
|
||||
(g::forall s. A.MArray s -> Int -> ST s Int) ws.
|
||||
append (writeAtMost a f) (append (writeAtMost b g) ws) =
|
||||
append (writeAtMost (a+b) (\marr o -> f marr o >>= \ n ->
|
||||
g marr (o+n) >>= \ m ->
|
||||
let s = n+m in s `seq` return s)) ws
|
||||
|
||||
"writeAtMost/writeAtMost" forall a b (f::forall s. A.MArray s -> Int -> ST s Int)
|
||||
(g::forall s. A.MArray s -> Int -> ST s Int).
|
||||
append (writeAtMost a f) (writeAtMost b g) =
|
||||
writeAtMost (a+b) (\marr o -> f marr o >>= \ n ->
|
||||
g marr (o+n) >>= \ m ->
|
||||
let s = n+m in s `seq` return s)
|
||||
|
||||
"ensureFree/ensureFree" forall a b .
|
||||
append (ensureFree a) (ensureFree b) = ensureFree (max a b)
|
||||
|
||||
"flush/flush"
|
||||
append flush flush = flush
|
||||
|
||||
#-}
|
@ -1,40 +0,0 @@
|
||||
{-# LANGUAGE MagicHash #-}
|
||||
|
||||
-- |
|
||||
-- Module : Data.Text.Internal.Builder.Functions
|
||||
-- Copyright : (c) 2011 MailRank, Inc.
|
||||
--
|
||||
-- License : BSD-style
|
||||
-- Maintainer : bos@serpentine.com
|
||||
-- Stability : experimental
|
||||
-- Portability : GHC
|
||||
--
|
||||
-- /Warning/: this is an internal module, and does not have a stable
|
||||
-- API or name. Functions in this module may not check or enforce
|
||||
-- preconditions expected by public modules. Use at your own risk!
|
||||
--
|
||||
-- Useful functions and combinators.
|
||||
|
||||
module Data.Text.Internal.Builder.Functions
|
||||
(
|
||||
(<>)
|
||||
, i2d
|
||||
) where
|
||||
|
||||
import Data.Monoid (mappend)
|
||||
import Data.Text.Lazy.Builder (Builder)
|
||||
import GHC.Base (chr#,ord#,(+#),Int(I#),Char(C#))
|
||||
import Prelude ()
|
||||
|
||||
-- | Unsafe conversion for decimal digits.
|
||||
{-# INLINE i2d #-}
|
||||
i2d :: Int -> Char
|
||||
i2d (I# i#) = C# (chr# (ord# '0'# +# i#))
|
||||
|
||||
-- | The normal 'mappend' function with right associativity instead of
|
||||
-- left.
|
||||
(<>) :: Builder -> Builder -> Builder
|
||||
(<>) = mappend
|
||||
{-# INLINE (<>) #-}
|
||||
|
||||
infixr 4 <>
|
@ -1,26 +0,0 @@
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
|
||||
-- Module: Data.Text.Internal.Builder.Int.Digits
|
||||
-- Copyright: (c) 2013 Bryan O'Sullivan
|
||||
-- License: BSD-style
|
||||
-- Maintainer: Bryan O'Sullivan <bos@serpentine.com>
|
||||
-- Stability: experimental
|
||||
-- Portability: portable
|
||||
--
|
||||
-- /Warning/: this is an internal module, and does not have a stable
|
||||
-- API or name. Functions in this module may not check or enforce
|
||||
-- preconditions expected by public modules. Use at your own risk!
|
||||
--
|
||||
-- This module exists because the C preprocessor does things that we
|
||||
-- shall not speak of when confronted with Haskell multiline strings.
|
||||
|
||||
module Data.Text.Internal.Builder.Int.Digits (digits) where
|
||||
|
||||
import Data.ByteString.Char8 (ByteString)
|
||||
|
||||
digits :: ByteString
|
||||
digits = "0001020304050607080910111213141516171819\
|
||||
\2021222324252627282930313233343536373839\
|
||||
\4041424344454647484950515253545556575859\
|
||||
\6061626364656667686970717273747576777879\
|
||||
\8081828384858687888990919293949596979899"
|
@ -1,57 +0,0 @@
|
||||
{-# LANGUAGE CPP #-}
|
||||
|
||||
-- |
|
||||
-- Module: Data.Text.Internal.Builder.RealFloat.Functions
|
||||
-- Copyright: (c) The University of Glasgow 1994-2002
|
||||
-- License: see libraries/base/LICENSE
|
||||
--
|
||||
-- /Warning/: this is an internal module, and does not have a stable
|
||||
-- API or name. Functions in this module may not check or enforce
|
||||
-- preconditions expected by public modules. Use at your own risk!
|
||||
|
||||
module Data.Text.Internal.Builder.RealFloat.Functions
|
||||
(
|
||||
roundTo
|
||||
) where
|
||||
|
||||
roundTo :: Int -> [Int] -> (Int,[Int])
|
||||
|
||||
#if MIN_VERSION_base(4,6,0)
|
||||
|
||||
roundTo d is =
|
||||
case f d True is of
|
||||
x@(0,_) -> x
|
||||
(1,xs) -> (1, 1:xs)
|
||||
_ -> error "roundTo: bad Value"
|
||||
where
|
||||
b2 = base `quot` 2
|
||||
|
||||
f n _ [] = (0, replicate n 0)
|
||||
f 0 e (x:xs) | x == b2 && e && all (== 0) xs = (0, []) -- Round to even when at exactly half the base
|
||||
| otherwise = (if x >= b2 then 1 else 0, [])
|
||||
f n _ (i:xs)
|
||||
| i' == base = (1,0:ds)
|
||||
| otherwise = (0,i':ds)
|
||||
where
|
||||
(c,ds) = f (n-1) (even i) xs
|
||||
i' = c + i
|
||||
base = 10
|
||||
|
||||
#else
|
||||
|
||||
roundTo d is =
|
||||
case f d is of
|
||||
x@(0,_) -> x
|
||||
(1,xs) -> (1, 1:xs)
|
||||
_ -> error "roundTo: bad Value"
|
||||
where
|
||||
f n [] = (0, replicate n 0)
|
||||
f 0 (x:_) = (if x >= 5 then 1 else 0, [])
|
||||
f n (i:xs)
|
||||
| i' == 10 = (1,0:ds)
|
||||
| otherwise = (0,i':ds)
|
||||
where
|
||||
(c,ds) = f (n-1) xs
|
||||
i' = c + i
|
||||
|
||||
#endif
|
@ -1,208 +0,0 @@
|
||||
{-# LANGUAGE BangPatterns, CPP, Rank2Types #-}
|
||||
|
||||
-- |
|
||||
-- Module : Data.Text.Internal.Encoding.Fusion
|
||||
-- Copyright : (c) Tom Harper 2008-2009,
|
||||
-- (c) Bryan O'Sullivan 2009,
|
||||
-- (c) Duncan Coutts 2009
|
||||
--
|
||||
-- License : BSD-style
|
||||
-- Maintainer : bos@serpentine.com
|
||||
-- Stability : experimental
|
||||
-- Portability : portable
|
||||
--
|
||||
-- /Warning/: this is an internal module, and does not have a stable
|
||||
-- API or name. Functions in this module may not check or enforce
|
||||
-- preconditions expected by public modules. Use at your own risk!
|
||||
--
|
||||
-- Fusible 'Stream'-oriented functions for converting between 'Text'
|
||||
-- and several common encodings.
|
||||
|
||||
module Data.Text.Internal.Encoding.Fusion
|
||||
(
|
||||
-- * Streaming
|
||||
streamASCII
|
||||
, streamUtf8
|
||||
, streamUtf16LE
|
||||
, streamUtf16BE
|
||||
, streamUtf32LE
|
||||
, streamUtf32BE
|
||||
|
||||
-- * Unstreaming
|
||||
, unstream
|
||||
|
||||
, module Data.Text.Internal.Encoding.Fusion.Common
|
||||
) where
|
||||
|
||||
#if defined(ASSERTS)
|
||||
import Control.Exception (assert)
|
||||
#endif
|
||||
import Data.ByteString.Internal (ByteString(..), mallocByteString, memcpy)
|
||||
import Data.Text.Internal.Fusion (Step(..), Stream(..))
|
||||
import Data.Text.Internal.Fusion.Size
|
||||
import Data.Text.Encoding.Error
|
||||
import Data.Text.Internal.Encoding.Fusion.Common
|
||||
import Data.Text.Internal.Unsafe.Char (unsafeChr, unsafeChr8, unsafeChr32)
|
||||
import Data.Text.Internal.Unsafe.Shift (shiftL, shiftR)
|
||||
import Data.Word (Word8, Word16, Word32)
|
||||
import Foreign.ForeignPtr (withForeignPtr, ForeignPtr)
|
||||
import Foreign.Storable (pokeByteOff)
|
||||
import qualified Data.ByteString as B
|
||||
import qualified Data.ByteString.Unsafe as B
|
||||
import qualified Data.Text.Internal.Encoding.Utf8 as U8
|
||||
import qualified Data.Text.Internal.Encoding.Utf16 as U16
|
||||
import qualified Data.Text.Internal.Encoding.Utf32 as U32
|
||||
import Data.Text.Unsafe (unsafeDupablePerformIO)
|
||||
|
||||
streamASCII :: ByteString -> Stream Char
|
||||
streamASCII bs = Stream next 0 (maxSize l)
|
||||
where
|
||||
l = B.length bs
|
||||
{-# INLINE next #-}
|
||||
next i
|
||||
| i >= l = Done
|
||||
| otherwise = Yield (unsafeChr8 x1) (i+1)
|
||||
where
|
||||
x1 = B.unsafeIndex bs i
|
||||
{-# DEPRECATED streamASCII "Do not use this function" #-}
|
||||
{-# INLINE [0] streamASCII #-}
|
||||
|
||||
-- | /O(n)/ Convert a 'ByteString' into a 'Stream Char', using UTF-8
|
||||
-- encoding.
|
||||
streamUtf8 :: OnDecodeError -> ByteString -> Stream Char
|
||||
streamUtf8 onErr bs = Stream next 0 (maxSize l)
|
||||
where
|
||||
l = B.length bs
|
||||
next i
|
||||
| i >= l = Done
|
||||
| U8.validate1 x1 = Yield (unsafeChr8 x1) (i+1)
|
||||
| i+1 < l && U8.validate2 x1 x2 = Yield (U8.chr2 x1 x2) (i+2)
|
||||
| i+2 < l && U8.validate3 x1 x2 x3 = Yield (U8.chr3 x1 x2 x3) (i+3)
|
||||
| i+3 < l && U8.validate4 x1 x2 x3 x4 = Yield (U8.chr4 x1 x2 x3 x4) (i+4)
|
||||
| otherwise = decodeError "streamUtf8" "UTF-8" onErr (Just x1) (i+1)
|
||||
where
|
||||
x1 = idx i
|
||||
x2 = idx (i + 1)
|
||||
x3 = idx (i + 2)
|
||||
x4 = idx (i + 3)
|
||||
idx = B.unsafeIndex bs
|
||||
{-# INLINE [0] streamUtf8 #-}
|
||||
|
||||
-- | /O(n)/ Convert a 'ByteString' into a 'Stream Char', using little
|
||||
-- endian UTF-16 encoding.
|
||||
streamUtf16LE :: OnDecodeError -> ByteString -> Stream Char
|
||||
streamUtf16LE onErr bs = Stream next 0 (maxSize (l `shiftR` 1))
|
||||
where
|
||||
l = B.length bs
|
||||
{-# INLINE next #-}
|
||||
next i
|
||||
| i >= l = Done
|
||||
| i+1 < l && U16.validate1 x1 = Yield (unsafeChr x1) (i+2)
|
||||
| i+3 < l && U16.validate2 x1 x2 = Yield (U16.chr2 x1 x2) (i+4)
|
||||
| otherwise = decodeError "streamUtf16LE" "UTF-16LE" onErr Nothing (i+1)
|
||||
where
|
||||
x1 = idx i + (idx (i + 1) `shiftL` 8)
|
||||
x2 = idx (i + 2) + (idx (i + 3) `shiftL` 8)
|
||||
idx = fromIntegral . B.unsafeIndex bs :: Int -> Word16
|
||||
{-# INLINE [0] streamUtf16LE #-}
|
||||
|
||||
-- | /O(n)/ Convert a 'ByteString' into a 'Stream Char', using big
|
||||
-- endian UTF-16 encoding.
|
||||
streamUtf16BE :: OnDecodeError -> ByteString -> Stream Char
|
||||
streamUtf16BE onErr bs = Stream next 0 (maxSize (l `shiftR` 1))
|
||||
where
|
||||
l = B.length bs
|
||||
{-# INLINE next #-}
|
||||
next i
|
||||
| i >= l = Done
|
||||
| i+1 < l && U16.validate1 x1 = Yield (unsafeChr x1) (i+2)
|
||||
| i+3 < l && U16.validate2 x1 x2 = Yield (U16.chr2 x1 x2) (i+4)
|
||||
| otherwise = decodeError "streamUtf16BE" "UTF-16BE" onErr Nothing (i+1)
|
||||
where
|
||||
x1 = (idx i `shiftL` 8) + idx (i + 1)
|
||||
x2 = (idx (i + 2) `shiftL` 8) + idx (i + 3)
|
||||
idx = fromIntegral . B.unsafeIndex bs :: Int -> Word16
|
||||
{-# INLINE [0] streamUtf16BE #-}
|
||||
|
||||
-- | /O(n)/ Convert a 'ByteString' into a 'Stream Char', using big
|
||||
-- endian UTF-32 encoding.
|
||||
streamUtf32BE :: OnDecodeError -> ByteString -> Stream Char
|
||||
streamUtf32BE onErr bs = Stream next 0 (maxSize (l `shiftR` 2))
|
||||
where
|
||||
l = B.length bs
|
||||
{-# INLINE next #-}
|
||||
next i
|
||||
| i >= l = Done
|
||||
| i+3 < l && U32.validate x = Yield (unsafeChr32 x) (i+4)
|
||||
| otherwise = decodeError "streamUtf32BE" "UTF-32BE" onErr Nothing (i+1)
|
||||
where
|
||||
x = shiftL x1 24 + shiftL x2 16 + shiftL x3 8 + x4
|
||||
x1 = idx i
|
||||
x2 = idx (i+1)
|
||||
x3 = idx (i+2)
|
||||
x4 = idx (i+3)
|
||||
idx = fromIntegral . B.unsafeIndex bs :: Int -> Word32
|
||||
{-# INLINE [0] streamUtf32BE #-}
|
||||
|
||||
-- | /O(n)/ Convert a 'ByteString' into a 'Stream Char', using little
|
||||
-- endian UTF-32 encoding.
|
||||
streamUtf32LE :: OnDecodeError -> ByteString -> Stream Char
|
||||
streamUtf32LE onErr bs = Stream next 0 (maxSize (l `shiftR` 2))
|
||||
where
|
||||
l = B.length bs
|
||||
{-# INLINE next #-}
|
||||
next i
|
||||
| i >= l = Done
|
||||
| i+3 < l && U32.validate x = Yield (unsafeChr32 x) (i+4)
|
||||
| otherwise = decodeError "streamUtf32LE" "UTF-32LE" onErr Nothing (i+1)
|
||||
where
|
||||
x = shiftL x4 24 + shiftL x3 16 + shiftL x2 8 + x1
|
||||
x1 = idx i
|
||||
x2 = idx $ i+1
|
||||
x3 = idx $ i+2
|
||||
x4 = idx $ i+3
|
||||
idx = fromIntegral . B.unsafeIndex bs :: Int -> Word32
|
||||
{-# INLINE [0] streamUtf32LE #-}
|
||||
|
||||
-- | /O(n)/ Convert a 'Stream' 'Word8' to a 'ByteString'.
|
||||
unstream :: Stream Word8 -> ByteString
|
||||
unstream (Stream next s0 len) = unsafeDupablePerformIO $ do
|
||||
let mlen = upperBound 4 len
|
||||
mallocByteString mlen >>= loop mlen 0 s0
|
||||
where
|
||||
loop !n !off !s fp = case next s of
|
||||
Done -> trimUp fp n off
|
||||
Skip s' -> loop n off s' fp
|
||||
Yield x s'
|
||||
| off == n -> realloc fp n off s' x
|
||||
| otherwise -> do
|
||||
withForeignPtr fp $ \p -> pokeByteOff p off x
|
||||
loop n (off+1) s' fp
|
||||
{-# NOINLINE realloc #-}
|
||||
realloc fp n off s x = do
|
||||
let n' = n+n
|
||||
fp' <- copy0 fp n n'
|
||||
withForeignPtr fp' $ \p -> pokeByteOff p off x
|
||||
loop n' (off+1) s fp'
|
||||
{-# NOINLINE trimUp #-}
|
||||
trimUp fp _ off = return $! PS fp 0 off
|
||||
copy0 :: ForeignPtr Word8 -> Int -> Int -> IO (ForeignPtr Word8)
|
||||
copy0 !src !srcLen !destLen =
|
||||
#if defined(ASSERTS)
|
||||
assert (srcLen <= destLen) $
|
||||
#endif
|
||||
do
|
||||
dest <- mallocByteString destLen
|
||||
withForeignPtr src $ \src' ->
|
||||
withForeignPtr dest $ \dest' ->
|
||||
memcpy dest' src' (fromIntegral srcLen)
|
||||
return dest
|
||||
|
||||
decodeError :: forall s. String -> String -> OnDecodeError -> Maybe Word8
|
||||
-> s -> Step s Char
|
||||
decodeError func kind onErr mb i =
|
||||
case onErr desc mb of
|
||||
Nothing -> Skip i
|
||||
Just c -> Yield c i
|
||||
where desc = "Data.Text.Internal.Encoding.Fusion." ++ func ++ ": Invalid " ++
|
||||
kind ++ " stream"
|
@ -1,123 +0,0 @@
|
||||
{-# LANGUAGE BangPatterns #-}
|
||||
|
||||
-- |
|
||||
-- Module : Data.Text.Internal.Encoding.Fusion.Common
|
||||
-- Copyright : (c) Tom Harper 2008-2009,
|
||||
-- (c) Bryan O'Sullivan 2009,
|
||||
-- (c) Duncan Coutts 2009,
|
||||
-- (c) Jasper Van der Jeugt 2011
|
||||
--
|
||||
-- License : BSD-style
|
||||
-- Maintainer : bos@serpentine.com
|
||||
-- Stability : experimental
|
||||
-- Portability : portable
|
||||
--
|
||||
-- /Warning/: this is an internal module, and does not have a stable
|
||||
-- API or name. Use at your own risk!
|
||||
--
|
||||
-- Fusible 'Stream'-oriented functions for converting between 'Text'
|
||||
-- and several common encodings.
|
||||
|
||||
module Data.Text.Internal.Encoding.Fusion.Common
|
||||
(
|
||||
-- * Restreaming
|
||||
-- Restreaming is the act of converting from one 'Stream'
|
||||
-- representation to another.
|
||||
restreamUtf16LE
|
||||
, restreamUtf16BE
|
||||
, restreamUtf32LE
|
||||
, restreamUtf32BE
|
||||
) where
|
||||
|
||||
import Data.Bits ((.&.))
|
||||
import Data.Text.Internal.Fusion (Step(..), Stream(..))
|
||||
import Data.Text.Internal.Fusion.Types (RS(..))
|
||||
import Data.Text.Internal.Unsafe.Char (ord)
|
||||
import Data.Text.Internal.Unsafe.Shift (shiftR)
|
||||
import Data.Word (Word8)
|
||||
|
||||
restreamUtf16BE :: Stream Char -> Stream Word8
|
||||
restreamUtf16BE (Stream next0 s0 len) = Stream next (RS0 s0) (len * 2)
|
||||
where
|
||||
next (RS0 s) = case next0 s of
|
||||
Done -> Done
|
||||
Skip s' -> Skip (RS0 s')
|
||||
Yield x s'
|
||||
| n < 0x10000 -> Yield (fromIntegral $ n `shiftR` 8) $
|
||||
RS1 s' (fromIntegral n)
|
||||
| otherwise -> Yield c1 $ RS3 s' c2 c3 c4
|
||||
where
|
||||
n = ord x
|
||||
n1 = n - 0x10000
|
||||
c1 = fromIntegral (n1 `shiftR` 18 + 0xD8)
|
||||
c2 = fromIntegral (n1 `shiftR` 10)
|
||||
n2 = n1 .&. 0x3FF
|
||||
c3 = fromIntegral (n2 `shiftR` 8 + 0xDC)
|
||||
c4 = fromIntegral n2
|
||||
next (RS1 s x2) = Yield x2 (RS0 s)
|
||||
next (RS2 s x2 x3) = Yield x2 (RS1 s x3)
|
||||
next (RS3 s x2 x3 x4) = Yield x2 (RS2 s x3 x4)
|
||||
{-# INLINE next #-}
|
||||
{-# INLINE restreamUtf16BE #-}
|
||||
|
||||
restreamUtf16LE :: Stream Char -> Stream Word8
|
||||
restreamUtf16LE (Stream next0 s0 len) = Stream next (RS0 s0) (len * 2)
|
||||
where
|
||||
next (RS0 s) = case next0 s of
|
||||
Done -> Done
|
||||
Skip s' -> Skip (RS0 s')
|
||||
Yield x s'
|
||||
| n < 0x10000 -> Yield (fromIntegral n) $
|
||||
RS1 s' (fromIntegral $ shiftR n 8)
|
||||
| otherwise -> Yield c1 $ RS3 s' c2 c3 c4
|
||||
where
|
||||
n = ord x
|
||||
n1 = n - 0x10000
|
||||
c2 = fromIntegral (shiftR n1 18 + 0xD8)
|
||||
c1 = fromIntegral (shiftR n1 10)
|
||||
n2 = n1 .&. 0x3FF
|
||||
c4 = fromIntegral (shiftR n2 8 + 0xDC)
|
||||
c3 = fromIntegral n2
|
||||
next (RS1 s x2) = Yield x2 (RS0 s)
|
||||
next (RS2 s x2 x3) = Yield x2 (RS1 s x3)
|
||||
next (RS3 s x2 x3 x4) = Yield x2 (RS2 s x3 x4)
|
||||
{-# INLINE next #-}
|
||||
{-# INLINE restreamUtf16LE #-}
|
||||
|
||||
restreamUtf32BE :: Stream Char -> Stream Word8
|
||||
restreamUtf32BE (Stream next0 s0 len) = Stream next (RS0 s0) (len * 2)
|
||||
where
|
||||
next (RS0 s) = case next0 s of
|
||||
Done -> Done
|
||||
Skip s' -> Skip (RS0 s')
|
||||
Yield x s' -> Yield c1 (RS3 s' c2 c3 c4)
|
||||
where
|
||||
n = ord x
|
||||
c1 = fromIntegral $ shiftR n 24
|
||||
c2 = fromIntegral $ shiftR n 16
|
||||
c3 = fromIntegral $ shiftR n 8
|
||||
c4 = fromIntegral n
|
||||
next (RS1 s x2) = Yield x2 (RS0 s)
|
||||
next (RS2 s x2 x3) = Yield x2 (RS1 s x3)
|
||||
next (RS3 s x2 x3 x4) = Yield x2 (RS2 s x3 x4)
|
||||
{-# INLINE next #-}
|
||||
{-# INLINE restreamUtf32BE #-}
|
||||
|
||||
restreamUtf32LE :: Stream Char -> Stream Word8
|
||||
restreamUtf32LE (Stream next0 s0 len) = Stream next (RS0 s0) (len * 2)
|
||||
where
|
||||
next (RS0 s) = case next0 s of
|
||||
Done -> Done
|
||||
Skip s' -> Skip (RS0 s')
|
||||
Yield x s' -> Yield c1 (RS3 s' c2 c3 c4)
|
||||
where
|
||||
n = ord x
|
||||
c4 = fromIntegral $ shiftR n 24
|
||||
c3 = fromIntegral $ shiftR n 16
|
||||
c2 = fromIntegral $ shiftR n 8
|
||||
c1 = fromIntegral n
|
||||
next (RS1 s x2) = Yield x2 (RS0 s)
|
||||
next (RS2 s x2 x3) = Yield x2 (RS1 s x3)
|
||||
next (RS3 s x2 x3 x4) = Yield x2 (RS2 s x3 x4)
|
||||
{-# INLINE next #-}
|
||||
{-# INLINE restreamUtf32LE #-}
|
@ -1,45 +0,0 @@
|
||||
{-# LANGUAGE MagicHash, BangPatterns #-}
|
||||
|
||||
-- |
|
||||
-- Module : Data.Text.Internal.Encoding.Utf16
|
||||
-- Copyright : (c) 2008, 2009 Tom Harper,
|
||||
-- (c) 2009 Bryan O'Sullivan,
|
||||
-- (c) 2009 Duncan Coutts
|
||||
--
|
||||
-- License : BSD-style
|
||||
-- Maintainer : bos@serpentine.com
|
||||
-- Stability : experimental
|
||||
-- Portability : GHC
|
||||
--
|
||||
-- /Warning/: this is an internal module, and does not have a stable
|
||||
-- API or name. Functions in this module may not check or enforce
|
||||
-- preconditions expected by public modules. Use at your own risk!
|
||||
--
|
||||
-- Basic UTF-16 validation and character manipulation.
|
||||
module Data.Text.Internal.Encoding.Utf16
|
||||
(
|
||||
chr2
|
||||
, validate1
|
||||
, validate2
|
||||
) where
|
||||
|
||||
import GHC.Exts
|
||||
import GHC.Word (Word16(..))
|
||||
|
||||
chr2 :: Word16 -> Word16 -> Char
|
||||
chr2 (W16# a#) (W16# b#) = C# (chr# (upper# +# lower# +# 0x10000#))
|
||||
where
|
||||
!x# = word2Int# a#
|
||||
!y# = word2Int# b#
|
||||
!upper# = uncheckedIShiftL# (x# -# 0xD800#) 10#
|
||||
!lower# = y# -# 0xDC00#
|
||||
{-# INLINE chr2 #-}
|
||||
|
||||
validate1 :: Word16 -> Bool
|
||||
validate1 x1 = x1 < 0xD800 || x1 > 0xDFFF
|
||||
{-# INLINE validate1 #-}
|
||||
|
||||
validate2 :: Word16 -> Word16 -> Bool
|
||||
validate2 x1 x2 = x1 >= 0xD800 && x1 <= 0xDBFF &&
|
||||
x2 >= 0xDC00 && x2 <= 0xDFFF
|
||||
{-# INLINE validate2 #-}
|
@ -1,26 +0,0 @@
|
||||
-- |
|
||||
-- Module : Data.Text.Internal.Encoding.Utf32
|
||||
-- Copyright : (c) 2008, 2009 Tom Harper,
|
||||
-- (c) 2009, 2010 Bryan O'Sullivan,
|
||||
-- (c) 2009 Duncan Coutts
|
||||
--
|
||||
-- License : BSD-style
|
||||
-- Maintainer : bos@serpentine.com
|
||||
-- Stability : experimental
|
||||
-- Portability : portable
|
||||
--
|
||||
-- /Warning/: this is an internal module, and does not have a stable
|
||||
-- API or name. Functions in this module may not check or enforce
|
||||
-- preconditions expected by public modules. Use at your own risk!
|
||||
--
|
||||
-- Basic UTF-32 validation.
|
||||
module Data.Text.Internal.Encoding.Utf32
|
||||
(
|
||||
validate
|
||||
) where
|
||||
|
||||
import Data.Word (Word32)
|
||||
|
||||
validate :: Word32 -> Bool
|
||||
validate x1 = x1 < 0xD800 || (x1 > 0xDFFF && x1 <= 0x10FFFF)
|
||||
{-# INLINE validate #-}
|
@ -1,168 +0,0 @@
|
||||
{-# LANGUAGE CPP, MagicHash, BangPatterns #-}
|
||||
|
||||
-- |
|
||||
-- Module : Data.Text.Internal.Encoding.Utf8
|
||||
-- Copyright : (c) 2008, 2009 Tom Harper,
|
||||
-- (c) 2009, 2010 Bryan O'Sullivan,
|
||||
-- (c) 2009 Duncan Coutts
|
||||
--
|
||||
-- License : BSD-style
|
||||
-- Maintainer : bos@serpentine.com
|
||||
-- Stability : experimental
|
||||
-- Portability : GHC
|
||||
--
|
||||
-- /Warning/: this is an internal module, and does not have a stable
|
||||
-- API or name. Functions in this module may not check or enforce
|
||||
-- preconditions expected by public modules. Use at your own risk!
|
||||
--
|
||||
-- Basic UTF-8 validation and character manipulation.
|
||||
module Data.Text.Internal.Encoding.Utf8
|
||||
(
|
||||
-- Decomposition
|
||||
ord2
|
||||
, ord3
|
||||
, ord4
|
||||
-- Construction
|
||||
, chr2
|
||||
, chr3
|
||||
, chr4
|
||||
-- * Validation
|
||||
, validate1
|
||||
, validate2
|
||||
, validate3
|
||||
, validate4
|
||||
) where
|
||||
|
||||
#if defined(TEST_SUITE)
|
||||
# undef ASSERTS
|
||||
#endif
|
||||
|
||||
#if defined(ASSERTS)
|
||||
import Control.Exception (assert)
|
||||
#endif
|
||||
import Data.Bits ((.&.))
|
||||
import Data.Text.Internal.Unsafe.Char (ord)
|
||||
import Data.Text.Internal.Unsafe.Shift (shiftR)
|
||||
import GHC.Exts
|
||||
import GHC.Word (Word8(..))
|
||||
|
||||
default(Int)
|
||||
|
||||
between :: Word8 -- ^ byte to check
|
||||
-> Word8 -- ^ lower bound
|
||||
-> Word8 -- ^ upper bound
|
||||
-> Bool
|
||||
between x y z = x >= y && x <= z
|
||||
{-# INLINE between #-}
|
||||
|
||||
ord2 :: Char -> (Word8,Word8)
|
||||
ord2 c =
|
||||
#if defined(ASSERTS)
|
||||
assert (n >= 0x80 && n <= 0x07ff)
|
||||
#endif
|
||||
(x1,x2)
|
||||
where
|
||||
n = ord c
|
||||
x1 = fromIntegral $ (n `shiftR` 6) + 0xC0
|
||||
x2 = fromIntegral $ (n .&. 0x3F) + 0x80
|
||||
|
||||
ord3 :: Char -> (Word8,Word8,Word8)
|
||||
ord3 c =
|
||||
#if defined(ASSERTS)
|
||||
assert (n >= 0x0800 && n <= 0xffff)
|
||||
#endif
|
||||
(x1,x2,x3)
|
||||
where
|
||||
n = ord c
|
||||
x1 = fromIntegral $ (n `shiftR` 12) + 0xE0
|
||||
x2 = fromIntegral $ ((n `shiftR` 6) .&. 0x3F) + 0x80
|
||||
x3 = fromIntegral $ (n .&. 0x3F) + 0x80
|
||||
|
||||
ord4 :: Char -> (Word8,Word8,Word8,Word8)
|
||||
ord4 c =
|
||||
#if defined(ASSERTS)
|
||||
assert (n >= 0x10000)
|
||||
#endif
|
||||
(x1,x2,x3,x4)
|
||||
where
|
||||
n = ord c
|
||||
x1 = fromIntegral $ (n `shiftR` 18) + 0xF0
|
||||
x2 = fromIntegral $ ((n `shiftR` 12) .&. 0x3F) + 0x80
|
||||
x3 = fromIntegral $ ((n `shiftR` 6) .&. 0x3F) + 0x80
|
||||
x4 = fromIntegral $ (n .&. 0x3F) + 0x80
|
||||
|
||||
chr2 :: Word8 -> Word8 -> Char
|
||||
chr2 (W8# x1#) (W8# x2#) = C# (chr# (z1# +# z2#))
|
||||
where
|
||||
!y1# = word2Int# x1#
|
||||
!y2# = word2Int# x2#
|
||||
!z1# = uncheckedIShiftL# (y1# -# 0xC0#) 6#
|
||||
!z2# = y2# -# 0x80#
|
||||
{-# INLINE chr2 #-}
|
||||
|
||||
chr3 :: Word8 -> Word8 -> Word8 -> Char
|
||||
chr3 (W8# x1#) (W8# x2#) (W8# x3#) = C# (chr# (z1# +# z2# +# z3#))
|
||||
where
|
||||
!y1# = word2Int# x1#
|
||||
!y2# = word2Int# x2#
|
||||
!y3# = word2Int# x3#
|
||||
!z1# = uncheckedIShiftL# (y1# -# 0xE0#) 12#
|
||||
!z2# = uncheckedIShiftL# (y2# -# 0x80#) 6#
|
||||
!z3# = y3# -# 0x80#
|
||||
{-# INLINE chr3 #-}
|
||||
|
||||
chr4 :: Word8 -> Word8 -> Word8 -> Word8 -> Char
|
||||
chr4 (W8# x1#) (W8# x2#) (W8# x3#) (W8# x4#) =
|
||||
C# (chr# (z1# +# z2# +# z3# +# z4#))
|
||||
where
|
||||
!y1# = word2Int# x1#
|
||||
!y2# = word2Int# x2#
|
||||
!y3# = word2Int# x3#
|
||||
!y4# = word2Int# x4#
|
||||
!z1# = uncheckedIShiftL# (y1# -# 0xF0#) 18#
|
||||
!z2# = uncheckedIShiftL# (y2# -# 0x80#) 12#
|
||||
!z3# = uncheckedIShiftL# (y3# -# 0x80#) 6#
|
||||
!z4# = y4# -# 0x80#
|
||||
{-# INLINE chr4 #-}
|
||||
|
||||
validate1 :: Word8 -> Bool
|
||||
validate1 x1 = x1 <= 0x7F
|
||||
{-# INLINE validate1 #-}
|
||||
|
||||
validate2 :: Word8 -> Word8 -> Bool
|
||||
validate2 x1 x2 = between x1 0xC2 0xDF && between x2 0x80 0xBF
|
||||
{-# INLINE validate2 #-}
|
||||
|
||||
validate3 :: Word8 -> Word8 -> Word8 -> Bool
|
||||
{-# INLINE validate3 #-}
|
||||
validate3 x1 x2 x3 = validate3_1 || validate3_2 || validate3_3 || validate3_4
|
||||
where
|
||||
validate3_1 = (x1 == 0xE0) &&
|
||||
between x2 0xA0 0xBF &&
|
||||
between x3 0x80 0xBF
|
||||
validate3_2 = between x1 0xE1 0xEC &&
|
||||
between x2 0x80 0xBF &&
|
||||
between x3 0x80 0xBF
|
||||
validate3_3 = x1 == 0xED &&
|
||||
between x2 0x80 0x9F &&
|
||||
between x3 0x80 0xBF
|
||||
validate3_4 = between x1 0xEE 0xEF &&
|
||||
between x2 0x80 0xBF &&
|
||||
between x3 0x80 0xBF
|
||||
|
||||
validate4 :: Word8 -> Word8 -> Word8 -> Word8 -> Bool
|
||||
{-# INLINE validate4 #-}
|
||||
validate4 x1 x2 x3 x4 = validate4_1 || validate4_2 || validate4_3
|
||||
where
|
||||
validate4_1 = x1 == 0xF0 &&
|
||||
between x2 0x90 0xBF &&
|
||||
between x3 0x80 0xBF &&
|
||||
between x4 0x80 0xBF
|
||||
validate4_2 = between x1 0xF1 0xF3 &&
|
||||
between x2 0x80 0xBF &&
|
||||
between x3 0x80 0xBF &&
|
||||
between x4 0x80 0xBF
|
||||
validate4_3 = x1 == 0xF4 &&
|
||||
between x2 0x80 0x8F &&
|
||||
between x3 0x80 0xBF &&
|
||||
between x4 0x80 0xBF
|
@ -1,29 +0,0 @@
|
||||
-- |
|
||||
-- Module : Data.Text.Internal.Functions
|
||||
-- Copyright : 2010 Bryan O'Sullivan
|
||||
--
|
||||
-- License : BSD-style
|
||||
-- Maintainer : bos@serpentine.com
|
||||
-- Stability : experimental
|
||||
-- Portability : GHC
|
||||
--
|
||||
-- /Warning/: this is an internal module, and does not have a stable
|
||||
-- API or name. Functions in this module may not check or enforce
|
||||
-- preconditions expected by public modules. Use at your own risk!
|
||||
--
|
||||
-- Useful functions.
|
||||
|
||||
module Data.Text.Internal.Functions
|
||||
(
|
||||
intersperse
|
||||
) where
|
||||
|
||||
-- | A lazier version of Data.List.intersperse. The other version
|
||||
-- causes space leaks!
|
||||
intersperse :: a -> [a] -> [a]
|
||||
intersperse _ [] = []
|
||||
intersperse sep (x:xs) = x : go xs
|
||||
where
|
||||
go [] = []
|
||||
go (y:ys) = sep : y: go ys
|
||||
{-# INLINE intersperse #-}
|
@ -1,244 +0,0 @@
|
||||
{-# LANGUAGE BangPatterns, MagicHash #-}
|
||||
|
||||
-- |
|
||||
-- Module : Data.Text.Internal.Fusion
|
||||
-- Copyright : (c) Tom Harper 2008-2009,
|
||||
-- (c) Bryan O'Sullivan 2009-2010,
|
||||
-- (c) Duncan Coutts 2009
|
||||
--
|
||||
-- License : BSD-style
|
||||
-- Maintainer : bos@serpentine.com
|
||||
-- Stability : experimental
|
||||
-- Portability : GHC
|
||||
--
|
||||
-- /Warning/: this is an internal module, and does not have a stable
|
||||
-- API or name. Functions in this module may not check or enforce
|
||||
-- preconditions expected by public modules. Use at your own risk!
|
||||
--
|
||||
-- Text manipulation functions represented as fusible operations over
|
||||
-- streams.
|
||||
module Data.Text.Internal.Fusion
|
||||
(
|
||||
-- * Types
|
||||
Stream(..)
|
||||
, Step(..)
|
||||
|
||||
-- * Creation and elimination
|
||||
, stream
|
||||
, unstream
|
||||
, reverseStream
|
||||
|
||||
, length
|
||||
|
||||
-- * Transformations
|
||||
, reverse
|
||||
|
||||
-- * Construction
|
||||
-- ** Scans
|
||||
, reverseScanr
|
||||
|
||||
-- ** Accumulating maps
|
||||
, mapAccumL
|
||||
|
||||
-- ** Generation and unfolding
|
||||
, unfoldrN
|
||||
|
||||
-- * Indexing
|
||||
, index
|
||||
, findIndex
|
||||
, countChar
|
||||
) where
|
||||
|
||||
import Prelude (Bool(..), Char, Maybe(..), Monad(..), Int,
|
||||
Num(..), Ord(..), ($), (&&),
|
||||
fromIntegral, otherwise)
|
||||
import Data.Bits ((.&.))
|
||||
import Data.Text.Internal (Text(..))
|
||||
import Data.Text.Internal.Private (runText)
|
||||
import Data.Text.Internal.Unsafe.Char (ord, unsafeChr, unsafeWrite)
|
||||
import Data.Text.Internal.Unsafe.Shift (shiftL, shiftR)
|
||||
import qualified Data.Text.Array as A
|
||||
import qualified Data.Text.Internal.Fusion.Common as S
|
||||
import Data.Text.Internal.Fusion.Types
|
||||
import Data.Text.Internal.Fusion.Size
|
||||
import qualified Data.Text.Internal as I
|
||||
import qualified Data.Text.Internal.Encoding.Utf16 as U16
|
||||
|
||||
default(Int)
|
||||
|
||||
-- | /O(n)/ Convert a 'Text' into a 'Stream Char'.
|
||||
stream :: Text -> Stream Char
|
||||
stream (Text arr off len) = Stream next off (betweenSize (len `shiftR` 1) len)
|
||||
where
|
||||
!end = off+len
|
||||
next !i
|
||||
| i >= end = Done
|
||||
| n >= 0xD800 && n <= 0xDBFF = Yield (U16.chr2 n n2) (i + 2)
|
||||
| otherwise = Yield (unsafeChr n) (i + 1)
|
||||
where
|
||||
n = A.unsafeIndex arr i
|
||||
n2 = A.unsafeIndex arr (i + 1)
|
||||
{-# INLINE [0] stream #-}
|
||||
|
||||
-- | /O(n)/ Convert a 'Text' into a 'Stream Char', but iterate
|
||||
-- backwards.
|
||||
reverseStream :: Text -> Stream Char
|
||||
reverseStream (Text arr off len) = Stream next (off+len-1) (betweenSize (len `shiftR` 1) len)
|
||||
where
|
||||
{-# INLINE next #-}
|
||||
next !i
|
||||
| i < off = Done
|
||||
| n >= 0xDC00 && n <= 0xDFFF = Yield (U16.chr2 n2 n) (i - 2)
|
||||
| otherwise = Yield (unsafeChr n) (i - 1)
|
||||
where
|
||||
n = A.unsafeIndex arr i
|
||||
n2 = A.unsafeIndex arr (i - 1)
|
||||
{-# INLINE [0] reverseStream #-}
|
||||
|
||||
-- | /O(n)/ Convert a 'Stream Char' into a 'Text'.
|
||||
unstream :: Stream Char -> Text
|
||||
unstream (Stream next0 s0 len) = runText $ \done -> do
|
||||
-- Before encoding each char we perform a buffer realloc check assuming
|
||||
-- worst case encoding size of two 16-bit units for the char. Just add an
|
||||
-- extra space to the buffer so that we do not end up reallocating even when
|
||||
-- all the chars are encoded as single unit.
|
||||
let mlen = upperBound 4 len + 1
|
||||
arr0 <- A.new mlen
|
||||
let outer !arr !maxi = encode
|
||||
where
|
||||
-- keep the common case loop as small as possible
|
||||
encode !si !di =
|
||||
case next0 si of
|
||||
Done -> done arr di
|
||||
Skip si' -> encode si' di
|
||||
Yield c si'
|
||||
-- simply check for the worst case
|
||||
| maxi < di + 1 -> realloc si di
|
||||
| otherwise -> do
|
||||
n <- unsafeWrite arr di c
|
||||
encode si' (di + n)
|
||||
|
||||
-- keep uncommon case separate from the common case code
|
||||
{-# NOINLINE realloc #-}
|
||||
realloc !si !di = do
|
||||
let newlen = (maxi + 1) * 2
|
||||
arr' <- A.new newlen
|
||||
A.copyM arr' 0 arr 0 di
|
||||
outer arr' (newlen - 1) si di
|
||||
|
||||
outer arr0 (mlen - 1) s0 0
|
||||
{-# INLINE [0] unstream #-}
|
||||
{-# RULES "STREAM stream/unstream fusion" forall s. stream (unstream s) = s #-}
|
||||
|
||||
|
||||
-- ----------------------------------------------------------------------------
|
||||
-- * Basic stream functions
|
||||
|
||||
length :: Stream Char -> Int
|
||||
length = S.lengthI
|
||||
{-# INLINE[0] length #-}
|
||||
|
||||
-- | /O(n)/ Reverse the characters of a string.
|
||||
reverse :: Stream Char -> Text
|
||||
reverse (Stream next s len0)
|
||||
| isEmpty len0 = I.empty
|
||||
| otherwise = I.text arr off' len'
|
||||
where
|
||||
len0' = upperBound 4 (larger len0 4)
|
||||
(arr, (off', len')) = A.run2 (A.new len0' >>= loop s (len0'-1) len0')
|
||||
loop !s0 !i !len marr =
|
||||
case next s0 of
|
||||
Done -> return (marr, (j, len-j))
|
||||
where j = i + 1
|
||||
Skip s1 -> loop s1 i len marr
|
||||
Yield x s1 | i < least -> {-# SCC "reverse/resize" #-} do
|
||||
let newLen = len `shiftL` 1
|
||||
marr' <- A.new newLen
|
||||
A.copyM marr' (newLen-len) marr 0 len
|
||||
write s1 (len+i) newLen marr'
|
||||
| otherwise -> write s1 i len marr
|
||||
where n = ord x
|
||||
least | n < 0x10000 = 0
|
||||
| otherwise = 1
|
||||
m = n - 0x10000
|
||||
lo = fromIntegral $ (m `shiftR` 10) + 0xD800
|
||||
hi = fromIntegral $ (m .&. 0x3FF) + 0xDC00
|
||||
write t j l mar
|
||||
| n < 0x10000 = do
|
||||
A.unsafeWrite mar j (fromIntegral n)
|
||||
loop t (j-1) l mar
|
||||
| otherwise = do
|
||||
A.unsafeWrite mar (j-1) lo
|
||||
A.unsafeWrite mar j hi
|
||||
loop t (j-2) l mar
|
||||
{-# INLINE [0] reverse #-}
|
||||
|
||||
-- | /O(n)/ Perform the equivalent of 'scanr' over a list, only with
|
||||
-- the input and result reversed.
|
||||
reverseScanr :: (Char -> Char -> Char) -> Char -> Stream Char -> Stream Char
|
||||
reverseScanr f z0 (Stream next0 s0 len) = Stream next (Scan1 z0 s0) (len+1) -- HINT maybe too low
|
||||
where
|
||||
{-# INLINE next #-}
|
||||
next (Scan1 z s) = Yield z (Scan2 z s)
|
||||
next (Scan2 z s) = case next0 s of
|
||||
Yield x s' -> let !x' = f x z
|
||||
in Yield x' (Scan2 x' s')
|
||||
Skip s' -> Skip (Scan2 z s')
|
||||
Done -> Done
|
||||
{-# INLINE reverseScanr #-}
|
||||
|
||||
-- | /O(n)/ Like 'unfoldr', 'unfoldrN' builds a stream from a seed
|
||||
-- value. However, the length of the result is limited by the
|
||||
-- first argument to 'unfoldrN'. This function is more efficient than
|
||||
-- 'unfoldr' when the length of the result is known.
|
||||
unfoldrN :: Int -> (a -> Maybe (Char,a)) -> a -> Stream Char
|
||||
unfoldrN n = S.unfoldrNI n
|
||||
{-# INLINE [0] unfoldrN #-}
|
||||
|
||||
-------------------------------------------------------------------------------
|
||||
-- ** Indexing streams
|
||||
|
||||
-- | /O(n)/ stream index (subscript) operator, starting from 0.
|
||||
index :: Stream Char -> Int -> Char
|
||||
index = S.indexI
|
||||
{-# INLINE [0] index #-}
|
||||
|
||||
-- | The 'findIndex' function takes a predicate and a stream and
|
||||
-- returns the index of the first element in the stream
|
||||
-- satisfying the predicate.
|
||||
findIndex :: (Char -> Bool) -> Stream Char -> Maybe Int
|
||||
findIndex = S.findIndexI
|
||||
{-# INLINE [0] findIndex #-}
|
||||
|
||||
-- | /O(n)/ The 'count' function returns the number of times the query
|
||||
-- element appears in the given stream.
|
||||
countChar :: Char -> Stream Char -> Int
|
||||
countChar = S.countCharI
|
||||
{-# INLINE [0] countChar #-}
|
||||
|
||||
-- | /O(n)/ Like a combination of 'map' and 'foldl''. Applies a
|
||||
-- function to each element of a 'Text', passing an accumulating
|
||||
-- parameter from left to right, and returns a final 'Text'.
|
||||
mapAccumL :: (a -> Char -> (a,Char)) -> a -> Stream Char -> (a, Text)
|
||||
mapAccumL f z0 (Stream next0 s0 len) = (nz, I.text na 0 nl)
|
||||
where
|
||||
(na,(nz,nl)) = A.run2 (A.new mlen >>= \arr -> outer arr mlen z0 s0 0)
|
||||
where mlen = upperBound 4 len
|
||||
outer arr top = loop
|
||||
where
|
||||
loop !z !s !i =
|
||||
case next0 s of
|
||||
Done -> return (arr, (z,i))
|
||||
Skip s' -> loop z s' i
|
||||
Yield x s'
|
||||
| j >= top -> {-# SCC "mapAccumL/resize" #-} do
|
||||
let top' = (top + 1) `shiftL` 1
|
||||
arr' <- A.new top'
|
||||
A.copyM arr' 0 arr 0 top
|
||||
outer arr' top' z s i
|
||||
| otherwise -> do d <- unsafeWrite arr i c
|
||||
loop z' s' (i+d)
|
||||
where (z',c) = f z x
|
||||
j | ord c < 0x10000 = i
|
||||
| otherwise = i + 1
|
||||
{-# INLINE [0] mapAccumL #-}
|
File diff suppressed because it is too large
Load Diff
@ -1,949 +0,0 @@
|
||||
{-# LANGUAGE BangPatterns, MagicHash, Rank2Types #-}
|
||||
-- |
|
||||
-- Module : Data.Text.Internal.Fusion.Common
|
||||
-- Copyright : (c) Bryan O'Sullivan 2009, 2012
|
||||
--
|
||||
-- License : BSD-style
|
||||
-- Maintainer : bos@serpentine.com
|
||||
-- Stability : experimental
|
||||
-- Portability : GHC
|
||||
--
|
||||
-- /Warning/: this is an internal module, and does not have a stable
|
||||
-- API or name. Functions in this module may not check or enforce
|
||||
-- preconditions expected by public modules. Use at your own risk!
|
||||
--
|
||||
-- Common stream fusion functionality for text.
|
||||
|
||||
module Data.Text.Internal.Fusion.Common
|
||||
(
|
||||
-- * Creation and elimination
|
||||
singleton
|
||||
, streamList
|
||||
, unstreamList
|
||||
, streamCString#
|
||||
|
||||
-- * Basic interface
|
||||
, cons
|
||||
, snoc
|
||||
, append
|
||||
, head
|
||||
, uncons
|
||||
, last
|
||||
, tail
|
||||
, init
|
||||
, null
|
||||
, lengthI
|
||||
, compareLengthI
|
||||
, isSingleton
|
||||
|
||||
-- * Transformations
|
||||
, map
|
||||
, intercalate
|
||||
, intersperse
|
||||
|
||||
-- ** Case conversion
|
||||
-- $case
|
||||
, toCaseFold
|
||||
, toLower
|
||||
, toTitle
|
||||
, toUpper
|
||||
|
||||
-- ** Justification
|
||||
, justifyLeftI
|
||||
|
||||
-- * Folds
|
||||
, foldl
|
||||
, foldl'
|
||||
, foldl1
|
||||
, foldl1'
|
||||
, foldr
|
||||
, foldr1
|
||||
|
||||
-- ** Special folds
|
||||
, concat
|
||||
, concatMap
|
||||
, any
|
||||
, all
|
||||
, maximum
|
||||
, minimum
|
||||
|
||||
-- * Construction
|
||||
-- ** Scans
|
||||
, scanl
|
||||
|
||||
-- ** Generation and unfolding
|
||||
, replicateCharI
|
||||
, replicateI
|
||||
, unfoldr
|
||||
, unfoldrNI
|
||||
|
||||
-- * Substrings
|
||||
-- ** Breaking strings
|
||||
, take
|
||||
, drop
|
||||
, takeWhile
|
||||
, dropWhile
|
||||
|
||||
-- * Predicates
|
||||
, isPrefixOf
|
||||
|
||||
-- * Searching
|
||||
, elem
|
||||
, filter
|
||||
|
||||
-- * Indexing
|
||||
, findBy
|
||||
, indexI
|
||||
, findIndexI
|
||||
, countCharI
|
||||
|
||||
-- * Zipping and unzipping
|
||||
, zipWith
|
||||
) where
|
||||
|
||||
import Prelude (Bool(..), Char, Eq(..), Int, Integral, Maybe(..),
|
||||
Ord(..), Ordering(..), String, (.), ($), (+), (-), (*), (++),
|
||||
(&&), fromIntegral, otherwise)
|
||||
import qualified Data.List as L
|
||||
import qualified Prelude as P
|
||||
import Data.Bits (shiftL)
|
||||
import Data.Char (isLetter, isSpace)
|
||||
import Data.Int (Int64)
|
||||
import Data.Text.Internal.Fusion.Types
|
||||
import Data.Text.Internal.Fusion.CaseMapping (foldMapping, lowerMapping, titleMapping,
|
||||
upperMapping)
|
||||
import Data.Text.Internal.Fusion.Size
|
||||
import GHC.Prim (Addr#, chr#, indexCharOffAddr#, ord#)
|
||||
import GHC.Types (Char(..), Int(..))
|
||||
|
||||
singleton :: Char -> Stream Char
|
||||
singleton c = Stream next False (codePointsSize 1)
|
||||
where next False = Yield c True
|
||||
next True = Done
|
||||
{-# INLINE [0] singleton #-}
|
||||
|
||||
streamList :: [a] -> Stream a
|
||||
{-# INLINE [0] streamList #-}
|
||||
streamList s = Stream next s unknownSize
|
||||
where next [] = Done
|
||||
next (x:xs) = Yield x xs
|
||||
|
||||
unstreamList :: Stream a -> [a]
|
||||
unstreamList (Stream next s0 _len) = unfold s0
|
||||
where unfold !s = case next s of
|
||||
Done -> []
|
||||
Skip s' -> unfold s'
|
||||
Yield x s' -> x : unfold s'
|
||||
{-# INLINE [0] unstreamList #-}
|
||||
|
||||
{-# RULES "STREAM streamList/unstreamList fusion" forall s. streamList (unstreamList s) = s #-}
|
||||
|
||||
-- | Stream the UTF-8-like packed encoding used by GHC to represent
|
||||
-- constant strings in generated code.
|
||||
--
|
||||
-- This encoding uses the byte sequence "\xc0\x80" to represent NUL,
|
||||
-- and the string is NUL-terminated.
|
||||
streamCString# :: Addr# -> Stream Char
|
||||
streamCString# addr = Stream step 0 unknownSize
|
||||
where
|
||||
step !i
|
||||
| b == 0 = Done
|
||||
| b <= 0x7f = Yield (C# b#) (i+1)
|
||||
| b <= 0xdf = let !c = chr $ ((b-0xc0) `shiftL` 6) + next 1
|
||||
in Yield c (i+2)
|
||||
| b <= 0xef = let !c = chr $ ((b-0xe0) `shiftL` 12) +
|
||||
(next 1 `shiftL` 6) +
|
||||
next 2
|
||||
in Yield c (i+3)
|
||||
| otherwise = let !c = chr $ ((b-0xf0) `shiftL` 18) +
|
||||
(next 1 `shiftL` 12) +
|
||||
(next 2 `shiftL` 6) +
|
||||
next 3
|
||||
in Yield c (i+4)
|
||||
where b = I# (ord# b#)
|
||||
next n = I# (ord# (at# (i+n))) - 0x80
|
||||
!b# = at# i
|
||||
at# (I# i#) = indexCharOffAddr# addr i#
|
||||
chr (I# i#) = C# (chr# i#)
|
||||
{-# INLINE [0] streamCString# #-}
|
||||
|
||||
-- ----------------------------------------------------------------------------
|
||||
-- * Basic stream functions
|
||||
|
||||
data C s = C0 !s
|
||||
| C1 !s
|
||||
|
||||
-- | /O(n)/ Adds a character to the front of a Stream Char.
|
||||
cons :: Char -> Stream Char -> Stream Char
|
||||
cons !w (Stream next0 s0 len) = Stream next (C1 s0) (len + codePointsSize 1)
|
||||
where
|
||||
next (C1 s) = Yield w (C0 s)
|
||||
next (C0 s) = case next0 s of
|
||||
Done -> Done
|
||||
Skip s' -> Skip (C0 s')
|
||||
Yield x s' -> Yield x (C0 s')
|
||||
{-# INLINE [0] cons #-}
|
||||
|
||||
data Snoc a = N
|
||||
| J !a
|
||||
|
||||
-- | /O(n)/ Adds a character to the end of a stream.
|
||||
snoc :: Stream Char -> Char -> Stream Char
|
||||
snoc (Stream next0 xs0 len) w = Stream next (J xs0) (len + codePointsSize 1)
|
||||
where
|
||||
next (J xs) = case next0 xs of
|
||||
Done -> Yield w N
|
||||
Skip xs' -> Skip (J xs')
|
||||
Yield x xs' -> Yield x (J xs')
|
||||
next N = Done
|
||||
{-# INLINE [0] snoc #-}
|
||||
|
||||
data E l r = L !l
|
||||
| R !r
|
||||
|
||||
-- | /O(n)/ Appends one Stream to the other.
|
||||
append :: Stream Char -> Stream Char -> Stream Char
|
||||
append (Stream next0 s01 len1) (Stream next1 s02 len2) =
|
||||
Stream next (L s01) (len1 + len2)
|
||||
where
|
||||
next (L s1) = case next0 s1 of
|
||||
Done -> Skip (R s02)
|
||||
Skip s1' -> Skip (L s1')
|
||||
Yield x s1' -> Yield x (L s1')
|
||||
next (R s2) = case next1 s2 of
|
||||
Done -> Done
|
||||
Skip s2' -> Skip (R s2')
|
||||
Yield x s2' -> Yield x (R s2')
|
||||
{-# INLINE [0] append #-}
|
||||
|
||||
-- | /O(1)/ Returns the first character of a Text, which must be non-empty.
|
||||
-- Subject to array fusion.
|
||||
head :: Stream Char -> Char
|
||||
head (Stream next s0 _len) = loop_head s0
|
||||
where
|
||||
loop_head !s = case next s of
|
||||
Yield x _ -> x
|
||||
Skip s' -> loop_head s'
|
||||
Done -> head_empty
|
||||
{-# INLINE [0] head #-}
|
||||
|
||||
head_empty :: a
|
||||
head_empty = streamError "head" "Empty stream"
|
||||
{-# NOINLINE head_empty #-}
|
||||
|
||||
-- | /O(1)/ Returns the first character and remainder of a 'Stream
|
||||
-- Char', or 'Nothing' if empty. Subject to array fusion.
|
||||
uncons :: Stream Char -> Maybe (Char, Stream Char)
|
||||
uncons (Stream next s0 len) = loop_uncons s0
|
||||
where
|
||||
loop_uncons !s = case next s of
|
||||
Yield x s1 -> Just (x, Stream next s1 (len - codePointsSize 1))
|
||||
Skip s' -> loop_uncons s'
|
||||
Done -> Nothing
|
||||
{-# INLINE [0] uncons #-}
|
||||
|
||||
-- | /O(n)/ Returns the last character of a 'Stream Char', which must
|
||||
-- be non-empty.
|
||||
last :: Stream Char -> Char
|
||||
last (Stream next s0 _len) = loop0_last s0
|
||||
where
|
||||
loop0_last !s = case next s of
|
||||
Done -> emptyError "last"
|
||||
Skip s' -> loop0_last s'
|
||||
Yield x s' -> loop_last x s'
|
||||
loop_last !x !s = case next s of
|
||||
Done -> x
|
||||
Skip s' -> loop_last x s'
|
||||
Yield x' s' -> loop_last x' s'
|
||||
{-# INLINE[0] last #-}
|
||||
|
||||
-- | /O(1)/ Returns all characters after the head of a Stream Char, which must
|
||||
-- be non-empty.
|
||||
tail :: Stream Char -> Stream Char
|
||||
tail (Stream next0 s0 len) = Stream next (C0 s0) (len - codePointsSize 1)
|
||||
where
|
||||
next (C0 s) = case next0 s of
|
||||
Done -> emptyError "tail"
|
||||
Skip s' -> Skip (C0 s')
|
||||
Yield _ s' -> Skip (C1 s')
|
||||
next (C1 s) = case next0 s of
|
||||
Done -> Done
|
||||
Skip s' -> Skip (C1 s')
|
||||
Yield x s' -> Yield x (C1 s')
|
||||
{-# INLINE [0] tail #-}
|
||||
|
||||
data Init s = Init0 !s
|
||||
| Init1 {-# UNPACK #-} !Char !s
|
||||
|
||||
-- | /O(1)/ Returns all but the last character of a Stream Char, which
|
||||
-- must be non-empty.
|
||||
init :: Stream Char -> Stream Char
|
||||
init (Stream next0 s0 len) = Stream next (Init0 s0) (len - codePointsSize 1)
|
||||
where
|
||||
next (Init0 s) = case next0 s of
|
||||
Done -> emptyError "init"
|
||||
Skip s' -> Skip (Init0 s')
|
||||
Yield x s' -> Skip (Init1 x s')
|
||||
next (Init1 x s) = case next0 s of
|
||||
Done -> Done
|
||||
Skip s' -> Skip (Init1 x s')
|
||||
Yield x' s' -> Yield x (Init1 x' s')
|
||||
{-# INLINE [0] init #-}
|
||||
|
||||
-- | /O(1)/ Tests whether a Stream Char is empty or not.
|
||||
null :: Stream Char -> Bool
|
||||
null (Stream next s0 _len) = loop_null s0
|
||||
where
|
||||
loop_null !s = case next s of
|
||||
Done -> True
|
||||
Yield _ _ -> False
|
||||
Skip s' -> loop_null s'
|
||||
{-# INLINE[0] null #-}
|
||||
|
||||
-- | /O(n)/ Returns the number of characters in a string.
|
||||
lengthI :: Integral a => Stream Char -> a
|
||||
lengthI (Stream next s0 _len) = loop_length 0 s0
|
||||
where
|
||||
loop_length !z s = case next s of
|
||||
Done -> z
|
||||
Skip s' -> loop_length z s'
|
||||
Yield _ s' -> loop_length (z + 1) s'
|
||||
{-# INLINE[0] lengthI #-}
|
||||
|
||||
-- | /O(n)/ Compares the count of characters in a string to a number.
|
||||
-- Subject to fusion.
|
||||
--
|
||||
-- This function gives the same answer as comparing against the result
|
||||
-- of 'lengthI', but can short circuit if the count of characters is
|
||||
-- greater than the number or if the stream can't possibly be as long
|
||||
-- as the number supplied, and hence be more efficient.
|
||||
compareLengthI :: Integral a => Stream Char -> a -> Ordering
|
||||
compareLengthI (Stream next s0 len) n
|
||||
-- Note that @len@ tracks code units whereas we want to compare the length
|
||||
-- in code points. Specifically, a stream with hint @len@ may consist of
|
||||
-- anywhere from @len/2@ to @len@ code points.
|
||||
| Just r <- compareSize len n' = r
|
||||
| otherwise = loop_cmp 0 s0
|
||||
where
|
||||
n' = codePointsSize $ fromIntegral n
|
||||
loop_cmp !z s = case next s of
|
||||
Done -> compare z n
|
||||
Skip s' -> loop_cmp z s'
|
||||
Yield _ s' | z > n -> GT
|
||||
| otherwise -> loop_cmp (z + 1) s'
|
||||
{-# INLINE[0] compareLengthI #-}
|
||||
|
||||
-- | /O(n)/ Indicate whether a string contains exactly one element.
|
||||
isSingleton :: Stream Char -> Bool
|
||||
isSingleton (Stream next s0 _len) = loop 0 s0
|
||||
where
|
||||
loop !z s = case next s of
|
||||
Done -> z == (1::Int)
|
||||
Skip s' -> loop z s'
|
||||
Yield _ s'
|
||||
| z >= 1 -> False
|
||||
| otherwise -> loop (z+1) s'
|
||||
{-# INLINE[0] isSingleton #-}
|
||||
|
||||
-- ----------------------------------------------------------------------------
|
||||
-- * Stream transformations
|
||||
|
||||
-- | /O(n)/ 'map' @f @xs is the Stream Char obtained by applying @f@
|
||||
-- to each element of @xs@.
|
||||
map :: (Char -> Char) -> Stream Char -> Stream Char
|
||||
map f (Stream next0 s0 len) = Stream next s0 len
|
||||
where
|
||||
next !s = case next0 s of
|
||||
Done -> Done
|
||||
Skip s' -> Skip s'
|
||||
Yield x s' -> Yield (f x) s'
|
||||
{-# INLINE [0] map #-}
|
||||
|
||||
{-#
|
||||
RULES "STREAM map/map fusion" forall f g s.
|
||||
map f (map g s) = map (\x -> f (g x)) s
|
||||
#-}
|
||||
|
||||
data I s = I1 !s
|
||||
| I2 !s {-# UNPACK #-} !Char
|
||||
| I3 !s
|
||||
|
||||
-- | /O(n)/ Take a character and place it between each of the
|
||||
-- characters of a 'Stream Char'.
|
||||
intersperse :: Char -> Stream Char -> Stream Char
|
||||
intersperse c (Stream next0 s0 len) = Stream next (I1 s0) (len + unknownSize)
|
||||
where
|
||||
next (I1 s) = case next0 s of
|
||||
Done -> Done
|
||||
Skip s' -> Skip (I1 s')
|
||||
Yield x s' -> Skip (I2 s' x)
|
||||
next (I2 s x) = Yield x (I3 s)
|
||||
next (I3 s) = case next0 s of
|
||||
Done -> Done
|
||||
Skip s' -> Skip (I3 s')
|
||||
Yield x s' -> Yield c (I2 s' x)
|
||||
{-# INLINE [0] intersperse #-}
|
||||
|
||||
-- ----------------------------------------------------------------------------
|
||||
-- ** Case conversions (folds)
|
||||
|
||||
-- $case
|
||||
--
|
||||
-- With Unicode text, it is incorrect to use combinators like @map
|
||||
-- toUpper@ to case convert each character of a string individually.
|
||||
-- Instead, use the whole-string case conversion functions from this
|
||||
-- module. For correctness in different writing systems, these
|
||||
-- functions may map one input character to two or three output
|
||||
-- characters.
|
||||
|
||||
-- | Map a 'Stream' through the given case-mapping function.
|
||||
caseConvert :: (forall s. Char -> s -> Step (CC s) Char)
|
||||
-> Stream Char -> Stream Char
|
||||
caseConvert remap (Stream next0 s0 len) =
|
||||
Stream next (CC s0 '\0' '\0') (len `unionSize` (3*len))
|
||||
where
|
||||
next (CC s '\0' _) =
|
||||
case next0 s of
|
||||
Done -> Done
|
||||
Skip s' -> Skip (CC s' '\0' '\0')
|
||||
Yield c s' -> remap c s'
|
||||
next (CC s a b) = Yield a (CC s b '\0')
|
||||
|
||||
-- | /O(n)/ Convert a string to folded case. This function is mainly
|
||||
-- useful for performing caseless (or case insensitive) string
|
||||
-- comparisons.
|
||||
--
|
||||
-- A string @x@ is a caseless match for a string @y@ if and only if:
|
||||
--
|
||||
-- @toCaseFold x == toCaseFold y@
|
||||
--
|
||||
-- The result string may be longer than the input string, and may
|
||||
-- differ from applying 'toLower' to the input string. For instance,
|
||||
-- the Armenian small ligature men now (U+FB13) is case folded to the
|
||||
-- bigram men now (U+0574 U+0576), while the micro sign (U+00B5) is
|
||||
-- case folded to the Greek small letter letter mu (U+03BC) instead of
|
||||
-- itself.
|
||||
toCaseFold :: Stream Char -> Stream Char
|
||||
toCaseFold = caseConvert foldMapping
|
||||
{-# INLINE [0] toCaseFold #-}
|
||||
|
||||
-- | /O(n)/ Convert a string to upper case, using simple case
|
||||
-- conversion. The result string may be longer than the input string.
|
||||
-- For instance, the German eszett (U+00DF) maps to the two-letter
|
||||
-- sequence SS.
|
||||
toUpper :: Stream Char -> Stream Char
|
||||
toUpper = caseConvert upperMapping
|
||||
{-# INLINE [0] toUpper #-}
|
||||
|
||||
-- | /O(n)/ Convert a string to lower case, using simple case
|
||||
-- conversion. The result string may be longer than the input string.
|
||||
-- For instance, the Latin capital letter I with dot above (U+0130)
|
||||
-- maps to the sequence Latin small letter i (U+0069) followed by
|
||||
-- combining dot above (U+0307).
|
||||
toLower :: Stream Char -> Stream Char
|
||||
toLower = caseConvert lowerMapping
|
||||
{-# INLINE [0] toLower #-}
|
||||
|
||||
-- | /O(n)/ Convert a string to title case, using simple case
|
||||
-- conversion.
|
||||
--
|
||||
-- The first letter of the input is converted to title case, as is
|
||||
-- every subsequent letter that immediately follows a non-letter.
|
||||
-- Every letter that immediately follows another letter is converted
|
||||
-- to lower case.
|
||||
--
|
||||
-- The result string may be longer than the input string. For example,
|
||||
-- the Latin small ligature fl (U+FB02) is converted to the
|
||||
-- sequence Latin capital letter F (U+0046) followed by Latin small
|
||||
-- letter l (U+006C).
|
||||
--
|
||||
-- /Note/: this function does not take language or culture specific
|
||||
-- rules into account. For instance, in English, different style
|
||||
-- guides disagree on whether the book name \"The Hill of the Red
|
||||
-- Fox\" is correctly title cased—but this function will
|
||||
-- capitalize /every/ word.
|
||||
toTitle :: Stream Char -> Stream Char
|
||||
toTitle (Stream next0 s0 len) = Stream next (CC (False :*: s0) '\0' '\0') (len + unknownSize)
|
||||
where
|
||||
next (CC (letter :*: s) '\0' _) =
|
||||
case next0 s of
|
||||
Done -> Done
|
||||
Skip s' -> Skip (CC (letter :*: s') '\0' '\0')
|
||||
Yield c s'
|
||||
| nonSpace -> if letter
|
||||
then lowerMapping c (nonSpace :*: s')
|
||||
else titleMapping c (letter' :*: s')
|
||||
| otherwise -> Yield c (CC (letter' :*: s') '\0' '\0')
|
||||
where nonSpace = P.not (isSpace c)
|
||||
letter' = isLetter c
|
||||
next (CC s a b) = Yield a (CC s b '\0')
|
||||
{-# INLINE [0] toTitle #-}
|
||||
|
||||
data Justify i s = Just1 !i !s
|
||||
| Just2 !i !s
|
||||
|
||||
justifyLeftI :: Integral a => a -> Char -> Stream Char -> Stream Char
|
||||
justifyLeftI k c (Stream next0 s0 len) =
|
||||
Stream next (Just1 0 s0) (larger (fromIntegral k * charSize c + len) len)
|
||||
where
|
||||
next (Just1 n s) =
|
||||
case next0 s of
|
||||
Done -> next (Just2 n s)
|
||||
Skip s' -> Skip (Just1 n s')
|
||||
Yield x s' -> Yield x (Just1 (n+1) s')
|
||||
next (Just2 n s)
|
||||
| n < k = Yield c (Just2 (n+1) s)
|
||||
| otherwise = Done
|
||||
{-# INLINE next #-}
|
||||
{-# INLINE [0] justifyLeftI #-}
|
||||
|
||||
-- ----------------------------------------------------------------------------
|
||||
-- * Reducing Streams (folds)
|
||||
|
||||
-- | foldl, applied to a binary operator, a starting value (typically the
|
||||
-- left-identity of the operator), and a Stream, reduces the Stream using the
|
||||
-- binary operator, from left to right.
|
||||
foldl :: (b -> Char -> b) -> b -> Stream Char -> b
|
||||
foldl f z0 (Stream next s0 _len) = loop_foldl z0 s0
|
||||
where
|
||||
loop_foldl z !s = case next s of
|
||||
Done -> z
|
||||
Skip s' -> loop_foldl z s'
|
||||
Yield x s' -> loop_foldl (f z x) s'
|
||||
{-# INLINE [0] foldl #-}
|
||||
|
||||
-- | A strict version of foldl.
|
||||
foldl' :: (b -> Char -> b) -> b -> Stream Char -> b
|
||||
foldl' f z0 (Stream next s0 _len) = loop_foldl' z0 s0
|
||||
where
|
||||
loop_foldl' !z !s = case next s of
|
||||
Done -> z
|
||||
Skip s' -> loop_foldl' z s'
|
||||
Yield x s' -> loop_foldl' (f z x) s'
|
||||
{-# INLINE [0] foldl' #-}
|
||||
|
||||
-- | foldl1 is a variant of foldl that has no starting value argument,
|
||||
-- and thus must be applied to non-empty Streams.
|
||||
foldl1 :: (Char -> Char -> Char) -> Stream Char -> Char
|
||||
foldl1 f (Stream next s0 _len) = loop0_foldl1 s0
|
||||
where
|
||||
loop0_foldl1 !s = case next s of
|
||||
Skip s' -> loop0_foldl1 s'
|
||||
Yield x s' -> loop_foldl1 x s'
|
||||
Done -> emptyError "foldl1"
|
||||
loop_foldl1 z !s = case next s of
|
||||
Done -> z
|
||||
Skip s' -> loop_foldl1 z s'
|
||||
Yield x s' -> loop_foldl1 (f z x) s'
|
||||
{-# INLINE [0] foldl1 #-}
|
||||
|
||||
-- | A strict version of foldl1.
|
||||
foldl1' :: (Char -> Char -> Char) -> Stream Char -> Char
|
||||
foldl1' f (Stream next s0 _len) = loop0_foldl1' s0
|
||||
where
|
||||
loop0_foldl1' !s = case next s of
|
||||
Skip s' -> loop0_foldl1' s'
|
||||
Yield x s' -> loop_foldl1' x s'
|
||||
Done -> emptyError "foldl1"
|
||||
loop_foldl1' !z !s = case next s of
|
||||
Done -> z
|
||||
Skip s' -> loop_foldl1' z s'
|
||||
Yield x s' -> loop_foldl1' (f z x) s'
|
||||
{-# INLINE [0] foldl1' #-}
|
||||
|
||||
-- | 'foldr', applied to a binary operator, a starting value (typically the
|
||||
-- right-identity of the operator), and a stream, reduces the stream using the
|
||||
-- binary operator, from right to left.
|
||||
foldr :: (Char -> b -> b) -> b -> Stream Char -> b
|
||||
foldr f z (Stream next s0 _len) = loop_foldr s0
|
||||
where
|
||||
loop_foldr !s = case next s of
|
||||
Done -> z
|
||||
Skip s' -> loop_foldr s'
|
||||
Yield x s' -> f x (loop_foldr s')
|
||||
{-# INLINE [0] foldr #-}
|
||||
|
||||
-- | foldr1 is a variant of 'foldr' that has no starting value argument,
|
||||
-- and thus must be applied to non-empty streams.
|
||||
-- Subject to array fusion.
|
||||
foldr1 :: (Char -> Char -> Char) -> Stream Char -> Char
|
||||
foldr1 f (Stream next s0 _len) = loop0_foldr1 s0
|
||||
where
|
||||
loop0_foldr1 !s = case next s of
|
||||
Done -> emptyError "foldr1"
|
||||
Skip s' -> loop0_foldr1 s'
|
||||
Yield x s' -> loop_foldr1 x s'
|
||||
|
||||
loop_foldr1 x !s = case next s of
|
||||
Done -> x
|
||||
Skip s' -> loop_foldr1 x s'
|
||||
Yield x' s' -> f x (loop_foldr1 x' s')
|
||||
{-# INLINE [0] foldr1 #-}
|
||||
|
||||
intercalate :: Stream Char -> [Stream Char] -> Stream Char
|
||||
intercalate s = concat . (L.intersperse s)
|
||||
{-# INLINE [0] intercalate #-}
|
||||
|
||||
-- ----------------------------------------------------------------------------
|
||||
-- ** Special folds
|
||||
|
||||
-- | /O(n)/ Concatenate a list of streams. Subject to array fusion.
|
||||
concat :: [Stream Char] -> Stream Char
|
||||
concat = L.foldr append empty
|
||||
{-# INLINE [0] concat #-}
|
||||
|
||||
-- | Map a function over a stream that results in a stream and concatenate the
|
||||
-- results.
|
||||
concatMap :: (Char -> Stream Char) -> Stream Char -> Stream Char
|
||||
concatMap f = foldr (append . f) empty
|
||||
{-# INLINE [0] concatMap #-}
|
||||
|
||||
-- | /O(n)/ any @p @xs determines if any character in the stream
|
||||
-- @xs@ satisfies the predicate @p@.
|
||||
any :: (Char -> Bool) -> Stream Char -> Bool
|
||||
any p (Stream next0 s0 _len) = loop_any s0
|
||||
where
|
||||
loop_any !s = case next0 s of
|
||||
Done -> False
|
||||
Skip s' -> loop_any s'
|
||||
Yield x s' | p x -> True
|
||||
| otherwise -> loop_any s'
|
||||
{-# INLINE [0] any #-}
|
||||
|
||||
-- | /O(n)/ all @p @xs determines if all characters in the 'Text'
|
||||
-- @xs@ satisfy the predicate @p@.
|
||||
all :: (Char -> Bool) -> Stream Char -> Bool
|
||||
all p (Stream next0 s0 _len) = loop_all s0
|
||||
where
|
||||
loop_all !s = case next0 s of
|
||||
Done -> True
|
||||
Skip s' -> loop_all s'
|
||||
Yield x s' | p x -> loop_all s'
|
||||
| otherwise -> False
|
||||
{-# INLINE [0] all #-}
|
||||
|
||||
-- | /O(n)/ maximum returns the maximum value from a stream, which must be
|
||||
-- non-empty.
|
||||
maximum :: Stream Char -> Char
|
||||
maximum (Stream next0 s0 _len) = loop0_maximum s0
|
||||
where
|
||||
loop0_maximum !s = case next0 s of
|
||||
Done -> emptyError "maximum"
|
||||
Skip s' -> loop0_maximum s'
|
||||
Yield x s' -> loop_maximum x s'
|
||||
loop_maximum !z !s = case next0 s of
|
||||
Done -> z
|
||||
Skip s' -> loop_maximum z s'
|
||||
Yield x s'
|
||||
| x > z -> loop_maximum x s'
|
||||
| otherwise -> loop_maximum z s'
|
||||
{-# INLINE [0] maximum #-}
|
||||
|
||||
-- | /O(n)/ minimum returns the minimum value from a 'Text', which must be
|
||||
-- non-empty.
|
||||
minimum :: Stream Char -> Char
|
||||
minimum (Stream next0 s0 _len) = loop0_minimum s0
|
||||
where
|
||||
loop0_minimum !s = case next0 s of
|
||||
Done -> emptyError "minimum"
|
||||
Skip s' -> loop0_minimum s'
|
||||
Yield x s' -> loop_minimum x s'
|
||||
loop_minimum !z !s = case next0 s of
|
||||
Done -> z
|
||||
Skip s' -> loop_minimum z s'
|
||||
Yield x s'
|
||||
| x < z -> loop_minimum x s'
|
||||
| otherwise -> loop_minimum z s'
|
||||
{-# INLINE [0] minimum #-}
|
||||
|
||||
-- -----------------------------------------------------------------------------
|
||||
-- * Building streams
|
||||
|
||||
scanl :: (Char -> Char -> Char) -> Char -> Stream Char -> Stream Char
|
||||
scanl f z0 (Stream next0 s0 len) = Stream next (Scan1 z0 s0) (len+1) -- HINT maybe too low
|
||||
where
|
||||
{-# INLINE next #-}
|
||||
next (Scan1 z s) = Yield z (Scan2 z s)
|
||||
next (Scan2 z s) = case next0 s of
|
||||
Yield x s' -> let !x' = f z x
|
||||
in Yield x' (Scan2 x' s')
|
||||
Skip s' -> Skip (Scan2 z s')
|
||||
Done -> Done
|
||||
{-# INLINE [0] scanl #-}
|
||||
|
||||
-- -----------------------------------------------------------------------------
|
||||
-- ** Generating and unfolding streams
|
||||
|
||||
replicateCharI :: Integral a => a -> Char -> Stream Char
|
||||
replicateCharI !n !c
|
||||
| n < 0 = empty
|
||||
| otherwise = Stream next 0 (fromIntegral n) -- HINT maybe too low
|
||||
where
|
||||
next !i | i >= n = Done
|
||||
| otherwise = Yield c (i + 1)
|
||||
{-# INLINE [0] replicateCharI #-}
|
||||
|
||||
data RI s = RI !s {-# UNPACK #-} !Int64
|
||||
|
||||
replicateI :: Int64 -> Stream Char -> Stream Char
|
||||
replicateI n (Stream next0 s0 len) =
|
||||
Stream next (RI s0 0) (fromIntegral (max 0 n) * len)
|
||||
where
|
||||
next (RI s k)
|
||||
| k >= n = Done
|
||||
| otherwise = case next0 s of
|
||||
Done -> Skip (RI s0 (k+1))
|
||||
Skip s' -> Skip (RI s' k)
|
||||
Yield x s' -> Yield x (RI s' k)
|
||||
{-# INLINE [0] replicateI #-}
|
||||
|
||||
-- | /O(n)/, where @n@ is the length of the result. The unfoldr function
|
||||
-- is analogous to the List 'unfoldr'. unfoldr builds a stream
|
||||
-- from a seed value. The function takes the element and returns
|
||||
-- Nothing if it is done producing the stream or returns Just
|
||||
-- (a,b), in which case, a is the next Char in the string, and b is
|
||||
-- the seed value for further production.
|
||||
unfoldr :: (a -> Maybe (Char,a)) -> a -> Stream Char
|
||||
unfoldr f s0 = Stream next s0 unknownSize
|
||||
where
|
||||
{-# INLINE next #-}
|
||||
next !s = case f s of
|
||||
Nothing -> Done
|
||||
Just (w, s') -> Yield w s'
|
||||
{-# INLINE [0] unfoldr #-}
|
||||
|
||||
-- | /O(n)/ Like 'unfoldr', 'unfoldrNI' builds a stream from a seed
|
||||
-- value. However, the length of the result is limited by the
|
||||
-- first argument to 'unfoldrNI'. This function is more efficient than
|
||||
-- 'unfoldr' when the length of the result is known.
|
||||
unfoldrNI :: Integral a => a -> (b -> Maybe (Char,b)) -> b -> Stream Char
|
||||
unfoldrNI n f s0 | n < 0 = empty
|
||||
| otherwise = Stream next (0 :*: s0) (maxSize $ fromIntegral (n*2))
|
||||
where
|
||||
{-# INLINE next #-}
|
||||
next (z :*: s) = case f s of
|
||||
Nothing -> Done
|
||||
Just (w, s') | z >= n -> Done
|
||||
| otherwise -> Yield w ((z + 1) :*: s')
|
||||
{-# INLINE unfoldrNI #-}
|
||||
|
||||
-------------------------------------------------------------------------------
|
||||
-- * Substreams
|
||||
|
||||
-- | /O(n)/ @'take' n@, applied to a stream, returns the prefix of the
|
||||
-- stream of length @n@, or the stream itself if @n@ is greater than the
|
||||
-- length of the stream.
|
||||
take :: Integral a => a -> Stream Char -> Stream Char
|
||||
take n0 (Stream next0 s0 len) =
|
||||
Stream next (n0' :*: s0) (smaller len (codePointsSize $ fromIntegral n0'))
|
||||
where
|
||||
n0' = max n0 0
|
||||
|
||||
{-# INLINE next #-}
|
||||
next (n :*: s) | n <= 0 = Done
|
||||
| otherwise = case next0 s of
|
||||
Done -> Done
|
||||
Skip s' -> Skip (n :*: s')
|
||||
Yield x s' -> Yield x ((n-1) :*: s')
|
||||
{-# INLINE [0] take #-}
|
||||
|
||||
data Drop a s = NS !s
|
||||
| JS !a !s
|
||||
|
||||
-- | /O(n)/ @'drop' n@, applied to a stream, returns the suffix of the
|
||||
-- stream after the first @n@ characters, or the empty stream if @n@
|
||||
-- is greater than the length of the stream.
|
||||
drop :: Integral a => a -> Stream Char -> Stream Char
|
||||
drop n0 (Stream next0 s0 len) =
|
||||
Stream next (JS n0' s0) (len - codePointsSize (fromIntegral n0'))
|
||||
where
|
||||
n0' = max n0 0
|
||||
|
||||
{-# INLINE next #-}
|
||||
next (JS n s)
|
||||
| n <= 0 = Skip (NS s)
|
||||
| otherwise = case next0 s of
|
||||
Done -> Done
|
||||
Skip s' -> Skip (JS n s')
|
||||
Yield _ s' -> Skip (JS (n-1) s')
|
||||
next (NS s) = case next0 s of
|
||||
Done -> Done
|
||||
Skip s' -> Skip (NS s')
|
||||
Yield x s' -> Yield x (NS s')
|
||||
{-# INLINE [0] drop #-}
|
||||
|
||||
-- | 'takeWhile', applied to a predicate @p@ and a stream, returns the
|
||||
-- longest prefix (possibly empty) of elements that satisfy @p@.
|
||||
takeWhile :: (Char -> Bool) -> Stream Char -> Stream Char
|
||||
takeWhile p (Stream next0 s0 len) = Stream next s0 (len - unknownSize)
|
||||
where
|
||||
{-# INLINE next #-}
|
||||
next !s = case next0 s of
|
||||
Done -> Done
|
||||
Skip s' -> Skip s'
|
||||
Yield x s' | p x -> Yield x s'
|
||||
| otherwise -> Done
|
||||
{-# INLINE [0] takeWhile #-}
|
||||
|
||||
-- | @'dropWhile' p xs@ returns the suffix remaining after @'takeWhile' p xs@.
|
||||
dropWhile :: (Char -> Bool) -> Stream Char -> Stream Char
|
||||
dropWhile p (Stream next0 s0 len) = Stream next (L s0) (len - unknownSize)
|
||||
where
|
||||
{-# INLINE next #-}
|
||||
next (L s) = case next0 s of
|
||||
Done -> Done
|
||||
Skip s' -> Skip (L s')
|
||||
Yield x s' | p x -> Skip (L s')
|
||||
| otherwise -> Yield x (R s')
|
||||
next (R s) = case next0 s of
|
||||
Done -> Done
|
||||
Skip s' -> Skip (R s')
|
||||
Yield x s' -> Yield x (R s')
|
||||
{-# INLINE [0] dropWhile #-}
|
||||
|
||||
-- | /O(n)/ The 'isPrefixOf' function takes two 'Stream's and returns
|
||||
-- 'True' iff the first is a prefix of the second.
|
||||
isPrefixOf :: (Eq a) => Stream a -> Stream a -> Bool
|
||||
isPrefixOf (Stream next1 s1 _) (Stream next2 s2 _) = loop (next1 s1) (next2 s2)
|
||||
where
|
||||
loop Done _ = True
|
||||
loop _ Done = False
|
||||
loop (Skip s1') (Skip s2') = loop (next1 s1') (next2 s2')
|
||||
loop (Skip s1') x2 = loop (next1 s1') x2
|
||||
loop x1 (Skip s2') = loop x1 (next2 s2')
|
||||
loop (Yield x1 s1') (Yield x2 s2') = x1 == x2 &&
|
||||
loop (next1 s1') (next2 s2')
|
||||
{-# INLINE [0] isPrefixOf #-}
|
||||
|
||||
-- ----------------------------------------------------------------------------
|
||||
-- * Searching
|
||||
|
||||
-------------------------------------------------------------------------------
|
||||
-- ** Searching by equality
|
||||
|
||||
-- | /O(n)/ 'elem' is the stream membership predicate.
|
||||
elem :: Char -> Stream Char -> Bool
|
||||
elem w (Stream next s0 _len) = loop_elem s0
|
||||
where
|
||||
loop_elem !s = case next s of
|
||||
Done -> False
|
||||
Skip s' -> loop_elem s'
|
||||
Yield x s' | x == w -> True
|
||||
| otherwise -> loop_elem s'
|
||||
{-# INLINE [0] elem #-}
|
||||
|
||||
-------------------------------------------------------------------------------
|
||||
-- ** Searching with a predicate
|
||||
|
||||
-- | /O(n)/ The 'findBy' function takes a predicate and a stream,
|
||||
-- and returns the first element in matching the predicate, or 'Nothing'
|
||||
-- if there is no such element.
|
||||
|
||||
findBy :: (Char -> Bool) -> Stream Char -> Maybe Char
|
||||
findBy p (Stream next s0 _len) = loop_find s0
|
||||
where
|
||||
loop_find !s = case next s of
|
||||
Done -> Nothing
|
||||
Skip s' -> loop_find s'
|
||||
Yield x s' | p x -> Just x
|
||||
| otherwise -> loop_find s'
|
||||
{-# INLINE [0] findBy #-}
|
||||
|
||||
-- | /O(n)/ Stream index (subscript) operator, starting from 0.
|
||||
indexI :: Integral a => Stream Char -> a -> Char
|
||||
indexI (Stream next s0 _len) n0
|
||||
| n0 < 0 = streamError "index" "Negative index"
|
||||
| otherwise = loop_index n0 s0
|
||||
where
|
||||
loop_index !n !s = case next s of
|
||||
Done -> streamError "index" "Index too large"
|
||||
Skip s' -> loop_index n s'
|
||||
Yield x s' | n == 0 -> x
|
||||
| otherwise -> loop_index (n-1) s'
|
||||
{-# INLINE [0] indexI #-}
|
||||
|
||||
-- | /O(n)/ 'filter', applied to a predicate and a stream,
|
||||
-- returns a stream containing those characters that satisfy the
|
||||
-- predicate.
|
||||
filter :: (Char -> Bool) -> Stream Char -> Stream Char
|
||||
filter p (Stream next0 s0 len) =
|
||||
Stream next s0 (len - unknownSize) -- HINT maybe too high
|
||||
where
|
||||
next !s = case next0 s of
|
||||
Done -> Done
|
||||
Skip s' -> Skip s'
|
||||
Yield x s' | p x -> Yield x s'
|
||||
| otherwise -> Skip s'
|
||||
{-# INLINE [0] filter #-}
|
||||
|
||||
{-# RULES
|
||||
"STREAM filter/filter fusion" forall p q s.
|
||||
filter p (filter q s) = filter (\x -> q x && p x) s
|
||||
#-}
|
||||
|
||||
-- | The 'findIndexI' function takes a predicate and a stream and
|
||||
-- returns the index of the first element in the stream satisfying the
|
||||
-- predicate.
|
||||
findIndexI :: Integral a => (Char -> Bool) -> Stream Char -> Maybe a
|
||||
findIndexI p s = case findIndicesI p s of
|
||||
(i:_) -> Just i
|
||||
_ -> Nothing
|
||||
{-# INLINE [0] findIndexI #-}
|
||||
|
||||
-- | The 'findIndicesI' function takes a predicate and a stream and
|
||||
-- returns all indices of the elements in the stream satisfying the
|
||||
-- predicate.
|
||||
findIndicesI :: Integral a => (Char -> Bool) -> Stream Char -> [a]
|
||||
findIndicesI p (Stream next s0 _len) = loop_findIndex 0 s0
|
||||
where
|
||||
loop_findIndex !i !s = case next s of
|
||||
Done -> []
|
||||
Skip s' -> loop_findIndex i s' -- hmm. not caught by QC
|
||||
Yield x s' | p x -> i : loop_findIndex (i+1) s'
|
||||
| otherwise -> loop_findIndex (i+1) s'
|
||||
{-# INLINE [0] findIndicesI #-}
|
||||
|
||||
-------------------------------------------------------------------------------
|
||||
-- * Zipping
|
||||
|
||||
-- | Strict triple.
|
||||
data Zip a b m = Z1 !a !b
|
||||
| Z2 !a !b !m
|
||||
|
||||
-- | zipWith generalises 'zip' by zipping with the function given as
|
||||
-- the first argument, instead of a tupling function.
|
||||
zipWith :: (a -> a -> b) -> Stream a -> Stream a -> Stream b
|
||||
zipWith f (Stream next0 sa0 len1) (Stream next1 sb0 len2) =
|
||||
Stream next (Z1 sa0 sb0) (smaller len1 len2)
|
||||
where
|
||||
next (Z1 sa sb) = case next0 sa of
|
||||
Done -> Done
|
||||
Skip sa' -> Skip (Z1 sa' sb)
|
||||
Yield a sa' -> Skip (Z2 sa' sb a)
|
||||
|
||||
next (Z2 sa' sb a) = case next1 sb of
|
||||
Done -> Done
|
||||
Skip sb' -> Skip (Z2 sa' sb' a)
|
||||
Yield b sb' -> Yield (f a b) (Z1 sa' sb')
|
||||
{-# INLINE [0] zipWith #-}
|
||||
|
||||
-- | /O(n)/ The 'countCharI' function returns the number of times the
|
||||
-- query element appears in the given stream.
|
||||
countCharI :: Integral a => Char -> Stream Char -> a
|
||||
countCharI a (Stream next s0 _len) = loop 0 s0
|
||||
where
|
||||
loop !i !s = case next s of
|
||||
Done -> i
|
||||
Skip s' -> loop i s'
|
||||
Yield x s' | a == x -> loop (i+1) s'
|
||||
| otherwise -> loop i s'
|
||||
{-# INLINE [0] countCharI #-}
|
||||
|
||||
streamError :: String -> String -> a
|
||||
streamError func msg = P.error $ "Data.Text.Internal.Fusion.Common." ++ func ++ ": " ++ msg
|
||||
|
||||
emptyError :: String -> a
|
||||
emptyError func = internalError func "Empty input"
|
||||
|
||||
internalError :: String -> a
|
||||
internalError func = streamError func "Internal error"
|
@ -1,187 +0,0 @@
|
||||
{-# LANGUAGE CPP #-}
|
||||
{-# OPTIONS_GHC -fno-warn-missing-methods #-}
|
||||
-- |
|
||||
-- Module : Data.Text.Internal.Fusion.Internal
|
||||
-- Copyright : (c) Roman Leshchinskiy 2008,
|
||||
-- (c) Bryan O'Sullivan 2009
|
||||
--
|
||||
-- License : BSD-style
|
||||
-- Maintainer : bos@serpentine.com
|
||||
-- Stability : experimental
|
||||
-- Portability : portable
|
||||
--
|
||||
-- /Warning/: this is an internal module, and does not have a stable
|
||||
-- API or name. Functions in this module may not check or enforce
|
||||
-- preconditions expected by public modules. Use at your own risk!
|
||||
--
|
||||
-- Size hints.
|
||||
|
||||
module Data.Text.Internal.Fusion.Size
|
||||
(
|
||||
Size
|
||||
-- * Sizes
|
||||
, exactSize
|
||||
, maxSize
|
||||
, betweenSize
|
||||
, unknownSize
|
||||
, unionSize
|
||||
, charSize
|
||||
, codePointsSize
|
||||
-- * Querying sizes
|
||||
, exactly
|
||||
, smaller
|
||||
, larger
|
||||
, upperBound
|
||||
, lowerBound
|
||||
, compareSize
|
||||
, isEmpty
|
||||
) where
|
||||
|
||||
import Data.Char (ord)
|
||||
import Data.Text.Internal (mul)
|
||||
#if defined(ASSERTS)
|
||||
import Control.Exception (assert)
|
||||
#endif
|
||||
|
||||
-- | A size in UTF-16 code units.
|
||||
data Size = Between {-# UNPACK #-} !Int {-# UNPACK #-} !Int -- ^ Lower and upper bounds on size.
|
||||
| Unknown -- ^ Unknown size.
|
||||
deriving (Eq, Show)
|
||||
|
||||
exactly :: Size -> Maybe Int
|
||||
exactly (Between na nb) | na == nb = Just na
|
||||
exactly _ = Nothing
|
||||
{-# INLINE exactly #-}
|
||||
|
||||
-- | The 'Size' of the given code point.
|
||||
charSize :: Char -> Size
|
||||
charSize c
|
||||
| ord c < 0x10000 = exactSize 1
|
||||
| otherwise = exactSize 2
|
||||
|
||||
-- | The 'Size' of @n@ code points.
|
||||
codePointsSize :: Int -> Size
|
||||
codePointsSize n =
|
||||
#if defined(ASSERTS)
|
||||
assert (n >= 0)
|
||||
#endif
|
||||
Between n (2*n)
|
||||
{-# INLINE codePointsSize #-}
|
||||
|
||||
exactSize :: Int -> Size
|
||||
exactSize n =
|
||||
#if defined(ASSERTS)
|
||||
assert (n >= 0)
|
||||
#endif
|
||||
Between n n
|
||||
{-# INLINE exactSize #-}
|
||||
|
||||
maxSize :: Int -> Size
|
||||
maxSize n =
|
||||
#if defined(ASSERTS)
|
||||
assert (n >= 0)
|
||||
#endif
|
||||
Between 0 n
|
||||
{-# INLINE maxSize #-}
|
||||
|
||||
betweenSize :: Int -> Int -> Size
|
||||
betweenSize m n =
|
||||
#if defined(ASSERTS)
|
||||
assert (m >= 0)
|
||||
assert (n >= m)
|
||||
#endif
|
||||
Between m n
|
||||
{-# INLINE betweenSize #-}
|
||||
|
||||
unionSize :: Size -> Size -> Size
|
||||
unionSize (Between a b) (Between c d) = Between (min a c) (max b d)
|
||||
unionSize _ _ = Unknown
|
||||
|
||||
unknownSize :: Size
|
||||
unknownSize = Unknown
|
||||
{-# INLINE unknownSize #-}
|
||||
|
||||
instance Num Size where
|
||||
(+) = addSize
|
||||
(-) = subtractSize
|
||||
(*) = mulSize
|
||||
|
||||
fromInteger = f where f = exactSize . fromInteger
|
||||
{-# INLINE f #-}
|
||||
|
||||
add :: Int -> Int -> Int
|
||||
add m n | mn >= 0 = mn
|
||||
| otherwise = overflowError
|
||||
where mn = m + n
|
||||
{-# INLINE add #-}
|
||||
|
||||
addSize :: Size -> Size -> Size
|
||||
addSize (Between ma mb) (Between na nb) = Between (add ma na) (add mb nb)
|
||||
addSize _ _ = Unknown
|
||||
{-# INLINE addSize #-}
|
||||
|
||||
subtractSize :: Size -> Size -> Size
|
||||
subtractSize (Between ma mb) (Between na nb) = Between (max (ma-nb) 0) (max (mb-na) 0)
|
||||
subtractSize a@(Between 0 _) Unknown = a
|
||||
subtractSize (Between _ mb) Unknown = Between 0 mb
|
||||
subtractSize _ _ = Unknown
|
||||
{-# INLINE subtractSize #-}
|
||||
|
||||
mulSize :: Size -> Size -> Size
|
||||
mulSize (Between ma mb) (Between na nb) = Between (mul ma na) (mul mb nb)
|
||||
mulSize _ _ = Unknown
|
||||
{-# INLINE mulSize #-}
|
||||
|
||||
-- | Minimum of two size hints.
|
||||
smaller :: Size -> Size -> Size
|
||||
smaller a@(Between ma mb) b@(Between na nb)
|
||||
| mb <= na = a
|
||||
| nb <= ma = b
|
||||
| otherwise = Between (ma `min` na) (mb `min` nb)
|
||||
smaller a@(Between 0 _) Unknown = a
|
||||
smaller (Between _ mb) Unknown = Between 0 mb
|
||||
smaller Unknown b@(Between 0 _) = b
|
||||
smaller Unknown (Between _ nb) = Between 0 nb
|
||||
smaller Unknown Unknown = Unknown
|
||||
{-# INLINE smaller #-}
|
||||
|
||||
-- | Maximum of two size hints.
|
||||
larger :: Size -> Size -> Size
|
||||
larger a@(Between ma mb) b@(Between na nb)
|
||||
| ma >= nb = a
|
||||
| na >= mb = b
|
||||
| otherwise = Between (ma `max` na) (mb `max` nb)
|
||||
larger _ _ = Unknown
|
||||
{-# INLINE larger #-}
|
||||
|
||||
-- | Compute the maximum size from a size hint, if possible.
|
||||
upperBound :: Int -> Size -> Int
|
||||
upperBound _ (Between _ n) = n
|
||||
upperBound k _ = k
|
||||
{-# INLINE upperBound #-}
|
||||
|
||||
-- | Compute the maximum size from a size hint, if possible.
|
||||
lowerBound :: Int -> Size -> Int
|
||||
lowerBound _ (Between n _) = n
|
||||
lowerBound k _ = k
|
||||
{-# INLINE lowerBound #-}
|
||||
|
||||
-- | Determine the ordering relationship between two 'Size's, or 'Nothing' in
|
||||
-- the indeterminate case.
|
||||
compareSize :: Size -> Size -> Maybe Ordering
|
||||
compareSize (Between ma mb) (Between na nb)
|
||||
| mb < na = Just LT
|
||||
| ma > nb = Just GT
|
||||
| ma == mb
|
||||
, ma == na
|
||||
, ma == nb = Just EQ
|
||||
compareSize _ _ = Nothing
|
||||
|
||||
|
||||
isEmpty :: Size -> Bool
|
||||
isEmpty (Between _ n) = n <= 0
|
||||
isEmpty _ = False
|
||||
{-# INLINE isEmpty #-}
|
||||
|
||||
overflowError :: Int
|
||||
overflowError = error "Data.Text.Internal.Fusion.Size: size overflow"
|
@ -1,122 +0,0 @@
|
||||
{-# LANGUAGE BangPatterns, ExistentialQuantification #-}
|
||||
-- |
|
||||
-- Module : Data.Text.Internal.Fusion.Types
|
||||
-- Copyright : (c) Tom Harper 2008-2009,
|
||||
-- (c) Bryan O'Sullivan 2009,
|
||||
-- (c) Duncan Coutts 2009,
|
||||
-- (c) Jasper Van der Jeugt 2011
|
||||
--
|
||||
-- License : BSD-style
|
||||
-- Maintainer : bos@serpentine.com
|
||||
-- Stability : experimental
|
||||
-- Portability : GHC
|
||||
--
|
||||
-- /Warning/: this is an internal module, and does not have a stable
|
||||
-- API or name. Functions in this module may not check or enforce
|
||||
-- preconditions expected by public modules. Use at your own risk!
|
||||
--
|
||||
-- Core stream fusion functionality for text.
|
||||
|
||||
module Data.Text.Internal.Fusion.Types
|
||||
(
|
||||
CC(..)
|
||||
, PairS(..)
|
||||
, Scan(..)
|
||||
, RS(..)
|
||||
, Step(..)
|
||||
, Stream(..)
|
||||
, empty
|
||||
) where
|
||||
|
||||
import Data.Text.Internal.Fusion.Size
|
||||
import Data.Word (Word8)
|
||||
|
||||
-- | Specialised tuple for case conversion.
|
||||
data CC s = CC !s {-# UNPACK #-} !Char {-# UNPACK #-} !Char
|
||||
|
||||
-- | Restreaming state.
|
||||
data RS s
|
||||
= RS0 !s
|
||||
| RS1 !s {-# UNPACK #-} !Word8
|
||||
| RS2 !s {-# UNPACK #-} !Word8 {-# UNPACK #-} !Word8
|
||||
| RS3 !s {-# UNPACK #-} !Word8 {-# UNPACK #-} !Word8 {-# UNPACK #-} !Word8
|
||||
|
||||
-- | Strict pair.
|
||||
data PairS a b = !a :*: !b
|
||||
-- deriving (Eq, Ord, Show)
|
||||
infixl 2 :*:
|
||||
|
||||
-- | An intermediate result in a scan.
|
||||
data Scan s = Scan1 {-# UNPACK #-} !Char !s
|
||||
| Scan2 {-# UNPACK #-} !Char !s
|
||||
|
||||
-- | Intermediate result in a processing pipeline.
|
||||
data Step s a = Done
|
||||
| Skip !s
|
||||
| Yield !a !s
|
||||
|
||||
{-
|
||||
instance (Show a) => Show (Step s a)
|
||||
where show Done = "Done"
|
||||
show (Skip _) = "Skip"
|
||||
show (Yield x _) = "Yield " ++ show x
|
||||
-}
|
||||
|
||||
instance (Eq a) => Eq (Stream a) where
|
||||
(==) = eq
|
||||
|
||||
instance (Ord a) => Ord (Stream a) where
|
||||
compare = cmp
|
||||
|
||||
-- The length hint in a Stream has two roles. If its value is zero,
|
||||
-- we trust it, and treat the stream as empty. Otherwise, we treat it
|
||||
-- as a hint: it should usually be accurate, so we use it when
|
||||
-- unstreaming to decide what size array to allocate. However, the
|
||||
-- unstreaming functions must be able to cope with the hint being too
|
||||
-- small or too large.
|
||||
--
|
||||
-- The size hint tries to track the UTF-16 code units in a stream,
|
||||
-- but often counts the number of code points instead. It can easily
|
||||
-- undercount if, for instance, a transformed stream contains astral
|
||||
-- plane code points (those above 0x10000).
|
||||
|
||||
data Stream a =
|
||||
forall s. Stream
|
||||
(s -> Step s a) -- stepper function
|
||||
!s -- current state
|
||||
!Size -- size hint in code units
|
||||
|
||||
-- | /O(n)/ Determines if two streams are equal.
|
||||
eq :: (Eq a) => Stream a -> Stream a -> Bool
|
||||
eq (Stream next1 s1 _) (Stream next2 s2 _) = loop (next1 s1) (next2 s2)
|
||||
where
|
||||
loop Done Done = True
|
||||
loop (Skip s1') (Skip s2') = loop (next1 s1') (next2 s2')
|
||||
loop (Skip s1') x2 = loop (next1 s1') x2
|
||||
loop x1 (Skip s2') = loop x1 (next2 s2')
|
||||
loop Done _ = False
|
||||
loop _ Done = False
|
||||
loop (Yield x1 s1') (Yield x2 s2') = x1 == x2 &&
|
||||
loop (next1 s1') (next2 s2')
|
||||
{-# INLINE [0] eq #-}
|
||||
|
||||
cmp :: (Ord a) => Stream a -> Stream a -> Ordering
|
||||
cmp (Stream next1 s1 _) (Stream next2 s2 _) = loop (next1 s1) (next2 s2)
|
||||
where
|
||||
loop Done Done = EQ
|
||||
loop (Skip s1') (Skip s2') = loop (next1 s1') (next2 s2')
|
||||
loop (Skip s1') x2 = loop (next1 s1') x2
|
||||
loop x1 (Skip s2') = loop x1 (next2 s2')
|
||||
loop Done _ = LT
|
||||
loop _ Done = GT
|
||||
loop (Yield x1 s1') (Yield x2 s2') =
|
||||
case compare x1 x2 of
|
||||
EQ -> loop (next1 s1') (next2 s2')
|
||||
other -> other
|
||||
{-# INLINE [0] cmp #-}
|
||||
|
||||
-- | The empty stream.
|
||||
empty :: Stream a
|
||||
empty = Stream next () 0
|
||||
where next _ = Done
|
||||
{-# INLINE [0] empty #-}
|
@ -1,166 +0,0 @@
|
||||
{-# LANGUAGE BangPatterns, RecordWildCards #-}
|
||||
-- |
|
||||
-- Module : Data.Text.Internal.IO
|
||||
-- Copyright : (c) 2009, 2010 Bryan O'Sullivan,
|
||||
-- (c) 2009 Simon Marlow
|
||||
-- License : BSD-style
|
||||
-- Maintainer : bos@serpentine.com
|
||||
-- Stability : experimental
|
||||
-- Portability : GHC
|
||||
--
|
||||
-- /Warning/: this is an internal module, and does not have a stable
|
||||
-- API or name. Functions in this module may not check or enforce
|
||||
-- preconditions expected by public modules. Use at your own risk!
|
||||
--
|
||||
-- Low-level support for text I\/O.
|
||||
|
||||
module Data.Text.Internal.IO
|
||||
(
|
||||
hGetLineWith
|
||||
, readChunk
|
||||
) where
|
||||
|
||||
import qualified Control.Exception as E
|
||||
import Data.IORef (readIORef, writeIORef)
|
||||
import Data.Text (Text)
|
||||
import Data.Text.Internal.Fusion (unstream)
|
||||
import Data.Text.Internal.Fusion.Types (Step(..), Stream(..))
|
||||
import Data.Text.Internal.Fusion.Size (exactSize, maxSize)
|
||||
import Data.Text.Unsafe (inlinePerformIO)
|
||||
import Foreign.Storable (peekElemOff)
|
||||
import GHC.IO.Buffer (Buffer(..), CharBuffer, RawCharBuffer, bufferAdjustL,
|
||||
bufferElems, charSize, isEmptyBuffer, readCharBuf,
|
||||
withRawBuffer, writeCharBuf)
|
||||
import GHC.IO.Handle.Internals (ioe_EOF, readTextDevice, wantReadableHandle_)
|
||||
import GHC.IO.Handle.Types (Handle__(..), Newline(..))
|
||||
import System.IO (Handle)
|
||||
import System.IO.Error (isEOFError)
|
||||
import qualified Data.Text as T
|
||||
|
||||
-- | Read a single line of input from a handle, constructing a list of
|
||||
-- decoded chunks as we go. When we're done, transform them into the
|
||||
-- destination type.
|
||||
hGetLineWith :: ([Text] -> t) -> Handle -> IO t
|
||||
hGetLineWith f h = wantReadableHandle_ "hGetLine" h go
|
||||
where
|
||||
go hh@Handle__{..} = readIORef haCharBuffer >>= fmap f . hGetLineLoop hh []
|
||||
|
||||
hGetLineLoop :: Handle__ -> [Text] -> CharBuffer -> IO [Text]
|
||||
hGetLineLoop hh@Handle__{..} = go where
|
||||
go ts buf@Buffer{ bufL=r0, bufR=w, bufRaw=raw0 } = do
|
||||
let findEOL raw r | r == w = return (False, w)
|
||||
| otherwise = do
|
||||
(c,r') <- readCharBuf raw r
|
||||
if c == '\n'
|
||||
then return (True, r)
|
||||
else findEOL raw r'
|
||||
(eol, off) <- findEOL raw0 r0
|
||||
(t,r') <- if haInputNL == CRLF
|
||||
then unpack_nl raw0 r0 off
|
||||
else do t <- unpack raw0 r0 off
|
||||
return (t,off)
|
||||
if eol
|
||||
then do writeIORef haCharBuffer (bufferAdjustL (off+1) buf)
|
||||
return $ reverse (t:ts)
|
||||
else do
|
||||
let buf1 = bufferAdjustL r' buf
|
||||
maybe_buf <- maybeFillReadBuffer hh buf1
|
||||
case maybe_buf of
|
||||
-- Nothing indicates we caught an EOF, and we may have a
|
||||
-- partial line to return.
|
||||
Nothing -> do
|
||||
-- we reached EOF. There might be a lone \r left
|
||||
-- in the buffer, so check for that and
|
||||
-- append it to the line if necessary.
|
||||
let pre | isEmptyBuffer buf1 = T.empty
|
||||
| otherwise = T.singleton '\r'
|
||||
writeIORef haCharBuffer buf1{ bufL=0, bufR=0 }
|
||||
let str = reverse . filter (not . T.null) $ pre:t:ts
|
||||
if null str
|
||||
then ioe_EOF
|
||||
else return str
|
||||
Just new_buf -> go (t:ts) new_buf
|
||||
|
||||
-- This function is lifted almost verbatim from GHC.IO.Handle.Text.
|
||||
maybeFillReadBuffer :: Handle__ -> CharBuffer -> IO (Maybe CharBuffer)
|
||||
maybeFillReadBuffer handle_ buf
|
||||
= E.catch (Just `fmap` getSomeCharacters handle_ buf) $ \e ->
|
||||
if isEOFError e
|
||||
then return Nothing
|
||||
else ioError e
|
||||
|
||||
unpack :: RawCharBuffer -> Int -> Int -> IO Text
|
||||
unpack !buf !r !w
|
||||
| charSize /= 4 = sizeError "unpack"
|
||||
| r >= w = return T.empty
|
||||
| otherwise = withRawBuffer buf go
|
||||
where
|
||||
go pbuf = return $! unstream (Stream next r (exactSize (w-r)))
|
||||
where
|
||||
next !i | i >= w = Done
|
||||
| otherwise = Yield (ix i) (i+1)
|
||||
ix i = inlinePerformIO $ peekElemOff pbuf i
|
||||
|
||||
unpack_nl :: RawCharBuffer -> Int -> Int -> IO (Text, Int)
|
||||
unpack_nl !buf !r !w
|
||||
| charSize /= 4 = sizeError "unpack_nl"
|
||||
| r >= w = return (T.empty, 0)
|
||||
| otherwise = withRawBuffer buf $ go
|
||||
where
|
||||
go pbuf = do
|
||||
let !t = unstream (Stream next r (maxSize (w-r)))
|
||||
w' = w - 1
|
||||
return $ if ix w' == '\r'
|
||||
then (t,w')
|
||||
else (t,w)
|
||||
where
|
||||
next !i | i >= w = Done
|
||||
| c == '\r' = let i' = i + 1
|
||||
in if i' < w
|
||||
then if ix i' == '\n'
|
||||
then Yield '\n' (i+2)
|
||||
else Yield '\n' i'
|
||||
else Done
|
||||
| otherwise = Yield c (i+1)
|
||||
where c = ix i
|
||||
ix i = inlinePerformIO $ peekElemOff pbuf i
|
||||
|
||||
-- This function is completely lifted from GHC.IO.Handle.Text.
|
||||
getSomeCharacters :: Handle__ -> CharBuffer -> IO CharBuffer
|
||||
getSomeCharacters handle_@Handle__{..} buf@Buffer{..} =
|
||||
case bufferElems buf of
|
||||
-- buffer empty: read some more
|
||||
0 -> {-# SCC "readTextDevice" #-} readTextDevice handle_ buf
|
||||
|
||||
-- if the buffer has a single '\r' in it and we're doing newline
|
||||
-- translation: read some more
|
||||
1 | haInputNL == CRLF -> do
|
||||
(c,_) <- readCharBuf bufRaw bufL
|
||||
if c == '\r'
|
||||
then do -- shuffle the '\r' to the beginning. This is only safe
|
||||
-- if we're about to call readTextDevice, otherwise it
|
||||
-- would mess up flushCharBuffer.
|
||||
-- See [note Buffer Flushing], GHC.IO.Handle.Types
|
||||
_ <- writeCharBuf bufRaw 0 '\r'
|
||||
let buf' = buf{ bufL=0, bufR=1 }
|
||||
readTextDevice handle_ buf'
|
||||
else do
|
||||
return buf
|
||||
|
||||
-- buffer has some chars in it already: just return it
|
||||
_otherwise -> {-# SCC "otherwise" #-} return buf
|
||||
|
||||
-- | Read a single chunk of strict text from a buffer. Used by both
|
||||
-- the strict and lazy implementations of hGetContents.
|
||||
readChunk :: Handle__ -> CharBuffer -> IO Text
|
||||
readChunk hh@Handle__{..} buf = do
|
||||
buf'@Buffer{..} <- getSomeCharacters hh buf
|
||||
(t,r) <- if haInputNL == CRLF
|
||||
then unpack_nl bufRaw bufL bufR
|
||||
else do t <- unpack bufRaw bufL bufR
|
||||
return (t,bufR)
|
||||
writeIORef haCharBuffer (bufferAdjustL r buf')
|
||||
return t
|
||||
|
||||
sizeError :: String -> a
|
||||
sizeError loc = error $ "Data.Text.IO." ++ loc ++ ": bad internal buffer size"
|
@ -1,119 +0,0 @@
|
||||
{-# LANGUAGE BangPatterns, DeriveDataTypeable #-}
|
||||
{-# OPTIONS_HADDOCK not-home #-}
|
||||
|
||||
-- |
|
||||
-- Module : Data.Text.Internal.Lazy
|
||||
-- Copyright : (c) 2009, 2010 Bryan O'Sullivan
|
||||
--
|
||||
-- License : BSD-style
|
||||
-- Maintainer : bos@serpentine.com
|
||||
-- Stability : experimental
|
||||
-- Portability : GHC
|
||||
--
|
||||
-- /Warning/: this is an internal module, and does not have a stable
|
||||
-- API or name. Functions in this module may not check or enforce
|
||||
-- preconditions expected by public modules. Use at your own risk!
|
||||
--
|
||||
-- A module containing private 'Text' internals. This exposes the
|
||||
-- 'Text' representation and low level construction functions.
|
||||
-- Modules which extend the 'Text' system may need to use this module.
|
||||
|
||||
module Data.Text.Internal.Lazy
|
||||
(
|
||||
Text(..)
|
||||
, chunk
|
||||
, empty
|
||||
, foldrChunks
|
||||
, foldlChunks
|
||||
-- * Data type invariant and abstraction functions
|
||||
|
||||
-- $invariant
|
||||
, strictInvariant
|
||||
, lazyInvariant
|
||||
, showStructure
|
||||
|
||||
-- * Chunk allocation sizes
|
||||
, defaultChunkSize
|
||||
, smallChunkSize
|
||||
, chunkOverhead
|
||||
) where
|
||||
|
||||
import Data.Text ()
|
||||
import Data.Text.Internal.Unsafe.Shift (shiftL)
|
||||
import Data.Typeable (Typeable)
|
||||
import Foreign.Storable (sizeOf)
|
||||
import qualified Data.Text.Internal as T
|
||||
|
||||
data Text = Empty
|
||||
| Chunk {-# UNPACK #-} !T.Text Text
|
||||
deriving (Typeable)
|
||||
|
||||
-- $invariant
|
||||
--
|
||||
-- The data type invariant for lazy 'Text': Every 'Text' is either 'Empty' or
|
||||
-- consists of non-null 'T.Text's. All functions must preserve this,
|
||||
-- and the QC properties must check this.
|
||||
|
||||
-- | Check the invariant strictly.
|
||||
strictInvariant :: Text -> Bool
|
||||
strictInvariant Empty = True
|
||||
strictInvariant x@(Chunk (T.Text _ _ len) cs)
|
||||
| len > 0 = strictInvariant cs
|
||||
| otherwise = error $ "Data.Text.Lazy: invariant violation: "
|
||||
++ showStructure x
|
||||
|
||||
-- | Check the invariant lazily.
|
||||
lazyInvariant :: Text -> Text
|
||||
lazyInvariant Empty = Empty
|
||||
lazyInvariant x@(Chunk c@(T.Text _ _ len) cs)
|
||||
| len > 0 = Chunk c (lazyInvariant cs)
|
||||
| otherwise = error $ "Data.Text.Lazy: invariant violation: "
|
||||
++ showStructure x
|
||||
|
||||
-- | Display the internal structure of a lazy 'Text'.
|
||||
showStructure :: Text -> String
|
||||
showStructure Empty = "Empty"
|
||||
showStructure (Chunk t Empty) = "Chunk " ++ show t ++ " Empty"
|
||||
showStructure (Chunk t ts) =
|
||||
"Chunk " ++ show t ++ " (" ++ showStructure ts ++ ")"
|
||||
|
||||
-- | Smart constructor for 'Chunk'. Guarantees the data type invariant.
|
||||
chunk :: T.Text -> Text -> Text
|
||||
{-# INLINE chunk #-}
|
||||
chunk t@(T.Text _ _ len) ts | len == 0 = ts
|
||||
| otherwise = Chunk t ts
|
||||
|
||||
-- | Smart constructor for 'Empty'.
|
||||
empty :: Text
|
||||
{-# INLINE [0] empty #-}
|
||||
empty = Empty
|
||||
|
||||
-- | Consume the chunks of a lazy 'Text' with a natural right fold.
|
||||
foldrChunks :: (T.Text -> a -> a) -> a -> Text -> a
|
||||
foldrChunks f z = go
|
||||
where go Empty = z
|
||||
go (Chunk c cs) = f c (go cs)
|
||||
{-# INLINE foldrChunks #-}
|
||||
|
||||
-- | Consume the chunks of a lazy 'Text' with a strict, tail-recursive,
|
||||
-- accumulating left fold.
|
||||
foldlChunks :: (a -> T.Text -> a) -> a -> Text -> a
|
||||
foldlChunks f z = go z
|
||||
where go !a Empty = a
|
||||
go !a (Chunk c cs) = go (f a c) cs
|
||||
{-# INLINE foldlChunks #-}
|
||||
|
||||
-- | Currently set to 16 KiB, less the memory management overhead.
|
||||
defaultChunkSize :: Int
|
||||
defaultChunkSize = 16384 - chunkOverhead
|
||||
{-# INLINE defaultChunkSize #-}
|
||||
|
||||
-- | Currently set to 128 bytes, less the memory management overhead.
|
||||
smallChunkSize :: Int
|
||||
smallChunkSize = 128 - chunkOverhead
|
||||
{-# INLINE smallChunkSize #-}
|
||||
|
||||
-- | The memory management overhead. Currently this is tuned for GHC only.
|
||||
chunkOverhead :: Int
|
||||
chunkOverhead = sizeOf (undefined :: Int) `shiftL` 1
|
||||
{-# INLINE chunkOverhead #-}
|
@ -1,324 +0,0 @@
|
||||
{-# LANGUAGE BangPatterns, CPP, Rank2Types #-}
|
||||
|
||||
-- |
|
||||
-- Module : Data.Text.Lazy.Encoding.Fusion
|
||||
-- Copyright : (c) 2009, 2010 Bryan O'Sullivan
|
||||
--
|
||||
-- License : BSD-style
|
||||
-- Maintainer : bos@serpentine.com
|
||||
-- Stability : experimental
|
||||
-- Portability : portable
|
||||
--
|
||||
-- /Warning/: this is an internal module, and does not have a stable
|
||||
-- API or name. Functions in this module may not check or enforce
|
||||
-- preconditions expected by public modules. Use at your own risk!
|
||||
--
|
||||
-- Fusible 'Stream'-oriented functions for converting between lazy
|
||||
-- 'Text' and several common encodings.
|
||||
|
||||
module Data.Text.Internal.Lazy.Encoding.Fusion
|
||||
(
|
||||
-- * Streaming
|
||||
-- streamASCII
|
||||
streamUtf8
|
||||
, streamUtf16LE
|
||||
, streamUtf16BE
|
||||
, streamUtf32LE
|
||||
, streamUtf32BE
|
||||
|
||||
-- * Unstreaming
|
||||
, unstream
|
||||
|
||||
, module Data.Text.Internal.Encoding.Fusion.Common
|
||||
) where
|
||||
|
||||
import Data.ByteString.Lazy.Internal (ByteString(..), defaultChunkSize)
|
||||
import qualified Data.ByteString as B
|
||||
import qualified Data.ByteString.Unsafe as B
|
||||
import Data.Text.Internal.Encoding.Fusion.Common
|
||||
import Data.Text.Encoding.Error
|
||||
import Data.Text.Internal.Fusion (Step(..), Stream(..))
|
||||
import Data.Text.Internal.Fusion.Size
|
||||
import Data.Text.Internal.Unsafe.Char (unsafeChr, unsafeChr8, unsafeChr32)
|
||||
import Data.Text.Internal.Unsafe.Shift (shiftL)
|
||||
import Data.Word (Word8, Word16, Word32)
|
||||
import qualified Data.Text.Internal.Encoding.Utf8 as U8
|
||||
import qualified Data.Text.Internal.Encoding.Utf16 as U16
|
||||
import qualified Data.Text.Internal.Encoding.Utf32 as U32
|
||||
import Data.Text.Unsafe (unsafeDupablePerformIO)
|
||||
import Foreign.ForeignPtr (withForeignPtr, ForeignPtr)
|
||||
import Foreign.Storable (pokeByteOff)
|
||||
import Data.ByteString.Internal (mallocByteString, memcpy)
|
||||
#if defined(ASSERTS)
|
||||
import Control.Exception (assert)
|
||||
#endif
|
||||
import qualified Data.ByteString.Internal as B
|
||||
|
||||
data S = S0
|
||||
| S1 {-# UNPACK #-} !Word8
|
||||
| S2 {-# UNPACK #-} !Word8 {-# UNPACK #-} !Word8
|
||||
| S3 {-# UNPACK #-} !Word8 {-# UNPACK #-} !Word8 {-# UNPACK #-} !Word8
|
||||
| S4 {-# UNPACK #-} !Word8 {-# UNPACK #-} !Word8 {-# UNPACK #-} !Word8 {-# UNPACK #-} !Word8
|
||||
|
||||
data T = T !ByteString !S {-# UNPACK #-} !Int
|
||||
|
||||
-- | /O(n)/ Convert a lazy 'ByteString' into a 'Stream Char', using
|
||||
-- UTF-8 encoding.
|
||||
streamUtf8 :: OnDecodeError -> ByteString -> Stream Char
|
||||
streamUtf8 onErr bs0 = Stream next (T bs0 S0 0) unknownSize
|
||||
where
|
||||
next (T bs@(Chunk ps _) S0 i)
|
||||
| i < len && U8.validate1 a =
|
||||
Yield (unsafeChr8 a) (T bs S0 (i+1))
|
||||
| i + 1 < len && U8.validate2 a b =
|
||||
Yield (U8.chr2 a b) (T bs S0 (i+2))
|
||||
| i + 2 < len && U8.validate3 a b c =
|
||||
Yield (U8.chr3 a b c) (T bs S0 (i+3))
|
||||
| i + 3 < len && U8.validate4 a b c d =
|
||||
Yield (U8.chr4 a b c d) (T bs S0 (i+4))
|
||||
where len = B.length ps
|
||||
a = B.unsafeIndex ps i
|
||||
b = B.unsafeIndex ps (i+1)
|
||||
c = B.unsafeIndex ps (i+2)
|
||||
d = B.unsafeIndex ps (i+3)
|
||||
next st@(T bs s i) =
|
||||
case s of
|
||||
S1 a | U8.validate1 a -> Yield (unsafeChr8 a) es
|
||||
S2 a b | U8.validate2 a b -> Yield (U8.chr2 a b) es
|
||||
S3 a b c | U8.validate3 a b c -> Yield (U8.chr3 a b c) es
|
||||
S4 a b c d | U8.validate4 a b c d -> Yield (U8.chr4 a b c d) es
|
||||
_ -> consume st
|
||||
where es = T bs S0 i
|
||||
consume (T bs@(Chunk ps rest) s i)
|
||||
| i >= B.length ps = consume (T rest s 0)
|
||||
| otherwise =
|
||||
case s of
|
||||
S0 -> next (T bs (S1 x) (i+1))
|
||||
S1 a -> next (T bs (S2 a x) (i+1))
|
||||
S2 a b -> next (T bs (S3 a b x) (i+1))
|
||||
S3 a b c -> next (T bs (S4 a b c x) (i+1))
|
||||
S4 a b c d -> decodeError "streamUtf8" "UTF-8" onErr (Just a)
|
||||
(T bs (S3 b c d) (i+1))
|
||||
where x = B.unsafeIndex ps i
|
||||
consume (T Empty S0 _) = Done
|
||||
consume st = decodeError "streamUtf8" "UTF-8" onErr Nothing st
|
||||
{-# INLINE [0] streamUtf8 #-}
|
||||
|
||||
-- | /O(n)/ Convert a 'ByteString' into a 'Stream Char', using little
|
||||
-- endian UTF-16 encoding.
|
||||
streamUtf16LE :: OnDecodeError -> ByteString -> Stream Char
|
||||
streamUtf16LE onErr bs0 = Stream next (T bs0 S0 0) unknownSize
|
||||
where
|
||||
next (T bs@(Chunk ps _) S0 i)
|
||||
| i + 1 < len && U16.validate1 x1 =
|
||||
Yield (unsafeChr x1) (T bs S0 (i+2))
|
||||
| i + 3 < len && U16.validate2 x1 x2 =
|
||||
Yield (U16.chr2 x1 x2) (T bs S0 (i+4))
|
||||
where len = B.length ps
|
||||
x1 = c (idx i) (idx (i + 1))
|
||||
x2 = c (idx (i + 2)) (idx (i + 3))
|
||||
c w1 w2 = w1 + (w2 `shiftL` 8)
|
||||
idx = fromIntegral . B.unsafeIndex ps :: Int -> Word16
|
||||
next st@(T bs s i) =
|
||||
case s of
|
||||
S2 w1 w2 | U16.validate1 (c w1 w2) ->
|
||||
Yield (unsafeChr (c w1 w2)) es
|
||||
S4 w1 w2 w3 w4 | U16.validate2 (c w1 w2) (c w3 w4) ->
|
||||
Yield (U16.chr2 (c w1 w2) (c w3 w4)) es
|
||||
_ -> consume st
|
||||
where es = T bs S0 i
|
||||
c :: Word8 -> Word8 -> Word16
|
||||
c w1 w2 = fromIntegral w1 + (fromIntegral w2 `shiftL` 8)
|
||||
consume (T bs@(Chunk ps rest) s i)
|
||||
| i >= B.length ps = consume (T rest s 0)
|
||||
| otherwise =
|
||||
case s of
|
||||
S0 -> next (T bs (S1 x) (i+1))
|
||||
S1 w1 -> next (T bs (S2 w1 x) (i+1))
|
||||
S2 w1 w2 -> next (T bs (S3 w1 w2 x) (i+1))
|
||||
S3 w1 w2 w3 -> next (T bs (S4 w1 w2 w3 x) (i+1))
|
||||
S4 w1 w2 w3 w4 -> decodeError "streamUtf16LE" "UTF-16LE" onErr (Just w1)
|
||||
(T bs (S3 w2 w3 w4) (i+1))
|
||||
where x = B.unsafeIndex ps i
|
||||
consume (T Empty S0 _) = Done
|
||||
consume st = decodeError "streamUtf16LE" "UTF-16LE" onErr Nothing st
|
||||
{-# INLINE [0] streamUtf16LE #-}
|
||||
|
||||
-- | /O(n)/ Convert a 'ByteString' into a 'Stream Char', using big
|
||||
-- endian UTF-16 encoding.
|
||||
streamUtf16BE :: OnDecodeError -> ByteString -> Stream Char
|
||||
streamUtf16BE onErr bs0 = Stream next (T bs0 S0 0) unknownSize
|
||||
where
|
||||
next (T bs@(Chunk ps _) S0 i)
|
||||
| i + 1 < len && U16.validate1 x1 =
|
||||
Yield (unsafeChr x1) (T bs S0 (i+2))
|
||||
| i + 3 < len && U16.validate2 x1 x2 =
|
||||
Yield (U16.chr2 x1 x2) (T bs S0 (i+4))
|
||||
where len = B.length ps
|
||||
x1 = c (idx i) (idx (i + 1))
|
||||
x2 = c (idx (i + 2)) (idx (i + 3))
|
||||
c w1 w2 = (w1 `shiftL` 8) + w2
|
||||
idx = fromIntegral . B.unsafeIndex ps :: Int -> Word16
|
||||
next st@(T bs s i) =
|
||||
case s of
|
||||
S2 w1 w2 | U16.validate1 (c w1 w2) ->
|
||||
Yield (unsafeChr (c w1 w2)) es
|
||||
S4 w1 w2 w3 w4 | U16.validate2 (c w1 w2) (c w3 w4) ->
|
||||
Yield (U16.chr2 (c w1 w2) (c w3 w4)) es
|
||||
_ -> consume st
|
||||
where es = T bs S0 i
|
||||
c :: Word8 -> Word8 -> Word16
|
||||
c w1 w2 = (fromIntegral w1 `shiftL` 8) + fromIntegral w2
|
||||
consume (T bs@(Chunk ps rest) s i)
|
||||
| i >= B.length ps = consume (T rest s 0)
|
||||
| otherwise =
|
||||
case s of
|
||||
S0 -> next (T bs (S1 x) (i+1))
|
||||
S1 w1 -> next (T bs (S2 w1 x) (i+1))
|
||||
S2 w1 w2 -> next (T bs (S3 w1 w2 x) (i+1))
|
||||
S3 w1 w2 w3 -> next (T bs (S4 w1 w2 w3 x) (i+1))
|
||||
S4 w1 w2 w3 w4 -> decodeError "streamUtf16BE" "UTF-16BE" onErr (Just w1)
|
||||
(T bs (S3 w2 w3 w4) (i+1))
|
||||
where x = B.unsafeIndex ps i
|
||||
consume (T Empty S0 _) = Done
|
||||
consume st = decodeError "streamUtf16BE" "UTF-16BE" onErr Nothing st
|
||||
{-# INLINE [0] streamUtf16BE #-}
|
||||
|
||||
-- | /O(n)/ Convert a 'ByteString' into a 'Stream Char', using big
|
||||
-- endian UTF-32 encoding.
|
||||
streamUtf32BE :: OnDecodeError -> ByteString -> Stream Char
|
||||
streamUtf32BE onErr bs0 = Stream next (T bs0 S0 0) unknownSize
|
||||
where
|
||||
next (T bs@(Chunk ps _) S0 i)
|
||||
| i + 3 < len && U32.validate x =
|
||||
Yield (unsafeChr32 x) (T bs S0 (i+4))
|
||||
where len = B.length ps
|
||||
x = shiftL x1 24 + shiftL x2 16 + shiftL x3 8 + x4
|
||||
x1 = idx i
|
||||
x2 = idx (i+1)
|
||||
x3 = idx (i+2)
|
||||
x4 = idx (i+3)
|
||||
idx = fromIntegral . B.unsafeIndex ps :: Int -> Word32
|
||||
next st@(T bs s i) =
|
||||
case s of
|
||||
S4 w1 w2 w3 w4 | U32.validate (c w1 w2 w3 w4) ->
|
||||
Yield (unsafeChr32 (c w1 w2 w3 w4)) es
|
||||
_ -> consume st
|
||||
where es = T bs S0 i
|
||||
c :: Word8 -> Word8 -> Word8 -> Word8 -> Word32
|
||||
c w1 w2 w3 w4 = shifted
|
||||
where
|
||||
shifted = shiftL x1 24 + shiftL x2 16 + shiftL x3 8 + x4
|
||||
x1 = fromIntegral w1
|
||||
x2 = fromIntegral w2
|
||||
x3 = fromIntegral w3
|
||||
x4 = fromIntegral w4
|
||||
consume (T bs@(Chunk ps rest) s i)
|
||||
| i >= B.length ps = consume (T rest s 0)
|
||||
| otherwise =
|
||||
case s of
|
||||
S0 -> next (T bs (S1 x) (i+1))
|
||||
S1 w1 -> next (T bs (S2 w1 x) (i+1))
|
||||
S2 w1 w2 -> next (T bs (S3 w1 w2 x) (i+1))
|
||||
S3 w1 w2 w3 -> next (T bs (S4 w1 w2 w3 x) (i+1))
|
||||
S4 w1 w2 w3 w4 -> decodeError "streamUtf32BE" "UTF-32BE" onErr (Just w1)
|
||||
(T bs (S3 w2 w3 w4) (i+1))
|
||||
where x = B.unsafeIndex ps i
|
||||
consume (T Empty S0 _) = Done
|
||||
consume st = decodeError "streamUtf32BE" "UTF-32BE" onErr Nothing st
|
||||
{-# INLINE [0] streamUtf32BE #-}
|
||||
|
||||
-- | /O(n)/ Convert a 'ByteString' into a 'Stream Char', using little
|
||||
-- endian UTF-32 encoding.
|
||||
streamUtf32LE :: OnDecodeError -> ByteString -> Stream Char
|
||||
streamUtf32LE onErr bs0 = Stream next (T bs0 S0 0) unknownSize
|
||||
where
|
||||
next (T bs@(Chunk ps _) S0 i)
|
||||
| i + 3 < len && U32.validate x =
|
||||
Yield (unsafeChr32 x) (T bs S0 (i+4))
|
||||
where len = B.length ps
|
||||
x = shiftL x4 24 + shiftL x3 16 + shiftL x2 8 + x1
|
||||
x1 = idx i
|
||||
x2 = idx (i+1)
|
||||
x3 = idx (i+2)
|
||||
x4 = idx (i+3)
|
||||
idx = fromIntegral . B.unsafeIndex ps :: Int -> Word32
|
||||
next st@(T bs s i) =
|
||||
case s of
|
||||
S4 w1 w2 w3 w4 | U32.validate (c w1 w2 w3 w4) ->
|
||||
Yield (unsafeChr32 (c w1 w2 w3 w4)) es
|
||||
_ -> consume st
|
||||
where es = T bs S0 i
|
||||
c :: Word8 -> Word8 -> Word8 -> Word8 -> Word32
|
||||
c w1 w2 w3 w4 = shifted
|
||||
where
|
||||
shifted = shiftL x4 24 + shiftL x3 16 + shiftL x2 8 + x1
|
||||
x1 = fromIntegral w1
|
||||
x2 = fromIntegral w2
|
||||
x3 = fromIntegral w3
|
||||
x4 = fromIntegral w4
|
||||
consume (T bs@(Chunk ps rest) s i)
|
||||
| i >= B.length ps = consume (T rest s 0)
|
||||
| otherwise =
|
||||
case s of
|
||||
S0 -> next (T bs (S1 x) (i+1))
|
||||
S1 w1 -> next (T bs (S2 w1 x) (i+1))
|
||||
S2 w1 w2 -> next (T bs (S3 w1 w2 x) (i+1))
|
||||
S3 w1 w2 w3 -> next (T bs (S4 w1 w2 w3 x) (i+1))
|
||||
S4 w1 w2 w3 w4 -> decodeError "streamUtf32LE" "UTF-32LE" onErr (Just w1)
|
||||
(T bs (S3 w2 w3 w4) (i+1))
|
||||
where x = B.unsafeIndex ps i
|
||||
consume (T Empty S0 _) = Done
|
||||
consume st = decodeError "streamUtf32LE" "UTF-32LE" onErr Nothing st
|
||||
{-# INLINE [0] streamUtf32LE #-}
|
||||
|
||||
-- | /O(n)/ Convert a 'Stream' 'Word8' to a lazy 'ByteString'.
|
||||
unstreamChunks :: Int -> Stream Word8 -> ByteString
|
||||
unstreamChunks chunkSize (Stream next s0 len0) = chunk s0 (upperBound 4 len0)
|
||||
where chunk s1 len1 = unsafeDupablePerformIO $ do
|
||||
let len = max 4 (min len1 chunkSize)
|
||||
mallocByteString len >>= loop len 0 s1
|
||||
where
|
||||
loop !n !off !s fp = case next s of
|
||||
Done | off == 0 -> return Empty
|
||||
| otherwise -> return $! Chunk (trimUp fp off) Empty
|
||||
Skip s' -> loop n off s' fp
|
||||
Yield x s'
|
||||
| off == chunkSize -> do
|
||||
let !newLen = n - off
|
||||
return $! Chunk (trimUp fp off) (chunk s newLen)
|
||||
| off == n -> realloc fp n off s' x
|
||||
| otherwise -> do
|
||||
withForeignPtr fp $ \p -> pokeByteOff p off x
|
||||
loop n (off+1) s' fp
|
||||
{-# NOINLINE realloc #-}
|
||||
realloc fp n off s x = do
|
||||
let n' = min (n+n) chunkSize
|
||||
fp' <- copy0 fp n n'
|
||||
withForeignPtr fp' $ \p -> pokeByteOff p off x
|
||||
loop n' (off+1) s fp'
|
||||
trimUp fp off = B.PS fp 0 off
|
||||
copy0 :: ForeignPtr Word8 -> Int -> Int -> IO (ForeignPtr Word8)
|
||||
copy0 !src !srcLen !destLen =
|
||||
#if defined(ASSERTS)
|
||||
assert (srcLen <= destLen) $
|
||||
#endif
|
||||
do
|
||||
dest <- mallocByteString destLen
|
||||
withForeignPtr src $ \src' ->
|
||||
withForeignPtr dest $ \dest' ->
|
||||
memcpy dest' src' (fromIntegral srcLen)
|
||||
return dest
|
||||
|
||||
-- | /O(n)/ Convert a 'Stream' 'Word8' to a lazy 'ByteString'.
|
||||
unstream :: Stream Word8 -> ByteString
|
||||
unstream = unstreamChunks defaultChunkSize
|
||||
|
||||
decodeError :: forall s. String -> String -> OnDecodeError -> Maybe Word8
|
||||
-> s -> Step s Char
|
||||
decodeError func kind onErr mb i =
|
||||
case onErr desc mb of
|
||||
Nothing -> Skip i
|
||||
Just c -> Yield c i
|
||||
where desc = "Data.Text.Lazy.Encoding.Fusion." ++ func ++ ": Invalid " ++
|
||||
kind ++ " stream"
|
@ -1,120 +0,0 @@
|
||||
{-# LANGUAGE BangPatterns #-}
|
||||
-- |
|
||||
-- Module : Data.Text.Lazy.Fusion
|
||||
-- Copyright : (c) 2009, 2010 Bryan O'Sullivan
|
||||
--
|
||||
-- License : BSD-style
|
||||
-- Maintainer : bos@serpentine.com
|
||||
-- Stability : experimental
|
||||
-- Portability : GHC
|
||||
--
|
||||
-- /Warning/: this is an internal module, and does not have a stable
|
||||
-- API or name. Functions in this module may not check or enforce
|
||||
-- preconditions expected by public modules. Use at your own risk!
|
||||
--
|
||||
-- Core stream fusion functionality for text.
|
||||
|
||||
module Data.Text.Internal.Lazy.Fusion
|
||||
(
|
||||
stream
|
||||
, unstream
|
||||
, unstreamChunks
|
||||
, length
|
||||
, unfoldrN
|
||||
, index
|
||||
, countChar
|
||||
) where
|
||||
|
||||
import Prelude hiding (length)
|
||||
import qualified Data.Text.Internal.Fusion.Common as S
|
||||
import Control.Monad.ST (runST)
|
||||
import Data.Text.Internal.Fusion.Types
|
||||
import Data.Text.Internal.Fusion.Size (isEmpty, unknownSize)
|
||||
import Data.Text.Internal.Lazy
|
||||
import qualified Data.Text.Internal as I
|
||||
import qualified Data.Text.Array as A
|
||||
import Data.Text.Internal.Unsafe.Char (unsafeWrite)
|
||||
import Data.Text.Internal.Unsafe.Shift (shiftL)
|
||||
import Data.Text.Unsafe (Iter(..), iter)
|
||||
import Data.Int (Int64)
|
||||
|
||||
default(Int64)
|
||||
|
||||
-- | /O(n)/ Convert a 'Text' into a 'Stream Char'.
|
||||
stream :: Text -> Stream Char
|
||||
stream text = Stream next (text :*: 0) unknownSize
|
||||
where
|
||||
next (Empty :*: _) = Done
|
||||
next (txt@(Chunk t@(I.Text _ _ len) ts) :*: i)
|
||||
| i >= len = next (ts :*: 0)
|
||||
| otherwise = Yield c (txt :*: i+d)
|
||||
where Iter c d = iter t i
|
||||
{-# INLINE [0] stream #-}
|
||||
|
||||
-- | /O(n)/ Convert a 'Stream Char' into a 'Text', using the given
|
||||
-- chunk size.
|
||||
unstreamChunks :: Int -> Stream Char -> Text
|
||||
unstreamChunks !chunkSize (Stream next s0 len0)
|
||||
| isEmpty len0 = Empty
|
||||
| otherwise = outer s0
|
||||
where
|
||||
outer so = {-# SCC "unstreamChunks/outer" #-}
|
||||
case next so of
|
||||
Done -> Empty
|
||||
Skip s' -> outer s'
|
||||
Yield x s' -> runST $ do
|
||||
a <- A.new unknownLength
|
||||
unsafeWrite a 0 x >>= inner a unknownLength s'
|
||||
where unknownLength = 4
|
||||
where
|
||||
inner marr !len s !i
|
||||
| i + 1 >= chunkSize = finish marr i s
|
||||
| i + 1 >= len = {-# SCC "unstreamChunks/resize" #-} do
|
||||
let newLen = min (len `shiftL` 1) chunkSize
|
||||
marr' <- A.new newLen
|
||||
A.copyM marr' 0 marr 0 len
|
||||
inner marr' newLen s i
|
||||
| otherwise =
|
||||
{-# SCC "unstreamChunks/inner" #-}
|
||||
case next s of
|
||||
Done -> finish marr i s
|
||||
Skip s' -> inner marr len s' i
|
||||
Yield x s' -> do d <- unsafeWrite marr i x
|
||||
inner marr len s' (i+d)
|
||||
finish marr len s' = do
|
||||
arr <- A.unsafeFreeze marr
|
||||
return (I.Text arr 0 len `Chunk` outer s')
|
||||
{-# INLINE [0] unstreamChunks #-}
|
||||
|
||||
-- | /O(n)/ Convert a 'Stream Char' into a 'Text', using
|
||||
-- 'defaultChunkSize'.
|
||||
unstream :: Stream Char -> Text
|
||||
unstream = unstreamChunks defaultChunkSize
|
||||
{-# INLINE [0] unstream #-}
|
||||
|
||||
-- | /O(n)/ Returns the number of characters in a text.
|
||||
length :: Stream Char -> Int64
|
||||
length = S.lengthI
|
||||
{-# INLINE[0] length #-}
|
||||
|
||||
{-# RULES "LAZY STREAM stream/unstream fusion" forall s.
|
||||
stream (unstream s) = s #-}
|
||||
|
||||
-- | /O(n)/ Like 'unfoldr', 'unfoldrN' builds a stream from a seed
|
||||
-- value. However, the length of the result is limited by the
|
||||
-- first argument to 'unfoldrN'. This function is more efficient than
|
||||
-- 'unfoldr' when the length of the result is known.
|
||||
unfoldrN :: Int64 -> (a -> Maybe (Char,a)) -> a -> Stream Char
|
||||
unfoldrN n = S.unfoldrNI n
|
||||
{-# INLINE [0] unfoldrN #-}
|
||||
|
||||
-- | /O(n)/ stream index (subscript) operator, starting from 0.
|
||||
index :: Stream Char -> Int64 -> Char
|
||||
index = S.indexI
|
||||
{-# INLINE [0] index #-}
|
||||
|
||||
-- | /O(n)/ The 'count' function returns the number of times the query
|
||||
-- element appears in the given stream.
|
||||
countChar :: Char -> Stream Char -> Int64
|
||||
countChar = S.countCharI
|
||||
{-# INLINE [0] countChar #-}
|
@ -1,134 +0,0 @@
|
||||
{-# LANGUAGE BangPatterns, ScopedTypeVariables #-}
|
||||
|
||||
-- |
|
||||
-- Module : Data.Text.Lazy.Search
|
||||
-- Copyright : (c) 2009, 2010 Bryan O'Sullivan
|
||||
--
|
||||
-- License : BSD-style
|
||||
-- Maintainer : bos@serpentine.com
|
||||
-- Stability : experimental
|
||||
-- Portability : GHC
|
||||
--
|
||||
-- /Warning/: this is an internal module, and does not have a stable
|
||||
-- API or name. Functions in this module may not check or enforce
|
||||
-- preconditions expected by public modules. Use at your own risk!
|
||||
--
|
||||
-- Fast substring search for lazy 'Text', based on work by Boyer,
|
||||
-- Moore, Horspool, Sunday, and Lundh. Adapted from the strict
|
||||
-- implementation.
|
||||
|
||||
module Data.Text.Internal.Lazy.Search
|
||||
(
|
||||
indices
|
||||
) where
|
||||
|
||||
import qualified Data.Text.Array as A
|
||||
import Data.Int (Int64)
|
||||
import Data.Word (Word16, Word64)
|
||||
import qualified Data.Text.Internal as T
|
||||
import Data.Text.Internal.Fusion.Types (PairS(..))
|
||||
import Data.Text.Internal.Lazy (Text(..), foldlChunks)
|
||||
import Data.Bits ((.|.), (.&.))
|
||||
import Data.Text.Internal.Unsafe.Shift (shiftL)
|
||||
|
||||
-- | /O(n+m)/ Find the offsets of all non-overlapping indices of
|
||||
-- @needle@ within @haystack@.
|
||||
--
|
||||
-- This function is strict in @needle@, and lazy (as far as possible)
|
||||
-- in the chunks of @haystack@.
|
||||
--
|
||||
-- In (unlikely) bad cases, this algorithm's complexity degrades
|
||||
-- towards /O(n*m)/.
|
||||
indices :: Text -- ^ Substring to search for (@needle@)
|
||||
-> Text -- ^ Text to search in (@haystack@)
|
||||
-> [Int64]
|
||||
indices needle@(Chunk n ns) _haystack@(Chunk k ks)
|
||||
| nlen <= 0 = []
|
||||
| nlen == 1 = indicesOne (nindex 0) 0 k ks
|
||||
| otherwise = advance k ks 0 0
|
||||
where
|
||||
advance x@(T.Text _ _ l) xs = scan
|
||||
where
|
||||
scan !g !i
|
||||
| i >= m = case xs of
|
||||
Empty -> []
|
||||
Chunk y ys -> advance y ys g (i-m)
|
||||
| lackingHay (i + nlen) x xs = []
|
||||
| c == z && candidateMatch 0 = g : scan (g+nlen) (i+nlen)
|
||||
| otherwise = scan (g+delta) (i+delta)
|
||||
where
|
||||
m = fromIntegral l
|
||||
c = hindex (i + nlast)
|
||||
delta | nextInPattern = nlen + 1
|
||||
| c == z = skip + 1
|
||||
| otherwise = 1
|
||||
nextInPattern = mask .&. swizzle (hindex (i+nlen)) == 0
|
||||
candidateMatch !j
|
||||
| j >= nlast = True
|
||||
| hindex (i+j) /= nindex j = False
|
||||
| otherwise = candidateMatch (j+1)
|
||||
hindex = index x xs
|
||||
nlen = wordLength needle
|
||||
nlast = nlen - 1
|
||||
nindex = index n ns
|
||||
z = foldlChunks fin 0 needle
|
||||
where fin _ (T.Text farr foff flen) = A.unsafeIndex farr (foff+flen-1)
|
||||
(mask :: Word64) :*: skip = buildTable n ns 0 0 0 (nlen-2)
|
||||
swizzle w = 1 `shiftL` (fromIntegral w .&. 0x3f)
|
||||
buildTable (T.Text xarr xoff xlen) xs = go
|
||||
where
|
||||
go !(g::Int64) !i !msk !skp
|
||||
| i >= xlast = case xs of
|
||||
Empty -> (msk .|. swizzle z) :*: skp
|
||||
Chunk y ys -> buildTable y ys g 0 msk' skp'
|
||||
| otherwise = go (g+1) (i+1) msk' skp'
|
||||
where c = A.unsafeIndex xarr (xoff+i)
|
||||
msk' = msk .|. swizzle c
|
||||
skp' | c == z = nlen - g - 2
|
||||
| otherwise = skp
|
||||
xlast = xlen - 1
|
||||
-- | Check whether an attempt to index into the haystack at the
|
||||
-- given offset would fail.
|
||||
lackingHay q = go 0
|
||||
where
|
||||
go p (T.Text _ _ l) ps = p' < q && case ps of
|
||||
Empty -> True
|
||||
Chunk r rs -> go p' r rs
|
||||
where p' = p + fromIntegral l
|
||||
indices _ _ = []
|
||||
|
||||
-- | Fast index into a partly unpacked 'Text'. We take into account
|
||||
-- the possibility that the caller might try to access one element
|
||||
-- past the end.
|
||||
index :: T.Text -> Text -> Int64 -> Word16
|
||||
index (T.Text arr off len) xs !i
|
||||
| j < len = A.unsafeIndex arr (off+j)
|
||||
| otherwise = case xs of
|
||||
Empty
|
||||
-- out of bounds, but legal
|
||||
| j == len -> 0
|
||||
-- should never happen, due to lackingHay above
|
||||
| otherwise -> emptyError "index"
|
||||
Chunk c cs -> index c cs (i-fromIntegral len)
|
||||
where j = fromIntegral i
|
||||
|
||||
-- | A variant of 'indices' that scans linearly for a single 'Word16'.
|
||||
indicesOne :: Word16 -> Int64 -> T.Text -> Text -> [Int64]
|
||||
indicesOne c = chunk
|
||||
where
|
||||
chunk !i (T.Text oarr ooff olen) os = go 0
|
||||
where
|
||||
go h | h >= olen = case os of
|
||||
Empty -> []
|
||||
Chunk y ys -> chunk (i+fromIntegral olen) y ys
|
||||
| on == c = i + fromIntegral h : go (h+1)
|
||||
| otherwise = go (h+1)
|
||||
where on = A.unsafeIndex oarr (ooff+h)
|
||||
|
||||
-- | The number of 'Word16' values in a 'Text'.
|
||||
wordLength :: Text -> Int64
|
||||
wordLength = foldlChunks sumLength 0
|
||||
where sumLength i (T.Text _ _ l) = i + fromIntegral l
|
||||
|
||||
emptyError :: String -> a
|
||||
emptyError fun = error ("Data.Text.Lazy.Search." ++ fun ++ ": empty input")
|
@ -1,37 +0,0 @@
|
||||
{-# LANGUAGE BangPatterns, Rank2Types, UnboxedTuples #-}
|
||||
|
||||
-- |
|
||||
-- Module : Data.Text.Internal.Private
|
||||
-- Copyright : (c) 2011 Bryan O'Sullivan
|
||||
--
|
||||
-- License : BSD-style
|
||||
-- Maintainer : bos@serpentine.com
|
||||
-- Stability : experimental
|
||||
-- Portability : GHC
|
||||
|
||||
module Data.Text.Internal.Private
|
||||
(
|
||||
runText
|
||||
, span_
|
||||
) where
|
||||
|
||||
import Control.Monad.ST (ST, runST)
|
||||
import Data.Text.Internal (Text(..), text)
|
||||
import Data.Text.Unsafe (Iter(..), iter)
|
||||
import qualified Data.Text.Array as A
|
||||
|
||||
span_ :: (Char -> Bool) -> Text -> (# Text, Text #)
|
||||
span_ p t@(Text arr off len) = (# hd,tl #)
|
||||
where hd = text arr off k
|
||||
tl = text arr (off+k) (len-k)
|
||||
!k = loop 0
|
||||
loop !i | i < len && p c = loop (i+d)
|
||||
| otherwise = i
|
||||
where Iter c d = iter t i
|
||||
{-# INLINE span_ #-}
|
||||
|
||||
runText :: (forall s. (A.MArray s -> Int -> ST s Text) -> ST s Text) -> Text
|
||||
runText act = runST (act $ \ !marr !len -> do
|
||||
arr <- A.unsafeFreeze marr
|
||||
return $! text arr 0 len)
|
||||
{-# INLINE runText #-}
|
@ -1,69 +0,0 @@
|
||||
-- |
|
||||
-- Module : Data.Text.Internal.Read
|
||||
-- Copyright : (c) 2014 Bryan O'Sullivan
|
||||
--
|
||||
-- License : BSD-style
|
||||
-- Maintainer : bos@serpentine.com
|
||||
-- Stability : experimental
|
||||
-- Portability : GHC
|
||||
--
|
||||
-- Common internal functions for reading textual data.
|
||||
module Data.Text.Internal.Read
|
||||
(
|
||||
IReader
|
||||
, IParser(..)
|
||||
, T(..)
|
||||
, digitToInt
|
||||
, hexDigitToInt
|
||||
, perhaps
|
||||
) where
|
||||
|
||||
import Control.Applicative as App (Applicative(..))
|
||||
import Control.Arrow (first)
|
||||
import Control.Monad (ap)
|
||||
import Data.Char (ord)
|
||||
|
||||
type IReader t a = t -> Either String (a,t)
|
||||
|
||||
newtype IParser t a = P {
|
||||
runP :: IReader t a
|
||||
}
|
||||
|
||||
instance Functor (IParser t) where
|
||||
fmap f m = P $ fmap (first f) . runP m
|
||||
|
||||
instance Applicative (IParser t) where
|
||||
pure a = P $ \t -> Right (a,t)
|
||||
{-# INLINE pure #-}
|
||||
(<*>) = ap
|
||||
|
||||
instance Monad (IParser t) where
|
||||
return = App.pure
|
||||
m >>= k = P $ \t -> case runP m t of
|
||||
Left err -> Left err
|
||||
Right (a,t') -> runP (k a) t'
|
||||
{-# INLINE (>>=) #-}
|
||||
|
||||
-- If we ever need a `MonadFail` instance the definition below can be used
|
||||
--
|
||||
-- > instance MonadFail (IParser t) where
|
||||
-- > fail msg = P $ \_ -> Left msg
|
||||
--
|
||||
-- But given the code compiles fine with a post-MFP GHC 8.6+ we don't need
|
||||
-- one just yet.
|
||||
|
||||
data T = T !Integer !Int
|
||||
|
||||
perhaps :: a -> IParser t a -> IParser t a
|
||||
perhaps def m = P $ \t -> case runP m t of
|
||||
Left _ -> Right (def,t)
|
||||
r@(Right _) -> r
|
||||
|
||||
hexDigitToInt :: Char -> Int
|
||||
hexDigitToInt c
|
||||
| c >= '0' && c <= '9' = ord c - ord '0'
|
||||
| c >= 'a' && c <= 'f' = ord c - (ord 'a' - 10)
|
||||
| otherwise = ord c - (ord 'A' - 10)
|
||||
|
||||
digitToInt :: Char -> Int
|
||||
digitToInt c = ord c - ord '0'
|
@ -1,89 +0,0 @@
|
||||
{-# LANGUAGE BangPatterns, ScopedTypeVariables #-}
|
||||
|
||||
-- |
|
||||
-- Module : Data.Text.Internal.Search
|
||||
-- Copyright : (c) Bryan O'Sullivan 2009
|
||||
--
|
||||
-- License : BSD-style
|
||||
-- Maintainer : bos@serpentine.com
|
||||
-- Stability : experimental
|
||||
-- Portability : GHC
|
||||
--
|
||||
-- Fast substring search for 'Text', based on work by Boyer, Moore,
|
||||
-- Horspool, Sunday, and Lundh.
|
||||
--
|
||||
-- References:
|
||||
--
|
||||
-- * R. S. Boyer, J. S. Moore: A Fast String Searching Algorithm.
|
||||
-- Communications of the ACM, 20, 10, 762-772 (1977)
|
||||
--
|
||||
-- * R. N. Horspool: Practical Fast Searching in Strings. Software -
|
||||
-- Practice and Experience 10, 501-506 (1980)
|
||||
--
|
||||
-- * D. M. Sunday: A Very Fast Substring Search Algorithm.
|
||||
-- Communications of the ACM, 33, 8, 132-142 (1990)
|
||||
--
|
||||
-- * F. Lundh: The Fast Search Algorithm.
|
||||
-- <http://effbot.org/zone/stringlib.htm> (2006)
|
||||
|
||||
module Data.Text.Internal.Search
|
||||
(
|
||||
indices
|
||||
) where
|
||||
|
||||
import qualified Data.Text.Array as A
|
||||
import Data.Word (Word64)
|
||||
import Data.Text.Internal (Text(..))
|
||||
import Data.Bits ((.|.), (.&.))
|
||||
import Data.Text.Internal.Unsafe.Shift (shiftL)
|
||||
|
||||
data T = {-# UNPACK #-} !Word64 :* {-# UNPACK #-} !Int
|
||||
|
||||
-- | /O(n+m)/ Find the offsets of all non-overlapping indices of
|
||||
-- @needle@ within @haystack@. The offsets returned represent
|
||||
-- uncorrected indices in the low-level \"needle\" array, to which its
|
||||
-- offset must be added.
|
||||
--
|
||||
-- In (unlikely) bad cases, this algorithm's complexity degrades
|
||||
-- towards /O(n*m)/.
|
||||
indices :: Text -- ^ Substring to search for (@needle@)
|
||||
-> Text -- ^ Text to search in (@haystack@)
|
||||
-> [Int]
|
||||
indices _needle@(Text narr noff nlen) _haystack@(Text harr hoff hlen)
|
||||
| nlen == 1 = scanOne (nindex 0)
|
||||
| nlen <= 0 || ldiff < 0 = []
|
||||
| otherwise = scan 0
|
||||
where
|
||||
ldiff = hlen - nlen
|
||||
nlast = nlen - 1
|
||||
z = nindex nlast
|
||||
nindex k = A.unsafeIndex narr (noff+k)
|
||||
hindex k = A.unsafeIndex harr (hoff+k)
|
||||
hindex' k | k == hlen = 0
|
||||
| otherwise = A.unsafeIndex harr (hoff+k)
|
||||
buildTable !i !msk !skp
|
||||
| i >= nlast = (msk .|. swizzle z) :* skp
|
||||
| otherwise = buildTable (i+1) (msk .|. swizzle c) skp'
|
||||
where c = nindex i
|
||||
skp' | c == z = nlen - i - 2
|
||||
| otherwise = skp
|
||||
swizzle k = 1 `shiftL` (fromIntegral k .&. 0x3f)
|
||||
scan !i
|
||||
| i > ldiff = []
|
||||
| c == z && candidateMatch 0 = i : scan (i + nlen)
|
||||
| otherwise = scan (i + delta)
|
||||
where c = hindex (i + nlast)
|
||||
candidateMatch !j
|
||||
| j >= nlast = True
|
||||
| hindex (i+j) /= nindex j = False
|
||||
| otherwise = candidateMatch (j+1)
|
||||
delta | nextInPattern = nlen + 1
|
||||
| c == z = skip + 1
|
||||
| otherwise = 1
|
||||
where nextInPattern = mask .&. swizzle (hindex' (i+nlen)) == 0
|
||||
!(mask :* skip) = buildTable 0 0 (nlen-2)
|
||||
scanOne c = loop 0
|
||||
where loop !i | i >= hlen = []
|
||||
| hindex i == c = i : loop (i+1)
|
||||
| otherwise = loop (i+1)
|
||||
{-# INLINE indices #-}
|
@ -1,56 +0,0 @@
|
||||
{-# LANGUAGE CPP, MagicHash, UnboxedTuples #-}
|
||||
{-# OPTIONS_HADDOCK not-home #-}
|
||||
|
||||
-- |
|
||||
-- Module : Data.Text.Internal.Unsafe
|
||||
-- Copyright : (c) 2009, 2010, 2011 Bryan O'Sullivan
|
||||
-- License : BSD-style
|
||||
-- Maintainer : bos@serpentine.com
|
||||
-- Stability : experimental
|
||||
-- Portability : portable
|
||||
--
|
||||
-- /Warning/: this is an internal module, and does not have a stable
|
||||
-- API or name. Functions in this module may not check or enforce
|
||||
-- preconditions expected by public modules. Use at your own risk!
|
||||
--
|
||||
-- A module containing /unsafe/ operations, for /very very careful/ use
|
||||
-- in /heavily tested/ code.
|
||||
module Data.Text.Internal.Unsafe
|
||||
(
|
||||
inlineInterleaveST
|
||||
, inlinePerformIO
|
||||
) where
|
||||
|
||||
import GHC.ST (ST(..))
|
||||
#if defined(__GLASGOW_HASKELL__)
|
||||
import GHC.IO (IO(IO))
|
||||
import GHC.Base (realWorld#)
|
||||
#endif
|
||||
|
||||
|
||||
-- | Just like unsafePerformIO, but we inline it. Big performance gains as
|
||||
-- it exposes lots of things to further inlining. /Very unsafe/. In
|
||||
-- particular, you should do no memory allocation inside an
|
||||
-- 'inlinePerformIO' block. On Hugs this is just @unsafePerformIO@.
|
||||
--
|
||||
{-# INLINE inlinePerformIO #-}
|
||||
inlinePerformIO :: IO a -> a
|
||||
#if defined(__GLASGOW_HASKELL__)
|
||||
inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r
|
||||
#else
|
||||
inlinePerformIO = unsafePerformIO
|
||||
#endif
|
||||
|
||||
-- | Allow an 'ST' computation to be deferred lazily. When passed an
|
||||
-- action of type 'ST' @s@ @a@, the action will only be performed when
|
||||
-- the value of @a@ is demanded.
|
||||
--
|
||||
-- This function is identical to the normal unsafeInterleaveST, but is
|
||||
-- inlined and hence faster.
|
||||
--
|
||||
-- /Note/: This operation is highly unsafe, as it can introduce
|
||||
-- externally visible non-determinism into an 'ST' action.
|
||||
inlineInterleaveST :: ST s a -> ST s a
|
||||
inlineInterleaveST (ST m) = ST $ \ s ->
|
||||
let r = case m s of (# _, res #) -> res in (# s, r #)
|
||||
{-# INLINE inlineInterleaveST #-}
|
@ -1,95 +0,0 @@
|
||||
{-# LANGUAGE CPP, MagicHash #-}
|
||||
|
||||
-- |
|
||||
-- Module : Data.Text.Internal.Unsafe.Char
|
||||
-- Copyright : (c) 2008, 2009 Tom Harper,
|
||||
-- (c) 2009, 2010 Bryan O'Sullivan,
|
||||
-- (c) 2009 Duncan Coutts
|
||||
--
|
||||
-- License : BSD-style
|
||||
-- Maintainer : bos@serpentine.com
|
||||
-- Stability : experimental
|
||||
-- Portability : GHC
|
||||
--
|
||||
-- /Warning/: this is an internal module, and does not have a stable
|
||||
-- API or name. Functions in this module may not check or enforce
|
||||
-- preconditions expected by public modules. Use at your own risk!
|
||||
--
|
||||
-- Fast character manipulation functions.
|
||||
module Data.Text.Internal.Unsafe.Char
|
||||
(
|
||||
ord
|
||||
, unsafeChr
|
||||
, unsafeChr8
|
||||
, unsafeChr32
|
||||
, unsafeWrite
|
||||
-- , unsafeWriteRev
|
||||
) where
|
||||
|
||||
#ifdef ASSERTS
|
||||
import Control.Exception (assert)
|
||||
#endif
|
||||
import Control.Monad.ST (ST)
|
||||
import Data.Bits ((.&.))
|
||||
import Data.Text.Internal.Unsafe.Shift (shiftR)
|
||||
import GHC.Exts (Char(..), Int(..), chr#, ord#, word2Int#)
|
||||
import GHC.Word (Word8(..), Word16(..), Word32(..))
|
||||
import qualified Data.Text.Array as A
|
||||
|
||||
ord :: Char -> Int
|
||||
ord (C# c#) = I# (ord# c#)
|
||||
{-# INLINE ord #-}
|
||||
|
||||
unsafeChr :: Word16 -> Char
|
||||
unsafeChr (W16# w#) = C# (chr# (word2Int# w#))
|
||||
{-# INLINE unsafeChr #-}
|
||||
|
||||
unsafeChr8 :: Word8 -> Char
|
||||
unsafeChr8 (W8# w#) = C# (chr# (word2Int# w#))
|
||||
{-# INLINE unsafeChr8 #-}
|
||||
|
||||
unsafeChr32 :: Word32 -> Char
|
||||
unsafeChr32 (W32# w#) = C# (chr# (word2Int# w#))
|
||||
{-# INLINE unsafeChr32 #-}
|
||||
|
||||
-- | Write a character into the array at the given offset. Returns
|
||||
-- the number of 'Word16's written.
|
||||
unsafeWrite :: A.MArray s -> Int -> Char -> ST s Int
|
||||
unsafeWrite marr i c
|
||||
| n < 0x10000 = do
|
||||
#if defined(ASSERTS)
|
||||
assert (i >= 0) . assert (i < A.length marr) $ return ()
|
||||
#endif
|
||||
A.unsafeWrite marr i (fromIntegral n)
|
||||
return 1
|
||||
| otherwise = do
|
||||
#if defined(ASSERTS)
|
||||
assert (i >= 0) . assert (i < A.length marr - 1) $ return ()
|
||||
#endif
|
||||
A.unsafeWrite marr i lo
|
||||
A.unsafeWrite marr (i+1) hi
|
||||
return 2
|
||||
where n = ord c
|
||||
m = n - 0x10000
|
||||
lo = fromIntegral $ (m `shiftR` 10) + 0xD800
|
||||
hi = fromIntegral $ (m .&. 0x3FF) + 0xDC00
|
||||
{-# INLINE unsafeWrite #-}
|
||||
|
||||
{-
|
||||
unsafeWriteRev :: A.MArray s Word16 -> Int -> Char -> ST s Int
|
||||
unsafeWriteRev marr i c
|
||||
| n < 0x10000 = do
|
||||
assert (i >= 0) . assert (i < A.length marr) $
|
||||
A.unsafeWrite marr i (fromIntegral n)
|
||||
return (i-1)
|
||||
| otherwise = do
|
||||
assert (i >= 1) . assert (i < A.length marr) $
|
||||
A.unsafeWrite marr (i-1) lo
|
||||
A.unsafeWrite marr i hi
|
||||
return (i-2)
|
||||
where n = ord c
|
||||
m = n - 0x10000
|
||||
lo = fromIntegral $ (m `shiftR` 10) + 0xD800
|
||||
hi = fromIntegral $ (m .&. 0x3FF) + 0xDC00
|
||||
{-# INLINE unsafeWriteRev #-}
|
||||
-}
|
@ -1,72 +0,0 @@
|
||||
{-# LANGUAGE MagicHash #-}
|
||||
|
||||
-- |
|
||||
-- Module : Data.Text.Internal.Unsafe.Shift
|
||||
-- Copyright : (c) Bryan O'Sullivan 2009
|
||||
--
|
||||
-- License : BSD-style
|
||||
-- Maintainer : bos@serpentine.com
|
||||
-- Stability : experimental
|
||||
-- Portability : GHC
|
||||
--
|
||||
-- /Warning/: this is an internal module, and does not have a stable
|
||||
-- API or name. Functions in this module may not check or enforce
|
||||
-- preconditions expected by public modules. Use at your own risk!
|
||||
--
|
||||
-- Fast, unchecked bit shifting functions.
|
||||
|
||||
module Data.Text.Internal.Unsafe.Shift
|
||||
(
|
||||
UnsafeShift(..)
|
||||
) where
|
||||
|
||||
-- import qualified Data.Bits as Bits
|
||||
import GHC.Base
|
||||
import GHC.Word
|
||||
|
||||
-- | This is a workaround for poor optimisation in GHC 6.8.2. It
|
||||
-- fails to notice constant-width shifts, and adds a test and branch
|
||||
-- to every shift. This imposes about a 10% performance hit.
|
||||
--
|
||||
-- These functions are undefined when the amount being shifted by is
|
||||
-- greater than the size in bits of a machine Int#.
|
||||
class UnsafeShift a where
|
||||
shiftL :: a -> Int -> a
|
||||
shiftR :: a -> Int -> a
|
||||
|
||||
instance UnsafeShift Word16 where
|
||||
{-# INLINE shiftL #-}
|
||||
shiftL (W16# x#) (I# i#) = W16# (narrow16Word# (x# `uncheckedShiftL#` i#))
|
||||
|
||||
{-# INLINE shiftR #-}
|
||||
shiftR (W16# x#) (I# i#) = W16# (x# `uncheckedShiftRL#` i#)
|
||||
|
||||
instance UnsafeShift Word32 where
|
||||
{-# INLINE shiftL #-}
|
||||
shiftL (W32# x#) (I# i#) = W32# (narrow32Word# (x# `uncheckedShiftL#` i#))
|
||||
|
||||
{-# INLINE shiftR #-}
|
||||
shiftR (W32# x#) (I# i#) = W32# (x# `uncheckedShiftRL#` i#)
|
||||
|
||||
instance UnsafeShift Word64 where
|
||||
{-# INLINE shiftL #-}
|
||||
shiftL (W64# x#) (I# i#) = W64# (x# `uncheckedShiftL64#` i#)
|
||||
|
||||
{-# INLINE shiftR #-}
|
||||
shiftR (W64# x#) (I# i#) = W64# (x# `uncheckedShiftRL64#` i#)
|
||||
|
||||
instance UnsafeShift Int where
|
||||
{-# INLINE shiftL #-}
|
||||
shiftL (I# x#) (I# i#) = I# (x# `iShiftL#` i#)
|
||||
|
||||
{-# INLINE shiftR #-}
|
||||
shiftR (I# x#) (I# i#) = I# (x# `iShiftRA#` i#)
|
||||
|
||||
{-
|
||||
instance UnsafeShift Integer where
|
||||
{-# INLINE shiftL #-}
|
||||
shiftL = Bits.shiftL
|
||||
|
||||
{-# INLINE shiftR #-}
|
||||
shiftR = Bits.shiftR
|
||||
-}
|
File diff suppressed because it is too large
Load Diff
@ -1,57 +0,0 @@
|
||||
{-# LANGUAGE BangPatterns, CPP, Rank2Types #-}
|
||||
#if __GLASGOW_HASKELL__ >= 702
|
||||
{-# LANGUAGE Trustworthy #-}
|
||||
#endif
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
-- |
|
||||
-- Module : Data.Text.Lazy.Builder
|
||||
-- Copyright : (c) 2013 Bryan O'Sullivan
|
||||
-- (c) 2010 Johan Tibell
|
||||
-- License : BSD-style (see LICENSE)
|
||||
--
|
||||
-- Maintainer : Johan Tibell <johan.tibell@gmail.com>
|
||||
-- Portability : portable to Hugs and GHC
|
||||
--
|
||||
-- Efficient construction of lazy @Text@ values. The principal
|
||||
-- operations on a @Builder@ are @singleton@, @fromText@, and
|
||||
-- @fromLazyText@, which construct new builders, and 'mappend', which
|
||||
-- concatenates two builders.
|
||||
--
|
||||
-- To get maximum performance when building lazy @Text@ values using a
|
||||
-- builder, associate @mappend@ calls to the right. For example,
|
||||
-- prefer
|
||||
--
|
||||
-- > singleton 'a' `mappend` (singleton 'b' `mappend` singleton 'c')
|
||||
--
|
||||
-- to
|
||||
--
|
||||
-- > singleton 'a' `mappend` singleton 'b' `mappend` singleton 'c'
|
||||
--
|
||||
-- as the latter associates @mappend@ to the left. Or, equivalently,
|
||||
-- prefer
|
||||
--
|
||||
-- > singleton 'a' <> singleton 'b' <> singleton 'c'
|
||||
--
|
||||
-- since the '<>' from recent versions of 'Data.Monoid' associates
|
||||
-- to the right.
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module Data.Text.Lazy.Builder
|
||||
( -- * The Builder type
|
||||
Builder
|
||||
, toLazyText
|
||||
, toLazyTextWith
|
||||
|
||||
-- * Constructing Builders
|
||||
, singleton
|
||||
, fromText
|
||||
, fromLazyText
|
||||
, fromString
|
||||
|
||||
-- * Flushing the buffer state
|
||||
, flush
|
||||
) where
|
||||
|
||||
import Data.Text.Internal.Builder
|
@ -1,264 +0,0 @@
|
||||
{-# LANGUAGE BangPatterns, CPP, MagicHash, RankNTypes, ScopedTypeVariables,
|
||||
UnboxedTuples #-}
|
||||
#if __GLASGOW_HASKELL__ >= 702
|
||||
{-# LANGUAGE Trustworthy #-}
|
||||
#endif
|
||||
|
||||
-- Module: Data.Text.Lazy.Builder.Int
|
||||
-- Copyright: (c) 2013 Bryan O'Sullivan
|
||||
-- (c) 2011 MailRank, Inc.
|
||||
-- License: BSD-style
|
||||
-- Maintainer: Bryan O'Sullivan <bos@serpentine.com>
|
||||
-- Portability: portable
|
||||
--
|
||||
-- Efficiently write an integral value to a 'Builder'.
|
||||
|
||||
module Data.Text.Lazy.Builder.Int
|
||||
(
|
||||
decimal
|
||||
, hexadecimal
|
||||
) where
|
||||
|
||||
import Data.Int (Int8, Int16, Int32, Int64)
|
||||
import Data.Monoid (mempty)
|
||||
import qualified Data.ByteString.Unsafe as B
|
||||
import Data.Text.Internal.Builder.Functions ((<>), i2d)
|
||||
import Data.Text.Internal.Builder
|
||||
import Data.Text.Internal.Builder.Int.Digits (digits)
|
||||
import Data.Text.Array
|
||||
import Data.Word (Word, Word8, Word16, Word32, Word64)
|
||||
import GHC.Base (quotInt, remInt)
|
||||
import GHC.Num (quotRemInteger)
|
||||
import GHC.Types (Int(..))
|
||||
import Control.Monad.ST
|
||||
#if MIN_VERSION_base(4,11,0)
|
||||
import Prelude hiding ((<>))
|
||||
#endif
|
||||
|
||||
#ifdef __GLASGOW_HASKELL__
|
||||
# if defined(INTEGER_GMP)
|
||||
import GHC.Integer.GMP.Internals (Integer(S#))
|
||||
# elif defined(INTEGER_SIMPLE)
|
||||
import GHC.Integer
|
||||
# else
|
||||
# error "You need to use either GMP or integer-simple."
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if defined(INTEGER_GMP) || defined(INTEGER_SIMPLE)
|
||||
# define PAIR(a,b) (# a,b #)
|
||||
#else
|
||||
# define PAIR(a,b) (a,b)
|
||||
#endif
|
||||
|
||||
decimal :: Integral a => a -> Builder
|
||||
{-# RULES "decimal/Int8" decimal = boundedDecimal :: Int8 -> Builder #-}
|
||||
{-# RULES "decimal/Int" decimal = boundedDecimal :: Int -> Builder #-}
|
||||
{-# RULES "decimal/Int16" decimal = boundedDecimal :: Int16 -> Builder #-}
|
||||
{-# RULES "decimal/Int32" decimal = boundedDecimal :: Int32 -> Builder #-}
|
||||
{-# RULES "decimal/Int64" decimal = boundedDecimal :: Int64 -> Builder #-}
|
||||
{-# RULES "decimal/Word" decimal = positive :: Data.Word.Word -> Builder #-}
|
||||
{-# RULES "decimal/Word8" decimal = positive :: Word8 -> Builder #-}
|
||||
{-# RULES "decimal/Word16" decimal = positive :: Word16 -> Builder #-}
|
||||
{-# RULES "decimal/Word32" decimal = positive :: Word32 -> Builder #-}
|
||||
{-# RULES "decimal/Word64" decimal = positive :: Word64 -> Builder #-}
|
||||
{-# RULES "decimal/Integer" decimal = integer 10 :: Integer -> Builder #-}
|
||||
decimal i = decimal' (<= -128) i
|
||||
{-# NOINLINE decimal #-}
|
||||
|
||||
boundedDecimal :: (Integral a, Bounded a) => a -> Builder
|
||||
{-# SPECIALIZE boundedDecimal :: Int -> Builder #-}
|
||||
{-# SPECIALIZE boundedDecimal :: Int8 -> Builder #-}
|
||||
{-# SPECIALIZE boundedDecimal :: Int16 -> Builder #-}
|
||||
{-# SPECIALIZE boundedDecimal :: Int32 -> Builder #-}
|
||||
{-# SPECIALIZE boundedDecimal :: Int64 -> Builder #-}
|
||||
boundedDecimal i = decimal' (== minBound) i
|
||||
|
||||
decimal' :: (Integral a) => (a -> Bool) -> a -> Builder
|
||||
{-# INLINE decimal' #-}
|
||||
decimal' p i
|
||||
| i < 0 = if p i
|
||||
then let (q, r) = i `quotRem` 10
|
||||
qq = -q
|
||||
!n = countDigits qq
|
||||
in writeN (n + 2) $ \marr off -> do
|
||||
unsafeWrite marr off minus
|
||||
posDecimal marr (off+1) n qq
|
||||
unsafeWrite marr (off+n+1) (i2w (-r))
|
||||
else let j = -i
|
||||
!n = countDigits j
|
||||
in writeN (n + 1) $ \marr off ->
|
||||
unsafeWrite marr off minus >> posDecimal marr (off+1) n j
|
||||
| otherwise = positive i
|
||||
|
||||
positive :: (Integral a) => a -> Builder
|
||||
{-# SPECIALIZE positive :: Int -> Builder #-}
|
||||
{-# SPECIALIZE positive :: Int8 -> Builder #-}
|
||||
{-# SPECIALIZE positive :: Int16 -> Builder #-}
|
||||
{-# SPECIALIZE positive :: Int32 -> Builder #-}
|
||||
{-# SPECIALIZE positive :: Int64 -> Builder #-}
|
||||
{-# SPECIALIZE positive :: Word -> Builder #-}
|
||||
{-# SPECIALIZE positive :: Word8 -> Builder #-}
|
||||
{-# SPECIALIZE positive :: Word16 -> Builder #-}
|
||||
{-# SPECIALIZE positive :: Word32 -> Builder #-}
|
||||
{-# SPECIALIZE positive :: Word64 -> Builder #-}
|
||||
positive i
|
||||
| i < 10 = writeN 1 $ \marr off -> unsafeWrite marr off (i2w i)
|
||||
| otherwise = let !n = countDigits i
|
||||
in writeN n $ \marr off -> posDecimal marr off n i
|
||||
|
||||
posDecimal :: (Integral a) =>
|
||||
forall s. MArray s -> Int -> Int -> a -> ST s ()
|
||||
{-# INLINE posDecimal #-}
|
||||
posDecimal marr off0 ds v0 = go (off0 + ds - 1) v0
|
||||
where go off v
|
||||
| v >= 100 = do
|
||||
let (q, r) = v `quotRem` 100
|
||||
write2 off r
|
||||
go (off - 2) q
|
||||
| v < 10 = unsafeWrite marr off (i2w v)
|
||||
| otherwise = write2 off v
|
||||
write2 off i0 = do
|
||||
let i = fromIntegral i0; j = i + i
|
||||
unsafeWrite marr off $ get (j + 1)
|
||||
unsafeWrite marr (off - 1) $ get j
|
||||
get = fromIntegral . B.unsafeIndex digits
|
||||
|
||||
minus, zero :: Word16
|
||||
{-# INLINE minus #-}
|
||||
{-# INLINE zero #-}
|
||||
minus = 45
|
||||
zero = 48
|
||||
|
||||
i2w :: (Integral a) => a -> Word16
|
||||
{-# INLINE i2w #-}
|
||||
i2w v = zero + fromIntegral v
|
||||
|
||||
countDigits :: (Integral a) => a -> Int
|
||||
{-# INLINE countDigits #-}
|
||||
countDigits v0
|
||||
| fromIntegral v64 == v0 = go 1 v64
|
||||
| otherwise = goBig 1 (fromIntegral v0)
|
||||
where v64 = fromIntegral v0
|
||||
goBig !k (v :: Integer)
|
||||
| v > big = goBig (k + 19) (v `quot` big)
|
||||
| otherwise = go k (fromIntegral v)
|
||||
big = 10000000000000000000
|
||||
go !k (v :: Word64)
|
||||
| v < 10 = k
|
||||
| v < 100 = k + 1
|
||||
| v < 1000 = k + 2
|
||||
| v < 1000000000000 =
|
||||
k + if v < 100000000
|
||||
then if v < 1000000
|
||||
then if v < 10000
|
||||
then 3
|
||||
else 4 + fin v 100000
|
||||
else 6 + fin v 10000000
|
||||
else if v < 10000000000
|
||||
then 8 + fin v 1000000000
|
||||
else 10 + fin v 100000000000
|
||||
| otherwise = go (k + 12) (v `quot` 1000000000000)
|
||||
fin v n = if v >= n then 1 else 0
|
||||
|
||||
hexadecimal :: Integral a => a -> Builder
|
||||
{-# SPECIALIZE hexadecimal :: Int -> Builder #-}
|
||||
{-# SPECIALIZE hexadecimal :: Int8 -> Builder #-}
|
||||
{-# SPECIALIZE hexadecimal :: Int16 -> Builder #-}
|
||||
{-# SPECIALIZE hexadecimal :: Int32 -> Builder #-}
|
||||
{-# SPECIALIZE hexadecimal :: Int64 -> Builder #-}
|
||||
{-# SPECIALIZE hexadecimal :: Word -> Builder #-}
|
||||
{-# SPECIALIZE hexadecimal :: Word8 -> Builder #-}
|
||||
{-# SPECIALIZE hexadecimal :: Word16 -> Builder #-}
|
||||
{-# SPECIALIZE hexadecimal :: Word32 -> Builder #-}
|
||||
{-# SPECIALIZE hexadecimal :: Word64 -> Builder #-}
|
||||
{-# RULES "hexadecimal/Integer"
|
||||
hexadecimal = hexInteger :: Integer -> Builder #-}
|
||||
hexadecimal i
|
||||
| i < 0 = error hexErrMsg
|
||||
| otherwise = go i
|
||||
where
|
||||
go n | n < 16 = hexDigit n
|
||||
| otherwise = go (n `quot` 16) <> hexDigit (n `rem` 16)
|
||||
{-# NOINLINE[0] hexadecimal #-}
|
||||
|
||||
hexInteger :: Integer -> Builder
|
||||
hexInteger i
|
||||
| i < 0 = error hexErrMsg
|
||||
| otherwise = integer 16 i
|
||||
|
||||
hexErrMsg :: String
|
||||
hexErrMsg = "Data.Text.Lazy.Builder.Int.hexadecimal: applied to negative number"
|
||||
|
||||
hexDigit :: Integral a => a -> Builder
|
||||
hexDigit n
|
||||
| n <= 9 = singleton $! i2d (fromIntegral n)
|
||||
| otherwise = singleton $! toEnum (fromIntegral n + 87)
|
||||
{-# INLINE hexDigit #-}
|
||||
|
||||
data T = T !Integer !Int
|
||||
|
||||
integer :: Int -> Integer -> Builder
|
||||
#ifdef INTEGER_GMP
|
||||
integer 10 (S# i#) = decimal (I# i#)
|
||||
integer 16 (S# i#) = hexadecimal (I# i#)
|
||||
#endif
|
||||
integer base i
|
||||
| i < 0 = singleton '-' <> go (-i)
|
||||
| otherwise = go i
|
||||
where
|
||||
go n | n < maxInt = int (fromInteger n)
|
||||
| otherwise = putH (splitf (maxInt * maxInt) n)
|
||||
|
||||
splitf p n
|
||||
| p > n = [n]
|
||||
| otherwise = splith p (splitf (p*p) n)
|
||||
|
||||
splith p (n:ns) = case n `quotRemInteger` p of
|
||||
PAIR(q,r) | q > 0 -> q : r : splitb p ns
|
||||
| otherwise -> r : splitb p ns
|
||||
splith _ _ = error "splith: the impossible happened."
|
||||
|
||||
splitb p (n:ns) = case n `quotRemInteger` p of
|
||||
PAIR(q,r) -> q : r : splitb p ns
|
||||
splitb _ _ = []
|
||||
|
||||
T maxInt10 maxDigits10 =
|
||||
until ((>mi) . (*10) . fstT) (\(T n d) -> T (n*10) (d+1)) (T 10 1)
|
||||
where mi = fromIntegral (maxBound :: Int)
|
||||
T maxInt16 maxDigits16 =
|
||||
until ((>mi) . (*16) . fstT) (\(T n d) -> T (n*16) (d+1)) (T 16 1)
|
||||
where mi = fromIntegral (maxBound :: Int)
|
||||
|
||||
fstT (T a _) = a
|
||||
|
||||
maxInt | base == 10 = maxInt10
|
||||
| otherwise = maxInt16
|
||||
maxDigits | base == 10 = maxDigits10
|
||||
| otherwise = maxDigits16
|
||||
|
||||
putH (n:ns) = case n `quotRemInteger` maxInt of
|
||||
PAIR(x,y)
|
||||
| q > 0 -> int q <> pblock r <> putB ns
|
||||
| otherwise -> int r <> putB ns
|
||||
where q = fromInteger x
|
||||
r = fromInteger y
|
||||
putH _ = error "putH: the impossible happened"
|
||||
|
||||
putB (n:ns) = case n `quotRemInteger` maxInt of
|
||||
PAIR(x,y) -> pblock q <> pblock r <> putB ns
|
||||
where q = fromInteger x
|
||||
r = fromInteger y
|
||||
putB _ = Data.Monoid.mempty
|
||||
|
||||
int :: Int -> Builder
|
||||
int x | base == 10 = decimal x
|
||||
| otherwise = hexadecimal x
|
||||
|
||||
pblock = loop maxDigits
|
||||
where
|
||||
loop !d !n
|
||||
| d == 1 = hexDigit n
|
||||
| otherwise = loop (d-1) q <> hexDigit r
|
||||
where q = n `quotInt` base
|
||||
r = n `remInt` base
|
@ -1,253 +0,0 @@
|
||||
{-# LANGUAGE CPP, OverloadedStrings #-}
|
||||
#if __GLASGOW_HASKELL__ >= 702
|
||||
{-# LANGUAGE Trustworthy #-}
|
||||
#endif
|
||||
|
||||
-- |
|
||||
-- Module: Data.Text.Lazy.Builder.RealFloat
|
||||
-- Copyright: (c) The University of Glasgow 1994-2002
|
||||
-- License: see libraries/base/LICENSE
|
||||
--
|
||||
-- Write a floating point value to a 'Builder'.
|
||||
|
||||
module Data.Text.Lazy.Builder.RealFloat
|
||||
(
|
||||
FPFormat(..)
|
||||
, realFloat
|
||||
, formatRealFloat
|
||||
) where
|
||||
|
||||
import Data.Array.Base (unsafeAt)
|
||||
import Data.Array.IArray
|
||||
import Data.Text.Internal.Builder.Functions ((<>), i2d)
|
||||
import Data.Text.Lazy.Builder.Int (decimal)
|
||||
import Data.Text.Internal.Builder.RealFloat.Functions (roundTo)
|
||||
import Data.Text.Lazy.Builder
|
||||
import qualified Data.Text as T
|
||||
#if MIN_VERSION_base(4,11,0)
|
||||
import Prelude hiding ((<>))
|
||||
#endif
|
||||
|
||||
-- | Control the rendering of floating point numbers.
|
||||
data FPFormat = Exponent
|
||||
-- ^ Scientific notation (e.g. @2.3e123@).
|
||||
| Fixed
|
||||
-- ^ Standard decimal notation.
|
||||
| Generic
|
||||
-- ^ Use decimal notation for values between @0.1@ and
|
||||
-- @9,999,999@, and scientific notation otherwise.
|
||||
deriving (Enum, Read, Show)
|
||||
|
||||
-- | Show a signed 'RealFloat' value to full precision,
|
||||
-- using standard decimal notation for arguments whose absolute value lies
|
||||
-- between @0.1@ and @9,999,999@, and scientific notation otherwise.
|
||||
realFloat :: (RealFloat a) => a -> Builder
|
||||
{-# SPECIALIZE realFloat :: Float -> Builder #-}
|
||||
{-# SPECIALIZE realFloat :: Double -> Builder #-}
|
||||
realFloat x = formatRealFloat Generic Nothing x
|
||||
|
||||
-- | Encode a signed 'RealFloat' according to 'FPFormat' and optionally requested precision.
|
||||
--
|
||||
-- This corresponds to the @show{E,F,G}Float@ operations provided by @base@'s "Numeric" module.
|
||||
--
|
||||
-- __NOTE__: The functions in @base-4.12@ changed the serialisation in
|
||||
-- case of a @Just 0@ precision; this version of @text@ still provides
|
||||
-- the serialisation as implemented in @base-4.11@. The next major
|
||||
-- version of @text@ will switch to the more correct @base-4.12@ serialisation.
|
||||
formatRealFloat :: (RealFloat a) =>
|
||||
FPFormat
|
||||
-> Maybe Int -- ^ Number of decimal places to render.
|
||||
-> a
|
||||
-> Builder
|
||||
{-# SPECIALIZE formatRealFloat :: FPFormat -> Maybe Int -> Float -> Builder #-}
|
||||
{-# SPECIALIZE formatRealFloat :: FPFormat -> Maybe Int -> Double -> Builder #-}
|
||||
formatRealFloat fmt decs x
|
||||
| isNaN x = "NaN"
|
||||
| isInfinite x = if x < 0 then "-Infinity" else "Infinity"
|
||||
| x < 0 || isNegativeZero x = singleton '-' <> doFmt fmt (floatToDigits (-x))
|
||||
| otherwise = doFmt fmt (floatToDigits x)
|
||||
where
|
||||
doFmt format (is, e) =
|
||||
let ds = map i2d is in
|
||||
case format of
|
||||
Generic ->
|
||||
doFmt (if e < 0 || e > 7 then Exponent else Fixed)
|
||||
(is,e)
|
||||
Exponent ->
|
||||
case decs of
|
||||
Nothing ->
|
||||
let show_e' = decimal (e-1) in
|
||||
case ds of
|
||||
"0" -> "0.0e0"
|
||||
[d] -> singleton d <> ".0e" <> show_e'
|
||||
(d:ds') -> singleton d <> singleton '.' <> fromString ds' <> singleton 'e' <> show_e'
|
||||
[] -> error "formatRealFloat/doFmt/Exponent/Nothing: []"
|
||||
Just dec ->
|
||||
let dec' = max dec 1 in
|
||||
case is of
|
||||
[0] -> "0." <> fromText (T.replicate dec' "0") <> "e0"
|
||||
_ ->
|
||||
let (ei,is') = roundTo (dec'+1) is
|
||||
is'' = map i2d (if ei > 0 then init is' else is')
|
||||
in case is'' of
|
||||
[] -> error "formatRealFloat/doFmt/Exponent/Just: []"
|
||||
(d:ds') -> singleton d <> singleton '.' <> fromString ds' <> singleton 'e' <> decimal (e-1+ei)
|
||||
Fixed ->
|
||||
let
|
||||
mk0 ls = case ls of { "" -> "0" ; _ -> fromString ls}
|
||||
in
|
||||
case decs of
|
||||
Nothing
|
||||
| e <= 0 -> "0." <> fromText (T.replicate (-e) "0") <> fromString ds
|
||||
| otherwise ->
|
||||
let
|
||||
f 0 s rs = mk0 (reverse s) <> singleton '.' <> mk0 rs
|
||||
f n s "" = f (n-1) ('0':s) ""
|
||||
f n s (r:rs) = f (n-1) (r:s) rs
|
||||
in
|
||||
f e "" ds
|
||||
Just dec ->
|
||||
let dec' = max dec 0 in
|
||||
if e >= 0 then
|
||||
let
|
||||
(ei,is') = roundTo (dec' + e) is
|
||||
(ls,rs) = splitAt (e+ei) (map i2d is')
|
||||
in
|
||||
mk0 ls <> (if null rs then "" else singleton '.' <> fromString rs)
|
||||
else
|
||||
let (ei,is') = roundTo dec' (replicate (-e) 0 ++ is)
|
||||
is'' = map i2d (if ei > 0 then is' else 0:is')
|
||||
in case is'' of
|
||||
[] -> error "formatRealFloat/doFmt/Fixed: []"
|
||||
(d:ds') -> singleton d <> (if null ds' then "" else singleton '.' <> fromString ds')
|
||||
|
||||
|
||||
-- Based on "Printing Floating-Point Numbers Quickly and Accurately"
|
||||
-- by R.G. Burger and R.K. Dybvig in PLDI 96.
|
||||
-- This version uses a much slower logarithm estimator. It should be improved.
|
||||
|
||||
-- | 'floatToDigits' takes a base and a non-negative 'RealFloat' number,
|
||||
-- and returns a list of digits and an exponent.
|
||||
-- In particular, if @x>=0@, and
|
||||
--
|
||||
-- > floatToDigits base x = ([d1,d2,...,dn], e)
|
||||
--
|
||||
-- then
|
||||
--
|
||||
-- (1) @n >= 1@
|
||||
--
|
||||
-- (2) @x = 0.d1d2...dn * (base**e)@
|
||||
--
|
||||
-- (3) @0 <= di <= base-1@
|
||||
|
||||
floatToDigits :: (RealFloat a) => a -> ([Int], Int)
|
||||
{-# SPECIALIZE floatToDigits :: Float -> ([Int], Int) #-}
|
||||
{-# SPECIALIZE floatToDigits :: Double -> ([Int], Int) #-}
|
||||
floatToDigits 0 = ([0], 0)
|
||||
floatToDigits x =
|
||||
let
|
||||
(f0, e0) = decodeFloat x
|
||||
(minExp0, _) = floatRange x
|
||||
p = floatDigits x
|
||||
b = floatRadix x
|
||||
minExp = minExp0 - p -- the real minimum exponent
|
||||
-- Haskell requires that f be adjusted so denormalized numbers
|
||||
-- will have an impossibly low exponent. Adjust for this.
|
||||
(f, e) =
|
||||
let n = minExp - e0 in
|
||||
if n > 0 then (f0 `quot` (expt b n), e0+n) else (f0, e0)
|
||||
(r, s, mUp, mDn) =
|
||||
if e >= 0 then
|
||||
let be = expt b e in
|
||||
if f == expt b (p-1) then
|
||||
(f*be*b*2, 2*b, be*b, be) -- according to Burger and Dybvig
|
||||
else
|
||||
(f*be*2, 2, be, be)
|
||||
else
|
||||
if e > minExp && f == expt b (p-1) then
|
||||
(f*b*2, expt b (-e+1)*2, b, 1)
|
||||
else
|
||||
(f*2, expt b (-e)*2, 1, 1)
|
||||
k :: Int
|
||||
k =
|
||||
let
|
||||
k0 :: Int
|
||||
k0 =
|
||||
if b == 2 then
|
||||
-- logBase 10 2 is very slightly larger than 8651/28738
|
||||
-- (about 5.3558e-10), so if log x >= 0, the approximation
|
||||
-- k1 is too small, hence we add one and need one fixup step less.
|
||||
-- If log x < 0, the approximation errs rather on the high side.
|
||||
-- That is usually more than compensated for by ignoring the
|
||||
-- fractional part of logBase 2 x, but when x is a power of 1/2
|
||||
-- or slightly larger and the exponent is a multiple of the
|
||||
-- denominator of the rational approximation to logBase 10 2,
|
||||
-- k1 is larger than logBase 10 x. If k1 > 1 + logBase 10 x,
|
||||
-- we get a leading zero-digit we don't want.
|
||||
-- With the approximation 3/10, this happened for
|
||||
-- 0.5^1030, 0.5^1040, ..., 0.5^1070 and values close above.
|
||||
-- The approximation 8651/28738 guarantees k1 < 1 + logBase 10 x
|
||||
-- for IEEE-ish floating point types with exponent fields
|
||||
-- <= 17 bits and mantissae of several thousand bits, earlier
|
||||
-- convergents to logBase 10 2 would fail for long double.
|
||||
-- Using quot instead of div is a little faster and requires
|
||||
-- fewer fixup steps for negative lx.
|
||||
let lx = p - 1 + e0
|
||||
k1 = (lx * 8651) `quot` 28738
|
||||
in if lx >= 0 then k1 + 1 else k1
|
||||
else
|
||||
-- f :: Integer, log :: Float -> Float,
|
||||
-- ceiling :: Float -> Int
|
||||
ceiling ((log (fromInteger (f+1) :: Float) +
|
||||
fromIntegral e * log (fromInteger b)) /
|
||||
log 10)
|
||||
--WAS: fromInt e * log (fromInteger b))
|
||||
|
||||
fixup n =
|
||||
if n >= 0 then
|
||||
if r + mUp <= expt 10 n * s then n else fixup (n+1)
|
||||
else
|
||||
if expt 10 (-n) * (r + mUp) <= s then n else fixup (n+1)
|
||||
in
|
||||
fixup k0
|
||||
|
||||
gen ds rn sN mUpN mDnN =
|
||||
let
|
||||
(dn, rn') = (rn * 10) `quotRem` sN
|
||||
mUpN' = mUpN * 10
|
||||
mDnN' = mDnN * 10
|
||||
in
|
||||
case (rn' < mDnN', rn' + mUpN' > sN) of
|
||||
(True, False) -> dn : ds
|
||||
(False, True) -> dn+1 : ds
|
||||
(True, True) -> if rn' * 2 < sN then dn : ds else dn+1 : ds
|
||||
(False, False) -> gen (dn:ds) rn' sN mUpN' mDnN'
|
||||
|
||||
rds =
|
||||
if k >= 0 then
|
||||
gen [] r (s * expt 10 k) mUp mDn
|
||||
else
|
||||
let bk = expt 10 (-k) in
|
||||
gen [] (r * bk) s (mUp * bk) (mDn * bk)
|
||||
in
|
||||
(map fromIntegral (reverse rds), k)
|
||||
|
||||
-- Exponentiation with a cache for the most common numbers.
|
||||
minExpt, maxExpt :: Int
|
||||
minExpt = 0
|
||||
maxExpt = 1100
|
||||
|
||||
expt :: Integer -> Int -> Integer
|
||||
expt base n
|
||||
| base == 2 && n >= minExpt && n <= maxExpt = expts `unsafeAt` n
|
||||
| base == 10 && n <= maxExpt10 = expts10 `unsafeAt` n
|
||||
| otherwise = base^n
|
||||
|
||||
expts :: Array Int Integer
|
||||
expts = array (minExpt,maxExpt) [(n,2^n) | n <- [minExpt .. maxExpt]]
|
||||
|
||||
maxExpt10 :: Int
|
||||
maxExpt10 = 324
|
||||
|
||||
expts10 :: Array Int Integer
|
||||
expts10 = array (minExpt,maxExpt10) [(n,10^n) | n <- [minExpt .. maxExpt10]]
|
@ -1,254 +0,0 @@
|
||||
{-# LANGUAGE BangPatterns,CPP #-}
|
||||
#if __GLASGOW_HASKELL__ >= 702
|
||||
{-# LANGUAGE Trustworthy #-}
|
||||
#endif
|
||||
-- |
|
||||
-- Module : Data.Text.Lazy.Encoding
|
||||
-- Copyright : (c) 2009, 2010 Bryan O'Sullivan
|
||||
--
|
||||
-- License : BSD-style
|
||||
-- Maintainer : bos@serpentine.com
|
||||
-- Portability : portable
|
||||
--
|
||||
-- Functions for converting lazy 'Text' values to and from lazy
|
||||
-- 'ByteString', using several standard encodings.
|
||||
--
|
||||
-- To gain access to a much larger family of encodings, use the
|
||||
-- <http://hackage.haskell.org/package/text-icu text-icu package>.
|
||||
|
||||
module Data.Text.Lazy.Encoding
|
||||
(
|
||||
-- * Decoding ByteStrings to Text
|
||||
-- $strict
|
||||
decodeASCII
|
||||
, decodeLatin1
|
||||
, decodeUtf8
|
||||
, decodeUtf16LE
|
||||
, decodeUtf16BE
|
||||
, decodeUtf32LE
|
||||
, decodeUtf32BE
|
||||
|
||||
-- ** Catchable failure
|
||||
, decodeUtf8'
|
||||
|
||||
-- ** Controllable error handling
|
||||
, decodeUtf8With
|
||||
, decodeUtf16LEWith
|
||||
, decodeUtf16BEWith
|
||||
, decodeUtf32LEWith
|
||||
, decodeUtf32BEWith
|
||||
|
||||
-- * Encoding Text to ByteStrings
|
||||
, encodeUtf8
|
||||
, encodeUtf16LE
|
||||
, encodeUtf16BE
|
||||
, encodeUtf32LE
|
||||
, encodeUtf32BE
|
||||
|
||||
-- * Encoding Text using ByteString Builders
|
||||
, encodeUtf8Builder
|
||||
, encodeUtf8BuilderEscaped
|
||||
) where
|
||||
|
||||
import Control.Exception (evaluate, try)
|
||||
import Data.Monoid (Monoid(..))
|
||||
import Data.Text.Encoding.Error (OnDecodeError, UnicodeException, strictDecode)
|
||||
import Data.Text.Internal.Lazy (Text(..), chunk, empty, foldrChunks)
|
||||
import Data.Word (Word8)
|
||||
import qualified Data.ByteString as S
|
||||
import qualified Data.ByteString.Builder as B
|
||||
import qualified Data.ByteString.Builder.Extra as B (safeStrategy, toLazyByteStringWith)
|
||||
import qualified Data.ByteString.Builder.Prim as BP
|
||||
import qualified Data.ByteString.Lazy as B
|
||||
import qualified Data.ByteString.Lazy.Internal as B
|
||||
import qualified Data.ByteString.Unsafe as B
|
||||
import qualified Data.Text as T
|
||||
import qualified Data.Text.Encoding as TE
|
||||
import qualified Data.Text.Internal.Lazy.Encoding.Fusion as E
|
||||
import qualified Data.Text.Internal.Lazy.Fusion as F
|
||||
import Data.Text.Unsafe (unsafeDupablePerformIO)
|
||||
|
||||
-- $strict
|
||||
--
|
||||
-- All of the single-parameter functions for decoding bytestrings
|
||||
-- encoded in one of the Unicode Transformation Formats (UTF) operate
|
||||
-- in a /strict/ mode: each will throw an exception if given invalid
|
||||
-- input.
|
||||
--
|
||||
-- Each function has a variant, whose name is suffixed with -'With',
|
||||
-- that gives greater control over the handling of decoding errors.
|
||||
-- For instance, 'decodeUtf8' will throw an exception, but
|
||||
-- 'decodeUtf8With' allows the programmer to determine what to do on a
|
||||
-- decoding error.
|
||||
|
||||
-- | /Deprecated/. Decode a 'ByteString' containing 7-bit ASCII
|
||||
-- encoded text.
|
||||
decodeASCII :: B.ByteString -> Text
|
||||
decodeASCII = decodeUtf8
|
||||
{-# DEPRECATED decodeASCII "Use decodeUtf8 instead" #-}
|
||||
|
||||
-- | Decode a 'ByteString' containing Latin-1 (aka ISO-8859-1) encoded text.
|
||||
decodeLatin1 :: B.ByteString -> Text
|
||||
decodeLatin1 = foldr (chunk . TE.decodeLatin1) empty . B.toChunks
|
||||
|
||||
-- | Decode a 'ByteString' containing UTF-8 encoded text.
|
||||
decodeUtf8With :: OnDecodeError -> B.ByteString -> Text
|
||||
#if defined(ASTERIUS)
|
||||
decodeUtf8With onErr lbs = Chunk (TE.decodeUtf8With onErr (B.toStrict lbs)) Empty
|
||||
#else
|
||||
decodeUtf8With onErr (B.Chunk b0 bs0) =
|
||||
case TE.streamDecodeUtf8With onErr b0 of
|
||||
TE.Some t l f -> chunk t (go f l bs0)
|
||||
where
|
||||
go f0 _ (B.Chunk b bs) =
|
||||
case f0 b of
|
||||
TE.Some t l f -> chunk t (go f l bs)
|
||||
go _ l _
|
||||
| S.null l = empty
|
||||
| otherwise = case onErr desc (Just (B.unsafeHead l)) of
|
||||
Nothing -> empty
|
||||
Just c -> Chunk (T.singleton c) Empty
|
||||
desc = "Data.Text.Lazy.Encoding.decodeUtf8With: Invalid UTF-8 stream"
|
||||
decodeUtf8With _ _ = empty
|
||||
#endif
|
||||
|
||||
-- | Decode a 'ByteString' containing UTF-8 encoded text that is known
|
||||
-- to be valid.
|
||||
--
|
||||
-- If the input contains any invalid UTF-8 data, an exception will be
|
||||
-- thrown that cannot be caught in pure code. For more control over
|
||||
-- the handling of invalid data, use 'decodeUtf8'' or
|
||||
-- 'decodeUtf8With'.
|
||||
decodeUtf8 :: B.ByteString -> Text
|
||||
decodeUtf8 = decodeUtf8With strictDecode
|
||||
{-# INLINE[0] decodeUtf8 #-}
|
||||
|
||||
-- This rule seems to cause performance loss.
|
||||
{- RULES "LAZY STREAM stream/decodeUtf8' fusion" [1]
|
||||
forall bs. F.stream (decodeUtf8' bs) = E.streamUtf8 strictDecode bs #-}
|
||||
|
||||
-- | Decode a 'ByteString' containing UTF-8 encoded text..
|
||||
--
|
||||
-- If the input contains any invalid UTF-8 data, the relevant
|
||||
-- exception will be returned, otherwise the decoded text.
|
||||
--
|
||||
-- /Note/: this function is /not/ lazy, as it must decode its entire
|
||||
-- input before it can return a result. If you need lazy (streaming)
|
||||
-- decoding, use 'decodeUtf8With' in lenient mode.
|
||||
decodeUtf8' :: B.ByteString -> Either UnicodeException Text
|
||||
decodeUtf8' bs = unsafeDupablePerformIO $ do
|
||||
let t = decodeUtf8 bs
|
||||
try (evaluate (rnf t `seq` t))
|
||||
where
|
||||
rnf Empty = ()
|
||||
rnf (Chunk _ ts) = rnf ts
|
||||
{-# INLINE decodeUtf8' #-}
|
||||
|
||||
-- | Encode text using UTF-8 encoding.
|
||||
encodeUtf8 :: Text -> B.ByteString
|
||||
encodeUtf8 Empty = B.empty
|
||||
encodeUtf8 lt@(Chunk t _) =
|
||||
B.toLazyByteStringWith strategy B.empty $ encodeUtf8Builder lt
|
||||
where
|
||||
-- To improve our small string performance, we use a strategy that
|
||||
-- allocates a buffer that is guaranteed to be large enough for the
|
||||
-- encoding of the first chunk, but not larger than the default
|
||||
-- B.smallChunkSize. We clamp the firstChunkSize to ensure that we don't
|
||||
-- generate too large buffers which hamper streaming.
|
||||
firstChunkSize = min B.smallChunkSize (4 * (T.length t + 1))
|
||||
strategy = B.safeStrategy firstChunkSize B.defaultChunkSize
|
||||
|
||||
-- | Encode text to a ByteString 'B.Builder' using UTF-8 encoding.
|
||||
--
|
||||
-- @since 1.1.0.0
|
||||
encodeUtf8Builder :: Text -> B.Builder
|
||||
encodeUtf8Builder =
|
||||
foldrChunks (\c b -> TE.encodeUtf8Builder c `mappend` b) Data.Monoid.mempty
|
||||
|
||||
-- | Encode text using UTF-8 encoding and escape the ASCII characters using
|
||||
-- a 'BP.BoundedPrim'.
|
||||
--
|
||||
-- Use this function is to implement efficient encoders for text-based formats
|
||||
-- like JSON or HTML.
|
||||
--
|
||||
-- @since 1.1.0.0
|
||||
{-# INLINE encodeUtf8BuilderEscaped #-}
|
||||
encodeUtf8BuilderEscaped :: BP.BoundedPrim Word8 -> Text -> B.Builder
|
||||
encodeUtf8BuilderEscaped prim =
|
||||
foldrChunks (\c b -> TE.encodeUtf8BuilderEscaped prim c `mappend` b) mempty
|
||||
|
||||
-- | Decode text from little endian UTF-16 encoding.
|
||||
decodeUtf16LEWith :: OnDecodeError -> B.ByteString -> Text
|
||||
decodeUtf16LEWith onErr bs = F.unstream (E.streamUtf16LE onErr bs)
|
||||
{-# INLINE decodeUtf16LEWith #-}
|
||||
|
||||
-- | Decode text from little endian UTF-16 encoding.
|
||||
--
|
||||
-- If the input contains any invalid little endian UTF-16 data, an
|
||||
-- exception will be thrown. For more control over the handling of
|
||||
-- invalid data, use 'decodeUtf16LEWith'.
|
||||
decodeUtf16LE :: B.ByteString -> Text
|
||||
decodeUtf16LE = decodeUtf16LEWith strictDecode
|
||||
{-# INLINE decodeUtf16LE #-}
|
||||
|
||||
-- | Decode text from big endian UTF-16 encoding.
|
||||
decodeUtf16BEWith :: OnDecodeError -> B.ByteString -> Text
|
||||
decodeUtf16BEWith onErr bs = F.unstream (E.streamUtf16BE onErr bs)
|
||||
{-# INLINE decodeUtf16BEWith #-}
|
||||
|
||||
-- | Decode text from big endian UTF-16 encoding.
|
||||
--
|
||||
-- If the input contains any invalid big endian UTF-16 data, an
|
||||
-- exception will be thrown. For more control over the handling of
|
||||
-- invalid data, use 'decodeUtf16BEWith'.
|
||||
decodeUtf16BE :: B.ByteString -> Text
|
||||
decodeUtf16BE = decodeUtf16BEWith strictDecode
|
||||
{-# INLINE decodeUtf16BE #-}
|
||||
|
||||
-- | Encode text using little endian UTF-16 encoding.
|
||||
encodeUtf16LE :: Text -> B.ByteString
|
||||
encodeUtf16LE txt = B.fromChunks (foldrChunks ((:) . TE.encodeUtf16LE) [] txt)
|
||||
{-# INLINE encodeUtf16LE #-}
|
||||
|
||||
-- | Encode text using big endian UTF-16 encoding.
|
||||
encodeUtf16BE :: Text -> B.ByteString
|
||||
encodeUtf16BE txt = B.fromChunks (foldrChunks ((:) . TE.encodeUtf16BE) [] txt)
|
||||
{-# INLINE encodeUtf16BE #-}
|
||||
|
||||
-- | Decode text from little endian UTF-32 encoding.
|
||||
decodeUtf32LEWith :: OnDecodeError -> B.ByteString -> Text
|
||||
decodeUtf32LEWith onErr bs = F.unstream (E.streamUtf32LE onErr bs)
|
||||
{-# INLINE decodeUtf32LEWith #-}
|
||||
|
||||
-- | Decode text from little endian UTF-32 encoding.
|
||||
--
|
||||
-- If the input contains any invalid little endian UTF-32 data, an
|
||||
-- exception will be thrown. For more control over the handling of
|
||||
-- invalid data, use 'decodeUtf32LEWith'.
|
||||
decodeUtf32LE :: B.ByteString -> Text
|
||||
decodeUtf32LE = decodeUtf32LEWith strictDecode
|
||||
{-# INLINE decodeUtf32LE #-}
|
||||
|
||||
-- | Decode text from big endian UTF-32 encoding.
|
||||
decodeUtf32BEWith :: OnDecodeError -> B.ByteString -> Text
|
||||
decodeUtf32BEWith onErr bs = F.unstream (E.streamUtf32BE onErr bs)
|
||||
{-# INLINE decodeUtf32BEWith #-}
|
||||
|
||||
-- | Decode text from big endian UTF-32 encoding.
|
||||
--
|
||||
-- If the input contains any invalid big endian UTF-32 data, an
|
||||
-- exception will be thrown. For more control over the handling of
|
||||
-- invalid data, use 'decodeUtf32BEWith'.
|
||||
decodeUtf32BE :: B.ByteString -> Text
|
||||
decodeUtf32BE = decodeUtf32BEWith strictDecode
|
||||
{-# INLINE decodeUtf32BE #-}
|
||||
|
||||
-- | Encode text using little endian UTF-32 encoding.
|
||||
encodeUtf32LE :: Text -> B.ByteString
|
||||
encodeUtf32LE txt = B.fromChunks (foldrChunks ((:) . TE.encodeUtf32LE) [] txt)
|
||||
{-# INLINE encodeUtf32LE #-}
|
||||
|
||||
-- | Encode text using big endian UTF-32 encoding.
|
||||
encodeUtf32BE :: Text -> B.ByteString
|
||||
encodeUtf32BE txt = B.fromChunks (foldrChunks ((:) . TE.encodeUtf32BE) [] txt)
|
||||
{-# INLINE encodeUtf32BE #-}
|
@ -1,195 +0,0 @@
|
||||
{-# LANGUAGE BangPatterns, CPP, RecordWildCards #-}
|
||||
#if __GLASGOW_HASKELL__ >= 702
|
||||
{-# LANGUAGE Trustworthy #-}
|
||||
#endif
|
||||
-- |
|
||||
-- Module : Data.Text.Lazy.IO
|
||||
-- Copyright : (c) 2009, 2010 Bryan O'Sullivan,
|
||||
-- (c) 2009 Simon Marlow
|
||||
-- License : BSD-style
|
||||
-- Maintainer : bos@serpentine.com
|
||||
-- Portability : GHC
|
||||
--
|
||||
-- Efficient locale-sensitive support for lazy text I\/O.
|
||||
--
|
||||
-- Skip past the synopsis for some important notes on performance and
|
||||
-- portability across different versions of GHC.
|
||||
|
||||
module Data.Text.Lazy.IO
|
||||
(
|
||||
-- * Performance
|
||||
-- $performance
|
||||
|
||||
-- * Locale support
|
||||
-- $locale
|
||||
-- * File-at-a-time operations
|
||||
readFile
|
||||
, writeFile
|
||||
, appendFile
|
||||
-- * Operations on handles
|
||||
, hGetContents
|
||||
, hGetLine
|
||||
, hPutStr
|
||||
, hPutStrLn
|
||||
-- * Special cases for standard input and output
|
||||
, interact
|
||||
, getContents
|
||||
, getLine
|
||||
, putStr
|
||||
, putStrLn
|
||||
) where
|
||||
|
||||
import Data.Text.Lazy (Text)
|
||||
import Prelude hiding (appendFile, getContents, getLine, interact,
|
||||
putStr, putStrLn, readFile, writeFile)
|
||||
import System.IO (Handle, IOMode(..), hPutChar, openFile, stdin, stdout,
|
||||
withFile)
|
||||
import qualified Data.Text.IO as T
|
||||
import qualified Data.Text.Lazy as L
|
||||
import qualified Control.Exception as E
|
||||
import Control.Monad (when)
|
||||
import Data.IORef (readIORef)
|
||||
import Data.Text.Internal.IO (hGetLineWith, readChunk)
|
||||
import Data.Text.Internal.Lazy (chunk, empty)
|
||||
import GHC.IO.Buffer (isEmptyBuffer)
|
||||
import GHC.IO.Exception (IOException(..), IOErrorType(..), ioException)
|
||||
import GHC.IO.Handle.Internals (augmentIOError, hClose_help,
|
||||
wantReadableHandle, withHandle)
|
||||
import GHC.IO.Handle.Types (Handle__(..), HandleType(..))
|
||||
import System.IO (BufferMode(..), hGetBuffering, hSetBuffering)
|
||||
import System.IO.Error (isEOFError)
|
||||
import System.IO.Unsafe (unsafeInterleaveIO)
|
||||
|
||||
-- $performance
|
||||
--
|
||||
-- The functions in this module obey the runtime system's locale,
|
||||
-- character set encoding, and line ending conversion settings.
|
||||
--
|
||||
-- If you know in advance that you will be working with data that has
|
||||
-- a specific encoding (e.g. UTF-8), and your application is highly
|
||||
-- performance sensitive, you may find that it is faster to perform
|
||||
-- I\/O with bytestrings and to encode and decode yourself than to use
|
||||
-- the functions in this module.
|
||||
--
|
||||
-- Whether this will hold depends on the version of GHC you are using,
|
||||
-- the platform you are working on, the data you are working with, and
|
||||
-- the encodings you are using, so be sure to test for yourself.
|
||||
|
||||
-- | Read a file and return its contents as a string. The file is
|
||||
-- read lazily, as with 'getContents'.
|
||||
readFile :: FilePath -> IO Text
|
||||
readFile name = openFile name ReadMode >>= hGetContents
|
||||
|
||||
-- | Write a string to a file. The file is truncated to zero length
|
||||
-- before writing begins.
|
||||
writeFile :: FilePath -> Text -> IO ()
|
||||
writeFile p = withFile p WriteMode . flip hPutStr
|
||||
|
||||
-- | Write a string the end of a file.
|
||||
appendFile :: FilePath -> Text -> IO ()
|
||||
appendFile p = withFile p AppendMode . flip hPutStr
|
||||
|
||||
-- | Lazily read the remaining contents of a 'Handle'. The 'Handle'
|
||||
-- will be closed after the read completes, or on error.
|
||||
hGetContents :: Handle -> IO Text
|
||||
hGetContents h = do
|
||||
chooseGoodBuffering h
|
||||
wantReadableHandle "hGetContents" h $ \hh -> do
|
||||
ts <- lazyRead h
|
||||
return (hh{haType=SemiClosedHandle}, ts)
|
||||
|
||||
-- | Use a more efficient buffer size if we're reading in
|
||||
-- block-buffered mode with the default buffer size.
|
||||
chooseGoodBuffering :: Handle -> IO ()
|
||||
chooseGoodBuffering h = do
|
||||
bufMode <- hGetBuffering h
|
||||
when (bufMode == BlockBuffering Nothing) $
|
||||
hSetBuffering h (BlockBuffering (Just 16384))
|
||||
|
||||
lazyRead :: Handle -> IO Text
|
||||
lazyRead h = unsafeInterleaveIO $
|
||||
withHandle "hGetContents" h $ \hh -> do
|
||||
case haType hh of
|
||||
ClosedHandle -> return (hh, L.empty)
|
||||
SemiClosedHandle -> lazyReadBuffered h hh
|
||||
_ -> ioException
|
||||
(IOError (Just h) IllegalOperation "hGetContents"
|
||||
"illegal handle type" Nothing Nothing)
|
||||
|
||||
lazyReadBuffered :: Handle -> Handle__ -> IO (Handle__, Text)
|
||||
lazyReadBuffered h hh@Handle__{..} = do
|
||||
buf <- readIORef haCharBuffer
|
||||
(do t <- readChunk hh buf
|
||||
ts <- lazyRead h
|
||||
return (hh, chunk t ts)) `E.catch` \e -> do
|
||||
(hh', _) <- hClose_help hh
|
||||
if isEOFError e
|
||||
then return $ if isEmptyBuffer buf
|
||||
then (hh', empty)
|
||||
else (hh', L.singleton '\r')
|
||||
else E.throwIO (augmentIOError e "hGetContents" h)
|
||||
|
||||
-- | Read a single line from a handle.
|
||||
hGetLine :: Handle -> IO Text
|
||||
hGetLine = hGetLineWith L.fromChunks
|
||||
|
||||
-- | Write a string to a handle.
|
||||
hPutStr :: Handle -> Text -> IO ()
|
||||
hPutStr h = mapM_ (T.hPutStr h) . L.toChunks
|
||||
|
||||
-- | Write a string to a handle, followed by a newline.
|
||||
hPutStrLn :: Handle -> Text -> IO ()
|
||||
hPutStrLn h t = hPutStr h t >> hPutChar h '\n'
|
||||
|
||||
-- | The 'interact' function takes a function of type @Text -> Text@
|
||||
-- as its argument. The entire input from the standard input device is
|
||||
-- passed (lazily) to this function as its argument, and the resulting
|
||||
-- string is output on the standard output device.
|
||||
interact :: (Text -> Text) -> IO ()
|
||||
interact f = putStr . f =<< getContents
|
||||
|
||||
-- | Lazily read all user input on 'stdin' as a single string.
|
||||
getContents :: IO Text
|
||||
getContents = hGetContents stdin
|
||||
|
||||
-- | Read a single line of user input from 'stdin'.
|
||||
getLine :: IO Text
|
||||
getLine = hGetLine stdin
|
||||
|
||||
-- | Write a string to 'stdout'.
|
||||
putStr :: Text -> IO ()
|
||||
putStr = hPutStr stdout
|
||||
|
||||
-- | Write a string to 'stdout', followed by a newline.
|
||||
putStrLn :: Text -> IO ()
|
||||
putStrLn = hPutStrLn stdout
|
||||
|
||||
-- $locale
|
||||
--
|
||||
-- /Note/: The behaviour of functions in this module depends on the
|
||||
-- version of GHC you are using.
|
||||
--
|
||||
-- Beginning with GHC 6.12, text I\/O is performed using the system or
|
||||
-- handle's current locale and line ending conventions.
|
||||
--
|
||||
-- Under GHC 6.10 and earlier, the system I\/O libraries /do not
|
||||
-- support/ locale-sensitive I\/O or line ending conversion. On these
|
||||
-- versions of GHC, functions in this library all use UTF-8. What
|
||||
-- does this mean in practice?
|
||||
--
|
||||
-- * All data that is read will be decoded as UTF-8.
|
||||
--
|
||||
-- * Before data is written, it is first encoded as UTF-8.
|
||||
--
|
||||
-- * On both reading and writing, the platform's native newline
|
||||
-- conversion is performed.
|
||||
--
|
||||
-- If you must use a non-UTF-8 locale on an older version of GHC, you
|
||||
-- will have to perform the transcoding yourself, e.g. as follows:
|
||||
--
|
||||
-- > import qualified Data.ByteString.Lazy as B
|
||||
-- > import Data.Text.Lazy (Text)
|
||||
-- > import Data.Text.Lazy.Encoding (encodeUtf16)
|
||||
-- >
|
||||
-- > putStr_Utf16LE :: Text -> IO ()
|
||||
-- > putStr_Utf16LE t = B.putStr (encodeUtf16LE t)
|
@ -1,20 +0,0 @@
|
||||
{-# LANGUAGE BangPatterns, DeriveDataTypeable #-}
|
||||
-- |
|
||||
-- Module : Data.Text.Lazy.Internal
|
||||
-- Copyright : (c) 2013 Bryan O'Sullivan
|
||||
--
|
||||
-- License : BSD-style
|
||||
-- Maintainer : bos@serpentine.com
|
||||
-- Stability : experimental
|
||||
-- Portability : GHC
|
||||
--
|
||||
-- This module has been renamed to 'Data.Text.Internal.Lazy'. This
|
||||
-- name for the module will be removed in the next major release.
|
||||
|
||||
module Data.Text.Lazy.Internal
|
||||
{-# DEPRECATED "Use Data.Text.Internal.Lazy instead" #-}
|
||||
(
|
||||
module Data.Text.Internal.Lazy
|
||||
) where
|
||||
|
||||
import Data.Text.Internal.Lazy
|
@ -1,192 +0,0 @@
|
||||
{-# LANGUAGE OverloadedStrings, CPP #-}
|
||||
#if __GLASGOW_HASKELL__ >= 704
|
||||
{-# LANGUAGE Safe #-}
|
||||
#elif __GLASGOW_HASKELL__ >= 702
|
||||
{-# LANGUAGE Trustworthy #-}
|
||||
#endif
|
||||
|
||||
-- |
|
||||
-- Module : Data.Text.Lazy.Read
|
||||
-- Copyright : (c) 2010, 2011 Bryan O'Sullivan
|
||||
--
|
||||
-- License : BSD-style
|
||||
-- Maintainer : bos@serpentine.com
|
||||
-- Portability : GHC
|
||||
--
|
||||
-- Functions used frequently when reading textual data.
|
||||
module Data.Text.Lazy.Read
|
||||
(
|
||||
Reader
|
||||
, decimal
|
||||
, hexadecimal
|
||||
, signed
|
||||
, rational
|
||||
, double
|
||||
) where
|
||||
|
||||
import Control.Monad (liftM)
|
||||
import Data.Char (isDigit, isHexDigit)
|
||||
import Data.Int (Int8, Int16, Int32, Int64)
|
||||
import Data.Ratio ((%))
|
||||
import Data.Text.Internal.Read
|
||||
import Data.Text.Lazy as T
|
||||
import Data.Word (Word, Word8, Word16, Word32, Word64)
|
||||
|
||||
-- | Read some text. If the read succeeds, return its value and the
|
||||
-- remaining text, otherwise an error message.
|
||||
type Reader a = IReader Text a
|
||||
type Parser = IParser Text
|
||||
|
||||
-- | Read a decimal integer. The input must begin with at least one
|
||||
-- decimal digit, and is consumed until a non-digit or end of string
|
||||
-- is reached.
|
||||
--
|
||||
-- This function does not handle leading sign characters. If you need
|
||||
-- to handle signed input, use @'signed' 'decimal'@.
|
||||
--
|
||||
-- /Note/: For fixed-width integer types, this function does not
|
||||
-- attempt to detect overflow, so a sufficiently long input may give
|
||||
-- incorrect results. If you are worried about overflow, use
|
||||
-- 'Integer' for your result type.
|
||||
decimal :: Integral a => Reader a
|
||||
{-# SPECIALIZE decimal :: Reader Int #-}
|
||||
{-# SPECIALIZE decimal :: Reader Int8 #-}
|
||||
{-# SPECIALIZE decimal :: Reader Int16 #-}
|
||||
{-# SPECIALIZE decimal :: Reader Int32 #-}
|
||||
{-# SPECIALIZE decimal :: Reader Int64 #-}
|
||||
{-# SPECIALIZE decimal :: Reader Integer #-}
|
||||
{-# SPECIALIZE decimal :: Reader Data.Word.Word #-}
|
||||
{-# SPECIALIZE decimal :: Reader Word8 #-}
|
||||
{-# SPECIALIZE decimal :: Reader Word16 #-}
|
||||
{-# SPECIALIZE decimal :: Reader Word32 #-}
|
||||
{-# SPECIALIZE decimal :: Reader Word64 #-}
|
||||
decimal txt
|
||||
| T.null h = Left "input does not start with a digit"
|
||||
| otherwise = Right (T.foldl' go 0 h, t)
|
||||
where (h,t) = T.span isDigit txt
|
||||
go n d = (n * 10 + fromIntegral (digitToInt d))
|
||||
|
||||
-- | Read a hexadecimal integer, consisting of an optional leading
|
||||
-- @\"0x\"@ followed by at least one hexadecimal digit. Input is
|
||||
-- consumed until a non-hex-digit or end of string is reached.
|
||||
-- This function is case insensitive.
|
||||
--
|
||||
-- This function does not handle leading sign characters. If you need
|
||||
-- to handle signed input, use @'signed' 'hexadecimal'@.
|
||||
--
|
||||
-- /Note/: For fixed-width integer types, this function does not
|
||||
-- attempt to detect overflow, so a sufficiently long input may give
|
||||
-- incorrect results. If you are worried about overflow, use
|
||||
-- 'Integer' for your result type.
|
||||
hexadecimal :: Integral a => Reader a
|
||||
{-# SPECIALIZE hexadecimal :: Reader Int #-}
|
||||
{-# SPECIALIZE hexadecimal :: Reader Integer #-}
|
||||
hexadecimal txt
|
||||
| h == "0x" || h == "0X" = hex t
|
||||
| otherwise = hex txt
|
||||
where (h,t) = T.splitAt 2 txt
|
||||
|
||||
hex :: Integral a => Reader a
|
||||
{-# SPECIALIZE hexadecimal :: Reader Int #-}
|
||||
{-# SPECIALIZE hexadecimal :: Reader Int8 #-}
|
||||
{-# SPECIALIZE hexadecimal :: Reader Int16 #-}
|
||||
{-# SPECIALIZE hexadecimal :: Reader Int32 #-}
|
||||
{-# SPECIALIZE hexadecimal :: Reader Int64 #-}
|
||||
{-# SPECIALIZE hexadecimal :: Reader Integer #-}
|
||||
{-# SPECIALIZE hexadecimal :: Reader Word #-}
|
||||
{-# SPECIALIZE hexadecimal :: Reader Word8 #-}
|
||||
{-# SPECIALIZE hexadecimal :: Reader Word16 #-}
|
||||
{-# SPECIALIZE hexadecimal :: Reader Word32 #-}
|
||||
{-# SPECIALIZE hexadecimal :: Reader Word64 #-}
|
||||
hex txt
|
||||
| T.null h = Left "input does not start with a hexadecimal digit"
|
||||
| otherwise = Right (T.foldl' go 0 h, t)
|
||||
where (h,t) = T.span isHexDigit txt
|
||||
go n d = (n * 16 + fromIntegral (hexDigitToInt d))
|
||||
|
||||
-- | Read an optional leading sign character (@\'-\'@ or @\'+\'@) and
|
||||
-- apply it to the result of applying the given reader.
|
||||
signed :: Num a => Reader a -> Reader a
|
||||
{-# INLINE signed #-}
|
||||
signed f = runP (signa (P f))
|
||||
|
||||
-- | Read a rational number.
|
||||
--
|
||||
-- This function accepts an optional leading sign character, followed
|
||||
-- by at least one decimal digit. The syntax similar to that accepted
|
||||
-- by the 'read' function, with the exception that a trailing @\'.\'@
|
||||
-- or @\'e\'@ /not/ followed by a number is not consumed.
|
||||
--
|
||||
-- Examples:
|
||||
--
|
||||
-- >rational "3" == Right (3.0, "")
|
||||
-- >rational "3.1" == Right (3.1, "")
|
||||
-- >rational "3e4" == Right (30000.0, "")
|
||||
-- >rational "3.1e4" == Right (31000.0, "")
|
||||
-- >rational ".3" == Left "input does not start with a digit"
|
||||
-- >rational "e3" == Left "input does not start with a digit"
|
||||
--
|
||||
-- Examples of differences from 'read':
|
||||
--
|
||||
-- >rational "3.foo" == Right (3.0, ".foo")
|
||||
-- >rational "3e" == Right (3.0, "e")
|
||||
rational :: Fractional a => Reader a
|
||||
{-# SPECIALIZE rational :: Reader Double #-}
|
||||
rational = floaty $ \real frac fracDenom -> fromRational $
|
||||
real % 1 + frac % fracDenom
|
||||
|
||||
-- | Read a rational number.
|
||||
--
|
||||
-- The syntax accepted by this function is the same as for 'rational'.
|
||||
--
|
||||
-- /Note/: This function is almost ten times faster than 'rational',
|
||||
-- but is slightly less accurate.
|
||||
--
|
||||
-- The 'Double' type supports about 16 decimal places of accuracy.
|
||||
-- For 94.2% of numbers, this function and 'rational' give identical
|
||||
-- results, but for the remaining 5.8%, this function loses precision
|
||||
-- around the 15th decimal place. For 0.001% of numbers, this
|
||||
-- function will lose precision at the 13th or 14th decimal place.
|
||||
double :: Reader Double
|
||||
double = floaty $ \real frac fracDenom ->
|
||||
fromIntegral real +
|
||||
fromIntegral frac / fromIntegral fracDenom
|
||||
|
||||
signa :: Num a => Parser a -> Parser a
|
||||
{-# SPECIALIZE signa :: Parser Int -> Parser Int #-}
|
||||
{-# SPECIALIZE signa :: Parser Int8 -> Parser Int8 #-}
|
||||
{-# SPECIALIZE signa :: Parser Int16 -> Parser Int16 #-}
|
||||
{-# SPECIALIZE signa :: Parser Int32 -> Parser Int32 #-}
|
||||
{-# SPECIALIZE signa :: Parser Int64 -> Parser Int64 #-}
|
||||
{-# SPECIALIZE signa :: Parser Integer -> Parser Integer #-}
|
||||
signa p = do
|
||||
sign <- perhaps '+' $ char (\c -> c == '-' || c == '+')
|
||||
if sign == '+' then p else negate `liftM` p
|
||||
|
||||
char :: (Char -> Bool) -> Parser Char
|
||||
char p = P $ \t -> case T.uncons t of
|
||||
Just (c,t') | p c -> Right (c,t')
|
||||
_ -> Left "character does not match"
|
||||
|
||||
floaty :: Fractional a => (Integer -> Integer -> Integer -> a) -> Reader a
|
||||
{-# INLINE floaty #-}
|
||||
floaty f = runP $ do
|
||||
sign <- perhaps '+' $ char (\c -> c == '-' || c == '+')
|
||||
real <- P decimal
|
||||
T fraction fracDigits <- perhaps (T 0 0) $ do
|
||||
_ <- char (=='.')
|
||||
digits <- P $ \t -> Right (fromIntegral . T.length $ T.takeWhile isDigit t, t)
|
||||
n <- P decimal
|
||||
return $ T n digits
|
||||
let e c = c == 'e' || c == 'E'
|
||||
power <- perhaps 0 (char e >> signa (P decimal) :: Parser Int)
|
||||
let n = if fracDigits == 0
|
||||
then if power == 0
|
||||
then fromIntegral real
|
||||
else fromIntegral real * (10 ^^ power)
|
||||
else if power == 0
|
||||
then f real fraction (10 ^ fracDigits)
|
||||
else f real fraction (10 ^ fracDigits) * (10 ^^ power)
|
||||
return $! if sign == '+'
|
||||
then n
|
||||
else -n
|
@ -1,200 +0,0 @@
|
||||
{-# LANGUAGE OverloadedStrings, UnboxedTuples, CPP #-}
|
||||
#if __GLASGOW_HASKELL__ >= 702
|
||||
{-# LANGUAGE Trustworthy #-}
|
||||
#endif
|
||||
|
||||
-- |
|
||||
-- Module : Data.Text.Read
|
||||
-- Copyright : (c) 2010, 2011 Bryan O'Sullivan
|
||||
--
|
||||
-- License : BSD-style
|
||||
-- Maintainer : bos@serpentine.com
|
||||
-- Portability : GHC
|
||||
--
|
||||
-- Functions used frequently when reading textual data.
|
||||
module Data.Text.Read
|
||||
(
|
||||
Reader
|
||||
, decimal
|
||||
, hexadecimal
|
||||
, signed
|
||||
, rational
|
||||
, double
|
||||
) where
|
||||
|
||||
import Control.Monad (liftM)
|
||||
import Data.Char (isDigit, isHexDigit)
|
||||
import Data.Int (Int8, Int16, Int32, Int64)
|
||||
import Data.Ratio ((%))
|
||||
import Data.Text as T
|
||||
import Data.Text.Internal.Private (span_)
|
||||
import Data.Text.Internal.Read
|
||||
import Data.Word (Word, Word8, Word16, Word32, Word64)
|
||||
|
||||
-- | Read some text. If the read succeeds, return its value and the
|
||||
-- remaining text, otherwise an error message.
|
||||
type Reader a = IReader Text a
|
||||
type Parser a = IParser Text a
|
||||
|
||||
-- | Read a decimal integer. The input must begin with at least one
|
||||
-- decimal digit, and is consumed until a non-digit or end of string
|
||||
-- is reached.
|
||||
--
|
||||
-- This function does not handle leading sign characters. If you need
|
||||
-- to handle signed input, use @'signed' 'decimal'@.
|
||||
--
|
||||
-- /Note/: For fixed-width integer types, this function does not
|
||||
-- attempt to detect overflow, so a sufficiently long input may give
|
||||
-- incorrect results. If you are worried about overflow, use
|
||||
-- 'Integer' for your result type.
|
||||
decimal :: Integral a => Reader a
|
||||
{-# SPECIALIZE decimal :: Reader Int #-}
|
||||
{-# SPECIALIZE decimal :: Reader Int8 #-}
|
||||
{-# SPECIALIZE decimal :: Reader Int16 #-}
|
||||
{-# SPECIALIZE decimal :: Reader Int32 #-}
|
||||
{-# SPECIALIZE decimal :: Reader Int64 #-}
|
||||
{-# SPECIALIZE decimal :: Reader Integer #-}
|
||||
{-# SPECIALIZE decimal :: Reader Data.Word.Word #-}
|
||||
{-# SPECIALIZE decimal :: Reader Word8 #-}
|
||||
{-# SPECIALIZE decimal :: Reader Word16 #-}
|
||||
{-# SPECIALIZE decimal :: Reader Word32 #-}
|
||||
{-# SPECIALIZE decimal :: Reader Word64 #-}
|
||||
decimal txt
|
||||
| T.null h = Left "input does not start with a digit"
|
||||
| otherwise = Right (T.foldl' go 0 h, t)
|
||||
where (# h,t #) = span_ isDigit txt
|
||||
go n d = (n * 10 + fromIntegral (digitToInt d))
|
||||
|
||||
-- | Read a hexadecimal integer, consisting of an optional leading
|
||||
-- @\"0x\"@ followed by at least one hexadecimal digit. Input is
|
||||
-- consumed until a non-hex-digit or end of string is reached.
|
||||
-- This function is case insensitive.
|
||||
--
|
||||
-- This function does not handle leading sign characters. If you need
|
||||
-- to handle signed input, use @'signed' 'hexadecimal'@.
|
||||
--
|
||||
-- /Note/: For fixed-width integer types, this function does not
|
||||
-- attempt to detect overflow, so a sufficiently long input may give
|
||||
-- incorrect results. If you are worried about overflow, use
|
||||
-- 'Integer' for your result type.
|
||||
hexadecimal :: Integral a => Reader a
|
||||
{-# SPECIALIZE hexadecimal :: Reader Int #-}
|
||||
{-# SPECIALIZE hexadecimal :: Reader Int8 #-}
|
||||
{-# SPECIALIZE hexadecimal :: Reader Int16 #-}
|
||||
{-# SPECIALIZE hexadecimal :: Reader Int32 #-}
|
||||
{-# SPECIALIZE hexadecimal :: Reader Int64 #-}
|
||||
{-# SPECIALIZE hexadecimal :: Reader Integer #-}
|
||||
{-# SPECIALIZE hexadecimal :: Reader Word #-}
|
||||
{-# SPECIALIZE hexadecimal :: Reader Word8 #-}
|
||||
{-# SPECIALIZE hexadecimal :: Reader Word16 #-}
|
||||
{-# SPECIALIZE hexadecimal :: Reader Word32 #-}
|
||||
{-# SPECIALIZE hexadecimal :: Reader Word64 #-}
|
||||
hexadecimal txt
|
||||
| h == "0x" || h == "0X" = hex t
|
||||
| otherwise = hex txt
|
||||
where (h,t) = T.splitAt 2 txt
|
||||
|
||||
hex :: Integral a => Reader a
|
||||
{-# SPECIALIZE hex :: Reader Int #-}
|
||||
{-# SPECIALIZE hex :: Reader Int8 #-}
|
||||
{-# SPECIALIZE hex :: Reader Int16 #-}
|
||||
{-# SPECIALIZE hex :: Reader Int32 #-}
|
||||
{-# SPECIALIZE hex :: Reader Int64 #-}
|
||||
{-# SPECIALIZE hex :: Reader Integer #-}
|
||||
{-# SPECIALIZE hex :: Reader Word #-}
|
||||
{-# SPECIALIZE hex :: Reader Word8 #-}
|
||||
{-# SPECIALIZE hex :: Reader Word16 #-}
|
||||
{-# SPECIALIZE hex :: Reader Word32 #-}
|
||||
{-# SPECIALIZE hex :: Reader Word64 #-}
|
||||
hex txt
|
||||
| T.null h = Left "input does not start with a hexadecimal digit"
|
||||
| otherwise = Right (T.foldl' go 0 h, t)
|
||||
where (# h,t #) = span_ isHexDigit txt
|
||||
go n d = (n * 16 + fromIntegral (hexDigitToInt d))
|
||||
|
||||
-- | Read an optional leading sign character (@\'-\'@ or @\'+\'@) and
|
||||
-- apply it to the result of applying the given reader.
|
||||
signed :: Num a => Reader a -> Reader a
|
||||
{-# INLINE signed #-}
|
||||
signed f = runP (signa (P f))
|
||||
|
||||
-- | Read a rational number.
|
||||
--
|
||||
-- This function accepts an optional leading sign character, followed
|
||||
-- by at least one decimal digit. The syntax similar to that accepted
|
||||
-- by the 'read' function, with the exception that a trailing @\'.\'@
|
||||
-- or @\'e\'@ /not/ followed by a number is not consumed.
|
||||
--
|
||||
-- Examples (with behaviour identical to 'read'):
|
||||
--
|
||||
-- >rational "3" == Right (3.0, "")
|
||||
-- >rational "3.1" == Right (3.1, "")
|
||||
-- >rational "3e4" == Right (30000.0, "")
|
||||
-- >rational "3.1e4" == Right (31000.0, "")
|
||||
-- >rational ".3" == Left "input does not start with a digit"
|
||||
-- >rational "e3" == Left "input does not start with a digit"
|
||||
--
|
||||
-- Examples of differences from 'read':
|
||||
--
|
||||
-- >rational "3.foo" == Right (3.0, ".foo")
|
||||
-- >rational "3e" == Right (3.0, "e")
|
||||
rational :: Fractional a => Reader a
|
||||
{-# SPECIALIZE rational :: Reader Double #-}
|
||||
rational = floaty $ \real frac fracDenom -> fromRational $
|
||||
real % 1 + frac % fracDenom
|
||||
|
||||
-- | Read a rational number.
|
||||
--
|
||||
-- The syntax accepted by this function is the same as for 'rational'.
|
||||
--
|
||||
-- /Note/: This function is almost ten times faster than 'rational',
|
||||
-- but is slightly less accurate.
|
||||
--
|
||||
-- The 'Double' type supports about 16 decimal places of accuracy.
|
||||
-- For 94.2% of numbers, this function and 'rational' give identical
|
||||
-- results, but for the remaining 5.8%, this function loses precision
|
||||
-- around the 15th decimal place. For 0.001% of numbers, this
|
||||
-- function will lose precision at the 13th or 14th decimal place.
|
||||
double :: Reader Double
|
||||
double = floaty $ \real frac fracDenom ->
|
||||
fromIntegral real +
|
||||
fromIntegral frac / fromIntegral fracDenom
|
||||
|
||||
signa :: Num a => Parser a -> Parser a
|
||||
{-# SPECIALIZE signa :: Parser Int -> Parser Int #-}
|
||||
{-# SPECIALIZE signa :: Parser Int8 -> Parser Int8 #-}
|
||||
{-# SPECIALIZE signa :: Parser Int16 -> Parser Int16 #-}
|
||||
{-# SPECIALIZE signa :: Parser Int32 -> Parser Int32 #-}
|
||||
{-# SPECIALIZE signa :: Parser Int64 -> Parser Int64 #-}
|
||||
{-# SPECIALIZE signa :: Parser Integer -> Parser Integer #-}
|
||||
signa p = do
|
||||
sign <- perhaps '+' $ char (\c -> c == '-' || c == '+')
|
||||
if sign == '+' then p else negate `liftM` p
|
||||
|
||||
char :: (Char -> Bool) -> Parser Char
|
||||
char p = P $ \t -> case T.uncons t of
|
||||
Just (c,t') | p c -> Right (c,t')
|
||||
_ -> Left "character does not match"
|
||||
|
||||
floaty :: Fractional a => (Integer -> Integer -> Integer -> a) -> Reader a
|
||||
{-# INLINE floaty #-}
|
||||
floaty f = runP $ do
|
||||
sign <- perhaps '+' $ char (\c -> c == '-' || c == '+')
|
||||
real <- P decimal
|
||||
T fraction fracDigits <- perhaps (T 0 0) $ do
|
||||
_ <- char (=='.')
|
||||
digits <- P $ \t -> Right (T.length $ T.takeWhile isDigit t, t)
|
||||
n <- P decimal
|
||||
return $ T n digits
|
||||
let e c = c == 'e' || c == 'E'
|
||||
power <- perhaps 0 (char e >> signa (P decimal) :: Parser Int)
|
||||
let n = if fracDigits == 0
|
||||
then if power == 0
|
||||
then fromIntegral real
|
||||
else fromIntegral real * (10 ^^ power)
|
||||
else if power == 0
|
||||
then f real fraction (10 ^ fracDigits)
|
||||
else f real fraction (10 ^ fracDigits) * (10 ^^ power)
|
||||
return $! if sign == '+'
|
||||
then n
|
||||
else -n
|
@ -1,90 +0,0 @@
|
||||
{-# LANGUAGE CPP, MagicHash #-}
|
||||
{-# OPTIONS_GHC -fno-warn-orphans #-}
|
||||
#if __GLASGOW_HASKELL__ >= 702
|
||||
{-# LANGUAGE Trustworthy #-}
|
||||
#endif
|
||||
|
||||
-- |
|
||||
-- Module : Data.Text.Show
|
||||
-- Copyright : (c) 2009-2015 Bryan O'Sullivan
|
||||
--
|
||||
-- License : BSD-style
|
||||
-- Maintainer : bos@serpentine.com
|
||||
-- Stability : experimental
|
||||
-- Portability : GHC
|
||||
|
||||
module Data.Text.Show
|
||||
(
|
||||
singleton
|
||||
, unpack
|
||||
, unpackCString#
|
||||
) where
|
||||
|
||||
import Control.Monad.ST (ST)
|
||||
import Data.Text.Internal (Text(..), empty_, safe)
|
||||
import Data.Text.Internal.Fusion (stream, unstream)
|
||||
import Data.Text.Internal.Unsafe.Char (unsafeWrite)
|
||||
import GHC.Prim (Addr#)
|
||||
import qualified Data.Text.Array as A
|
||||
import qualified Data.Text.Internal.Fusion.Common as S
|
||||
|
||||
#if __GLASGOW_HASKELL__ >= 702
|
||||
import qualified GHC.CString as GHC
|
||||
#else
|
||||
import qualified GHC.Base as GHC
|
||||
#endif
|
||||
|
||||
instance Show Text where
|
||||
showsPrec p ps r = showsPrec p (unpack ps) r
|
||||
|
||||
-- | /O(n)/ Convert a 'Text' into a 'String'. Subject to fusion.
|
||||
unpack :: Text -> String
|
||||
unpack = S.unstreamList . stream
|
||||
{-# INLINE [1] unpack #-}
|
||||
|
||||
-- | /O(n)/ Convert a literal string into a 'Text'.
|
||||
--
|
||||
-- This is exposed solely for people writing GHC rewrite rules.
|
||||
--
|
||||
-- @since 1.2.1.1
|
||||
unpackCString# :: Addr# -> Text
|
||||
unpackCString# addr# = unstream (S.streamCString# addr#)
|
||||
{-# NOINLINE unpackCString# #-}
|
||||
|
||||
{-# RULES "TEXT literal" [1] forall a.
|
||||
unstream (S.map safe (S.streamList (GHC.unpackCString# a)))
|
||||
= unpackCString# a #-}
|
||||
|
||||
{-# RULES "TEXT literal UTF8" [1] forall a.
|
||||
unstream (S.map safe (S.streamList (GHC.unpackCStringUtf8# a)))
|
||||
= unpackCString# a #-}
|
||||
|
||||
{-# RULES "TEXT empty literal" [1]
|
||||
unstream (S.map safe (S.streamList []))
|
||||
= empty_ #-}
|
||||
|
||||
{-# RULES "TEXT singleton literal" [1] forall a.
|
||||
unstream (S.map safe (S.streamList [a]))
|
||||
= singleton_ a #-}
|
||||
|
||||
-- | /O(1)/ Convert a character into a Text. Subject to fusion.
|
||||
-- Performs replacement on invalid scalar values.
|
||||
singleton :: Char -> Text
|
||||
singleton = unstream . S.singleton . safe
|
||||
{-# INLINE [1] singleton #-}
|
||||
|
||||
{-# RULES "TEXT singleton" forall a.
|
||||
unstream (S.singleton (safe a))
|
||||
= singleton_ a #-}
|
||||
|
||||
-- This is intended to reduce inlining bloat.
|
||||
singleton_ :: Char -> Text
|
||||
singleton_ c = Text (A.run x) 0 len
|
||||
where x :: ST s (A.MArray s)
|
||||
x = do arr <- A.new len
|
||||
_ <- unsafeWrite arr 0 d
|
||||
return arr
|
||||
len | d < '\x10000' = 1
|
||||
| otherwise = 2
|
||||
d = safe c
|
||||
{-# NOINLINE singleton_ #-}
|
@ -1,124 +0,0 @@
|
||||
{-# LANGUAGE CPP, MagicHash, UnboxedTuples #-}
|
||||
-- |
|
||||
-- Module : Data.Text.Unsafe
|
||||
-- Copyright : (c) 2009, 2010, 2011 Bryan O'Sullivan
|
||||
-- License : BSD-style
|
||||
-- Maintainer : bos@serpentine.com
|
||||
-- Portability : portable
|
||||
--
|
||||
-- A module containing unsafe 'Text' operations, for very very careful
|
||||
-- use in heavily tested code.
|
||||
module Data.Text.Unsafe
|
||||
(
|
||||
inlineInterleaveST
|
||||
, inlinePerformIO
|
||||
, unsafeDupablePerformIO
|
||||
, Iter(..)
|
||||
, iter
|
||||
, iter_
|
||||
, reverseIter
|
||||
, reverseIter_
|
||||
, unsafeHead
|
||||
, unsafeTail
|
||||
, lengthWord16
|
||||
, takeWord16
|
||||
, dropWord16
|
||||
) where
|
||||
|
||||
#if defined(ASSERTS)
|
||||
import Control.Exception (assert)
|
||||
#endif
|
||||
import Data.Text.Internal.Encoding.Utf16 (chr2)
|
||||
import Data.Text.Internal (Text(..))
|
||||
import Data.Text.Internal.Unsafe (inlineInterleaveST, inlinePerformIO)
|
||||
import Data.Text.Internal.Unsafe.Char (unsafeChr)
|
||||
import qualified Data.Text.Array as A
|
||||
import GHC.IO (unsafeDupablePerformIO)
|
||||
|
||||
-- | /O(1)/ A variant of 'head' for non-empty 'Text'. 'unsafeHead'
|
||||
-- omits the check for the empty case, so there is an obligation on
|
||||
-- the programmer to provide a proof that the 'Text' is non-empty.
|
||||
unsafeHead :: Text -> Char
|
||||
unsafeHead (Text arr off _len)
|
||||
| m < 0xD800 || m > 0xDBFF = unsafeChr m
|
||||
| otherwise = chr2 m n
|
||||
where m = A.unsafeIndex arr off
|
||||
n = A.unsafeIndex arr (off+1)
|
||||
{-# INLINE unsafeHead #-}
|
||||
|
||||
-- | /O(1)/ A variant of 'tail' for non-empty 'Text'. 'unsafeTail'
|
||||
-- omits the check for the empty case, so there is an obligation on
|
||||
-- the programmer to provide a proof that the 'Text' is non-empty.
|
||||
unsafeTail :: Text -> Text
|
||||
unsafeTail t@(Text arr off len) =
|
||||
#if defined(ASSERTS)
|
||||
assert (d <= len) $
|
||||
#endif
|
||||
Text arr (off+d) (len-d)
|
||||
where d = iter_ t 0
|
||||
{-# INLINE unsafeTail #-}
|
||||
|
||||
data Iter = Iter {-# UNPACK #-} !Char {-# UNPACK #-} !Int
|
||||
|
||||
-- | /O(1)/ Iterate (unsafely) one step forwards through a UTF-16
|
||||
-- array, returning the current character and the delta to add to give
|
||||
-- the next offset to iterate at.
|
||||
iter :: Text -> Int -> Iter
|
||||
iter (Text arr off _len) i
|
||||
| m < 0xD800 || m > 0xDBFF = Iter (unsafeChr m) 1
|
||||
| otherwise = Iter (chr2 m n) 2
|
||||
where m = A.unsafeIndex arr j
|
||||
n = A.unsafeIndex arr k
|
||||
j = off + i
|
||||
k = j + 1
|
||||
{-# INLINE iter #-}
|
||||
|
||||
-- | /O(1)/ Iterate one step through a UTF-16 array, returning the
|
||||
-- delta to add to give the next offset to iterate at.
|
||||
iter_ :: Text -> Int -> Int
|
||||
iter_ (Text arr off _len) i | m < 0xD800 || m > 0xDBFF = 1
|
||||
| otherwise = 2
|
||||
where m = A.unsafeIndex arr (off+i)
|
||||
{-# INLINE iter_ #-}
|
||||
|
||||
-- | /O(1)/ Iterate one step backwards through a UTF-16 array,
|
||||
-- returning the current character and the delta to add (i.e. a
|
||||
-- negative number) to give the next offset to iterate at.
|
||||
reverseIter :: Text -> Int -> (Char,Int)
|
||||
reverseIter (Text arr off _len) i
|
||||
| m < 0xDC00 || m > 0xDFFF = (unsafeChr m, -1)
|
||||
| otherwise = (chr2 n m, -2)
|
||||
where m = A.unsafeIndex arr j
|
||||
n = A.unsafeIndex arr k
|
||||
j = off + i
|
||||
k = j - 1
|
||||
{-# INLINE reverseIter #-}
|
||||
|
||||
-- | /O(1)/ Iterate one step backwards through a UTF-16 array,
|
||||
-- returning the delta to add (i.e. a negative number) to give the
|
||||
-- next offset to iterate at.
|
||||
--
|
||||
-- @since 1.1.1.0
|
||||
reverseIter_ :: Text -> Int -> Int
|
||||
reverseIter_ (Text arr off _len) i
|
||||
| m < 0xDC00 || m > 0xDFFF = -1
|
||||
| otherwise = -2
|
||||
where m = A.unsafeIndex arr (off+i)
|
||||
{-# INLINE reverseIter_ #-}
|
||||
|
||||
-- | /O(1)/ Return the length of a 'Text' in units of 'Word16'. This
|
||||
-- is useful for sizing a target array appropriately before using
|
||||
-- 'unsafeCopyToPtr'.
|
||||
lengthWord16 :: Text -> Int
|
||||
lengthWord16 (Text _arr _off len) = len
|
||||
{-# INLINE lengthWord16 #-}
|
||||
|
||||
-- | /O(1)/ Unchecked take of 'k' 'Word16's from the front of a 'Text'.
|
||||
takeWord16 :: Int -> Text -> Text
|
||||
takeWord16 k (Text arr off _len) = Text arr off k
|
||||
{-# INLINE takeWord16 #-}
|
||||
|
||||
-- | /O(1)/ Unchecked drop of 'k' 'Word16's from the front of a 'Text'.
|
||||
dropWord16 :: Int -> Text -> Text
|
||||
dropWord16 k (Text arr off len) = Text arr (off+k) (len-k)
|
||||
{-# INLINE dropWord16 #-}
|
@ -1,26 +0,0 @@
|
||||
Copyright (c) 2008-2009, Tom Harper
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following
|
||||
disclaimer in the documentation and/or other materials provided
|
||||
with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
@ -1,31 +0,0 @@
|
||||
# `text`: Fast, packed Unicode strings, using stream fusion
|
||||
|
||||
This package provides the Data.Text library, a library for the space-
|
||||
and time-efficient manipulation of Unicode text in Haskell.
|
||||
|
||||
**Please refer to the [package description on Hackage](https://hackage.haskell.org/package/text#description) for more information.**
|
||||
|
||||
# Get involved!
|
||||
|
||||
Please report bugs via the
|
||||
[github issue tracker](https://github.com/haskell/text/issues).
|
||||
|
||||
Master [git repository](https://github.com/haskell/text):
|
||||
|
||||
* `git clone git://github.com/haskell/text.git`
|
||||
|
||||
There's also a [Mercurial mirror](https://bitbucket.org/bos/text):
|
||||
|
||||
* `hg clone https://bitbucket.org/bos/text`
|
||||
|
||||
(You can create and contribute changes using either Mercurial or git.)
|
||||
|
||||
|
||||
# Authors
|
||||
|
||||
The base code for this library was originally written by Tom Harper,
|
||||
based on the stream fusion framework developed by Roman Leshchinskiy,
|
||||
Duncan Coutts, and Don Stewart.
|
||||
|
||||
The core library was fleshed out, debugged, and tested by Bryan
|
||||
O'Sullivan <bos@serpentine.com>, and he is the current maintainer.
|
@ -1,3 +0,0 @@
|
||||
#!/usr/bin/env runhaskell
|
||||
> import Distribution.Simple
|
||||
> main = defaultMain
|
@ -1,4 +0,0 @@
|
||||
/dist/
|
||||
/dist-newstyle/
|
||||
/.ghc.environment.*
|
||||
/cabal.project.local
|
@ -1,2 +0,0 @@
|
||||
import Distribution.Simple
|
||||
main = defaultMain
|
@ -1,2 +0,0 @@
|
||||
-- NB: we use a separate project
|
||||
packages: .
|
@ -1,35 +0,0 @@
|
||||
#include <iconv.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
|
||||
int time_iconv(char *srcbuf, size_t srcbufsize)
|
||||
{
|
||||
uint16_t *destbuf = NULL;
|
||||
size_t destbufsize;
|
||||
static uint16_t *origdestbuf;
|
||||
static size_t origdestbufsize;
|
||||
iconv_t ic = (iconv_t) -1;
|
||||
int ret = 0;
|
||||
|
||||
if (ic == (iconv_t) -1) {
|
||||
ic = iconv_open("UTF-16LE", "UTF-8");
|
||||
if (ic == (iconv_t) -1) {
|
||||
ret = -1;
|
||||
goto done;
|
||||
}
|
||||
}
|
||||
|
||||
destbufsize = srcbufsize * sizeof(uint16_t);
|
||||
if (destbufsize > origdestbufsize) {
|
||||
free(origdestbuf);
|
||||
origdestbuf = destbuf = malloc(origdestbufsize = destbufsize);
|
||||
} else {
|
||||
destbuf = origdestbuf;
|
||||
}
|
||||
|
||||
iconv(ic, &srcbuf, &srcbufsize, (char**) &destbuf, &destbufsize);
|
||||
|
||||
done:
|
||||
return ret;
|
||||
}
|
@ -1,73 +0,0 @@
|
||||
-- | Main module to run the micro benchmarks
|
||||
--
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
module Main
|
||||
( main
|
||||
) where
|
||||
|
||||
import Criterion.Main (defaultMain, bgroup, env)
|
||||
import System.FilePath ((</>))
|
||||
import System.IO (IOMode (WriteMode), openFile, hSetEncoding, utf8)
|
||||
|
||||
import qualified Benchmarks.Builder as Builder
|
||||
import qualified Benchmarks.Concat as Concat
|
||||
import qualified Benchmarks.DecodeUtf8 as DecodeUtf8
|
||||
import qualified Benchmarks.EncodeUtf8 as EncodeUtf8
|
||||
import qualified Benchmarks.Equality as Equality
|
||||
import qualified Benchmarks.FileRead as FileRead
|
||||
import qualified Benchmarks.FoldLines as FoldLines
|
||||
import qualified Benchmarks.Mul as Mul
|
||||
import qualified Benchmarks.Pure as Pure
|
||||
import qualified Benchmarks.ReadNumbers as ReadNumbers
|
||||
import qualified Benchmarks.Replace as Replace
|
||||
import qualified Benchmarks.Search as Search
|
||||
import qualified Benchmarks.Stream as Stream
|
||||
import qualified Benchmarks.WordFrequencies as WordFrequencies
|
||||
|
||||
import qualified Benchmarks.Programs.BigTable as Programs.BigTable
|
||||
import qualified Benchmarks.Programs.Cut as Programs.Cut
|
||||
import qualified Benchmarks.Programs.Fold as Programs.Fold
|
||||
import qualified Benchmarks.Programs.Sort as Programs.Sort
|
||||
import qualified Benchmarks.Programs.StripTags as Programs.StripTags
|
||||
import qualified Benchmarks.Programs.Throughput as Programs.Throughput
|
||||
|
||||
main :: IO ()
|
||||
main = do
|
||||
sink <- openFile "/dev/null" WriteMode
|
||||
hSetEncoding sink utf8
|
||||
defaultMain
|
||||
[ Builder.benchmark
|
||||
, Concat.benchmark
|
||||
, env (DecodeUtf8.initEnv (tf "libya-chinese.html")) (DecodeUtf8.benchmark "html")
|
||||
, env (DecodeUtf8.initEnv (tf "yiwiki.xml")) (DecodeUtf8.benchmark "xml")
|
||||
, env (DecodeUtf8.initEnv (tf "ascii.txt")) (DecodeUtf8.benchmark "ascii")
|
||||
, env (DecodeUtf8.initEnv (tf "russian.txt")) (DecodeUtf8.benchmark "russian")
|
||||
, env (DecodeUtf8.initEnv (tf "japanese.txt")) (DecodeUtf8.benchmark "japanese")
|
||||
, EncodeUtf8.benchmark "επανάληψη 竺法蘭共譯"
|
||||
, env (Equality.initEnv (tf "japanese.txt")) Equality.benchmark
|
||||
, FileRead.benchmark (tf "russian.txt")
|
||||
, FoldLines.benchmark (tf "russian.txt")
|
||||
, env Mul.initEnv Mul.benchmark
|
||||
, env (Pure.initEnv (tf "tiny.txt")) (Pure.benchmark "tiny")
|
||||
, env (Pure.initEnv (tf "ascii-small.txt")) (Pure.benchmark "ascii-small")
|
||||
, env (Pure.initEnv (tf "ascii.txt")) (Pure.benchmark "ascii")
|
||||
, env (Pure.initEnv (tf "english.txt")) (Pure.benchmark "english")
|
||||
, env (Pure.initEnv (tf "russian-small.txt")) (Pure.benchmark "russian")
|
||||
, env (Pure.initEnv (tf "japanese.txt")) (Pure.benchmark "japanese")
|
||||
, env (ReadNumbers.initEnv (tf "numbers.txt")) ReadNumbers.benchmark
|
||||
, env (Replace.initEnv (tf "russian.txt")) (Replace.benchmark "принимая" "своем")
|
||||
, env (Search.initEnv (tf "russian.txt")) (Search.benchmark "принимая")
|
||||
, env (Stream.initEnv (tf "russian.txt")) Stream.benchmark
|
||||
, env (WordFrequencies.initEnv (tf "russian.txt")) WordFrequencies.benchmark
|
||||
, bgroup "Programs"
|
||||
[ Programs.BigTable.benchmark sink
|
||||
, Programs.Cut.benchmark (tf "russian.txt") sink 20 40
|
||||
, Programs.Fold.benchmark (tf "russian.txt") sink
|
||||
, Programs.Sort.benchmark (tf "russian.txt") sink
|
||||
, Programs.StripTags.benchmark (tf "yiwiki.xml") sink
|
||||
, Programs.Throughput.benchmark (tf "russian.txt") sink
|
||||
]
|
||||
]
|
||||
where
|
||||
-- Location of a test file
|
||||
tf = ("../tests/text-test-data" </>)
|
@ -1,75 +0,0 @@
|
||||
-- | Testing the internal builder monoid
|
||||
--
|
||||
-- Tested in this benchmark:
|
||||
--
|
||||
-- * Concatenating many small strings using a builder
|
||||
--
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
module Benchmarks.Builder
|
||||
( benchmark
|
||||
) where
|
||||
|
||||
import Criterion (Benchmark, bgroup, bench, nf)
|
||||
import Data.Binary.Builder as B
|
||||
import Data.ByteString.Char8 ()
|
||||
import Data.Monoid (mconcat, mempty)
|
||||
import qualified Blaze.ByteString.Builder as Blaze
|
||||
import qualified Blaze.ByteString.Builder.Char.Utf8 as Blaze
|
||||
import qualified Data.ByteString as SB
|
||||
import qualified Data.ByteString.Lazy as LB
|
||||
import qualified Data.Text as T
|
||||
import qualified Data.Text.Lazy as LT
|
||||
import qualified Data.Text.Lazy.Builder as LTB
|
||||
import qualified Data.Text.Lazy.Builder.Int as Int
|
||||
import Data.Int (Int64)
|
||||
|
||||
benchmark :: Benchmark
|
||||
benchmark = bgroup "Builder"
|
||||
[ bgroup "Comparison"
|
||||
[ bench "LazyText" $ nf
|
||||
(LT.length . LTB.toLazyText . mconcat . map LTB.fromText) texts
|
||||
, bench "Binary" $ nf
|
||||
(LB.length . B.toLazyByteString . mconcat . map B.fromByteString)
|
||||
byteStrings
|
||||
, bench "Blaze" $ nf
|
||||
(LB.length . Blaze.toLazyByteString . mconcat . map Blaze.fromString)
|
||||
strings
|
||||
]
|
||||
, bgroup "Int"
|
||||
[ bgroup "Decimal"
|
||||
[ bgroup "Positive" .
|
||||
flip map numbers $ \n ->
|
||||
(bench (show (length (show n))) $ nf (LTB.toLazyText . Int.decimal) n)
|
||||
, bgroup "Negative" .
|
||||
flip map numbers $ \m ->
|
||||
let n = negate m in
|
||||
(bench (show (length (show n))) $ nf (LTB.toLazyText . Int.decimal) n)
|
||||
, bench "Empty" $ nf LTB.toLazyText mempty
|
||||
, bgroup "Show" .
|
||||
flip map numbers $ \n ->
|
||||
(bench (show (length (show n))) $ nf show n)
|
||||
]
|
||||
]
|
||||
]
|
||||
where
|
||||
numbers :: [Int64]
|
||||
numbers = [
|
||||
6, 14, 500, 9688, 10654, 620735, 5608880, 37010612,
|
||||
731223504, 5061580596, 24596952933, 711732309084, 2845910093839,
|
||||
54601756118340, 735159434806159, 3619097625502435, 95777227510267124,
|
||||
414944309510675693, 8986407456998704019
|
||||
]
|
||||
|
||||
texts :: [T.Text]
|
||||
texts = take 200000 $ cycle ["foo", "λx", "由の"]
|
||||
{-# NOINLINE texts #-}
|
||||
|
||||
-- Note that the non-ascii characters will be chopped
|
||||
byteStrings :: [SB.ByteString]
|
||||
byteStrings = take 200000 $ cycle ["foo", "λx", "由の"]
|
||||
{-# NOINLINE byteStrings #-}
|
||||
|
||||
-- Note that the non-ascii characters will be chopped
|
||||
strings :: [String]
|
||||
strings = take 200000 $ cycle ["foo", "λx", "由の"]
|
||||
{-# NOINLINE strings #-}
|
@ -1,25 +0,0 @@
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
|
||||
module Benchmarks.Concat (benchmark) where
|
||||
|
||||
import Control.Monad.Trans.Writer
|
||||
import Criterion (Benchmark, bgroup, bench, whnf)
|
||||
import Data.Text as T
|
||||
|
||||
benchmark :: Benchmark
|
||||
benchmark = bgroup "Concat"
|
||||
[ bench "append" $ whnf (append4 "Text 1" "Text 2" "Text 3") "Text 4"
|
||||
, bench "concat" $ whnf (concat4 "Text 1" "Text 2" "Text 3") "Text 4"
|
||||
, bench "write" $ whnf (write4 "Text 1" "Text 2" "Text 3") "Text 4"
|
||||
]
|
||||
|
||||
append4, concat4, write4 :: Text -> Text -> Text -> Text -> Text
|
||||
|
||||
{-# NOINLINE append4 #-}
|
||||
append4 x1 x2 x3 x4 = x1 `append` x2 `append` x3 `append` x4
|
||||
|
||||
{-# NOINLINE concat4 #-}
|
||||
concat4 x1 x2 x3 x4 = T.concat [x1, x2, x3, x4]
|
||||
|
||||
{-# NOINLINE write4 #-}
|
||||
write4 x1 x2 x3 x4 = execWriter $ tell x1 >> tell x2 >> tell x3 >> tell x4
|
@ -1,74 +0,0 @@
|
||||
{-# LANGUAGE ForeignFunctionInterface #-}
|
||||
|
||||
-- | Test decoding of UTF-8
|
||||
--
|
||||
-- Tested in this benchmark:
|
||||
--
|
||||
-- * Decoding bytes using UTF-8
|
||||
--
|
||||
-- In some tests:
|
||||
--
|
||||
-- * Taking the length of the result
|
||||
--
|
||||
-- * Taking the init of the result
|
||||
--
|
||||
-- The latter are used for testing stream fusion.
|
||||
--
|
||||
module Benchmarks.DecodeUtf8
|
||||
( initEnv
|
||||
, benchmark
|
||||
) where
|
||||
|
||||
import Foreign.C.Types
|
||||
import Data.ByteString.Internal (ByteString(..))
|
||||
import Data.ByteString.Lazy.Internal (ByteString(..))
|
||||
import Foreign.Ptr (Ptr, plusPtr)
|
||||
import Foreign.ForeignPtr (withForeignPtr)
|
||||
import Data.Word (Word8)
|
||||
import qualified Criterion as C
|
||||
import Criterion (Benchmark, bgroup, nf, whnfIO)
|
||||
import qualified Codec.Binary.UTF8.Generic as U8
|
||||
import qualified Data.ByteString as B
|
||||
import qualified Data.ByteString.Lazy as BL
|
||||
import qualified Data.Text as T
|
||||
import qualified Data.Text.Encoding as T
|
||||
import qualified Data.Text.Lazy as TL
|
||||
import qualified Data.Text.Lazy.Encoding as TL
|
||||
|
||||
type Env = (B.ByteString, BL.ByteString)
|
||||
|
||||
initEnv :: FilePath -> IO Env
|
||||
initEnv fp = do
|
||||
bs <- B.readFile fp
|
||||
lbs <- BL.readFile fp
|
||||
return (bs, lbs)
|
||||
|
||||
benchmark :: String -> Env -> Benchmark
|
||||
benchmark kind ~(bs, lbs) =
|
||||
let bench name = C.bench (name ++ "+" ++ kind)
|
||||
decodeStream (Chunk b0 bs0) = case T.streamDecodeUtf8 b0 of
|
||||
T.Some t0 _ f0 -> t0 : go f0 bs0
|
||||
where go f (Chunk b bs1) = case f b of
|
||||
T.Some t1 _ f1 -> t1 : go f1 bs1
|
||||
go _ _ = []
|
||||
decodeStream _ = []
|
||||
in bgroup "DecodeUtf8"
|
||||
[ bench "Strict" $ nf T.decodeUtf8 bs
|
||||
, bench "Stream" $ nf decodeStream lbs
|
||||
, bench "IConv" $ whnfIO $ iconv bs
|
||||
, bench "StrictLength" $ nf (T.length . T.decodeUtf8) bs
|
||||
, bench "StrictInitLength" $ nf (T.length . T.init . T.decodeUtf8) bs
|
||||
, bench "Lazy" $ nf TL.decodeUtf8 lbs
|
||||
, bench "LazyLength" $ nf (TL.length . TL.decodeUtf8) lbs
|
||||
, bench "LazyInitLength" $ nf (TL.length . TL.init . TL.decodeUtf8) lbs
|
||||
, bench "StrictStringUtf8" $ nf U8.toString bs
|
||||
, bench "StrictStringUtf8Length" $ nf (length . U8.toString) bs
|
||||
, bench "LazyStringUtf8" $ nf U8.toString lbs
|
||||
, bench "LazyStringUtf8Length" $ nf (length . U8.toString) lbs
|
||||
]
|
||||
|
||||
iconv :: B.ByteString -> IO CInt
|
||||
iconv (PS fp off len) = withForeignPtr fp $ \ptr ->
|
||||
time_iconv (ptr `plusPtr` off) (fromIntegral len)
|
||||
|
||||
foreign import ccall unsafe time_iconv :: Ptr Word8 -> CSize -> IO CInt
|
@ -1,33 +0,0 @@
|
||||
-- | UTF-8 encode a text
|
||||
--
|
||||
-- Tested in this benchmark:
|
||||
--
|
||||
-- * Replicating a string a number of times
|
||||
--
|
||||
-- * UTF-8 encoding it
|
||||
--
|
||||
module Benchmarks.EncodeUtf8
|
||||
( benchmark
|
||||
) where
|
||||
|
||||
import Criterion (Benchmark, bgroup, bench, whnf)
|
||||
import qualified Data.ByteString as B
|
||||
import qualified Data.ByteString.Lazy as BL
|
||||
import qualified Data.Text as T
|
||||
import qualified Data.Text.Encoding as T
|
||||
import qualified Data.Text.Lazy as TL
|
||||
import qualified Data.Text.Lazy.Encoding as TL
|
||||
|
||||
benchmark :: String -> Benchmark
|
||||
benchmark string =
|
||||
bgroup "EncodeUtf8"
|
||||
[ bench "Text" $ whnf (B.length . T.encodeUtf8) text
|
||||
, bench "LazyText" $ whnf (BL.length . TL.encodeUtf8) lazyText
|
||||
]
|
||||
where
|
||||
-- The string in different formats
|
||||
text = T.replicate k $ T.pack string
|
||||
lazyText = TL.replicate (fromIntegral k) $ TL.pack string
|
||||
|
||||
-- Amount
|
||||
k = 100000
|
@ -1,43 +0,0 @@
|
||||
-- | Compare a string with a copy of itself that is identical except
|
||||
-- for the last character.
|
||||
--
|
||||
-- Tested in this benchmark:
|
||||
--
|
||||
-- * Comparison of strings (Eq instance)
|
||||
--
|
||||
module Benchmarks.Equality
|
||||
( initEnv
|
||||
, benchmark
|
||||
) where
|
||||
|
||||
import Criterion (Benchmark, bgroup, bench, whnf)
|
||||
import qualified Data.ByteString.Char8 as B
|
||||
import qualified Data.ByteString.Lazy.Char8 as BL
|
||||
import qualified Data.Text as T
|
||||
import qualified Data.Text.Encoding as T
|
||||
import qualified Data.Text.Lazy as TL
|
||||
import qualified Data.Text.Lazy.Encoding as TL
|
||||
|
||||
type Env = (T.Text, TL.Text, B.ByteString, BL.ByteString, BL.ByteString, String)
|
||||
|
||||
initEnv :: FilePath -> IO Env
|
||||
initEnv fp = do
|
||||
b <- B.readFile fp
|
||||
bl1 <- BL.readFile fp
|
||||
-- A lazy bytestring is a list of chunks. When we do not explicitly create two
|
||||
-- different lazy bytestrings at a different address, the bytestring library
|
||||
-- will compare the chunk addresses instead of the chunk contents. This is why
|
||||
-- we read the lazy bytestring twice here.
|
||||
bl2 <- BL.readFile fp
|
||||
l <- readFile fp
|
||||
return (T.decodeUtf8 b, TL.decodeUtf8 bl1, b, bl1, bl2, l)
|
||||
|
||||
benchmark :: Env -> Benchmark
|
||||
benchmark ~(t, tl, b, bl1, bl2, l) =
|
||||
bgroup "Equality"
|
||||
[ bench "Text" $ whnf (== T.init t `T.snoc` '\xfffd') t
|
||||
, bench "LazyText" $ whnf (== TL.init tl `TL.snoc` '\xfffd') tl
|
||||
, bench "ByteString" $ whnf (== B.init b `B.snoc` '\xfffd') b
|
||||
, bench "LazyByteString" $ whnf (== BL.init bl2 `BL.snoc` '\xfffd') bl1
|
||||
, bench "String" $ whnf (== init l ++ "\xfffd") l
|
||||
]
|
@ -1,33 +0,0 @@
|
||||
-- | Benchmarks simple file reading
|
||||
--
|
||||
-- Tested in this benchmark:
|
||||
--
|
||||
-- * Reading a file from the disk
|
||||
--
|
||||
module Benchmarks.FileRead
|
||||
( benchmark
|
||||
) where
|
||||
|
||||
import Control.Applicative ((<$>))
|
||||
import Criterion (Benchmark, bgroup, bench, whnfIO)
|
||||
import qualified Data.ByteString as SB
|
||||
import qualified Data.ByteString.Lazy as LB
|
||||
import qualified Data.Text as T
|
||||
import qualified Data.Text.Encoding as T
|
||||
import qualified Data.Text.IO as T
|
||||
import qualified Data.Text.Lazy as LT
|
||||
import qualified Data.Text.Lazy.Encoding as LT
|
||||
import qualified Data.Text.Lazy.IO as LT
|
||||
|
||||
benchmark :: FilePath -> Benchmark
|
||||
benchmark p = bgroup "FileRead"
|
||||
[ bench "String" $ whnfIO $ length <$> readFile p
|
||||
, bench "ByteString" $ whnfIO $ SB.length <$> SB.readFile p
|
||||
, bench "LazyByteString" $ whnfIO $ LB.length <$> LB.readFile p
|
||||
, bench "Text" $ whnfIO $ T.length <$> T.readFile p
|
||||
, bench "LazyText" $ whnfIO $ LT.length <$> LT.readFile p
|
||||
, bench "TextByteString" $ whnfIO $
|
||||
(T.length . T.decodeUtf8) <$> SB.readFile p
|
||||
, bench "LazyTextByteString" $ whnfIO $
|
||||
(LT.length . LT.decodeUtf8) <$> LB.readFile p
|
||||
]
|
@ -1,58 +0,0 @@
|
||||
-- | Read a file line-by-line using handles, and perform a fold over the lines.
|
||||
-- The fold is used here to calculate the number of lines in the file.
|
||||
--
|
||||
-- Tested in this benchmark:
|
||||
--
|
||||
-- * Buffered, line-based IO
|
||||
--
|
||||
{-# LANGUAGE BangPatterns #-}
|
||||
module Benchmarks.FoldLines
|
||||
( benchmark
|
||||
) where
|
||||
|
||||
import Criterion (Benchmark, bgroup, bench, whnfIO)
|
||||
import System.IO
|
||||
import qualified Data.ByteString as B
|
||||
import qualified Data.Text as T
|
||||
import qualified Data.Text.IO as T
|
||||
|
||||
benchmark :: FilePath -> Benchmark
|
||||
benchmark fp = bgroup "ReadLines"
|
||||
[ bench "Text" $ withHandle $ foldLinesT (\n _ -> n + 1) (0 :: Int)
|
||||
, bench "ByteString" $ withHandle $ foldLinesB (\n _ -> n + 1) (0 :: Int)
|
||||
]
|
||||
where
|
||||
withHandle f = whnfIO $ do
|
||||
h <- openFile fp ReadMode
|
||||
hSetBuffering h (BlockBuffering (Just 16384))
|
||||
x <- f h
|
||||
hClose h
|
||||
return x
|
||||
|
||||
-- | Text line fold
|
||||
--
|
||||
foldLinesT :: (a -> T.Text -> a) -> a -> Handle -> IO a
|
||||
foldLinesT f z0 h = go z0
|
||||
where
|
||||
go !z = do
|
||||
eof <- hIsEOF h
|
||||
if eof
|
||||
then return z
|
||||
else do
|
||||
l <- T.hGetLine h
|
||||
let z' = f z l in go z'
|
||||
{-# INLINE foldLinesT #-}
|
||||
|
||||
-- | ByteString line fold
|
||||
--
|
||||
foldLinesB :: (a -> B.ByteString -> a) -> a -> Handle -> IO a
|
||||
foldLinesB f z0 h = go z0
|
||||
where
|
||||
go !z = do
|
||||
eof <- hIsEOF h
|
||||
if eof
|
||||
then return z
|
||||
else do
|
||||
l <- B.hGetLine h
|
||||
let z' = f z l in go z'
|
||||
{-# INLINE foldLinesB #-}
|
@ -1,146 +0,0 @@
|
||||
module Benchmarks.Mul
|
||||
( initEnv
|
||||
, benchmark
|
||||
) where
|
||||
|
||||
import Control.Exception (evaluate)
|
||||
import Criterion.Main
|
||||
import Data.Int (Int32, Int64)
|
||||
import Data.Text.Internal (mul32, mul64)
|
||||
import qualified Data.Vector.Unboxed as U
|
||||
|
||||
oldMul :: Int64 -> Int64 -> Int64
|
||||
oldMul m n
|
||||
| n == 0 = 0
|
||||
| m <= maxBound `quot` n = m * n
|
||||
| otherwise = error "overflow"
|
||||
|
||||
type Env = (U.Vector (Int32,Int32), U.Vector (Int64,Int64))
|
||||
|
||||
initEnv :: IO Env
|
||||
initEnv = do
|
||||
x <- evaluate testVector32
|
||||
y <- evaluate testVector64
|
||||
return (x, y)
|
||||
|
||||
benchmark :: Env -> Benchmark
|
||||
benchmark ~(tv32, tv64) = bgroup "Mul"
|
||||
[ bench "oldMul" $ whnf (U.map (uncurry oldMul)) tv64
|
||||
, bench "mul64" $ whnf (U.map (uncurry mul64)) tv64
|
||||
, bench "*64" $ whnf (U.map (uncurry (*))) tv64
|
||||
, bench "mul32" $ whnf (U.map (uncurry mul32)) tv32
|
||||
, bench "*32" $ whnf (U.map (uncurry (*))) tv32
|
||||
]
|
||||
|
||||
testVector64 :: U.Vector (Int64,Int64)
|
||||
testVector64 = U.fromList [
|
||||
(0,1248868987182846646),(169004623633872,24458),(482549039517835,7614),
|
||||
(372,8157063115504364),(27,107095594861148252),(3,63249878517962420),
|
||||
(4363,255694473572912),(86678474,1732634806),(1572453024,1800489338),
|
||||
(9384523143,77053781),(49024709555,75095046),(7,43457620410239131),
|
||||
(8,8201563008844571),(387719037,1520696708),(189869238220197,1423),
|
||||
(46788016849611,23063),(503077742109974359,0),(104,1502010908706487),
|
||||
(30478140346,207525518),(80961140129236192,14),(4283,368012829143675),
|
||||
(1028719181728108146,6),(318904,5874863049591),(56724427166898,110794),
|
||||
(234539368,31369110449),(2,251729663598178612),(103291548194451219,5),
|
||||
(76013,5345328755566),(1769631,2980846129318),(40898,60598477385754),
|
||||
(0,98931348893227155),(573555872156917492,3),(318821187115,4476566),
|
||||
(11152874213584,243582),(40274276,16636653248),(127,4249988676030597),
|
||||
(103543712111871836,5),(71,16954462148248238),(3963027173504,216570),
|
||||
(13000,503523808916753),(17038308,20018685905),(0,510350226577891549),
|
||||
(175898,3875698895405),(425299191292676,5651),(17223451323664536,50),
|
||||
(61755131,14247665326),(0,1018195131697569303),(36433751497238985,20),
|
||||
(3473607861601050,1837),(1392342328,1733971838),(225770297367,3249655),
|
||||
(14,127545244155254102),(1751488975299136,2634),(3949208,504190668767),
|
||||
(153329,831454434345),(1066212122928663658,2),(351224,2663633539556),
|
||||
(344565,53388869217),(35825609350446863,54),(276011553660081475,10),
|
||||
(1969754174790470349,3),(35,68088438338633),(506710,3247689556438),
|
||||
(11099382291,327739909),(105787303549,32824363),(210366111,14759049409),
|
||||
(688893241579,3102676),(8490,70047474429581),(152085,29923000251880),
|
||||
(5046974599257095,400),(4183167795,263434071),(10089728,502781960687),
|
||||
(44831977765,4725378),(91,8978094664238578),(30990165721,44053350),
|
||||
(1772377,149651820860),(243420621763408572,4),(32,5790357453815138),
|
||||
(27980806337993771,5),(47696295759774,20848),(1745874142313778,1098),
|
||||
(46869334770121,1203),(886995283,1564424789),(40679396544,76002479),
|
||||
(1,672849481568486995),(337656187205,3157069),(816980552858963,6003),
|
||||
(2271434085804831543,1),(0,1934521023868747186),(6266220038281,15825),
|
||||
(4160,107115946987394),(524,246808621791561),(0,1952519482439636339),
|
||||
(128,2865935904539691),(1044,3211982069426297),(16000511542473,88922),
|
||||
(1253596745404082,2226),(27041,56836278958002),(23201,49247489754471),
|
||||
(175906590497,21252392),(185163584757182295,24),(34742225226802197,150),
|
||||
(2363228,250824838408),(216327527109550,45),(24,81574076994520675),
|
||||
(28559899906542,15356),(10890139774837133,511),(2293,707179303654492),
|
||||
(2749366833,40703233),(0,4498229704622845986),(439,4962056468281937),
|
||||
(662,1453820621089921),(16336770612459631,220),(24282989393,74239137),
|
||||
(2724564648490195,3),(743672760,124992589),(4528103,704330948891),
|
||||
(6050483122491561,250),(13322953,13594265152),(181794,22268101450214),
|
||||
(25957941712,75384092),(43352,7322262295009),(32838,52609059549923),
|
||||
(33003585202001564,2),(103019,68430142267402),(129918230800,8742978),
|
||||
(0,2114347379589080688),(2548,905723041545274),(222745067962838382,0),
|
||||
(1671683850790425181,1),(455,4836932776795684),(794227702827214,6620),
|
||||
(212534135175874,1365),(96432431858,29784975),(466626763743380,3484),
|
||||
(29793949,53041519613),(8359,309952753409844),(3908960585331901,26),
|
||||
(45185288970365760,114),(10131829775,68110174),(58039242399640479,83),
|
||||
(628092278238719399,6),(1,196469106875361889),(302336625,16347502444),
|
||||
(148,3748088684181047),(1,1649096568849015456),(1019866864,2349753026),
|
||||
(8211344830,569363306),(65647579546873,34753),(2340190,1692053129069),
|
||||
(64263301,30758930355),(48681618072372209,110),(7074794736,47640197),
|
||||
(249634721521,7991792),(1162917363807215,232),(7446433349,420634045),
|
||||
(63398619383,60709817),(51359004508011,14200),(131788797028647,7072),
|
||||
(52079887791430043,7),(7,136277667582599838),(28582879735696,50327),
|
||||
(1404582800566278,833),(469164435,15017166943),(99567079957578263,49),
|
||||
(1015285971,3625801566),(321504843,4104079293),(5196954,464515406632),
|
||||
(114246832260876,7468),(8149664437,487119673),(12265299,378168974869),
|
||||
(37711995764,30766513),(3971137243,710996152),(483120070302,603162),
|
||||
(103009942,61645547145),(8476344625340,6987),(547948761229739,1446),
|
||||
(42234,18624767306301),(13486714173011,58948),(4,198309153268019840),
|
||||
(9913176974,325539248),(28246225540203,116822),(2882463945582154,18),
|
||||
(959,25504987505398),(3,1504372236378217710),(13505229956793,374987),
|
||||
(751661959,457611342),(27375926,36219151769),(482168869,5301952074),
|
||||
(1,1577425863241520640),(714116235611821,1164),(904492524250310488,0),
|
||||
(5983514941763398,68),(10759472423,23540686),(72539568471529,34919),
|
||||
(4,176090672310337473),(938702842110356453,1),(673652445,3335287382),
|
||||
(3111998893666122,917),(1568013,3168419765469)]
|
||||
|
||||
testVector32 :: U.Vector (Int32,Int32)
|
||||
testVector32 = U.fromList [
|
||||
(39242,410),(0,100077553),(2206,9538),(509400240,1),(38048,6368),
|
||||
(1789,651480),(2399,157032),(701,170017),(5241456,14),(11212,70449),
|
||||
(1,227804876),(749687254,1),(74559,2954),(1158,147957),(410604456,1),
|
||||
(170851,1561),(92643422,1),(6192,180509),(7,24202210),(3440,241481),
|
||||
(5753677,5),(294327,1622),(252,4454673),(127684121,11),(28315800,30),
|
||||
(340370905,0),(1,667887987),(592782090,1),(49023,27641),(750,290387),
|
||||
(72886,3847),(0,301047933),(3050276,473),(1,788366142),(59457,15813),
|
||||
(637726933,1),(1135,344317),(853616,264),(696816,493),(7038,12046),
|
||||
(125219574,4),(803694088,1),(107081726,1),(39294,21699),(16361,38191),
|
||||
(132561123,12),(1760,23499),(847543,484),(175687349,1),(2963,252678),
|
||||
(6248,224553),(27596,4606),(5422922,121),(1542,485890),(131,583035),
|
||||
(59096,4925),(3637115,132),(0,947225435),(86854,6794),(2984745,339),
|
||||
(760129569,1),(1,68260595),(380835652,2),(430575,2579),(54514,7211),
|
||||
(15550606,3),(9,27367402),(3007053,207),(7060988,60),(28560,27130),
|
||||
(1355,21087),(10880,53059),(14563646,4),(461886361,1),(2,169260724),
|
||||
(241454126,2),(406797,1),(61631630,16),(44473,5943),(63869104,12),
|
||||
(950300,1528),(2113,62333),(120817,9358),(100261456,1),(426764723,1),
|
||||
(119,12723684),(3,53358711),(4448071,18),(1,230278091),(238,232102),
|
||||
(8,57316440),(42437979,10),(6769,19555),(48590,22006),(11500585,79),
|
||||
(2808,97638),(42,26952545),(11,32104194),(23954638,1),(785427272,0),
|
||||
(513,81379),(31333960,37),(897772,1009),(4,25679692),(103027993,12),
|
||||
(104972702,11),(546,443401),(7,65137092),(88574269,3),(872139069,0),
|
||||
(2,97417121),(378802603,0),(141071401,4),(22613,10575),(2191743,118),
|
||||
(470,116119),(7062,38166),(231056,1847),(43901963,9),(2400,70640),
|
||||
(63553,1555),(34,11249573),(815174,1820),(997894011,0),(98881794,2),
|
||||
(5448,43132),(27956,9),(904926,1357),(112608626,3),(124,613021),
|
||||
(282086,1966),(99,10656881),(113799,1501),(433318,2085),(442,948171),
|
||||
(165380,1043),(28,14372905),(14880,50462),(2386,219918),(229,1797565),
|
||||
(1174961,298),(3925,41833),(3903515,299),(15690452,111),(360860521,3),
|
||||
(7440846,81),(2541026,507),(0,492448477),(6869,82469),(245,8322939),
|
||||
(3503496,253),(123495298,0),(150963,2299),(33,4408482),(1,200911107),
|
||||
(305,252121),(13,123369189),(215846,8181),(2440,65387),(776764401,1),
|
||||
(1241172,434),(8,15493155),(81953961,6),(17884993,5),(26,6893822),
|
||||
(0,502035190),(1,582451018),(2,514870139),(227,3625619),(49,12720258),
|
||||
(1456769,207),(94797661,10),(234407,893),(26843,5783),(15688,24547),
|
||||
(4091,86268),(4339448,151),(21360,6294),(397046497,2),(1227,205936),
|
||||
(9966,21959),(160046791,1),(0,159992224),(27,24974797),(19177,29334),
|
||||
(4136148,42),(21179785,53),(61256583,31),(385,344176),(7,11934915),
|
||||
(1,18992566),(3488065,5),(768021,224),(36288474,7),(8624,117561),
|
||||
(8,20341439),(5903,261475),(561,1007618),(1738,392327),(633049,1708)]
|
@ -1,42 +0,0 @@
|
||||
-- | Create a large HTML table and dump it to a handle
|
||||
--
|
||||
-- Tested in this benchmark:
|
||||
--
|
||||
-- * Creating a large HTML document using a builder
|
||||
--
|
||||
-- * Writing to a handle
|
||||
--
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
module Benchmarks.Programs.BigTable
|
||||
( benchmark
|
||||
) where
|
||||
|
||||
import Criterion (Benchmark, bench, whnfIO)
|
||||
import Data.Monoid (mappend, mconcat)
|
||||
import Data.Text.Lazy.Builder (Builder, fromText, toLazyText)
|
||||
import Data.Text.Lazy.IO (hPutStr)
|
||||
import System.IO (Handle)
|
||||
import qualified Data.Text as T
|
||||
|
||||
benchmark :: Handle -> Benchmark
|
||||
benchmark sink = bench "BigTable" $ whnfIO $ do
|
||||
hPutStr sink "Content-Type: text/html\n\n<table>"
|
||||
hPutStr sink . toLazyText . makeTable =<< rows
|
||||
hPutStr sink "</table>"
|
||||
where
|
||||
-- We provide the number of rows in IO so the builder value isn't shared
|
||||
-- between the benchmark samples.
|
||||
rows :: IO Int
|
||||
rows = return 20000
|
||||
{-# NOINLINE rows #-}
|
||||
|
||||
makeTable :: Int -> Builder
|
||||
makeTable n = mconcat $ replicate n $ mconcat $ map makeCol [1 .. 50]
|
||||
|
||||
makeCol :: Int -> Builder
|
||||
makeCol 1 = fromText "<tr><td>1</td>"
|
||||
makeCol 50 = fromText "<td>50</td></tr>"
|
||||
makeCol i = fromText "<td>" `mappend` (fromInt i `mappend` fromText "</td>")
|
||||
|
||||
fromInt :: Int -> Builder
|
||||
fromInt = fromText . T.pack . show
|
@ -1,98 +0,0 @@
|
||||
-- | Cut into a file, selecting certain columns (e.g. columns 10 to 40)
|
||||
--
|
||||
-- Tested in this benchmark:
|
||||
--
|
||||
-- * Reading the file
|
||||
--
|
||||
-- * Splitting into lines
|
||||
--
|
||||
-- * Taking a number of characters from the lines
|
||||
--
|
||||
-- * Joining the lines
|
||||
--
|
||||
-- * Writing back to a handle
|
||||
--
|
||||
module Benchmarks.Programs.Cut
|
||||
( benchmark
|
||||
) where
|
||||
|
||||
import Criterion (Benchmark, bgroup, bench, whnfIO)
|
||||
import System.IO (Handle, hPutStr)
|
||||
import qualified Data.ByteString as B
|
||||
import qualified Data.ByteString.Char8 as BC
|
||||
import qualified Data.ByteString.Lazy as BL
|
||||
import qualified Data.ByteString.Lazy.Char8 as BLC
|
||||
import qualified Data.Text as T
|
||||
import qualified Data.Text.Encoding as T
|
||||
import qualified Data.Text.IO as T
|
||||
import qualified Data.Text.Lazy as TL
|
||||
import qualified Data.Text.Lazy.Encoding as TL
|
||||
import qualified Data.Text.Lazy.IO as TL
|
||||
|
||||
benchmark :: FilePath -> Handle -> Int -> Int -> Benchmark
|
||||
benchmark p sink from to = bgroup "Cut"
|
||||
[ bench' "String" string
|
||||
, bench' "ByteString" byteString
|
||||
, bench' "LazyByteString" lazyByteString
|
||||
, bench' "Text" text
|
||||
, bench' "LazyText" lazyText
|
||||
, bench' "TextByteString" textByteString
|
||||
, bench' "LazyTextByteString" lazyTextByteString
|
||||
]
|
||||
where
|
||||
bench' n s = bench n $ whnfIO (s p sink from to)
|
||||
|
||||
string :: FilePath -> Handle -> Int -> Int -> IO ()
|
||||
string fp sink from to = do
|
||||
s <- readFile fp
|
||||
hPutStr sink $ cut s
|
||||
where
|
||||
cut = unlines . map (take (to - from) . drop from) . lines
|
||||
|
||||
byteString :: FilePath -> Handle -> Int -> Int -> IO ()
|
||||
byteString fp sink from to = do
|
||||
bs <- B.readFile fp
|
||||
B.hPutStr sink $ cut bs
|
||||
where
|
||||
cut = BC.unlines . map (B.take (to - from) . B.drop from) . BC.lines
|
||||
|
||||
lazyByteString :: FilePath -> Handle -> Int -> Int -> IO ()
|
||||
lazyByteString fp sink from to = do
|
||||
bs <- BL.readFile fp
|
||||
BL.hPutStr sink $ cut bs
|
||||
where
|
||||
cut = BLC.unlines . map (BL.take (to' - from') . BL.drop from') . BLC.lines
|
||||
from' = fromIntegral from
|
||||
to' = fromIntegral to
|
||||
|
||||
text :: FilePath -> Handle -> Int -> Int -> IO ()
|
||||
text fp sink from to = do
|
||||
t <- T.readFile fp
|
||||
T.hPutStr sink $ cut t
|
||||
where
|
||||
cut = T.unlines . map (T.take (to - from) . T.drop from) . T.lines
|
||||
|
||||
lazyText :: FilePath -> Handle -> Int -> Int -> IO ()
|
||||
lazyText fp sink from to = do
|
||||
t <- TL.readFile fp
|
||||
TL.hPutStr sink $ cut t
|
||||
where
|
||||
cut = TL.unlines . map (TL.take (to' - from') . TL.drop from') . TL.lines
|
||||
from' = fromIntegral from
|
||||
to' = fromIntegral to
|
||||
|
||||
textByteString :: FilePath -> Handle -> Int -> Int -> IO ()
|
||||
textByteString fp sink from to = do
|
||||
t <- T.decodeUtf8 `fmap` B.readFile fp
|
||||
B.hPutStr sink $ T.encodeUtf8 $ cut t
|
||||
where
|
||||
cut = T.unlines . map (T.take (to - from) . T.drop from) . T.lines
|
||||
|
||||
lazyTextByteString :: FilePath -> Handle -> Int -> Int -> IO ()
|
||||
lazyTextByteString fp sink from to = do
|
||||
t <- TL.decodeUtf8 `fmap` BL.readFile fp
|
||||
BL.hPutStr sink $ TL.encodeUtf8 $ cut t
|
||||
where
|
||||
cut = TL.unlines . map (TL.take (to' - from') . TL.drop from') . TL.lines
|
||||
from' = fromIntegral from
|
||||
to' = fromIntegral to
|
@ -1,68 +0,0 @@
|
||||
-- | Benchmark which formats paragraph, like the @sort@ unix utility.
|
||||
--
|
||||
-- Tested in this benchmark:
|
||||
--
|
||||
-- * Reading the file
|
||||
--
|
||||
-- * Splitting into paragraphs
|
||||
--
|
||||
-- * Reformatting the paragraphs to a certain line width
|
||||
--
|
||||
-- * Concatenating the results using the text builder
|
||||
--
|
||||
-- * Writing back to a handle
|
||||
--
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
module Benchmarks.Programs.Fold
|
||||
( benchmark
|
||||
) where
|
||||
|
||||
import Data.List (foldl')
|
||||
import Data.List (intersperse)
|
||||
import Data.Monoid (mempty, mappend, mconcat)
|
||||
import System.IO (Handle)
|
||||
import Criterion (Benchmark, bench, whnfIO)
|
||||
import qualified Data.Text as T
|
||||
import qualified Data.Text.IO as T
|
||||
import qualified Data.Text.Lazy.Builder as TLB
|
||||
import qualified Data.Text.Lazy as TL
|
||||
import qualified Data.Text.Lazy.IO as TL
|
||||
|
||||
benchmark :: FilePath -> Handle -> Benchmark
|
||||
benchmark i o =
|
||||
bench "Fold" $ whnfIO $ T.readFile i >>= TL.hPutStr o . fold 80
|
||||
|
||||
-- | We represent a paragraph by a word list
|
||||
--
|
||||
type Paragraph = [T.Text]
|
||||
|
||||
-- | Fold a text
|
||||
--
|
||||
fold :: Int -> T.Text -> TL.Text
|
||||
fold maxWidth = TLB.toLazyText . mconcat .
|
||||
intersperse "\n\n" . map (foldParagraph maxWidth) . paragraphs
|
||||
|
||||
-- | Fold a paragraph
|
||||
--
|
||||
foldParagraph :: Int -> Paragraph -> TLB.Builder
|
||||
foldParagraph _ [] = mempty
|
||||
foldParagraph max' (w : ws) = fst $ foldl' go (TLB.fromText w, T.length w) ws
|
||||
where
|
||||
go (builder, width) word
|
||||
| width + len + 1 <= max' =
|
||||
(builder `mappend` " " `mappend` word', width + len + 1)
|
||||
| otherwise =
|
||||
(builder `mappend` "\n" `mappend` word', len)
|
||||
where
|
||||
word' = TLB.fromText word
|
||||
len = T.length word
|
||||
|
||||
-- | Divide a text into paragraphs
|
||||
--
|
||||
paragraphs :: T.Text -> [Paragraph]
|
||||
paragraphs = splitParagraphs . map T.words . T.lines
|
||||
where
|
||||
splitParagraphs ls = case break null ls of
|
||||
([], []) -> []
|
||||
(p, []) -> [concat p]
|
||||
(p, lr) -> concat p : splitParagraphs (dropWhile null lr)
|
@ -1,71 +0,0 @@
|
||||
-- | This benchmark sorts the lines of a file, like the @sort@ unix utility.
|
||||
--
|
||||
-- Tested in this benchmark:
|
||||
--
|
||||
-- * Reading the file
|
||||
--
|
||||
-- * Splitting into lines
|
||||
--
|
||||
-- * Sorting the lines
|
||||
--
|
||||
-- * Joining the lines
|
||||
--
|
||||
-- * Writing back to a handle
|
||||
--
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
module Benchmarks.Programs.Sort
|
||||
( benchmark
|
||||
) where
|
||||
|
||||
import Criterion (Benchmark, bgroup, bench, whnfIO)
|
||||
import Data.Monoid (mconcat)
|
||||
import System.IO (Handle, hPutStr)
|
||||
import qualified Data.ByteString as B
|
||||
import qualified Data.ByteString.Char8 as BC
|
||||
import qualified Data.ByteString.Lazy as BL
|
||||
import qualified Data.ByteString.Lazy.Char8 as BLC
|
||||
import qualified Data.List as L
|
||||
import qualified Data.Text as T
|
||||
import qualified Data.Text.Encoding as T
|
||||
import qualified Data.Text.IO as T
|
||||
import qualified Data.Text.Lazy as TL
|
||||
import qualified Data.Text.Lazy.Builder as TLB
|
||||
import qualified Data.Text.Lazy.Encoding as TL
|
||||
import qualified Data.Text.Lazy.IO as TL
|
||||
|
||||
benchmark :: FilePath -> Handle -> Benchmark
|
||||
benchmark i o = bgroup "Sort"
|
||||
[ bench "String" $ whnfIO $ readFile i >>= hPutStr o . string
|
||||
, bench "ByteString" $ whnfIO $ B.readFile i >>= B.hPutStr o . byteString
|
||||
, bench "LazyByteString" $ whnfIO $
|
||||
BL.readFile i >>= BL.hPutStr o . lazyByteString
|
||||
, bench "Text" $ whnfIO $ T.readFile i >>= T.hPutStr o . text
|
||||
, bench "LazyText" $ whnfIO $ TL.readFile i >>= TL.hPutStr o . lazyText
|
||||
, bench "TextByteString" $ whnfIO $ B.readFile i >>=
|
||||
B.hPutStr o . T.encodeUtf8 . text . T.decodeUtf8
|
||||
, bench "LazyTextByteString" $ whnfIO $ BL.readFile i >>=
|
||||
BL.hPutStr o . TL.encodeUtf8 . lazyText . TL.decodeUtf8
|
||||
, bench "TextBuilder" $ whnfIO $ B.readFile i >>=
|
||||
BL.hPutStr o . TL.encodeUtf8 . textBuilder . T.decodeUtf8
|
||||
]
|
||||
|
||||
string :: String -> String
|
||||
string = unlines . L.sort . lines
|
||||
|
||||
byteString :: B.ByteString -> B.ByteString
|
||||
byteString = BC.unlines . L.sort . BC.lines
|
||||
|
||||
lazyByteString :: BL.ByteString -> BL.ByteString
|
||||
lazyByteString = BLC.unlines . L.sort . BLC.lines
|
||||
|
||||
text :: T.Text -> T.Text
|
||||
text = T.unlines . L.sort . T.lines
|
||||
|
||||
lazyText :: TL.Text -> TL.Text
|
||||
lazyText = TL.unlines . L.sort . TL.lines
|
||||
|
||||
-- | Text variant using a builder monoid for the final concatenation
|
||||
--
|
||||
textBuilder :: T.Text -> TL.Text
|
||||
textBuilder = TLB.toLazyText . mconcat . L.intersperse (TLB.singleton '\n') .
|
||||
map TLB.fromText . L.sort . T.lines
|
@ -1,53 +0,0 @@
|
||||
-- | Program to replace HTML tags by whitespace
|
||||
--
|
||||
-- This program was originally contributed by Petr Prokhorenkov.
|
||||
--
|
||||
-- Tested in this benchmark:
|
||||
--
|
||||
-- * Reading the file
|
||||
--
|
||||
-- * Replacing text between HTML tags (<>) with whitespace
|
||||
--
|
||||
-- * Writing back to a handle
|
||||
--
|
||||
{-# OPTIONS_GHC -fspec-constr-count=5 #-}
|
||||
module Benchmarks.Programs.StripTags
|
||||
( benchmark
|
||||
) where
|
||||
|
||||
import Criterion (Benchmark, bgroup, bench, whnfIO)
|
||||
import Data.List (mapAccumL)
|
||||
import System.IO (Handle, hPutStr)
|
||||
import qualified Data.ByteString as B
|
||||
import qualified Data.ByteString.Char8 as BC
|
||||
import qualified Data.Text as T
|
||||
import qualified Data.Text.Encoding as T
|
||||
import qualified Data.Text.IO as T
|
||||
|
||||
benchmark :: FilePath -> Handle -> Benchmark
|
||||
benchmark i o = bgroup "StripTags"
|
||||
[ bench "String" $ whnfIO $ readFile i >>= hPutStr o . string
|
||||
, bench "ByteString" $ whnfIO $ B.readFile i >>= B.hPutStr o . byteString
|
||||
, bench "Text" $ whnfIO $ T.readFile i >>= T.hPutStr o . text
|
||||
, bench "TextByteString" $ whnfIO $
|
||||
B.readFile i >>= B.hPutStr o . T.encodeUtf8 . text . T.decodeUtf8
|
||||
]
|
||||
|
||||
string :: String -> String
|
||||
string = snd . mapAccumL step 0
|
||||
|
||||
text :: T.Text -> T.Text
|
||||
text = snd . T.mapAccumL step 0
|
||||
|
||||
byteString :: B.ByteString -> B.ByteString
|
||||
byteString = snd . BC.mapAccumL step 0
|
||||
|
||||
step :: Int -> Char -> (Int, Char)
|
||||
step d c
|
||||
| d > 0 || d' > 0 = (d', ' ')
|
||||
| otherwise = (d', c)
|
||||
where
|
||||
d' = d + depth c
|
||||
depth '>' = 1
|
||||
depth '<' = -1
|
||||
depth _ = 0
|
@ -1,41 +0,0 @@
|
||||
-- | This benchmark simply reads and writes a file using the various string
|
||||
-- libraries. The point of it is that we can make better estimations on how
|
||||
-- much time the other benchmarks spend doing IO.
|
||||
--
|
||||
-- Note that we expect ByteStrings to be a whole lot faster, since they do not
|
||||
-- do any actual encoding/decoding here, while String and Text do have UTF-8
|
||||
-- encoding/decoding.
|
||||
--
|
||||
-- Tested in this benchmark:
|
||||
--
|
||||
-- * Reading the file
|
||||
--
|
||||
-- * Replacing text between HTML tags (<>) with whitespace
|
||||
--
|
||||
-- * Writing back to a handle
|
||||
--
|
||||
module Benchmarks.Programs.Throughput
|
||||
( benchmark
|
||||
) where
|
||||
|
||||
import Criterion (Benchmark, bgroup, bench, whnfIO)
|
||||
import System.IO (Handle, hPutStr)
|
||||
import qualified Data.ByteString as B
|
||||
import qualified Data.ByteString.Lazy as BL
|
||||
import qualified Data.Text.Encoding as T
|
||||
import qualified Data.Text.IO as T
|
||||
import qualified Data.Text.Lazy.Encoding as TL
|
||||
import qualified Data.Text.Lazy.IO as TL
|
||||
|
||||
benchmark :: FilePath -> Handle -> Benchmark
|
||||
benchmark fp sink = bgroup "Throughput"
|
||||
[ bench "String" $ whnfIO $ readFile fp >>= hPutStr sink
|
||||
, bench "ByteString" $ whnfIO $ B.readFile fp >>= B.hPutStr sink
|
||||
, bench "LazyByteString" $ whnfIO $ BL.readFile fp >>= BL.hPutStr sink
|
||||
, bench "Text" $ whnfIO $ T.readFile fp >>= T.hPutStr sink
|
||||
, bench "LazyText" $ whnfIO $ TL.readFile fp >>= TL.hPutStr sink
|
||||
, bench "TextByteString" $ whnfIO $
|
||||
B.readFile fp >>= B.hPutStr sink . T.encodeUtf8 . T.decodeUtf8
|
||||
, bench "LazyTextByteString" $ whnfIO $
|
||||
BL.readFile fp >>= BL.hPutStr sink . TL.encodeUtf8 . TL.decodeUtf8
|
||||
]
|
@ -1,518 +0,0 @@
|
||||
-- | Benchmarks various pure functions from the Text library
|
||||
--
|
||||
-- Tested in this benchmark:
|
||||
--
|
||||
-- * Most pure functions defined the string types
|
||||
--
|
||||
{-# LANGUAGE BangPatterns, CPP, GADTs, MagicHash #-}
|
||||
{-# LANGUAGE DeriveAnyClass, DeriveGeneric, RecordWildCards #-}
|
||||
{-# OPTIONS_GHC -fno-warn-orphans #-}
|
||||
module Benchmarks.Pure
|
||||
( initEnv
|
||||
, benchmark
|
||||
) where
|
||||
|
||||
import Control.DeepSeq (NFData (..))
|
||||
import Control.Exception (evaluate)
|
||||
import Criterion (Benchmark, bgroup, bench, nf)
|
||||
import Data.Char (toLower, toUpper)
|
||||
import Data.Monoid (mappend, mempty)
|
||||
import GHC.Base (Char (..), Int (..), chr#, ord#, (+#))
|
||||
import GHC.Generics (Generic)
|
||||
import GHC.Int (Int64)
|
||||
import qualified Data.ByteString.Char8 as BS
|
||||
import qualified Data.ByteString.Lazy.Char8 as BL
|
||||
import qualified Data.ByteString.UTF8 as UTF8
|
||||
import qualified Data.List as L
|
||||
import qualified Data.Text as T
|
||||
import qualified Data.Text.Encoding as T
|
||||
import qualified Data.Text.Lazy as TL
|
||||
import qualified Data.Text.Lazy.Builder as TB
|
||||
import qualified Data.Text.Lazy.Encoding as TL
|
||||
|
||||
data Env = Env
|
||||
{ bsa :: !BS.ByteString
|
||||
, ta :: !T.Text
|
||||
, tb :: !T.Text
|
||||
, tla :: !TL.Text
|
||||
, tlb :: !TL.Text
|
||||
, bsb :: !BS.ByteString
|
||||
, bla :: !BL.ByteString
|
||||
, blb :: !BL.ByteString
|
||||
, sa :: !String
|
||||
, sb :: !String
|
||||
, bsa_len :: !Int
|
||||
, ta_len :: !Int
|
||||
, bla_len :: !Int64
|
||||
, tla_len :: !Int64
|
||||
, sa_len :: !Int
|
||||
, bsl :: [BS.ByteString]
|
||||
, bll :: [BL.ByteString]
|
||||
, tl :: [T.Text]
|
||||
, tll :: [TL.Text]
|
||||
, sl :: [String]
|
||||
} deriving (Generic, NFData)
|
||||
|
||||
|
||||
initEnv :: FilePath -> IO Env
|
||||
initEnv fp = do
|
||||
-- Evaluate stuff before actually running the benchmark, we don't want to
|
||||
-- count it here.
|
||||
|
||||
-- ByteString A
|
||||
bsa <- BS.readFile fp
|
||||
|
||||
-- Text A/B, LazyText A/B
|
||||
ta <- evaluate $ T.decodeUtf8 bsa
|
||||
tb <- evaluate $ T.toUpper ta
|
||||
tla <- evaluate $ TL.fromChunks (T.chunksOf 16376 ta)
|
||||
tlb <- evaluate $ TL.fromChunks (T.chunksOf 16376 tb)
|
||||
|
||||
-- ByteString B, LazyByteString A/B
|
||||
bsb <- evaluate $ T.encodeUtf8 tb
|
||||
bla <- evaluate $ BL.fromChunks (chunksOf 16376 bsa)
|
||||
blb <- evaluate $ BL.fromChunks (chunksOf 16376 bsb)
|
||||
|
||||
-- String A/B
|
||||
sa <- evaluate $ UTF8.toString bsa
|
||||
sb <- evaluate $ T.unpack tb
|
||||
|
||||
-- Lengths
|
||||
bsa_len <- evaluate $ BS.length bsa
|
||||
ta_len <- evaluate $ T.length ta
|
||||
bla_len <- evaluate $ BL.length bla
|
||||
tla_len <- evaluate $ TL.length tla
|
||||
sa_len <- evaluate $ L.length sa
|
||||
|
||||
-- Lines
|
||||
bsl <- evaluate $ BS.lines bsa
|
||||
bll <- evaluate $ BL.lines bla
|
||||
tl <- evaluate $ T.lines ta
|
||||
tll <- evaluate $ TL.lines tla
|
||||
sl <- evaluate $ L.lines sa
|
||||
|
||||
return Env{..}
|
||||
|
||||
benchmark :: String -> Env -> Benchmark
|
||||
benchmark kind ~Env{..} =
|
||||
bgroup "Pure"
|
||||
[ bgroup "append"
|
||||
[ benchT $ nf (T.append tb) ta
|
||||
, benchTL $ nf (TL.append tlb) tla
|
||||
, benchBS $ nf (BS.append bsb) bsa
|
||||
, benchBSL $ nf (BL.append blb) bla
|
||||
, benchS $ nf ((++) sb) sa
|
||||
]
|
||||
, bgroup "concat"
|
||||
[ benchT $ nf T.concat tl
|
||||
, benchTL $ nf TL.concat tll
|
||||
, benchBS $ nf BS.concat bsl
|
||||
, benchBSL $ nf BL.concat bll
|
||||
, benchS $ nf L.concat sl
|
||||
]
|
||||
, bgroup "cons"
|
||||
[ benchT $ nf (T.cons c) ta
|
||||
, benchTL $ nf (TL.cons c) tla
|
||||
, benchBS $ nf (BS.cons c) bsa
|
||||
, benchBSL $ nf (BL.cons c) bla
|
||||
, benchS $ nf (c:) sa
|
||||
]
|
||||
, bgroup "concatMap"
|
||||
[ benchT $ nf (T.concatMap (T.replicate 3 . T.singleton)) ta
|
||||
, benchTL $ nf (TL.concatMap (TL.replicate 3 . TL.singleton)) tla
|
||||
, benchBS $ nf (BS.concatMap (BS.replicate 3)) bsa
|
||||
, benchBSL $ nf (BL.concatMap (BL.replicate 3)) bla
|
||||
, benchS $ nf (L.concatMap (L.replicate 3 . (:[]))) sa
|
||||
]
|
||||
, bgroup "decode"
|
||||
[ benchT $ nf T.decodeUtf8 bsa
|
||||
, benchTL $ nf TL.decodeUtf8 bla
|
||||
, benchBS $ nf BS.unpack bsa
|
||||
, benchBSL $ nf BL.unpack bla
|
||||
, benchS $ nf UTF8.toString bsa
|
||||
]
|
||||
, bgroup "decode'"
|
||||
[ benchT $ nf T.decodeUtf8' bsa
|
||||
, benchTL $ nf TL.decodeUtf8' bla
|
||||
]
|
||||
, bgroup "drop"
|
||||
[ benchT $ nf (T.drop (ta_len `div` 3)) ta
|
||||
, benchTL $ nf (TL.drop (tla_len `div` 3)) tla
|
||||
, benchBS $ nf (BS.drop (bsa_len `div` 3)) bsa
|
||||
, benchBSL $ nf (BL.drop (bla_len `div` 3)) bla
|
||||
, benchS $ nf (L.drop (sa_len `div` 3)) sa
|
||||
]
|
||||
, bgroup "encode"
|
||||
[ benchT $ nf T.encodeUtf8 ta
|
||||
, benchTL $ nf TL.encodeUtf8 tla
|
||||
, benchBS $ nf BS.pack sa
|
||||
, benchBSL $ nf BL.pack sa
|
||||
, benchS $ nf UTF8.fromString sa
|
||||
]
|
||||
, bgroup "filter"
|
||||
[ benchT $ nf (T.filter p0) ta
|
||||
, benchTL $ nf (TL.filter p0) tla
|
||||
, benchBS $ nf (BS.filter p0) bsa
|
||||
, benchBSL $ nf (BL.filter p0) bla
|
||||
, benchS $ nf (L.filter p0) sa
|
||||
]
|
||||
, bgroup "filter.filter"
|
||||
[ benchT $ nf (T.filter p1 . T.filter p0) ta
|
||||
, benchTL $ nf (TL.filter p1 . TL.filter p0) tla
|
||||
, benchBS $ nf (BS.filter p1 . BS.filter p0) bsa
|
||||
, benchBSL $ nf (BL.filter p1 . BL.filter p0) bla
|
||||
, benchS $ nf (L.filter p1 . L.filter p0) sa
|
||||
]
|
||||
, bgroup "foldl'"
|
||||
[ benchT $ nf (T.foldl' len 0) ta
|
||||
, benchTL $ nf (TL.foldl' len 0) tla
|
||||
, benchBS $ nf (BS.foldl' len 0) bsa
|
||||
, benchBSL $ nf (BL.foldl' len 0) bla
|
||||
, benchS $ nf (L.foldl' len 0) sa
|
||||
]
|
||||
, bgroup "foldr"
|
||||
[ benchT $ nf (L.length . T.foldr (:) []) ta
|
||||
, benchTL $ nf (L.length . TL.foldr (:) []) tla
|
||||
, benchBS $ nf (L.length . BS.foldr (:) []) bsa
|
||||
, benchBSL $ nf (L.length . BL.foldr (:) []) bla
|
||||
, benchS $ nf (L.length . L.foldr (:) []) sa
|
||||
]
|
||||
, bgroup "head"
|
||||
[ benchT $ nf T.head ta
|
||||
, benchTL $ nf TL.head tla
|
||||
, benchBS $ nf BS.head bsa
|
||||
, benchBSL $ nf BL.head bla
|
||||
, benchS $ nf L.head sa
|
||||
]
|
||||
, bgroup "init"
|
||||
[ benchT $ nf T.init ta
|
||||
, benchTL $ nf TL.init tla
|
||||
, benchBS $ nf BS.init bsa
|
||||
, benchBSL $ nf BL.init bla
|
||||
, benchS $ nf L.init sa
|
||||
]
|
||||
, bgroup "intercalate"
|
||||
[ benchT $ nf (T.intercalate tsw) tl
|
||||
, benchTL $ nf (TL.intercalate tlw) tll
|
||||
, benchBS $ nf (BS.intercalate bsw) bsl
|
||||
, benchBSL $ nf (BL.intercalate blw) bll
|
||||
, benchS $ nf (L.intercalate lw) sl
|
||||
]
|
||||
, bgroup "intersperse"
|
||||
[ benchT $ nf (T.intersperse c) ta
|
||||
, benchTL $ nf (TL.intersperse c) tla
|
||||
, benchBS $ nf (BS.intersperse c) bsa
|
||||
, benchBSL $ nf (BL.intersperse c) bla
|
||||
, benchS $ nf (L.intersperse c) sa
|
||||
]
|
||||
, bgroup "isInfixOf"
|
||||
[ benchT $ nf (T.isInfixOf tsw) ta
|
||||
, benchTL $ nf (TL.isInfixOf tlw) tla
|
||||
, benchBS $ nf (BS.isInfixOf bsw) bsa
|
||||
-- no isInfixOf for lazy bytestrings
|
||||
, benchS $ nf (L.isInfixOf lw) sa
|
||||
]
|
||||
, bgroup "last"
|
||||
[ benchT $ nf T.last ta
|
||||
, benchTL $ nf TL.last tla
|
||||
, benchBS $ nf BS.last bsa
|
||||
, benchBSL $ nf BL.last bla
|
||||
, benchS $ nf L.last sa
|
||||
]
|
||||
, bgroup "map"
|
||||
[ benchT $ nf (T.map f) ta
|
||||
, benchTL $ nf (TL.map f) tla
|
||||
, benchBS $ nf (BS.map f) bsa
|
||||
, benchBSL $ nf (BL.map f) bla
|
||||
, benchS $ nf (L.map f) sa
|
||||
]
|
||||
, bgroup "mapAccumL"
|
||||
[ benchT $ nf (T.mapAccumL g 0) ta
|
||||
, benchTL $ nf (TL.mapAccumL g 0) tla
|
||||
, benchBS $ nf (BS.mapAccumL g 0) bsa
|
||||
, benchBSL $ nf (BL.mapAccumL g 0) bla
|
||||
, benchS $ nf (L.mapAccumL g 0) sa
|
||||
]
|
||||
, bgroup "mapAccumR"
|
||||
[ benchT $ nf (T.mapAccumR g 0) ta
|
||||
, benchTL $ nf (TL.mapAccumR g 0) tla
|
||||
, benchBS $ nf (BS.mapAccumR g 0) bsa
|
||||
, benchBSL $ nf (BL.mapAccumR g 0) bla
|
||||
, benchS $ nf (L.mapAccumR g 0) sa
|
||||
]
|
||||
, bgroup "map.map"
|
||||
[ benchT $ nf (T.map f . T.map f) ta
|
||||
, benchTL $ nf (TL.map f . TL.map f) tla
|
||||
, benchBS $ nf (BS.map f . BS.map f) bsa
|
||||
, benchBSL $ nf (BL.map f . BL.map f) bla
|
||||
, benchS $ nf (L.map f . L.map f) sa
|
||||
]
|
||||
, bgroup "replicate char"
|
||||
[ benchT $ nf (T.replicate bsa_len) (T.singleton c)
|
||||
, benchTL $ nf (TL.replicate (fromIntegral bsa_len)) (TL.singleton c)
|
||||
, benchBS $ nf (BS.replicate bsa_len) c
|
||||
, benchBSL $ nf (BL.replicate (fromIntegral bsa_len)) c
|
||||
, benchS $ nf (L.replicate bsa_len) c
|
||||
]
|
||||
, bgroup "replicate string"
|
||||
[ benchT $ nf (T.replicate (bsa_len `div` T.length tsw)) tsw
|
||||
, benchTL $ nf (TL.replicate (fromIntegral bsa_len `div` TL.length tlw)) tlw
|
||||
, benchS $ nf (replicat (bsa_len `div` T.length tsw)) lw
|
||||
]
|
||||
, bgroup "reverse"
|
||||
[ benchT $ nf T.reverse ta
|
||||
, benchTL $ nf TL.reverse tla
|
||||
, benchBS $ nf BS.reverse bsa
|
||||
, benchBSL $ nf BL.reverse bla
|
||||
, benchS $ nf L.reverse sa
|
||||
]
|
||||
, bgroup "take"
|
||||
[ benchT $ nf (T.take (ta_len `div` 3)) ta
|
||||
, benchTL $ nf (TL.take (tla_len `div` 3)) tla
|
||||
, benchBS $ nf (BS.take (bsa_len `div` 3)) bsa
|
||||
, benchBSL $ nf (BL.take (bla_len `div` 3)) bla
|
||||
, benchS $ nf (L.take (sa_len `div` 3)) sa
|
||||
]
|
||||
, bgroup "tail"
|
||||
[ benchT $ nf T.tail ta
|
||||
, benchTL $ nf TL.tail tla
|
||||
, benchBS $ nf BS.tail bsa
|
||||
, benchBSL $ nf BL.tail bla
|
||||
, benchS $ nf L.tail sa
|
||||
]
|
||||
, bgroup "toLower"
|
||||
[ benchT $ nf T.toLower ta
|
||||
, benchTL $ nf TL.toLower tla
|
||||
, benchBS $ nf (BS.map toLower) bsa
|
||||
, benchBSL $ nf (BL.map toLower) bla
|
||||
, benchS $ nf (L.map toLower) sa
|
||||
]
|
||||
, bgroup "toUpper"
|
||||
[ benchT $ nf T.toUpper ta
|
||||
, benchTL $ nf TL.toUpper tla
|
||||
, benchBS $ nf (BS.map toUpper) bsa
|
||||
, benchBSL $ nf (BL.map toUpper) bla
|
||||
, benchS $ nf (L.map toUpper) sa
|
||||
]
|
||||
, bgroup "uncons"
|
||||
[ benchT $ nf T.uncons ta
|
||||
, benchTL $ nf TL.uncons tla
|
||||
, benchBS $ nf BS.uncons bsa
|
||||
, benchBSL $ nf BL.uncons bla
|
||||
, benchS $ nf L.uncons sa
|
||||
]
|
||||
, bgroup "words"
|
||||
[ benchT $ nf T.words ta
|
||||
, benchTL $ nf TL.words tla
|
||||
, benchBS $ nf BS.words bsa
|
||||
, benchBSL $ nf BL.words bla
|
||||
, benchS $ nf L.words sa
|
||||
]
|
||||
, bgroup "zipWith"
|
||||
[ benchT $ nf (T.zipWith min tb) ta
|
||||
, benchTL $ nf (TL.zipWith min tlb) tla
|
||||
, benchBS $ nf (BS.zipWith min bsb) bsa
|
||||
, benchBSL $ nf (BL.zipWith min blb) bla
|
||||
, benchS $ nf (L.zipWith min sb) sa
|
||||
]
|
||||
, bgroup "length"
|
||||
[ bgroup "cons"
|
||||
[ benchT $ nf (T.length . T.cons c) ta
|
||||
, benchTL $ nf (TL.length . TL.cons c) tla
|
||||
, benchBS $ nf (BS.length . BS.cons c) bsa
|
||||
, benchBSL $ nf (BL.length . BL.cons c) bla
|
||||
, benchS $ nf (L.length . (:) c) sa
|
||||
]
|
||||
, bgroup "decode"
|
||||
[ benchT $ nf (T.length . T.decodeUtf8) bsa
|
||||
, benchTL $ nf (TL.length . TL.decodeUtf8) bla
|
||||
, benchBS $ nf (L.length . BS.unpack) bsa
|
||||
, benchBSL $ nf (L.length . BL.unpack) bla
|
||||
, bench "StringUTF8" $ nf (L.length . UTF8.toString) bsa
|
||||
]
|
||||
, bgroup "drop"
|
||||
[ benchT $ nf (T.length . T.drop (ta_len `div` 3)) ta
|
||||
, benchTL $ nf (TL.length . TL.drop (tla_len `div` 3)) tla
|
||||
, benchBS $ nf (BS.length . BS.drop (bsa_len `div` 3)) bsa
|
||||
, benchBSL $ nf (BL.length . BL.drop (bla_len `div` 3)) bla
|
||||
, benchS $ nf (L.length . L.drop (sa_len `div` 3)) sa
|
||||
]
|
||||
, bgroup "filter"
|
||||
[ benchT $ nf (T.length . T.filter p0) ta
|
||||
, benchTL $ nf (TL.length . TL.filter p0) tla
|
||||
, benchBS $ nf (BS.length . BS.filter p0) bsa
|
||||
, benchBSL $ nf (BL.length . BL.filter p0) bla
|
||||
, benchS $ nf (L.length . L.filter p0) sa
|
||||
]
|
||||
, bgroup "filter.filter"
|
||||
[ benchT $ nf (T.length . T.filter p1 . T.filter p0) ta
|
||||
, benchTL $ nf (TL.length . TL.filter p1 . TL.filter p0) tla
|
||||
, benchBS $ nf (BS.length . BS.filter p1 . BS.filter p0) bsa
|
||||
, benchBSL $ nf (BL.length . BL.filter p1 . BL.filter p0) bla
|
||||
, benchS $ nf (L.length . L.filter p1 . L.filter p0) sa
|
||||
]
|
||||
, bgroup "init"
|
||||
[ benchT $ nf (T.length . T.init) ta
|
||||
, benchTL $ nf (TL.length . TL.init) tla
|
||||
, benchBS $ nf (BS.length . BS.init) bsa
|
||||
, benchBSL $ nf (BL.length . BL.init) bla
|
||||
, benchS $ nf (L.length . L.init) sa
|
||||
]
|
||||
, bgroup "intercalate"
|
||||
[ benchT $ nf (T.length . T.intercalate tsw) tl
|
||||
, benchTL $ nf (TL.length . TL.intercalate tlw) tll
|
||||
, benchBS $ nf (BS.length . BS.intercalate bsw) bsl
|
||||
, benchBSL $ nf (BL.length . BL.intercalate blw) bll
|
||||
, benchS $ nf (L.length . L.intercalate lw) sl
|
||||
]
|
||||
, bgroup "intersperse"
|
||||
[ benchT $ nf (T.length . T.intersperse c) ta
|
||||
, benchTL $ nf (TL.length . TL.intersperse c) tla
|
||||
, benchBS $ nf (BS.length . BS.intersperse c) bsa
|
||||
, benchBSL $ nf (BL.length . BL.intersperse c) bla
|
||||
, benchS $ nf (L.length . L.intersperse c) sa
|
||||
]
|
||||
, bgroup "map"
|
||||
[ benchT $ nf (T.length . T.map f) ta
|
||||
, benchTL $ nf (TL.length . TL.map f) tla
|
||||
, benchBS $ nf (BS.length . BS.map f) bsa
|
||||
, benchBSL $ nf (BL.length . BL.map f) bla
|
||||
, benchS $ nf (L.length . L.map f) sa
|
||||
]
|
||||
, bgroup "map.map"
|
||||
[ benchT $ nf (T.length . T.map f . T.map f) ta
|
||||
, benchTL $ nf (TL.length . TL.map f . TL.map f) tla
|
||||
, benchBS $ nf (BS.length . BS.map f . BS.map f) bsa
|
||||
, benchS $ nf (L.length . L.map f . L.map f) sa
|
||||
]
|
||||
, bgroup "replicate char"
|
||||
[ benchT $ nf (T.length . T.replicate bsa_len) (T.singleton c)
|
||||
, benchTL $ nf (TL.length . TL.replicate (fromIntegral bsa_len)) (TL.singleton c)
|
||||
, benchBS $ nf (BS.length . BS.replicate bsa_len) c
|
||||
, benchBSL $ nf (BL.length . BL.replicate (fromIntegral bsa_len)) c
|
||||
, benchS $ nf (L.length . L.replicate bsa_len) c
|
||||
]
|
||||
, bgroup "replicate string"
|
||||
[ benchT $ nf (T.length . T.replicate (bsa_len `div` T.length tsw)) tsw
|
||||
, benchTL $ nf (TL.length . TL.replicate (fromIntegral bsa_len `div` TL.length tlw)) tlw
|
||||
, benchS $ nf (L.length . replicat (bsa_len `div` T.length tsw)) lw
|
||||
]
|
||||
, bgroup "take"
|
||||
[ benchT $ nf (T.length . T.take (ta_len `div` 3)) ta
|
||||
, benchTL $ nf (TL.length . TL.take (tla_len `div` 3)) tla
|
||||
, benchBS $ nf (BS.length . BS.take (bsa_len `div` 3)) bsa
|
||||
, benchBSL $ nf (BL.length . BL.take (bla_len `div` 3)) bla
|
||||
, benchS $ nf (L.length . L.take (sa_len `div` 3)) sa
|
||||
]
|
||||
, bgroup "tail"
|
||||
[ benchT $ nf (T.length . T.tail) ta
|
||||
, benchTL $ nf (TL.length . TL.tail) tla
|
||||
, benchBS $ nf (BS.length . BS.tail) bsa
|
||||
, benchBSL $ nf (BL.length . BL.tail) bla
|
||||
, benchS $ nf (L.length . L.tail) sa
|
||||
]
|
||||
, bgroup "toLower"
|
||||
[ benchT $ nf (T.length . T.toLower) ta
|
||||
, benchTL $ nf (TL.length . TL.toLower) tla
|
||||
, benchBS $ nf (BS.length . BS.map toLower) bsa
|
||||
, benchBSL $ nf (BL.length . BL.map toLower) bla
|
||||
, benchS $ nf (L.length . L.map toLower) sa
|
||||
]
|
||||
, bgroup "toUpper"
|
||||
[ benchT $ nf (T.length . T.toUpper) ta
|
||||
, benchTL $ nf (TL.length . TL.toUpper) tla
|
||||
, benchBS $ nf (BS.length . BS.map toUpper) bsa
|
||||
, benchBSL $ nf (BL.length . BL.map toUpper) bla
|
||||
, benchS $ nf (L.length . L.map toUpper) sa
|
||||
]
|
||||
, bgroup "words"
|
||||
[ benchT $ nf (L.length . T.words) ta
|
||||
, benchTL $ nf (L.length . TL.words) tla
|
||||
, benchBS $ nf (L.length . BS.words) bsa
|
||||
, benchBSL $ nf (L.length . BL.words) bla
|
||||
, benchS $ nf (L.length . L.words) sa
|
||||
]
|
||||
, bgroup "zipWith"
|
||||
[ benchT $ nf (T.length . T.zipWith min tb) ta
|
||||
, benchTL $ nf (TL.length . TL.zipWith min tlb) tla
|
||||
, benchBS $ nf (L.length . BS.zipWith min bsb) bsa
|
||||
, benchBSL $ nf (L.length . BL.zipWith min blb) bla
|
||||
, benchS $ nf (L.length . L.zipWith min sb) sa
|
||||
]
|
||||
]
|
||||
, bgroup "Builder"
|
||||
[ bench "mappend char" $ nf (TL.length . TB.toLazyText . mappendNChar 'a') 10000
|
||||
, bench "mappend 8 char" $ nf (TL.length . TB.toLazyText . mappend8Char) 'a'
|
||||
, bench "mappend text" $ nf (TL.length . TB.toLazyText . mappendNText short) 10000
|
||||
]
|
||||
]
|
||||
where
|
||||
benchS = bench ("String+" ++ kind)
|
||||
benchT = bench ("Text+" ++ kind)
|
||||
benchTL = bench ("LazyText+" ++ kind)
|
||||
benchBS = bench ("ByteString+" ++ kind)
|
||||
benchBSL = bench ("LazyByteString+" ++ kind)
|
||||
|
||||
c = 'й'
|
||||
p0 = (== c)
|
||||
p1 = (/= 'д')
|
||||
lw = "право"
|
||||
bsw = UTF8.fromString lw
|
||||
blw = BL.fromChunks [bsw]
|
||||
tsw = T.pack lw
|
||||
tlw = TL.fromChunks [tsw]
|
||||
f (C# c#) = C# (chr# (ord# c# +# 1#))
|
||||
g (I# i#) (C# c#) = (I# (i# +# 1#), C# (chr# (ord# c# +# i#)))
|
||||
len l _ = l + (1::Int)
|
||||
replicat n = concat . L.replicate n
|
||||
short = T.pack "short"
|
||||
|
||||
#if !MIN_VERSION_bytestring(0,10,0)
|
||||
instance NFData BS.ByteString
|
||||
|
||||
instance NFData BL.ByteString where
|
||||
rnf BL.Empty = ()
|
||||
rnf (BL.Chunk _ ts) = rnf ts
|
||||
#endif
|
||||
|
||||
data B where
|
||||
B :: NFData a => a -> B
|
||||
|
||||
instance NFData B where
|
||||
rnf (B b) = rnf b
|
||||
|
||||
-- | Split a bytestring in chunks
|
||||
--
|
||||
chunksOf :: Int -> BS.ByteString -> [BS.ByteString]
|
||||
chunksOf k = go
|
||||
where
|
||||
go t = case BS.splitAt k t of
|
||||
(a,b) | BS.null a -> []
|
||||
| otherwise -> a : go b
|
||||
|
||||
-- | Append a character n times
|
||||
--
|
||||
mappendNChar :: Char -> Int -> TB.Builder
|
||||
mappendNChar c n = go 0
|
||||
where
|
||||
go i
|
||||
| i < n = TB.singleton c `mappend` go (i+1)
|
||||
| otherwise = mempty
|
||||
|
||||
-- | Gives more opportunity for inlining and elimination of unnecesary
|
||||
-- bounds checks.
|
||||
--
|
||||
mappend8Char :: Char -> TB.Builder
|
||||
mappend8Char c = TB.singleton c `mappend` TB.singleton c `mappend`
|
||||
TB.singleton c `mappend` TB.singleton c `mappend`
|
||||
TB.singleton c `mappend` TB.singleton c `mappend`
|
||||
TB.singleton c `mappend` TB.singleton c
|
||||
|
||||
-- | Append a text N times
|
||||
--
|
||||
mappendNText :: T.Text -> Int -> TB.Builder
|
||||
mappendNText t n = go 0
|
||||
where
|
||||
go i
|
||||
| i < n = TB.fromText t `mappend` go (i+1)
|
||||
| otherwise = mempty
|
@ -1,100 +0,0 @@
|
||||
-- | Read numbers from a file with a just a number on each line, find the
|
||||
-- minimum of those numbers. The file contains different kinds of numbers:
|
||||
--
|
||||
-- * Decimals
|
||||
--
|
||||
-- * Hexadecimals
|
||||
--
|
||||
-- * Floating point numbers
|
||||
--
|
||||
-- * Floating point numbers in scientific notation
|
||||
--
|
||||
-- The different benchmarks will only take into account the values they can
|
||||
-- parse.
|
||||
--
|
||||
-- Tested in this benchmark:
|
||||
--
|
||||
-- * Lexing/parsing of different numerical types
|
||||
--
|
||||
module Benchmarks.ReadNumbers
|
||||
( initEnv
|
||||
, benchmark
|
||||
) where
|
||||
|
||||
import Criterion (Benchmark, bgroup, bench, whnf)
|
||||
import Data.List (foldl')
|
||||
import Numeric (readDec, readFloat, readHex)
|
||||
import qualified Data.ByteString.Char8 as B
|
||||
import qualified Data.ByteString.Lazy.Char8 as BL
|
||||
import qualified Data.ByteString.Lex.Fractional as B
|
||||
import qualified Data.Text as T
|
||||
import qualified Data.Text.IO as T
|
||||
import qualified Data.Text.Lazy as TL
|
||||
import qualified Data.Text.Lazy.IO as TL
|
||||
import qualified Data.Text.Lazy.Read as TL
|
||||
import qualified Data.Text.Read as T
|
||||
|
||||
type Env = ([String], [T.Text], [TL.Text], [B.ByteString], [BL.ByteString])
|
||||
|
||||
initEnv :: FilePath -> IO Env
|
||||
initEnv fp = do
|
||||
-- Read all files into lines: string, text, lazy text, bytestring, lazy
|
||||
-- bytestring
|
||||
s <- lines `fmap` readFile fp
|
||||
t <- T.lines `fmap` T.readFile fp
|
||||
tl <- TL.lines `fmap` TL.readFile fp
|
||||
b <- B.lines `fmap` B.readFile fp
|
||||
bl <- BL.lines `fmap` BL.readFile fp
|
||||
return (s, t, tl, b, bl)
|
||||
|
||||
benchmark :: Env -> Benchmark
|
||||
benchmark ~(s, t, tl, b, bl) =
|
||||
bgroup "ReadNumbers"
|
||||
[ bench "DecimalString" $ whnf (int . string readDec) s
|
||||
, bench "HexadecimalString" $ whnf (int . string readHex) s
|
||||
, bench "DoubleString" $ whnf (double . string readFloat) s
|
||||
|
||||
, bench "DecimalText" $ whnf (int . text (T.signed T.decimal)) t
|
||||
, bench "HexadecimalText" $ whnf (int . text (T.signed T.hexadecimal)) t
|
||||
, bench "DoubleText" $ whnf (double . text T.double) t
|
||||
, bench "RationalText" $ whnf (double . text T.rational) t
|
||||
|
||||
, bench "DecimalLazyText" $
|
||||
whnf (int . text (TL.signed TL.decimal)) tl
|
||||
, bench "HexadecimalLazyText" $
|
||||
whnf (int . text (TL.signed TL.hexadecimal)) tl
|
||||
, bench "DoubleLazyText" $
|
||||
whnf (double . text TL.double) tl
|
||||
, bench "RationalLazyText" $
|
||||
whnf (double . text TL.rational) tl
|
||||
|
||||
, bench "DecimalByteString" $ whnf (int . byteString B.readInt) b
|
||||
, bench "DoubleByteString" $ whnf (double . byteString B.readDecimal) b
|
||||
|
||||
, bench "DecimalLazyByteString" $
|
||||
whnf (int . byteString BL.readInt) bl
|
||||
]
|
||||
where
|
||||
-- Used for fixing types
|
||||
int :: Int -> Int
|
||||
int = id
|
||||
double :: Double -> Double
|
||||
double = id
|
||||
|
||||
string :: (Ord a, Num a) => (t -> [(a, t)]) -> [t] -> a
|
||||
string reader = foldl' go 1000000
|
||||
where
|
||||
go z t = case reader t of [(n, _)] -> min n z
|
||||
_ -> z
|
||||
|
||||
text :: (Ord a, Num a) => (t -> Either String (a,t)) -> [t] -> a
|
||||
text reader = foldl' go 1000000
|
||||
where
|
||||
go z t = case reader t of Left _ -> z
|
||||
Right (n, _) -> min n z
|
||||
|
||||
byteString :: (Ord a, Num a) => (t -> Maybe (a,t)) -> [t] -> a
|
||||
byteString reader = foldl' go 1000000
|
||||
where
|
||||
go z t = case reader t of Nothing -> z
|
||||
Just (n, _) -> min n z
|
@ -1,50 +0,0 @@
|
||||
{-# LANGUAGE BangPatterns #-}
|
||||
-- | Replace a string by another string
|
||||
--
|
||||
-- Tested in this benchmark:
|
||||
--
|
||||
-- * Search and replace of a pattern in a text
|
||||
--
|
||||
module Benchmarks.Replace
|
||||
( benchmark
|
||||
, initEnv
|
||||
) where
|
||||
|
||||
import Criterion (Benchmark, bgroup, bench, nf)
|
||||
import qualified Data.ByteString.Char8 as B
|
||||
import qualified Data.ByteString.Lazy as BL
|
||||
import qualified Data.ByteString.Lazy.Search as BL
|
||||
import qualified Data.ByteString.Search as B
|
||||
import qualified Data.Text as T
|
||||
import qualified Data.Text.Encoding as T
|
||||
import qualified Data.Text.Lazy as TL
|
||||
import qualified Data.Text.Lazy.Encoding as TL
|
||||
import qualified Data.Text.Lazy.IO as TL
|
||||
|
||||
type Env = (T.Text, B.ByteString, TL.Text, BL.ByteString)
|
||||
|
||||
initEnv :: FilePath -> IO Env
|
||||
initEnv fp = do
|
||||
tl <- TL.readFile fp
|
||||
bl <- BL.readFile fp
|
||||
let !t = TL.toStrict tl
|
||||
!b = T.encodeUtf8 t
|
||||
return (t, b, tl, bl)
|
||||
|
||||
benchmark :: String -> String -> Env -> Benchmark
|
||||
benchmark pat sub ~(t, b, tl, bl) =
|
||||
bgroup "Replace" [
|
||||
bench "Text" $ nf (T.length . T.replace tpat tsub) t
|
||||
, bench "ByteString" $ nf (BL.length . B.replace bpat bsub) b
|
||||
, bench "LazyText" $ nf (TL.length . TL.replace tlpat tlsub) tl
|
||||
, bench "LazyByteString" $ nf (BL.length . BL.replace blpat blsub) bl
|
||||
]
|
||||
where
|
||||
tpat = T.pack pat
|
||||
tsub = T.pack sub
|
||||
tlpat = TL.pack pat
|
||||
tlsub = TL.pack sub
|
||||
bpat = T.encodeUtf8 tpat
|
||||
bsub = T.encodeUtf8 tsub
|
||||
blpat = B.concat $ BL.toChunks $ TL.encodeUtf8 tlpat
|
||||
blsub = B.concat $ BL.toChunks $ TL.encodeUtf8 tlsub
|
@ -1,55 +0,0 @@
|
||||
-- | Search for a pattern in a file, find the number of occurences
|
||||
--
|
||||
-- Tested in this benchmark:
|
||||
--
|
||||
-- * Searching all occurences of a pattern using library routines
|
||||
--
|
||||
module Benchmarks.Search
|
||||
( initEnv
|
||||
, benchmark
|
||||
) where
|
||||
|
||||
import Criterion (Benchmark, bench, bgroup, whnf)
|
||||
import qualified Data.ByteString as B
|
||||
import qualified Data.ByteString.Lazy as BL
|
||||
import qualified Data.ByteString.Lazy.Search as BL
|
||||
import qualified Data.ByteString.Search as B
|
||||
import qualified Data.Text as T
|
||||
import qualified Data.Text.Encoding as T
|
||||
import qualified Data.Text.IO as T
|
||||
import qualified Data.Text.Lazy as TL
|
||||
import qualified Data.Text.Lazy.IO as TL
|
||||
|
||||
type Env = (B.ByteString, BL.ByteString, T.Text, TL.Text)
|
||||
|
||||
initEnv :: FilePath -> IO Env
|
||||
initEnv fp = do
|
||||
b <- B.readFile fp
|
||||
bl <- BL.readFile fp
|
||||
t <- T.readFile fp
|
||||
tl <- TL.readFile fp
|
||||
return (b, bl, t, tl)
|
||||
|
||||
benchmark :: T.Text -> Env -> Benchmark
|
||||
benchmark needleT ~(b, bl, t, tl) =
|
||||
bgroup "FileIndices"
|
||||
[ bench "ByteString" $ whnf (byteString needleB) b
|
||||
, bench "LazyByteString" $ whnf (lazyByteString needleB) bl
|
||||
, bench "Text" $ whnf (text needleT) t
|
||||
, bench "LazyText" $ whnf (lazyText needleTL) tl
|
||||
]
|
||||
where
|
||||
needleB = T.encodeUtf8 needleT
|
||||
needleTL = TL.fromChunks [needleT]
|
||||
|
||||
byteString :: B.ByteString -> B.ByteString -> Int
|
||||
byteString needle = length . B.indices needle
|
||||
|
||||
lazyByteString :: B.ByteString -> BL.ByteString -> Int
|
||||
lazyByteString needle = length . BL.indices needle
|
||||
|
||||
text :: T.Text -> T.Text -> Int
|
||||
text = T.count
|
||||
|
||||
lazyText :: TL.Text -> TL.Text -> Int
|
||||
lazyText needle = fromIntegral . TL.count needle
|
@ -1,118 +0,0 @@
|
||||
-- | This module contains a number of benchmarks for the different streaming
|
||||
-- functions
|
||||
--
|
||||
-- Tested in this benchmark:
|
||||
--
|
||||
-- * Most streaming functions
|
||||
--
|
||||
{-# LANGUAGE BangPatterns #-}
|
||||
{-# LANGUAGE DeriveAnyClass, DeriveGeneric, RecordWildCards #-}
|
||||
{-# OPTIONS_GHC -fno-warn-orphans #-}
|
||||
module Benchmarks.Stream
|
||||
( initEnv
|
||||
, benchmark
|
||||
) where
|
||||
|
||||
import Control.DeepSeq (NFData (..))
|
||||
import Criterion (Benchmark, bgroup, bench, nf)
|
||||
import qualified Data.Text as T
|
||||
import qualified Data.ByteString as B
|
||||
import qualified Data.Text.Lazy as TL
|
||||
import qualified Data.ByteString.Lazy as BL
|
||||
import Data.Text.Internal.Fusion.Types (Step (..), Stream (..))
|
||||
import qualified Data.Text.Encoding as T
|
||||
import qualified Data.Text.Encoding.Error as E
|
||||
import qualified Data.Text.Internal.Encoding.Fusion as T
|
||||
import qualified Data.Text.Internal.Encoding.Fusion.Common as F
|
||||
import qualified Data.Text.Internal.Fusion as T
|
||||
import qualified Data.Text.IO as T
|
||||
import qualified Data.Text.Lazy.Encoding as TL
|
||||
import qualified Data.Text.Internal.Lazy.Encoding.Fusion as TL
|
||||
import qualified Data.Text.Internal.Lazy.Fusion as TL
|
||||
import qualified Data.Text.Lazy.IO as TL
|
||||
import GHC.Generics (Generic)
|
||||
|
||||
instance NFData a => NFData (Stream a) where
|
||||
-- Currently, this implementation does not force evaluation of the size hint
|
||||
rnf (Stream next s0 _) = go s0
|
||||
where
|
||||
go !s = case next s of
|
||||
Done -> ()
|
||||
Skip s' -> go s'
|
||||
Yield x s' -> rnf x `seq` go s'
|
||||
|
||||
data Env = Env
|
||||
{ t :: !T.Text
|
||||
, utf8 :: !B.ByteString
|
||||
, utf16le :: !B.ByteString
|
||||
, utf16be :: !B.ByteString
|
||||
, utf32le :: !B.ByteString
|
||||
, utf32be :: !B.ByteString
|
||||
, tl :: !TL.Text
|
||||
, utf8L :: !BL.ByteString
|
||||
, utf16leL :: !BL.ByteString
|
||||
, utf16beL :: !BL.ByteString
|
||||
, utf32leL :: !BL.ByteString
|
||||
, utf32beL :: !BL.ByteString
|
||||
, s :: T.Stream Char
|
||||
} deriving (Generic, NFData)
|
||||
|
||||
initEnv :: FilePath -> IO Env
|
||||
initEnv fp = do
|
||||
-- Different formats
|
||||
t <- T.readFile fp
|
||||
let !utf8 = T.encodeUtf8 t
|
||||
!utf16le = T.encodeUtf16LE t
|
||||
!utf16be = T.encodeUtf16BE t
|
||||
!utf32le = T.encodeUtf32LE t
|
||||
!utf32be = T.encodeUtf32BE t
|
||||
|
||||
-- Once again for the lazy variants
|
||||
tl <- TL.readFile fp
|
||||
let !utf8L = TL.encodeUtf8 tl
|
||||
!utf16leL = TL.encodeUtf16LE tl
|
||||
!utf16beL = TL.encodeUtf16BE tl
|
||||
!utf32leL = TL.encodeUtf32LE tl
|
||||
!utf32beL = TL.encodeUtf32BE tl
|
||||
|
||||
-- For the functions which operate on streams
|
||||
let !s = T.stream t
|
||||
return Env{..}
|
||||
|
||||
benchmark :: Env -> Benchmark
|
||||
benchmark ~Env{..} =
|
||||
bgroup "Stream"
|
||||
-- Fusion
|
||||
[ bgroup "stream" $
|
||||
[ bench "Text" $ nf T.stream t
|
||||
, bench "LazyText" $ nf TL.stream tl
|
||||
]
|
||||
|
||||
-- Encoding.Fusion
|
||||
, bgroup "streamUtf8"
|
||||
[ bench "Text" $ nf (T.streamUtf8 E.lenientDecode) utf8
|
||||
, bench "LazyText" $ nf (TL.streamUtf8 E.lenientDecode) utf8L
|
||||
]
|
||||
, bgroup "streamUtf16LE"
|
||||
[ bench "Text" $ nf (T.streamUtf16LE E.lenientDecode) utf16le
|
||||
, bench "LazyText" $ nf (TL.streamUtf16LE E.lenientDecode) utf16leL
|
||||
]
|
||||
, bgroup "streamUtf16BE"
|
||||
[ bench "Text" $ nf (T.streamUtf16BE E.lenientDecode) utf16be
|
||||
, bench "LazyText" $ nf (TL.streamUtf16BE E.lenientDecode) utf16beL
|
||||
]
|
||||
, bgroup "streamUtf32LE"
|
||||
[ bench "Text" $ nf (T.streamUtf32LE E.lenientDecode) utf32le
|
||||
, bench "LazyText" $ nf (TL.streamUtf32LE E.lenientDecode) utf32leL
|
||||
]
|
||||
, bgroup "streamUtf32BE"
|
||||
[ bench "Text" $ nf (T.streamUtf32BE E.lenientDecode) utf32be
|
||||
, bench "LazyText" $ nf (TL.streamUtf32BE E.lenientDecode) utf32beL
|
||||
]
|
||||
|
||||
-- Encoding.Fusion.Common
|
||||
, bench "restreamUtf16LE" $ nf F.restreamUtf16LE s
|
||||
, bench "restreamUtf16BE" $ nf F.restreamUtf16BE s
|
||||
, bench "restreamUtf32LE" $ nf F.restreamUtf32LE s
|
||||
, bench "restreamUtf32BE" $ nf F.restreamUtf32BE s
|
||||
]
|
@ -1,43 +0,0 @@
|
||||
-- | A word frequency count using the different string types
|
||||
--
|
||||
-- Tested in this benchmark:
|
||||
--
|
||||
-- * Splitting into words
|
||||
--
|
||||
-- * Converting to lowercase
|
||||
--
|
||||
-- * Comparing: Eq/Ord instances
|
||||
--
|
||||
module Benchmarks.WordFrequencies
|
||||
( initEnv
|
||||
, benchmark
|
||||
) where
|
||||
|
||||
import Criterion (Benchmark, bench, bgroup, whnf)
|
||||
import Data.Char (toLower)
|
||||
import Data.List (foldl')
|
||||
import Data.Map (Map)
|
||||
import qualified Data.ByteString.Char8 as B
|
||||
import qualified Data.Map as M
|
||||
import qualified Data.Text as T
|
||||
import qualified Data.Text.IO as T
|
||||
|
||||
type Env = (String, B.ByteString, T.Text)
|
||||
|
||||
initEnv :: FilePath -> IO Env
|
||||
initEnv fp = do
|
||||
s <- readFile fp
|
||||
b <- B.readFile fp
|
||||
t <- T.readFile fp
|
||||
return (s, b, t)
|
||||
|
||||
benchmark :: Env -> Benchmark
|
||||
benchmark ~(s, b, t) =
|
||||
bgroup "WordFrequencies"
|
||||
[ bench "String" $ whnf (frequencies . words . map toLower) s
|
||||
, bench "ByteString" $ whnf (frequencies . B.words . B.map toLower) b
|
||||
, bench "Text" $ whnf (frequencies . T.words . T.toLower) t
|
||||
]
|
||||
|
||||
frequencies :: Ord a => [a] -> Map a Int
|
||||
frequencies = foldl' (\m k -> M.insertWith (+) k 1 m) M.empty
|
@ -1,32 +0,0 @@
|
||||
{-# LANGUAGE BangPatterns, OverloadedStrings, RankNTypes #-}
|
||||
|
||||
module Main (
|
||||
main
|
||||
) where
|
||||
|
||||
import Control.Monad (forM_)
|
||||
import qualified Data.ByteString as B
|
||||
import qualified Data.Text as Text
|
||||
import Data.Text.Encoding (decodeUtf8)
|
||||
import Data.Text (Text)
|
||||
import System.IO (hFlush, stdout)
|
||||
import Timer (timer)
|
||||
|
||||
type BM = Text -> ()
|
||||
|
||||
bm :: forall a. (Text -> a) -> BM
|
||||
bm f t = f t `seq` ()
|
||||
|
||||
benchmarks :: [(String, Text.Text -> ())]
|
||||
benchmarks = [
|
||||
("find_first", bm $ Text.isInfixOf "en:Benin")
|
||||
, ("find_index", bm $ Text.findIndex (=='c'))
|
||||
]
|
||||
|
||||
main :: IO ()
|
||||
main = do
|
||||
!contents <- decodeUtf8 `fmap` B.readFile "../tests/text-test-data/yiwiki.xml"
|
||||
forM_ benchmarks $ \(name, bmark) -> do
|
||||
putStr $ name ++ " "
|
||||
hFlush stdout
|
||||
putStrLn =<< (timer 100 contents bmark)
|
@ -1,30 +0,0 @@
|
||||
{-# LANGUAGE BangPatterns #-}
|
||||
|
||||
module Timer (timer) where
|
||||
|
||||
import Control.Exception (evaluate)
|
||||
import Data.Time.Clock.POSIX (getPOSIXTime)
|
||||
import GHC.Float (FFFormat(..), formatRealFloat)
|
||||
|
||||
ickyRound :: Int -> Double -> String
|
||||
ickyRound k = formatRealFloat FFFixed (Just k)
|
||||
|
||||
timer :: Int -> a -> (a -> b) -> IO String
|
||||
timer count a0 f = do
|
||||
let loop !k !fastest
|
||||
| k <= 0 = return fastest
|
||||
| otherwise = do
|
||||
start <- getPOSIXTime
|
||||
let inner a i
|
||||
| i <= 0 = return ()
|
||||
| otherwise = evaluate (f a) >> inner a (i-1)
|
||||
inner a0 count
|
||||
end <- getPOSIXTime
|
||||
let elapsed = end - start
|
||||
loop (k-1) (min fastest (elapsed / fromIntegral count))
|
||||
t <- loop (3::Int) 1e300
|
||||
let log10 x = log x / log 10
|
||||
ft = realToFrac t
|
||||
prec = round (log10 (fromIntegral count) - log10 ft)
|
||||
return $! ickyRound prec ft
|
||||
{-# NOINLINE timer #-}
|
@ -1,2 +0,0 @@
|
||||
__pycache__
|
||||
*.pyc
|
@ -1,12 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import utils, sys, codecs
|
||||
|
||||
def cut(filename, l, r):
|
||||
content = open(filename, encoding='utf-8')
|
||||
for line in content:
|
||||
print(line[l:r])
|
||||
|
||||
for f in sys.argv[1:]:
|
||||
t = utils.benchmark(lambda: cut(f, 20, 40))
|
||||
sys.stderr.write('{0}: {1}\n'.format(f, t))
|
@ -1,50 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import math
|
||||
import sys
|
||||
import time
|
||||
|
||||
def find_first():
|
||||
cf = contents.find
|
||||
return timer(lambda: cf("en:Benin"))
|
||||
|
||||
def timer(f, count=100):
|
||||
a = 1e300
|
||||
def g():
|
||||
return
|
||||
for i in xrange(3):
|
||||
start = time.time()
|
||||
for j in xrange(count):
|
||||
g()
|
||||
a = min(a, (time.time() - start) / count)
|
||||
|
||||
b = 1e300
|
||||
for i in xrange(3):
|
||||
start = time.time()
|
||||
for j in xrange(count):
|
||||
f()
|
||||
b = min(b, (time.time() - start) / count)
|
||||
|
||||
return round(b - a, int(round(math.log(count, 10) - math.log(b - a, 10))))
|
||||
|
||||
contents = open('../../tests/text-test-data/yiwiki.xml', 'r').read()
|
||||
contents = contents.decode('utf-8')
|
||||
|
||||
benchmarks = (
|
||||
find_first,
|
||||
)
|
||||
|
||||
to_run = sys.argv[1:]
|
||||
bms = []
|
||||
if to_run:
|
||||
for r in to_run:
|
||||
for b in benchmarks:
|
||||
if b.__name__.startswith(r):
|
||||
bms.append(b)
|
||||
else:
|
||||
bms = benchmarks
|
||||
|
||||
for b in bms:
|
||||
sys.stdout.write(b.__name__ + ' ')
|
||||
sys.stdout.flush()
|
||||
print b()
|
@ -1,13 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import utils, sys, codecs
|
||||
|
||||
def sort(filename):
|
||||
content = open(filename, encoding='utf-8').read()
|
||||
lines = content.splitlines()
|
||||
lines.sort()
|
||||
print('\n'.join(lines))
|
||||
|
||||
for f in sys.argv[1:]:
|
||||
t = utils.benchmark(lambda: sort(f))
|
||||
sys.stderr.write('{0}: {1}\n'.format(f, t))
|
@ -1,25 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import utils, sys
|
||||
|
||||
def strip_tags(filename):
|
||||
string = open(filename, encoding='utf-8').read()
|
||||
|
||||
d = 0
|
||||
out = []
|
||||
|
||||
for c in string:
|
||||
if c == '<': d += 1
|
||||
|
||||
if d > 0:
|
||||
out += ' '
|
||||
else:
|
||||
out += c
|
||||
|
||||
if c == '>': d -= 1
|
||||
|
||||
print(''.join(out))
|
||||
|
||||
for f in sys.argv[1:]:
|
||||
t = utils.benchmark(lambda: strip_tags(f))
|
||||
sys.stderr.write('{0}: {1}\n'.format(f, t))
|
@ -1,18 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import sys, time
|
||||
|
||||
def benchmark_once(f):
|
||||
start = time.time()
|
||||
f()
|
||||
end = time.time()
|
||||
return end - start
|
||||
|
||||
def benchmark(f):
|
||||
runs = 100
|
||||
total = 0.0
|
||||
for i in range(runs):
|
||||
result = benchmark_once(f)
|
||||
sys.stderr.write('Run {0}: {1}\n'.format(i, result))
|
||||
total += result
|
||||
return total / runs
|
@ -1,16 +0,0 @@
|
||||
#!/usr/bin/env ruby
|
||||
|
||||
require './utils.rb'
|
||||
|
||||
def cut(filename, l, r)
|
||||
File.open(filename, 'r:utf-8') do |file|
|
||||
file.each_line do |line|
|
||||
puts line[l, r - l]
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
ARGV.each do |f|
|
||||
t = benchmark { cut(f, 20, 40) }
|
||||
STDERR.puts "#{f}: #{t}"
|
||||
end
|
@ -1,50 +0,0 @@
|
||||
#!/usr/bin/env ruby
|
||||
|
||||
require './utils.rb'
|
||||
|
||||
def fold(filename, max_width)
|
||||
File.open(filename, 'r:utf-8') do |file|
|
||||
# Words in this paragraph
|
||||
paragraph = []
|
||||
|
||||
file.each_line do |line|
|
||||
# If we encounter an empty line, we reformat and dump the current
|
||||
# paragraph
|
||||
if line.strip.empty?
|
||||
puts fold_paragraph(paragraph, max_width)
|
||||
puts
|
||||
paragraph = []
|
||||
# Otherwise, we append the words found in the line to the paragraph
|
||||
else
|
||||
paragraph.concat line.split
|
||||
end
|
||||
end
|
||||
|
||||
# Last paragraph
|
||||
puts fold_paragraph(paragraph, max_width) unless paragraph.empty?
|
||||
end
|
||||
end
|
||||
|
||||
# Fold a single paragraph to the desired width
|
||||
def fold_paragraph(paragraph, max_width)
|
||||
# Gradually build our output
|
||||
str, *rest = paragraph
|
||||
width = str.length
|
||||
|
||||
rest.each do |word|
|
||||
if width + word.length + 1 <= max_width
|
||||
str << ' ' << word
|
||||
width += word.length + 1
|
||||
else
|
||||
str << "\n" << word
|
||||
width = word.length
|
||||
end
|
||||
end
|
||||
|
||||
str
|
||||
end
|
||||
|
||||
ARGV.each do |f|
|
||||
t = benchmark { fold(f, 80) }
|
||||
STDERR.puts "#{f}: #{t}"
|
||||
end
|
@ -1,15 +0,0 @@
|
||||
#!/usr/bin/env ruby
|
||||
|
||||
require './utils.rb'
|
||||
|
||||
def sort(filename)
|
||||
File.open(filename, 'r:utf-8') do |file|
|
||||
content = file.read
|
||||
puts content.lines.sort.join
|
||||
end
|
||||
end
|
||||
|
||||
ARGV.each do |f|
|
||||
t = benchmark { sort(f) }
|
||||
STDERR.puts "#{f}: #{t}"
|
||||
end
|
@ -1,22 +0,0 @@
|
||||
#!/usr/bin/env ruby
|
||||
|
||||
require './utils.rb'
|
||||
|
||||
def strip_tags(filename)
|
||||
File.open(filename, 'r:utf-8') do |file|
|
||||
str = file.read
|
||||
|
||||
d = 0
|
||||
|
||||
str.each_char do |c|
|
||||
d += 1 if c == '<'
|
||||
putc(if d > 0 then ' ' else c end)
|
||||
d -= 1 if c == '>'
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
ARGV.each do |f|
|
||||
t = benchmark { strip_tags(f) }
|
||||
STDERR.puts "#{f}: #{t}"
|
||||
end
|
@ -1,14 +0,0 @@
|
||||
require 'benchmark'
|
||||
|
||||
def benchmark(&block)
|
||||
runs = 100
|
||||
total = 0
|
||||
|
||||
runs.times do |i|
|
||||
result = Benchmark.measure(&block).total
|
||||
$stderr.puts "Run #{i}: #{result}"
|
||||
total += result
|
||||
end
|
||||
|
||||
total / runs
|
||||
end
|
@ -1,148 +0,0 @@
|
||||
cabal-version: 1.12
|
||||
name: text-benchmarks
|
||||
version: 0.0.0.0
|
||||
synopsis: Benchmarks for the text package
|
||||
description: Benchmarks for the text package
|
||||
homepage: https://bitbucket.org/bos/text
|
||||
license: BSD2
|
||||
license-file: ../LICENSE
|
||||
author: Jasper Van der Jeugt <jaspervdj@gmail.com>,
|
||||
Bryan O'Sullivan <bos@serpentine.com>,
|
||||
Tom Harper <rtomharper@googlemail.com>,
|
||||
Duncan Coutts <duncan@haskell.org>
|
||||
maintainer: jaspervdj@gmail.com
|
||||
category: Text
|
||||
build-type: Simple
|
||||
|
||||
flag bytestring-builder
|
||||
description: Depend on the bytestring-builder package for backwards compatibility.
|
||||
default: False
|
||||
manual: False
|
||||
|
||||
flag llvm
|
||||
description: use LLVM
|
||||
default: False
|
||||
manual: True
|
||||
|
||||
executable text-benchmarks
|
||||
ghc-options: -Wall -O2 -rtsopts
|
||||
if flag(llvm)
|
||||
ghc-options: -fllvm
|
||||
cpp-options: -DINTEGER_GMP
|
||||
build-depends: array,
|
||||
base == 4.*,
|
||||
binary,
|
||||
blaze-builder,
|
||||
bytestring-lexing >= 0.5.0,
|
||||
containers,
|
||||
criterion >= 0.10.0.0,
|
||||
deepseq,
|
||||
directory,
|
||||
filepath,
|
||||
ghc-prim,
|
||||
integer-gmp,
|
||||
stringsearch,
|
||||
template-haskell,
|
||||
transformers,
|
||||
utf8-string,
|
||||
vector
|
||||
|
||||
if flag(bytestring-builder)
|
||||
build-depends: bytestring >= 0.9 && < 0.10.4,
|
||||
bytestring-builder >= 0.10.4
|
||||
else
|
||||
build-depends: bytestring >= 0.10.4
|
||||
|
||||
-- modules for benchmark proper
|
||||
c-sources: cbits/time_iconv.c
|
||||
hs-source-dirs: haskell
|
||||
main-is: Benchmarks.hs
|
||||
other-modules:
|
||||
Benchmarks.Builder
|
||||
Benchmarks.Concat
|
||||
Benchmarks.DecodeUtf8
|
||||
Benchmarks.EncodeUtf8
|
||||
Benchmarks.Equality
|
||||
Benchmarks.FileRead
|
||||
Benchmarks.FoldLines
|
||||
Benchmarks.Mul
|
||||
Benchmarks.Programs.BigTable
|
||||
Benchmarks.Programs.Cut
|
||||
Benchmarks.Programs.Fold
|
||||
Benchmarks.Programs.Sort
|
||||
Benchmarks.Programs.StripTags
|
||||
Benchmarks.Programs.Throughput
|
||||
Benchmarks.Pure
|
||||
Benchmarks.ReadNumbers
|
||||
Benchmarks.Replace
|
||||
Benchmarks.Search
|
||||
Benchmarks.Stream
|
||||
Benchmarks.WordFrequencies
|
||||
|
||||
-- Source code for IUT (implementation under test)
|
||||
-- "borrowed" from parent folder
|
||||
include-dirs: ../include
|
||||
c-sources: ../cbits/cbits.c
|
||||
hs-source-dirs: ..
|
||||
other-modules:
|
||||
Data.Text
|
||||
Data.Text.Array
|
||||
Data.Text.Encoding
|
||||
Data.Text.Encoding.Error
|
||||
Data.Text.Foreign
|
||||
Data.Text.IO
|
||||
Data.Text.Internal
|
||||
Data.Text.Internal.Builder
|
||||
Data.Text.Internal.Builder.Functions
|
||||
Data.Text.Internal.Builder.Int.Digits
|
||||
Data.Text.Internal.Builder.RealFloat.Functions
|
||||
Data.Text.Internal.Encoding.Fusion
|
||||
Data.Text.Internal.Encoding.Fusion.Common
|
||||
Data.Text.Internal.Encoding.Utf16
|
||||
Data.Text.Internal.Encoding.Utf32
|
||||
Data.Text.Internal.Encoding.Utf8
|
||||
Data.Text.Internal.Functions
|
||||
Data.Text.Internal.Fusion
|
||||
Data.Text.Internal.Fusion.CaseMapping
|
||||
Data.Text.Internal.Fusion.Common
|
||||
Data.Text.Internal.Fusion.Size
|
||||
Data.Text.Internal.Fusion.Types
|
||||
Data.Text.Internal.IO
|
||||
Data.Text.Internal.Lazy
|
||||
Data.Text.Internal.Lazy.Encoding.Fusion
|
||||
Data.Text.Internal.Lazy.Fusion
|
||||
Data.Text.Internal.Lazy.Search
|
||||
Data.Text.Internal.Private
|
||||
Data.Text.Internal.Read
|
||||
Data.Text.Internal.Search
|
||||
Data.Text.Internal.Unsafe
|
||||
Data.Text.Internal.Unsafe.Char
|
||||
Data.Text.Internal.Unsafe.Shift
|
||||
Data.Text.Lazy
|
||||
Data.Text.Lazy.Builder
|
||||
Data.Text.Lazy.Builder.Int
|
||||
Data.Text.Lazy.Builder.RealFloat
|
||||
Data.Text.Lazy.Encoding
|
||||
Data.Text.Lazy.IO
|
||||
Data.Text.Lazy.Internal
|
||||
Data.Text.Lazy.Read
|
||||
Data.Text.Read
|
||||
Data.Text.Unsafe
|
||||
Data.Text.Show
|
||||
|
||||
default-language: Haskell2010
|
||||
default-extensions: NondecreasingIndentation
|
||||
|
||||
|
||||
executable text-multilang
|
||||
hs-source-dirs: haskell
|
||||
main-is: Multilang.hs
|
||||
other-modules: Timer
|
||||
ghc-options: -Wall -O2
|
||||
build-depends: base == 4.*,
|
||||
bytestring,
|
||||
text,
|
||||
time
|
||||
|
||||
default-language: Haskell2010
|
||||
default-extensions: NondecreasingIndentation
|
@ -1,3 +0,0 @@
|
||||
-- See http://cabal.readthedocs.io/en/latest/nix-local-build-overview.html
|
||||
packages: ., benchmarks, th-tests
|
||||
tests: True
|
@ -1,226 +0,0 @@
|
||||
### 1.2.4.0
|
||||
|
||||
* Add TH `Lift` instances for `Data.Text.Text` and `Data.Text.Lazy.Text` (gh-232)
|
||||
|
||||
* Update Haddock documentation to better reflect fusion eligibility; improve fusion
|
||||
rules for `takeWhileEnd` and `length` (gh-241, ghc-202)
|
||||
|
||||
* Optimise `Data.Text.replicate` from `O(n)` to `O(log n)` (gh-209)
|
||||
|
||||
* Support `base-4.13.0.0`
|
||||
|
||||
### 1.2.3.1
|
||||
|
||||
* Make `decodeUtf8With` fail explicitly for unsupported non-BMP
|
||||
replacement characters instead silent undefined behaviour (gh-213)
|
||||
|
||||
* Fix termination condition for file reads via `Data.Text.IO`
|
||||
operations (gh-223)
|
||||
|
||||
* A serious correctness issue affecting uses of `take` and `drop` with
|
||||
negative counts has been fixed (gh-227)
|
||||
|
||||
* A bug in the case-mapping functions resulting in unreasonably large
|
||||
allocations with large arguments has been fixed (gh-221)
|
||||
|
||||
### 1.2.3.0
|
||||
|
||||
* Spec compliance: `toCaseFold` now follows the Unicode 9.0 spec
|
||||
(updated from 8.0).
|
||||
|
||||
* Bug fix: the lazy `takeWhileEnd` function violated the
|
||||
[lazy text invariant](https://github.com/bos/text/blob/1.2.3.0/Data/Text/Internal/Lazy.hs#L51)
|
||||
(gh-184).
|
||||
|
||||
* Bug fix: Fixed usage of size hints causing incorrect behavior (gh-197).
|
||||
|
||||
* New function: `unsnoc` (gh-173).
|
||||
|
||||
* Reduce memory overhead in `encodeUTF8` (gh-194).
|
||||
|
||||
* Improve UTF-8 decoder error-recovery (gh-182).
|
||||
|
||||
* Minor documentation improvements (`@since` annotations, more
|
||||
examples, clarifications).
|
||||
|
||||
#### 1.2.2.2
|
||||
|
||||
* The `toTitle` function now correctly handles letters that
|
||||
immediately follow punctuation. Before, `"there's"` would turn into
|
||||
`"There'S"`. Now, it becomes `"There's"`.
|
||||
|
||||
* The implementation of unstreaming is faster, resulting in operations
|
||||
such as `map` and `intersperse` speeding up by up to 30%, with
|
||||
smaller code generated.
|
||||
|
||||
* The optimised length comparison function is now more likely to be
|
||||
used after some rewrite rule tweaking.
|
||||
|
||||
* Bug fix: an off-by-one bug in `takeEnd` is fixed.
|
||||
|
||||
* Bug fix: a logic error in `takeWord16` is fixed.
|
||||
|
||||
#### 1.2.2.1
|
||||
|
||||
* The switch to `integer-pure` in 1.2.2.0 was apparently mistaken.
|
||||
The build flag has been renamed accordingly. Your army of diligent
|
||||
maintainers apologizes for the churn.
|
||||
|
||||
* Spec compliance: `toCaseFold` now follows the Unicode 8.0 spec
|
||||
(updated from 7.0)
|
||||
|
||||
* An STG lint error has been fixed
|
||||
|
||||
### 1.2.2.0
|
||||
|
||||
* The `integer-simple` package, upon which this package optionally
|
||||
depended, has been replaced with `integer-pure`. The build flag has
|
||||
been renamed accordingly.
|
||||
|
||||
* Bug fix: For the `Binary` instance, If UTF-8 decoding fails during a
|
||||
`get`, the error is propagated via `fail` instead of an uncatchable
|
||||
crash.
|
||||
|
||||
* New function: `takeWhileEnd`
|
||||
|
||||
* New instances for the `Text` types:
|
||||
* if `base` >= 4.7: `PrintfArg`
|
||||
* if `base` >= 4.9: `Semigroup`
|
||||
|
||||
#### 1.2.1.3
|
||||
|
||||
* Bug fix: As it turns out, moving the literal rewrite rules to simplifier
|
||||
phase 2 does not prevent competition with the `unpack` rule, which is
|
||||
also active in this phase. Unfortunately this was hidden due to a silly
|
||||
test environment mistake. Moving literal rules back to phase 1 finally
|
||||
fixes GHC Trac #10528 correctly.
|
||||
|
||||
#### 1.2.1.2
|
||||
|
||||
* Bug fix: Run literal rewrite rules in simplifier phase 2.
|
||||
The behavior of the simplifier changed in GHC 7.10.2,
|
||||
causing these rules to fail to fire, leading to poor code generation
|
||||
and long compilation times. See
|
||||
[GHC Trac #10528](https://ghc.haskell.org/trac/ghc/ticket/10528).
|
||||
|
||||
#### 1.2.1.1
|
||||
|
||||
* Expose unpackCString#, which you should never use.
|
||||
|
||||
### 1.2.1.0
|
||||
|
||||
* Added Binary instances for both Text types. (If you have previously
|
||||
been using the text-binary package to get a Binary instance, it is
|
||||
now obsolete.)
|
||||
|
||||
#### 1.2.0.6
|
||||
|
||||
* Fixed a space leak in UTF-8 decoding
|
||||
|
||||
#### 1.2.0.5
|
||||
|
||||
* Feature parity: repeat, cycle, iterate are now implemented for lazy
|
||||
Text, and the Data instance is more complete
|
||||
|
||||
* Build speed: an inliner space explosion has been fixed with toCaseFold
|
||||
|
||||
* Bug fix: encoding Int to a Builder would infinite-loop if the
|
||||
integer-simple package was used
|
||||
|
||||
* Deprecation: OnEncodeError and EncodeError are deprecated, as they
|
||||
are never used
|
||||
|
||||
* Internals: some types that are used internally in fusion-related
|
||||
functions have moved around, been renamed, or been deleted (we don't
|
||||
bump the major version if .Internal modules change)
|
||||
|
||||
* Spec compliance: toCaseFold now follows the Unicode 7.0 spec
|
||||
(updated from 6.3)
|
||||
|
||||
#### 1.2.0.4
|
||||
|
||||
* Fixed an incompatibility with base < 4.5
|
||||
|
||||
#### 1.2.0.3
|
||||
|
||||
* Update formatRealFloat to correspond to the definition in versions
|
||||
of base newer than 4.5 (https://github.com/bos/text/issues/105)
|
||||
|
||||
#### 1.2.0.2
|
||||
|
||||
* Bumped lower bound on deepseq to 1.4 for compatibility with the
|
||||
upcoming GHC 7.10
|
||||
|
||||
#### 1.2.0.1
|
||||
|
||||
* Fixed a buffer overflow in rendering of large Integers
|
||||
(https://github.com/bos/text/issues/99)
|
||||
|
||||
## 1.2.0.0
|
||||
|
||||
* Fixed an integer overflow in the replace function
|
||||
(https://github.com/bos/text/issues/81)
|
||||
|
||||
* Fixed a hang in lazy decodeUtf8With
|
||||
(https://github.com/bos/text/issues/87)
|
||||
|
||||
* Reduced codegen bloat caused by use of empty and single-character
|
||||
literals
|
||||
|
||||
* Added an instance of IsList for GHC 7.8 and above
|
||||
|
||||
### 1.1.1.0
|
||||
|
||||
* The Data.Data instance now allows gunfold to work, via a virtual
|
||||
pack constructor
|
||||
|
||||
* dropEnd, takeEnd: new functions
|
||||
|
||||
* Comparing the length of a Text against a number can now
|
||||
short-circuit in more cases
|
||||
|
||||
#### 1.1.0.1
|
||||
|
||||
* streamDecodeUtf8: fixed gh-70, did not return all unconsumed bytes
|
||||
in single-byte chunks
|
||||
|
||||
## 1.1.0.0
|
||||
|
||||
* encodeUtf8: Performance is improved by up to 4x.
|
||||
|
||||
* encodeUtf8Builder, encodeUtf8BuilderEscaped: new functions,
|
||||
available only if bytestring >= 0.10.4.0 is installed, that allow
|
||||
very fast and flexible encoding of a Text value to a bytestring
|
||||
Builder.
|
||||
|
||||
As an example of the performance gain to be had, the
|
||||
encodeUtf8BuilderEscaped function helps to double the speed of JSON
|
||||
encoding in the latest version of aeson! (Note: if all you need is a
|
||||
plain ByteString, encodeUtf8 is still the faster way to go.)
|
||||
|
||||
* All of the internal module hierarchy is now publicly exposed. If a
|
||||
module is in the .Internal hierarchy, or is documented as internal,
|
||||
use at your own risk - there are no API stability guarantees for
|
||||
internal modules!
|
||||
|
||||
#### 1.0.0.1
|
||||
|
||||
* decodeUtf8: Fixed a regression that caused us to incorrectly
|
||||
identify truncated UTF-8 as valid (gh-61)
|
||||
|
||||
# 1.0.0.0
|
||||
|
||||
* Added support for Unicode 6.3.0 to case conversion functions
|
||||
|
||||
* New function toTitle converts words in a string to title case
|
||||
|
||||
* New functions peekCStringLen and withCStringLen simplify
|
||||
interoperability with C functionns
|
||||
|
||||
* Added support for decoding UTF-8 in stream-friendly fashion
|
||||
|
||||
* Fixed a bug in mapAccumL
|
||||
|
||||
* Added trusted Haskell support
|
||||
|
||||
* Removed support for GHC 6.10 (released in 2008) and older
|
@ -1,28 +0,0 @@
|
||||
-- This script compares the strict and lazy Text APIs to ensure that
|
||||
-- they're reasonably in sync.
|
||||
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
|
||||
import qualified Data.Set as S
|
||||
import qualified Data.Text as T
|
||||
import System.Process
|
||||
|
||||
main = do
|
||||
let tidy pkg = (S.fromList . filter (T.isInfixOf "::") . T.lines .
|
||||
T.replace "GHC.Int.Int64" "Int" .
|
||||
T.replace "\n " "" .
|
||||
T.replace (T.append (T.pack pkg) ".") "" . T.pack) `fmap`
|
||||
readProcess "ghci" [] (":browse " ++ pkg)
|
||||
let diff a b = mapM_ (putStrLn . (" "++) . T.unpack) . S.toList $
|
||||
S.difference a b
|
||||
text <- tidy "Data.Text"
|
||||
lazy <- tidy "Data.Text.Lazy"
|
||||
list <- tidy "Data.List"
|
||||
putStrLn "Text \\ List:"
|
||||
diff text list
|
||||
putStrLn ""
|
||||
putStrLn "Text \\ Lazy:"
|
||||
diff text lazy
|
||||
putStrLn ""
|
||||
putStrLn "Lazy \\ Text:"
|
||||
diff lazy text
|
@ -1,44 +0,0 @@
|
||||
module Arsec
|
||||
(
|
||||
Comment
|
||||
, comment
|
||||
, semi
|
||||
, showC
|
||||
, unichar
|
||||
, unichars
|
||||
, module Control.Applicative
|
||||
, module Control.Monad
|
||||
, module Data.Char
|
||||
, module Text.ParserCombinators.Parsec.Char
|
||||
, module Text.ParserCombinators.Parsec.Combinator
|
||||
, module Text.ParserCombinators.Parsec.Error
|
||||
, module Text.ParserCombinators.Parsec.Prim
|
||||
) where
|
||||
|
||||
import Control.Monad
|
||||
import Control.Applicative
|
||||
import Data.Char
|
||||
import Numeric
|
||||
import Text.ParserCombinators.Parsec.Char hiding (lower, upper)
|
||||
import Text.ParserCombinators.Parsec.Combinator hiding (optional)
|
||||
import Text.ParserCombinators.Parsec.Error
|
||||
import Text.ParserCombinators.Parsec.Prim hiding ((<|>), many)
|
||||
|
||||
type Comment = String
|
||||
|
||||
unichar :: Parser Char
|
||||
unichar = chr . fst . head . readHex <$> many1 hexDigit
|
||||
|
||||
unichars :: Parser [Char]
|
||||
unichars = manyTill (unichar <* spaces) semi
|
||||
|
||||
semi :: Parser ()
|
||||
semi = char ';' *> spaces *> pure ()
|
||||
|
||||
comment :: Parser Comment
|
||||
comment = (char '#' *> manyTill anyToken (char '\n')) <|> string "\n"
|
||||
|
||||
showC :: Char -> String
|
||||
showC c = "'\\x" ++ d ++ "'"
|
||||
where h = showHex (ord c) ""
|
||||
d = replicate (4 - length h) '0' ++ h
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user