2020-09-19 15:22:54 +03:00
|
|
|
module Data.String.Iterator
|
|
|
|
|
2020-09-19 23:46:46 +03:00
|
|
|
import public Data.List.Lazy
|
|
|
|
|
2020-09-19 22:54:34 +03:00
|
|
|
%default total
|
2020-09-19 15:22:54 +03:00
|
|
|
|
2020-09-20 11:35:53 +03:00
|
|
|
-- Backend-dependent string iteration type,
|
|
|
|
-- parameterised by the string that it iterates over.
|
2020-09-20 18:42:50 +03:00
|
|
|
--
|
|
|
|
-- Beware: the index is checked only up to definitional equality.
|
|
|
|
-- In theory, you could run `decEq` on two strings
|
|
|
|
-- with the same content but allocated in different memory locations
|
|
|
|
-- and use the obtained Refl to coerce iterators between them.
|
|
|
|
--
|
|
|
|
-- The strictly correct solution is to make the iterators independent
|
|
|
|
-- from the exact memory location of the string given to `uncons`.
|
|
|
|
-- (For example, byte offsets satisfy this requirement.)
|
2020-09-19 22:54:34 +03:00
|
|
|
export
|
2020-09-20 11:35:53 +03:00
|
|
|
data StringIterator : String -> Type where [external]
|
2020-09-19 15:22:54 +03:00
|
|
|
|
2020-09-20 10:56:58 +03:00
|
|
|
-- This function is private
|
|
|
|
-- to avoid subverting the linearity guarantees of withString.
|
2020-09-19 22:54:34 +03:00
|
|
|
%foreign
|
|
|
|
"scheme:blodwen-string-iterator-new"
|
2020-09-20 16:35:29 +03:00
|
|
|
"javascript:stringIterator:new"
|
2020-09-20 10:56:58 +03:00
|
|
|
private
|
2020-09-20 11:35:53 +03:00
|
|
|
fromString : (str : String) -> StringIterator str
|
2020-09-19 15:22:54 +03:00
|
|
|
|
2020-09-20 10:56:58 +03:00
|
|
|
-- This function uses a linear string iterator
|
|
|
|
-- so that backends can use mutating iterators.
|
|
|
|
export
|
2020-12-30 00:25:00 +03:00
|
|
|
withString : (str : String) -> ((it : StringIterator str) -> a) -> a
|
2020-09-20 10:56:58 +03:00
|
|
|
withString str f = f (fromString str)
|
|
|
|
|
|
|
|
-- We use a custom data type instead of Maybe (Char, StringIterator)
|
|
|
|
-- to remove one level of pointer indirection
|
|
|
|
-- in every iteration of something that's likely to be a hot loop,
|
|
|
|
-- and to avoid one allocation per character.
|
|
|
|
--
|
|
|
|
-- The Char field of Character is unrestricted for flexibility.
|
|
|
|
public export
|
2020-09-20 11:35:53 +03:00
|
|
|
data UnconsResult : String -> Type where
|
|
|
|
EOF : UnconsResult str
|
|
|
|
Character : (c : Char) -> (1 it : StringIterator str) -> UnconsResult str
|
2020-09-20 10:56:58 +03:00
|
|
|
|
|
|
|
-- We pass the whole string to the uncons function
|
|
|
|
-- to avoid yet another allocation per character
|
|
|
|
-- because for many backends, StringIterator can be simply an integer
|
|
|
|
-- (e.g. byte offset into an UTF-8 string).
|
2020-09-19 22:54:34 +03:00
|
|
|
%foreign
|
|
|
|
"scheme:blodwen-string-iterator-next"
|
2020-09-20 16:35:29 +03:00
|
|
|
"javascript:stringIterator:next"
|
2020-09-19 15:22:54 +03:00
|
|
|
export
|
2020-12-30 00:25:00 +03:00
|
|
|
uncons : (str : String) -> (it : StringIterator str) -> UnconsResult str
|
2020-09-20 10:56:58 +03:00
|
|
|
|
|
|
|
export
|
|
|
|
foldl : (accTy -> Char -> accTy) -> accTy -> String -> accTy
|
|
|
|
foldl op acc str = withString str (loop acc)
|
|
|
|
where
|
2020-12-30 00:25:00 +03:00
|
|
|
loop : accTy -> (it : StringIterator str) -> accTy
|
2020-09-20 10:56:58 +03:00
|
|
|
loop acc it =
|
|
|
|
case uncons str it of
|
|
|
|
EOF => acc
|
|
|
|
Character c it' => loop (acc `op` c) (assert_smaller it it')
|
2020-09-19 15:22:54 +03:00
|
|
|
|
2020-09-19 23:46:46 +03:00
|
|
|
export
|
|
|
|
unpack : String -> LazyList Char
|
2020-09-20 10:56:58 +03:00
|
|
|
unpack str = withString str unpack'
|
|
|
|
where
|
2020-12-30 00:25:00 +03:00
|
|
|
unpack' : (it : StringIterator str) -> LazyList Char
|
2020-09-20 10:56:58 +03:00
|
|
|
unpack' it =
|
|
|
|
case uncons str it of
|
|
|
|
EOF => []
|
|
|
|
Character c it' => c :: Delay (unpack' $ assert_smaller it it')
|