Idris2/libs/contrib/Data/String/Iterator.idr

95 lines
3.2 KiB
Idris
Raw Normal View History

2020-09-19 15:22:54 +03:00
module Data.String.Iterator
2021-01-21 15:37:30 +03:00
import Control.Monad.Identity
2020-09-19 23:46:46 +03:00
import public Data.List.Lazy
2020-09-19 22:54:34 +03:00
%default total
2020-09-19 15:22:54 +03:00
-- Backend-dependent string iteration type,
-- parameterised by the string that it iterates over.
2020-09-20 18:42:50 +03:00
--
-- Beware: the index is checked only up to definitional equality.
-- In theory, you could run `decEq` on two strings
-- with the same content but allocated in different memory locations
-- and use the obtained Refl to coerce iterators between them.
--
-- The strictly correct solution is to make the iterators independent
-- from the exact memory location of the string given to `uncons`.
-- (For example, byte offsets satisfy this requirement.)
2020-09-19 22:54:34 +03:00
export
data StringIterator : String -> Type where [external]
2020-09-19 15:22:54 +03:00
2020-09-20 10:56:58 +03:00
-- This function is private
-- to avoid subverting the linearity guarantees of withString.
2020-09-19 22:54:34 +03:00
%foreign
"scheme:blodwen-string-iterator-new"
"RefC:stringIteratorNew"
"javascript:stringIterator:new"
2020-09-20 10:56:58 +03:00
private
fromString : (str : String) -> StringIterator str
2020-09-19 15:22:54 +03:00
2020-09-20 10:56:58 +03:00
-- This function uses a linear string iterator
-- so that backends can use mutating iterators.
export
2021-01-21 15:37:30 +03:00
withString : (str : String) -> ((1 it : StringIterator str) -> a) -> a
2020-09-20 10:56:58 +03:00
withString str f = f (fromString str)
2020-12-31 20:36:07 +03:00
||| Runs the action `f` on the slice `res` of the original string `str` represented by the
||| iterator `it`
%foreign
"scheme:blodwen-string-iterator-to-string"
"RefC:stringIteratorToString"
2020-12-31 20:36:07 +03:00
"javascript:stringIterator:toString"
export
withIteratorString : (str : String)
-> (1 it : StringIterator str)
-> (f : (res : String) -> a)
-> a
2020-09-20 10:56:58 +03:00
-- We use a custom data type instead of Maybe (Char, StringIterator)
-- to remove one level of pointer indirection
-- in every iteration of something that's likely to be a hot loop,
-- and to avoid one allocation per character.
--
-- The Char field of Character is unrestricted for flexibility.
public export
data UnconsResult : String -> Type where
EOF : UnconsResult str
Character : (c : Char) -> (1 it : StringIterator str) -> UnconsResult str
2020-09-20 10:56:58 +03:00
-- We pass the whole string to the uncons function
-- to avoid yet another allocation per character
-- because for many backends, StringIterator can be simply an integer
-- (e.g. byte offset into an UTF-8 string).
2020-09-19 22:54:34 +03:00
%foreign
"scheme:blodwen-string-iterator-next"
"RefC:stringIteratorNext"
"javascript:stringIterator:next"
2020-09-19 15:22:54 +03:00
export
2021-01-21 15:37:30 +03:00
uncons : (str : String) -> (1 it : StringIterator str) -> UnconsResult str
2020-09-20 10:56:58 +03:00
export
foldl : (accTy -> Char -> accTy) -> accTy -> String -> accTy
foldl op acc str = withString str (loop acc)
where
2021-01-21 15:37:30 +03:00
loop : accTy -> (1 it : StringIterator str) -> accTy
2020-09-20 10:56:58 +03:00
loop acc it =
case uncons str it of
EOF => acc
Character c it' => loop (acc `op` c) (assert_smaller it it')
2020-09-19 15:22:54 +03:00
2020-09-19 23:46:46 +03:00
export
unpack : String -> LazyList Char
2021-01-21 15:37:30 +03:00
unpack str = runIdentity $ withString str unpack'
2020-09-20 10:56:58 +03:00
where
2021-01-21 15:37:30 +03:00
-- This is a Functor instance of Identity, but linear in second argument
%inline
mapId : forall a, b. (a -> b) -> (1 x : Identity a) -> Identity b
mapId f (Id x) = Id (f x)
unpack' : (1 it : StringIterator str) -> Identity (Lazy (LazyList Char))
2020-09-20 10:56:58 +03:00
unpack' it =
case uncons str it of
2021-01-21 15:37:30 +03:00
EOF => pure []
Character c it' => mapId (c ::) (unpack' $ assert_smaller it it')