1
1
mirror of https://github.com/github/semantic.git synced 2024-12-24 15:35:14 +03:00

Add Set for Category constructors that should not be made into Indexed leaves

This commit is contained in:
Rick Winfrey 2016-08-16 13:22:41 -05:00
parent 43e58dcee3
commit 30b94f822c

View File

@ -8,6 +8,7 @@ import Data.Functor.Both
import Data.Functor.Foldable import Data.Functor.Foldable
import Data.RandomWalkSimilarity import Data.RandomWalkSimilarity
import Data.Record import Data.Record
import qualified Data.Set as Set
import qualified Data.Text.IO as TextIO import qualified Data.Text.IO as TextIO
import qualified Data.Text.ICU.Detect as Detect import qualified Data.Text.ICU.Detect as Detect
import qualified Data.Text.ICU.Convert as Convert import qualified Data.Text.ICU.Convert as Convert
@ -98,12 +99,12 @@ breakDownLeavesByWord source = cata replaceIn
where where
replaceIn (info :< syntax) = cofree $ info :< syntax' replaceIn (info :< syntax) = cofree $ info :< syntax'
where syntax' = case (ranges, syntax) of where syntax' = case (ranges, syntax) of
(_:_:_, Leaf _) | category info == Category.Comment -> syntax (_:_:_, Leaf _) | Set.notMember (category info) preserveSyntax -> Indexed (makeLeaf info <$> ranges)
(_:_:_, Leaf _) | category info /= Regex -> Indexed (makeLeaf info <$> ranges)
_ -> syntax _ -> syntax
ranges = rangesAndWordsInSource (characterRange info) ranges = rangesAndWordsInSource (characterRange info)
rangesAndWordsInSource range = rangesAndWordsFrom (start range) (toString $ slice range source) rangesAndWordsInSource range = rangesAndWordsFrom (start range) (toString $ slice range source)
makeLeaf info (range, substring) = cofree $ setCharacterRange info range :< Leaf (toS substring) makeLeaf info (range, substring) = cofree $ setCharacterRange info range :< Leaf (toS substring)
preserveSyntax = Set.fromList [Regex, Category.Comment]
-- | Transcode a file to a unicode source. -- | Transcode a file to a unicode source.
transcode :: B1.ByteString -> IO (Source Char) transcode :: B1.ByteString -> IO (Source Char)