Copy Source in.

2024-12-26 00:12:29 +03:00 · 2019-09-20 15:13:50 -04:00 · 2019-09-20 15:13:50 -04:00 · b20dcf4a19
commit b20dcf4a19
parent f6e4864836
2 changed files with 136 additions and 0 deletions
--- a/semantic-source/semantic-source.cabal
+++ b/semantic-source/semantic-source.cabal
@ -41,16 +41,19 @@ library
  exposed-modules:
    Source.Loc
    Source.Range
    Source.Source
    Source.Span
  -- other-modules:
  -- other-extensions:
  build-depends:
      aeson          ^>= 1.4.2.0
    , base            >= 4.12 && < 5
    , bytestring     ^>= 0.10.8.2
    , deepseq        ^>= 1.4.4.0
    , generic-monoid ^>= 0.1.0.0
    , hashable       ^>= 1.2.7.0
    , semilattices   ^>= 0.0.0.3
    , text           ^>= 1.2.3.1
  hs-source-dirs: src
 test-suite doctest
--- a/semantic-source/src/Source/Source.hs
+++ b/semantic-source/src/Source/Source.hs
@ -0,0 +1,133 @@
 {-# LANGUAGE DeriveGeneric, GeneralizedNewtypeDeriving #-}
 module Source.Source
 ( Source
 , sourceBytes
 , fromUTF8
 -- * Measurement
 , sourceLength
 , nullSource
 , totalRange
 , totalSpan
 -- * En/decoding
 , fromText
 , toText
 -- * Slicing
 , slice
 , dropSource
 , takeSource
 -- * Splitting
 , sourceLines
 , sourceLineRanges
 , sourceLineRangesWithin
 , newlineIndices
 ) where
 import           Control.Arrow ((&&&))
 import           Data.Aeson (FromJSON (..), withText)
 import qualified Data.ByteString as B
 import           Data.Char (ord)
 import           Data.Maybe (fromMaybe)
 import           Data.Monoid (Last(..))
 import           Data.Semilattice.Lower
 import           Data.String (IsString (..))
 import qualified Data.Text as T
 import qualified Data.Text.Encoding as T
 import           GHC.Generics (Generic)
 import           Source.Range
 import           Source.Span hiding (HasSpan (..))
 -- | The contents of a source file. This is represented as a UTF-8
 -- 'ByteString' under the hood. Construct these with 'fromUTF8'; obviously,
 -- passing 'fromUTF8' non-UTF8 bytes will cause crashes.
 newtype Source = Source { sourceBytes :: B.ByteString }
  deriving (Eq, Semigroup, Monoid, IsString, Show, Generic)
 fromUTF8 :: B.ByteString -> Source
 fromUTF8 = Source
 instance FromJSON Source where
  parseJSON = withText "Source" (pure . fromText)
 -- Measurement
 sourceLength :: Source -> Int
 sourceLength = B.length . sourceBytes
 nullSource :: Source -> Bool
 nullSource = B.null . sourceBytes
 -- | Return a 'Range' that covers the entire text.
 totalRange :: Source -> Range
 totalRange = Range 0 . B.length . sourceBytes
 -- | Return a 'Span' that covers the entire text.
 totalSpan :: Source -> Span
 totalSpan source = Span lowerBound (Pos (length ranges) (succ (end lastRange - start lastRange))) where
  ranges = sourceLineRanges source
  lastRange = fromMaybe lowerBound (getLast (foldMap (Last . Just) ranges))
 -- En/decoding
 -- | Return a 'Source' from a 'Text'.
 fromText :: T.Text -> Source
 fromText = Source . T.encodeUtf8
 -- | Return the Text contained in the 'Source'.
 toText :: Source -> T.Text
 toText = T.decodeUtf8 . sourceBytes
 -- Slicing
 -- | Return a 'Source' that contains a slice of the given 'Source'.
 slice :: Source -> Range -> Source
 slice source range = take $ drop source where
  drop = dropSource (start range)
  take = takeSource (rangeLength range)
 dropSource :: Int -> Source -> Source
 dropSource i = Source . B.drop i . sourceBytes
 takeSource :: Int -> Source -> Source
 takeSource i = Source . B.take i . sourceBytes
 -- Splitting
 -- | Split the contents of the source after newlines.
 sourceLines :: Source -> [Source]
 sourceLines source = slice source <$> sourceLineRanges source
 -- | Compute the 'Range's of each line in a 'Source'.
 sourceLineRanges :: Source -> [Range]
 sourceLineRanges source = sourceLineRangesWithin (totalRange source) source
 -- | Compute the 'Range's of each line in a 'Range' of a 'Source'.
 sourceLineRangesWithin :: Range -> Source -> [Range]
 sourceLineRangesWithin range
  = uncurry (zipWith Range)
  . ((start range:) &&& (<> [ end range ]))
  . fmap (+ succ (start range))
  . newlineIndices
  . sourceBytes
  . flip slice range
 -- | Return all indices of newlines ('\n', '\r', and '\r\n') in the 'ByteString'.
 newlineIndices :: B.ByteString -> [Int]
 newlineIndices = go 0 where
  go n bs
    | B.null bs = []
    | otherwise = case (searchCR bs, searchLF bs) of
      (Nothing, Nothing)  -> []
      (Just i, Nothing)   -> recur n i bs
      (Nothing, Just i)   -> recur n i bs
      (Just crI, Just lfI)
        | succ crI == lfI -> recur n lfI bs
        | otherwise       -> recur n (min crI lfI) bs
  recur n i bs = let j = n + i in j : go (succ j) (B.drop (succ i) bs)
  searchLF = B.elemIndex (toEnum (ord '\n'))
  searchCR = B.elemIndex (toEnum (ord '\r'))
 {-# INLINE newlineIndices #-}