mirror of
https://github.com/github/semantic.git
synced 2024-12-25 07:55:12 +03:00
Copy Source in.
This commit is contained in:
parent
f6e4864836
commit
b20dcf4a19
@ -41,16 +41,19 @@ library
|
||||
exposed-modules:
|
||||
Source.Loc
|
||||
Source.Range
|
||||
Source.Source
|
||||
Source.Span
|
||||
-- other-modules:
|
||||
-- other-extensions:
|
||||
build-depends:
|
||||
aeson ^>= 1.4.2.0
|
||||
, base >= 4.12 && < 5
|
||||
, bytestring ^>= 0.10.8.2
|
||||
, deepseq ^>= 1.4.4.0
|
||||
, generic-monoid ^>= 0.1.0.0
|
||||
, hashable ^>= 1.2.7.0
|
||||
, semilattices ^>= 0.0.0.3
|
||||
, text ^>= 1.2.3.1
|
||||
hs-source-dirs: src
|
||||
|
||||
test-suite doctest
|
||||
|
133
semantic-source/src/Source/Source.hs
Normal file
133
semantic-source/src/Source/Source.hs
Normal file
@ -0,0 +1,133 @@
|
||||
{-# LANGUAGE DeriveGeneric, GeneralizedNewtypeDeriving #-}
|
||||
module Source.Source
|
||||
( Source
|
||||
, sourceBytes
|
||||
, fromUTF8
|
||||
-- * Measurement
|
||||
, sourceLength
|
||||
, nullSource
|
||||
, totalRange
|
||||
, totalSpan
|
||||
-- * En/decoding
|
||||
, fromText
|
||||
, toText
|
||||
-- * Slicing
|
||||
, slice
|
||||
, dropSource
|
||||
, takeSource
|
||||
-- * Splitting
|
||||
, sourceLines
|
||||
, sourceLineRanges
|
||||
, sourceLineRangesWithin
|
||||
, newlineIndices
|
||||
) where
|
||||
|
||||
import Control.Arrow ((&&&))
|
||||
import Data.Aeson (FromJSON (..), withText)
|
||||
import qualified Data.ByteString as B
|
||||
import Data.Char (ord)
|
||||
import Data.Maybe (fromMaybe)
|
||||
import Data.Monoid (Last(..))
|
||||
import Data.Semilattice.Lower
|
||||
import Data.String (IsString (..))
|
||||
import qualified Data.Text as T
|
||||
import qualified Data.Text.Encoding as T
|
||||
import GHC.Generics (Generic)
|
||||
import Source.Range
|
||||
import Source.Span hiding (HasSpan (..))
|
||||
|
||||
|
||||
-- | The contents of a source file. This is represented as a UTF-8
|
||||
-- 'ByteString' under the hood. Construct these with 'fromUTF8'; obviously,
|
||||
-- passing 'fromUTF8' non-UTF8 bytes will cause crashes.
|
||||
newtype Source = Source { sourceBytes :: B.ByteString }
|
||||
deriving (Eq, Semigroup, Monoid, IsString, Show, Generic)
|
||||
|
||||
fromUTF8 :: B.ByteString -> Source
|
||||
fromUTF8 = Source
|
||||
|
||||
instance FromJSON Source where
|
||||
parseJSON = withText "Source" (pure . fromText)
|
||||
|
||||
|
||||
-- Measurement
|
||||
|
||||
sourceLength :: Source -> Int
|
||||
sourceLength = B.length . sourceBytes
|
||||
|
||||
nullSource :: Source -> Bool
|
||||
nullSource = B.null . sourceBytes
|
||||
|
||||
-- | Return a 'Range' that covers the entire text.
|
||||
totalRange :: Source -> Range
|
||||
totalRange = Range 0 . B.length . sourceBytes
|
||||
|
||||
-- | Return a 'Span' that covers the entire text.
|
||||
totalSpan :: Source -> Span
|
||||
totalSpan source = Span lowerBound (Pos (length ranges) (succ (end lastRange - start lastRange))) where
|
||||
ranges = sourceLineRanges source
|
||||
lastRange = fromMaybe lowerBound (getLast (foldMap (Last . Just) ranges))
|
||||
|
||||
|
||||
-- En/decoding
|
||||
|
||||
-- | Return a 'Source' from a 'Text'.
|
||||
fromText :: T.Text -> Source
|
||||
fromText = Source . T.encodeUtf8
|
||||
|
||||
-- | Return the Text contained in the 'Source'.
|
||||
toText :: Source -> T.Text
|
||||
toText = T.decodeUtf8 . sourceBytes
|
||||
|
||||
|
||||
-- Slicing
|
||||
|
||||
-- | Return a 'Source' that contains a slice of the given 'Source'.
|
||||
slice :: Source -> Range -> Source
|
||||
slice source range = take $ drop source where
|
||||
drop = dropSource (start range)
|
||||
take = takeSource (rangeLength range)
|
||||
|
||||
dropSource :: Int -> Source -> Source
|
||||
dropSource i = Source . B.drop i . sourceBytes
|
||||
|
||||
takeSource :: Int -> Source -> Source
|
||||
takeSource i = Source . B.take i . sourceBytes
|
||||
|
||||
|
||||
-- Splitting
|
||||
|
||||
-- | Split the contents of the source after newlines.
|
||||
sourceLines :: Source -> [Source]
|
||||
sourceLines source = slice source <$> sourceLineRanges source
|
||||
|
||||
-- | Compute the 'Range's of each line in a 'Source'.
|
||||
sourceLineRanges :: Source -> [Range]
|
||||
sourceLineRanges source = sourceLineRangesWithin (totalRange source) source
|
||||
|
||||
-- | Compute the 'Range's of each line in a 'Range' of a 'Source'.
|
||||
sourceLineRangesWithin :: Range -> Source -> [Range]
|
||||
sourceLineRangesWithin range
|
||||
= uncurry (zipWith Range)
|
||||
. ((start range:) &&& (<> [ end range ]))
|
||||
. fmap (+ succ (start range))
|
||||
. newlineIndices
|
||||
. sourceBytes
|
||||
. flip slice range
|
||||
|
||||
-- | Return all indices of newlines ('\n', '\r', and '\r\n') in the 'ByteString'.
|
||||
newlineIndices :: B.ByteString -> [Int]
|
||||
newlineIndices = go 0 where
|
||||
go n bs
|
||||
| B.null bs = []
|
||||
| otherwise = case (searchCR bs, searchLF bs) of
|
||||
(Nothing, Nothing) -> []
|
||||
(Just i, Nothing) -> recur n i bs
|
||||
(Nothing, Just i) -> recur n i bs
|
||||
(Just crI, Just lfI)
|
||||
| succ crI == lfI -> recur n lfI bs
|
||||
| otherwise -> recur n (min crI lfI) bs
|
||||
recur n i bs = let j = n + i in j : go (succ j) (B.drop (succ i) bs)
|
||||
searchLF = B.elemIndex (toEnum (ord '\n'))
|
||||
searchCR = B.elemIndex (toEnum (ord '\r'))
|
||||
{-# INLINE newlineIndices #-}
|
Loading…
Reference in New Issue
Block a user