Copy Source in.

2024-12-24 23:42:31 +03:00 · 2019-09-20 15:13:50 -04:00 · 2019-09-20 15:13:50 -04:00 · b20dcf4a19
commit b20dcf4a19
parent f6e4864836
2 changed files with 136 additions and 0 deletions
--- a/semantic-source/semantic-source.cabal
+++ b/semantic-source/semantic-source.cabal
@ -41,16 +41,19 @@ library
  exposed-modules:
    Source.Loc
    Source.Range
+    Source.Source
    Source.Span
  -- other-modules:
  -- other-extensions:
  build-depends:
      aeson          ^>= 1.4.2.0
    , base            >= 4.12 && < 5
+    , bytestring     ^>= 0.10.8.2
    , deepseq        ^>= 1.4.4.0
    , generic-monoid ^>= 0.1.0.0
    , hashable       ^>= 1.2.7.0
    , semilattices   ^>= 0.0.0.3
+    , text           ^>= 1.2.3.1
  hs-source-dirs: src

 test-suite doctest
--- a/semantic-source/src/Source/Source.hs
+++ b/semantic-source/src/Source/Source.hs
@ -0,0 +1,133 @@
+{-# LANGUAGE DeriveGeneric, GeneralizedNewtypeDeriving #-}
+module Source.Source
+( Source
+, sourceBytes
+, fromUTF8
+-- * Measurement
+, sourceLength
+, nullSource
+, totalRange
+, totalSpan
+-- * En/decoding
+, fromText
+, toText
+-- * Slicing
+, slice
+, dropSource
+, takeSource
+-- * Splitting
+, sourceLines
+, sourceLineRanges
+, sourceLineRangesWithin
+, newlineIndices
+) where
+
+import           Control.Arrow ((&&&))
+import           Data.Aeson (FromJSON (..), withText)
+import qualified Data.ByteString as B
+import           Data.Char (ord)
+import           Data.Maybe (fromMaybe)
+import           Data.Monoid (Last(..))
+import           Data.Semilattice.Lower
+import           Data.String (IsString (..))
+import qualified Data.Text as T
+import qualified Data.Text.Encoding as T
+import           GHC.Generics (Generic)
+import           Source.Range
+import           Source.Span hiding (HasSpan (..))
+
+
+-- | The contents of a source file. This is represented as a UTF-8
+-- 'ByteString' under the hood. Construct these with 'fromUTF8'; obviously,
+-- passing 'fromUTF8' non-UTF8 bytes will cause crashes.
+newtype Source = Source { sourceBytes :: B.ByteString }
+  deriving (Eq, Semigroup, Monoid, IsString, Show, Generic)
+
+fromUTF8 :: B.ByteString -> Source
+fromUTF8 = Source
+
+instance FromJSON Source where
+  parseJSON = withText "Source" (pure . fromText)
+
+
+-- Measurement
+
+sourceLength :: Source -> Int
+sourceLength = B.length . sourceBytes
+
+nullSource :: Source -> Bool
+nullSource = B.null . sourceBytes
+
+-- | Return a 'Range' that covers the entire text.
+totalRange :: Source -> Range
+totalRange = Range 0 . B.length . sourceBytes
+
+-- | Return a 'Span' that covers the entire text.
+totalSpan :: Source -> Span
+totalSpan source = Span lowerBound (Pos (length ranges) (succ (end lastRange - start lastRange))) where
+  ranges = sourceLineRanges source
+  lastRange = fromMaybe lowerBound (getLast (foldMap (Last . Just) ranges))
+
+
+-- En/decoding
+
+-- | Return a 'Source' from a 'Text'.
+fromText :: T.Text -> Source
+fromText = Source . T.encodeUtf8
+
+-- | Return the Text contained in the 'Source'.
+toText :: Source -> T.Text
+toText = T.decodeUtf8 . sourceBytes
+
+
+-- Slicing
+
+-- | Return a 'Source' that contains a slice of the given 'Source'.
+slice :: Source -> Range -> Source
+slice source range = take $ drop source where
+  drop = dropSource (start range)
+  take = takeSource (rangeLength range)
+
+dropSource :: Int -> Source -> Source
+dropSource i = Source . B.drop i . sourceBytes
+
+takeSource :: Int -> Source -> Source
+takeSource i = Source . B.take i . sourceBytes
+
+
+-- Splitting
+
+-- | Split the contents of the source after newlines.
+sourceLines :: Source -> [Source]
+sourceLines source = slice source <$> sourceLineRanges source
+
+-- | Compute the 'Range's of each line in a 'Source'.
+sourceLineRanges :: Source -> [Range]
+sourceLineRanges source = sourceLineRangesWithin (totalRange source) source
+
+-- | Compute the 'Range's of each line in a 'Range' of a 'Source'.
+sourceLineRangesWithin :: Range -> Source -> [Range]
+sourceLineRangesWithin range
+  = uncurry (zipWith Range)
+  . ((start range:) &&& (<> [ end range ]))
+  . fmap (+ succ (start range))
+  . newlineIndices
+  . sourceBytes
+  . flip slice range
+
+-- | Return all indices of newlines ('\n', '\r', and '\r\n') in the 'ByteString'.
+newlineIndices :: B.ByteString -> [Int]
+newlineIndices = go 0 where
+  go n bs
+    | B.null bs = []
+    | otherwise = case (searchCR bs, searchLF bs) of
+      (Nothing, Nothing)  -> []
+      (Just i, Nothing)   -> recur n i bs
+      (Nothing, Just i)   -> recur n i bs
+      (Just crI, Just lfI)
+        | succ crI == lfI -> recur n lfI bs
+        | otherwise       -> recur n (min crI lfI) bs
+  recur n i bs = let j = n + i in j : go (succ j) (B.drop (succ i) bs)
+  searchLF = B.elemIndex (toEnum (ord '\n'))
+  searchCR = B.elemIndex (toEnum (ord '\r'))
+{-# INLINE newlineIndices #-}