1
1
mirror of https://github.com/github/semantic.git synced 2024-12-01 17:59:10 +03:00

Copy in Blob, Language, Range, Source, and Span.

This commit is contained in:
Rob Rix 2019-09-18 17:58:05 -04:00
parent 82beaa501f
commit 290345fc43
No known key found for this signature in database
GPG Key ID: F188A01508EA1CF7
7 changed files with 225 additions and 27 deletions

View File

@ -20,12 +20,18 @@ tested-with: GHC == 8.6.5
library
exposed-modules:
Data.Blob
Data.Language
Data.Range
Data.Source
Data.Span
Tags.Taggable.Precise
-- other-modules:
-- other-extensions:
build-depends:
aeson ^>= 1.4.2.0
, base >= 4.12 && < 5
, bytestring ^>= 0.10.8.2
, text ^>= 1.2.3.1
, tree-sitter == 0.3.0.0
, tree-sitter-python == 0.4.0.0

View File

@ -0,0 +1,39 @@
{-# LANGUAGE DeriveGeneric #-}
module Data.Blob
( File(..)
, Blob(..)
, blobLanguage
, blobPath
, makeBlob
) where
import Data.Language
import Data.Source as Source
import Data.Text (Text)
import GHC.Generics (Generic)
-- | A 'FilePath' paired with its corresponding 'Language'.
-- Unpacked to have the same size overhead as (FilePath, Language).
data File = File
{ filePath :: FilePath
, fileLanguage :: Language
}
deriving (Show, Eq, Generic)
-- | The source, path information, and language of a file read from disk.
data Blob = Blob
{ blobSource :: Source -- ^ The UTF-8 encoded source text of the blob.
, blobFile :: File -- ^ Path/language information for this blob.
, blobOid :: Text -- ^ Git OID for this blob, mempty if blob is not from a git db.
}
deriving (Show, Eq, Generic)
blobLanguage :: Blob -> Language
blobLanguage = fileLanguage . blobFile
blobPath :: Blob -> FilePath
blobPath = filePath . blobFile
makeBlob :: Source -> FilePath -> Language -> Text -> Blob
makeBlob s p l = Blob s (File p l)
{-# INLINE makeBlob #-}

View File

@ -0,0 +1,28 @@
{-# LANGUAGE DeriveGeneric #-}
module Data.Language
( Language (..)
) where
import Data.Aeson
import GHC.Generics (Generic)
-- | The various languages we support.
-- Please do not reorder any of the field names: the current implementation of 'Primitive'
-- delegates to the auto-generated 'Enum' instance.
data Language
= Unknown
| Go
| Haskell
| Java
| JavaScript
| JSON
| JSX
| Markdown
| Python
| Ruby
| TypeScript
| PHP
| TSX
deriving (Bounded, Enum, Eq, Generic, Ord, Read, Show)
instance ToJSON Language

View File

@ -0,0 +1,40 @@
{-# LANGUAGE DeriveGeneric #-}
module Data.Range
( Range(..)
, rangeLength
, intersectsRange
, subtractRange
) where
import GHC.Generics (Generic)
-- | A half-open interval of integers, defined by start & end indices.
data Range = Range { start :: {-# UNPACK #-} !Int, end :: {-# UNPACK #-} !Int }
deriving (Eq, Generic, Ord)
-- | Return the length of the range.
rangeLength :: Range -> Int
rangeLength range = end range - start range
-- | Test two ranges for intersection.
intersectsRange :: Range -> Range -> Bool
intersectsRange range1 range2 = start range1 < end range2 && start range2 < end range1
subtractRange :: Range -> Range -> Range
subtractRange range1 range2 = Range (start range1) (end range1 - rangeLength (Range (start range2) (max (end range1) (end range2))))
-- Instances
-- $
-- prop> a <> (b <> c) === (a <> b) <> (c :: Range)
instance Semigroup Range where
Range start1 end1 <> Range start2 end2 = Range (min start1 start2) (max end1 end2)
instance Show Range where
showsPrec _ r = showChar '[' . shows (start r) . showString " .. " . shows (end r) . showChar ']'
-- $setup
-- >>> import Test.QuickCheck
-- >>> instance Arbitrary Range where arbitrary = Range <$> arbitrary <*> arbitrary ; shrink (Range s e) = Range <$> shrink s <*> shrink e

View File

@ -0,0 +1,75 @@
{-# LANGUAGE DeriveGeneric, GeneralizedNewtypeDeriving #-}
module Data.Source
( Source
, sourceBytes
, fromUTF8
-- Measurement
, sourceLength
, nullSource
, totalRange
-- En/decoding
, fromText
, toText
-- Slicing
, slice
, dropSource
) where
import Data.Aeson (FromJSON (..), withText)
import qualified Data.ByteString as B
import Data.Range
import Data.String (IsString (..))
import qualified Data.Text as T
import qualified Data.Text.Encoding as T
import GHC.Generics
-- | The contents of a source file. This is represented as a UTF-8
-- 'ByteString' under the hood. Construct these with 'fromUTF8'; obviously,
-- passing 'fromUTF8' non-UTF8 bytes will cause crashes.
newtype Source = Source { sourceBytes :: B.ByteString }
deriving (Eq, Semigroup, Monoid, IsString, Show, Generic)
fromUTF8 :: B.ByteString -> Source
fromUTF8 = Source
instance FromJSON Source where
parseJSON = withText "Source" (pure . fromText)
-- Measurement
sourceLength :: Source -> Int
sourceLength = B.length . sourceBytes
nullSource :: Source -> Bool
nullSource = B.null . sourceBytes
-- | Return a 'Range' that covers the entire text.
totalRange :: Source -> Range
totalRange = Range 0 . B.length . sourceBytes
-- En/decoding
-- | Return a 'Source' from a 'Text'.
fromText :: T.Text -> Source
fromText = Source . T.encodeUtf8
-- | Return the Text contained in the 'Source'.
toText :: Source -> T.Text
toText = T.decodeUtf8 . sourceBytes
-- | Return a 'Source' that contains a slice of the given 'Source'.
slice :: Range -> Source -> Source
slice range = take . drop
where drop = dropSource (start range)
take = takeSource (rangeLength range)
dropSource :: Int -> Source -> Source
dropSource i = Source . drop . sourceBytes
where drop = B.drop i
takeSource :: Int -> Source -> Source
takeSource i = Source . take . sourceBytes
where take = B.take i

View File

@ -0,0 +1,36 @@
{-# LANGUAGE DeriveGeneric, OverloadedStrings #-}
module Data.Span
( Span(..)
, Pos(..)
) where
import Data.Aeson ((.=))
import qualified Data.Aeson as A
import GHC.Generics (Generic)
data Span = Span
{ spanStart :: {-# UNPACK #-} !Pos
, spanEnd :: {-# UNPACK #-} !Pos
}
deriving (Eq, Ord, Generic)
instance Show Span where
showsPrec _ s = shows (spanStart s) . showString ".." . shows (spanEnd s)
instance A.ToJSON Span where
toJSON s = A.object
[ "start" .= spanStart s
, "end" .= spanEnd s
]
data Pos = Pos
{ posLine :: {-# UNPACK #-} !Int
, posColumn :: {-# UNPACK #-} !Int
}
deriving (Eq, Ord, Generic)
instance Show Pos where
showsPrec _ p = showChar '[' . shows (posLine p) . showString ", " . shows (posColumn p) . showChar ']'
instance A.ToJSON Pos where
toJSON p = A.toJSON [posLine p, posColumn p]

View File

@ -4,37 +4,11 @@ module Tags.Taggable.Precise
) where
import Data.Aeson as A
import Data.Span
import Data.Text (Text)
import GHC.Generics (Generic)
import qualified TreeSitter.Python.AST as Python
data Span = Span
{ spanStart :: {-# UNPACK #-} !Pos
, spanEnd :: {-# UNPACK #-} !Pos
}
deriving (Eq, Ord, Generic)
instance Show Span where
showsPrec _ s = shows (spanStart s) . showString ".." . shows (spanEnd s)
instance A.ToJSON Span where
toJSON s = A.object
[ "start" .= spanStart s
, "end" .= spanEnd s
]
data Pos = Pos
{ posLine :: {-# UNPACK #-} !Int
, posColumn :: {-# UNPACK #-} !Int
}
deriving (Eq, Ord, Generic)
instance Show Pos where
showsPrec _ p = showChar '[' . shows (posLine p) . showString ", " . shows (posColumn p) . showChar ']'
instance A.ToJSON Pos where
toJSON p = A.toJSON [posLine p, posColumn p]
data Tag = Tag
{ name :: Text
, kind :: Text