mirror of
https://github.com/github/semantic.git
synced 2024-12-01 17:59:10 +03:00
Copy in Blob, Language, Range, Source, and Span.
This commit is contained in:
parent
82beaa501f
commit
290345fc43
@ -20,12 +20,18 @@ tested-with: GHC == 8.6.5
|
||||
|
||||
library
|
||||
exposed-modules:
|
||||
Data.Blob
|
||||
Data.Language
|
||||
Data.Range
|
||||
Data.Source
|
||||
Data.Span
|
||||
Tags.Taggable.Precise
|
||||
-- other-modules:
|
||||
-- other-extensions:
|
||||
build-depends:
|
||||
aeson ^>= 1.4.2.0
|
||||
, base >= 4.12 && < 5
|
||||
, bytestring ^>= 0.10.8.2
|
||||
, text ^>= 1.2.3.1
|
||||
, tree-sitter == 0.3.0.0
|
||||
, tree-sitter-python == 0.4.0.0
|
||||
|
39
semantic-tags/src/Data/Blob.hs
Normal file
39
semantic-tags/src/Data/Blob.hs
Normal file
@ -0,0 +1,39 @@
|
||||
{-# LANGUAGE DeriveGeneric #-}
|
||||
module Data.Blob
|
||||
( File(..)
|
||||
, Blob(..)
|
||||
, blobLanguage
|
||||
, blobPath
|
||||
, makeBlob
|
||||
) where
|
||||
|
||||
import Data.Language
|
||||
import Data.Source as Source
|
||||
import Data.Text (Text)
|
||||
import GHC.Generics (Generic)
|
||||
|
||||
-- | A 'FilePath' paired with its corresponding 'Language'.
|
||||
-- Unpacked to have the same size overhead as (FilePath, Language).
|
||||
data File = File
|
||||
{ filePath :: FilePath
|
||||
, fileLanguage :: Language
|
||||
}
|
||||
deriving (Show, Eq, Generic)
|
||||
|
||||
-- | The source, path information, and language of a file read from disk.
|
||||
data Blob = Blob
|
||||
{ blobSource :: Source -- ^ The UTF-8 encoded source text of the blob.
|
||||
, blobFile :: File -- ^ Path/language information for this blob.
|
||||
, blobOid :: Text -- ^ Git OID for this blob, mempty if blob is not from a git db.
|
||||
}
|
||||
deriving (Show, Eq, Generic)
|
||||
|
||||
blobLanguage :: Blob -> Language
|
||||
blobLanguage = fileLanguage . blobFile
|
||||
|
||||
blobPath :: Blob -> FilePath
|
||||
blobPath = filePath . blobFile
|
||||
|
||||
makeBlob :: Source -> FilePath -> Language -> Text -> Blob
|
||||
makeBlob s p l = Blob s (File p l)
|
||||
{-# INLINE makeBlob #-}
|
28
semantic-tags/src/Data/Language.hs
Normal file
28
semantic-tags/src/Data/Language.hs
Normal file
@ -0,0 +1,28 @@
|
||||
{-# LANGUAGE DeriveGeneric #-}
|
||||
module Data.Language
|
||||
( Language (..)
|
||||
) where
|
||||
|
||||
import Data.Aeson
|
||||
import GHC.Generics (Generic)
|
||||
|
||||
-- | The various languages we support.
|
||||
-- Please do not reorder any of the field names: the current implementation of 'Primitive'
|
||||
-- delegates to the auto-generated 'Enum' instance.
|
||||
data Language
|
||||
= Unknown
|
||||
| Go
|
||||
| Haskell
|
||||
| Java
|
||||
| JavaScript
|
||||
| JSON
|
||||
| JSX
|
||||
| Markdown
|
||||
| Python
|
||||
| Ruby
|
||||
| TypeScript
|
||||
| PHP
|
||||
| TSX
|
||||
deriving (Bounded, Enum, Eq, Generic, Ord, Read, Show)
|
||||
|
||||
instance ToJSON Language
|
40
semantic-tags/src/Data/Range.hs
Normal file
40
semantic-tags/src/Data/Range.hs
Normal file
@ -0,0 +1,40 @@
|
||||
{-# LANGUAGE DeriveGeneric #-}
|
||||
module Data.Range
|
||||
( Range(..)
|
||||
, rangeLength
|
||||
, intersectsRange
|
||||
, subtractRange
|
||||
) where
|
||||
|
||||
import GHC.Generics (Generic)
|
||||
|
||||
-- | A half-open interval of integers, defined by start & end indices.
|
||||
data Range = Range { start :: {-# UNPACK #-} !Int, end :: {-# UNPACK #-} !Int }
|
||||
deriving (Eq, Generic, Ord)
|
||||
|
||||
-- | Return the length of the range.
|
||||
rangeLength :: Range -> Int
|
||||
rangeLength range = end range - start range
|
||||
|
||||
-- | Test two ranges for intersection.
|
||||
intersectsRange :: Range -> Range -> Bool
|
||||
intersectsRange range1 range2 = start range1 < end range2 && start range2 < end range1
|
||||
|
||||
subtractRange :: Range -> Range -> Range
|
||||
subtractRange range1 range2 = Range (start range1) (end range1 - rangeLength (Range (start range2) (max (end range1) (end range2))))
|
||||
|
||||
|
||||
-- Instances
|
||||
|
||||
-- $
|
||||
-- prop> a <> (b <> c) === (a <> b) <> (c :: Range)
|
||||
instance Semigroup Range where
|
||||
Range start1 end1 <> Range start2 end2 = Range (min start1 start2) (max end1 end2)
|
||||
|
||||
instance Show Range where
|
||||
showsPrec _ r = showChar '[' . shows (start r) . showString " .. " . shows (end r) . showChar ']'
|
||||
|
||||
|
||||
-- $setup
|
||||
-- >>> import Test.QuickCheck
|
||||
-- >>> instance Arbitrary Range where arbitrary = Range <$> arbitrary <*> arbitrary ; shrink (Range s e) = Range <$> shrink s <*> shrink e
|
75
semantic-tags/src/Data/Source.hs
Normal file
75
semantic-tags/src/Data/Source.hs
Normal file
@ -0,0 +1,75 @@
|
||||
{-# LANGUAGE DeriveGeneric, GeneralizedNewtypeDeriving #-}
|
||||
module Data.Source
|
||||
( Source
|
||||
, sourceBytes
|
||||
, fromUTF8
|
||||
-- Measurement
|
||||
, sourceLength
|
||||
, nullSource
|
||||
, totalRange
|
||||
-- En/decoding
|
||||
, fromText
|
||||
, toText
|
||||
-- Slicing
|
||||
, slice
|
||||
, dropSource
|
||||
) where
|
||||
|
||||
import Data.Aeson (FromJSON (..), withText)
|
||||
import qualified Data.ByteString as B
|
||||
import Data.Range
|
||||
import Data.String (IsString (..))
|
||||
import qualified Data.Text as T
|
||||
import qualified Data.Text.Encoding as T
|
||||
import GHC.Generics
|
||||
|
||||
|
||||
-- | The contents of a source file. This is represented as a UTF-8
|
||||
-- 'ByteString' under the hood. Construct these with 'fromUTF8'; obviously,
|
||||
-- passing 'fromUTF8' non-UTF8 bytes will cause crashes.
|
||||
newtype Source = Source { sourceBytes :: B.ByteString }
|
||||
deriving (Eq, Semigroup, Monoid, IsString, Show, Generic)
|
||||
|
||||
fromUTF8 :: B.ByteString -> Source
|
||||
fromUTF8 = Source
|
||||
|
||||
instance FromJSON Source where
|
||||
parseJSON = withText "Source" (pure . fromText)
|
||||
|
||||
-- Measurement
|
||||
|
||||
sourceLength :: Source -> Int
|
||||
sourceLength = B.length . sourceBytes
|
||||
|
||||
nullSource :: Source -> Bool
|
||||
nullSource = B.null . sourceBytes
|
||||
|
||||
-- | Return a 'Range' that covers the entire text.
|
||||
totalRange :: Source -> Range
|
||||
totalRange = Range 0 . B.length . sourceBytes
|
||||
|
||||
|
||||
-- En/decoding
|
||||
|
||||
-- | Return a 'Source' from a 'Text'.
|
||||
fromText :: T.Text -> Source
|
||||
fromText = Source . T.encodeUtf8
|
||||
|
||||
-- | Return the Text contained in the 'Source'.
|
||||
toText :: Source -> T.Text
|
||||
toText = T.decodeUtf8 . sourceBytes
|
||||
|
||||
|
||||
-- | Return a 'Source' that contains a slice of the given 'Source'.
|
||||
slice :: Range -> Source -> Source
|
||||
slice range = take . drop
|
||||
where drop = dropSource (start range)
|
||||
take = takeSource (rangeLength range)
|
||||
|
||||
dropSource :: Int -> Source -> Source
|
||||
dropSource i = Source . drop . sourceBytes
|
||||
where drop = B.drop i
|
||||
|
||||
takeSource :: Int -> Source -> Source
|
||||
takeSource i = Source . take . sourceBytes
|
||||
where take = B.take i
|
36
semantic-tags/src/Data/Span.hs
Normal file
36
semantic-tags/src/Data/Span.hs
Normal file
@ -0,0 +1,36 @@
|
||||
{-# LANGUAGE DeriveGeneric, OverloadedStrings #-}
|
||||
module Data.Span
|
||||
( Span(..)
|
||||
, Pos(..)
|
||||
) where
|
||||
|
||||
import Data.Aeson ((.=))
|
||||
import qualified Data.Aeson as A
|
||||
import GHC.Generics (Generic)
|
||||
|
||||
data Span = Span
|
||||
{ spanStart :: {-# UNPACK #-} !Pos
|
||||
, spanEnd :: {-# UNPACK #-} !Pos
|
||||
}
|
||||
deriving (Eq, Ord, Generic)
|
||||
|
||||
instance Show Span where
|
||||
showsPrec _ s = shows (spanStart s) . showString ".." . shows (spanEnd s)
|
||||
|
||||
instance A.ToJSON Span where
|
||||
toJSON s = A.object
|
||||
[ "start" .= spanStart s
|
||||
, "end" .= spanEnd s
|
||||
]
|
||||
|
||||
data Pos = Pos
|
||||
{ posLine :: {-# UNPACK #-} !Int
|
||||
, posColumn :: {-# UNPACK #-} !Int
|
||||
}
|
||||
deriving (Eq, Ord, Generic)
|
||||
|
||||
instance Show Pos where
|
||||
showsPrec _ p = showChar '[' . shows (posLine p) . showString ", " . shows (posColumn p) . showChar ']'
|
||||
|
||||
instance A.ToJSON Pos where
|
||||
toJSON p = A.toJSON [posLine p, posColumn p]
|
@ -4,37 +4,11 @@ module Tags.Taggable.Precise
|
||||
) where
|
||||
|
||||
import Data.Aeson as A
|
||||
import Data.Span
|
||||
import Data.Text (Text)
|
||||
import GHC.Generics (Generic)
|
||||
import qualified TreeSitter.Python.AST as Python
|
||||
|
||||
data Span = Span
|
||||
{ spanStart :: {-# UNPACK #-} !Pos
|
||||
, spanEnd :: {-# UNPACK #-} !Pos
|
||||
}
|
||||
deriving (Eq, Ord, Generic)
|
||||
|
||||
instance Show Span where
|
||||
showsPrec _ s = shows (spanStart s) . showString ".." . shows (spanEnd s)
|
||||
|
||||
instance A.ToJSON Span where
|
||||
toJSON s = A.object
|
||||
[ "start" .= spanStart s
|
||||
, "end" .= spanEnd s
|
||||
]
|
||||
|
||||
data Pos = Pos
|
||||
{ posLine :: {-# UNPACK #-} !Int
|
||||
, posColumn :: {-# UNPACK #-} !Int
|
||||
}
|
||||
deriving (Eq, Ord, Generic)
|
||||
|
||||
instance Show Pos where
|
||||
showsPrec _ p = showChar '[' . shows (posLine p) . showString ", " . shows (posColumn p) . showChar ']'
|
||||
|
||||
instance A.ToJSON Pos where
|
||||
toJSON p = A.toJSON [posLine p, posColumn p]
|
||||
|
||||
data Tag = Tag
|
||||
{ name :: Text
|
||||
, kind :: Text
|
||||
|
Loading…
Reference in New Issue
Block a user