mirror of
https://github.com/github/semantic.git
synced 2025-01-05 14:11:33 +03:00
Add ability to parse terms and decorate them with their source blob text
This commit is contained in:
parent
4d3c2b1c70
commit
9cce621004
19
src/Parse.hs
19
src/Parse.hs
@ -4,6 +4,7 @@ module Parse where
|
|||||||
|
|
||||||
import Arguments
|
import Arguments
|
||||||
import Category
|
import Category
|
||||||
|
import Data.Aeson (ToJSON)
|
||||||
import Data.Aeson.Encode.Pretty
|
import Data.Aeson.Encode.Pretty
|
||||||
import qualified Data.ByteString.Char8 as B1
|
import qualified Data.ByteString.Char8 as B1
|
||||||
import qualified Data.Text.ICU.Convert as Convert
|
import qualified Data.Text.ICU.Convert as Convert
|
||||||
@ -30,7 +31,7 @@ run Arguments{..} = do
|
|||||||
sources <- sequence $ readAndTranscodeFile <$> filePaths
|
sources <- sequence $ readAndTranscodeFile <$> filePaths
|
||||||
|
|
||||||
let sourceBlobs = Source.SourceBlob <$> sources <*> pure mempty <*> filePaths <*> pure (Just Source.defaultPlainBlob)
|
let sourceBlobs = Source.SourceBlob <$> sources <*> pure mempty <*> filePaths <*> pure (Just Source.defaultPlainBlob)
|
||||||
let parsers = parserWithCost <$> filePaths
|
let parsers = parserWithSource <$> filePaths
|
||||||
let parsersAndBlobs = zip parsers sourceBlobs
|
let parsersAndBlobs = zip parsers sourceBlobs
|
||||||
|
|
||||||
terms <- traverse (\(parser, sourceBlob) -> parser sourceBlob) parsersAndBlobs
|
terms <- traverse (\(parser, sourceBlob) -> parser sourceBlob) parsersAndBlobs
|
||||||
@ -43,6 +44,10 @@ run Arguments{..} = do
|
|||||||
parserWithCost :: FilePath -> Parser (Syntax Text) (Record '[Cost, Range, Category, SourceSpan])
|
parserWithCost :: FilePath -> Parser (Syntax Text) (Record '[Cost, Range, Category, SourceSpan])
|
||||||
parserWithCost path blob = decorateTerm termCostDecorator <$> parserForType (toS (takeExtension path)) blob
|
parserWithCost path blob = decorateTerm termCostDecorator <$> parserForType (toS (takeExtension path)) blob
|
||||||
|
|
||||||
|
-- | Return a parser that decorates with the source text.
|
||||||
|
parserWithSource :: FilePath -> Parser (Syntax Text) (Record '[SourceText, Range, Category, SourceSpan])
|
||||||
|
parserWithSource path blob = decorateTerm (termSourceDecorator (source blob)) <$> parserForType (toS (takeExtension path)) blob
|
||||||
|
|
||||||
-- | Return a parser based on the file extension (including the ".").
|
-- | Return a parser based on the file extension (including the ".").
|
||||||
parserForType :: Text -> Parser (Syntax Text) (Record '[Range, Category, SourceSpan])
|
parserForType :: Text -> Parser (Syntax Text) (Record '[Range, Category, SourceSpan])
|
||||||
parserForType mediaType = case languageForType mediaType of
|
parserForType mediaType = case languageForType mediaType of
|
||||||
@ -53,16 +58,22 @@ parserForType mediaType = case languageForType mediaType of
|
|||||||
_ -> lineByLineParser
|
_ -> lineByLineParser
|
||||||
|
|
||||||
-- | Decorate a 'Term' using a function to compute the annotation values at every node.
|
-- | Decorate a 'Term' using a function to compute the annotation values at every node.
|
||||||
decorateTerm :: Functor f => TermDecorator f fields field -> Term f (Record fields) -> Term f (Record (field ': fields))
|
decorateTerm :: (HasField fields Range, Functor f) => TermDecorator f fields field -> Term f (Record fields) -> Term f (Record (field ': fields))
|
||||||
decorateTerm decorator = cata $ \ c -> cofree ((decorator (extract <$> c) .: headF c) :< tailF c)
|
decorateTerm decorator = cata $ \ term -> cofree ((decorator (extract <$> term) .: headF term) :< tailF term)
|
||||||
|
|
||||||
-- | A function computing a value to decorate terms with. This can be used to cache synthesized attributes on terms.
|
-- | A function computing a value to decorate terms with. This can be used to cache synthesized attributes on terms.
|
||||||
type TermDecorator f fields field = TermF f (Record fields) (Record (field ': fields)) -> field
|
type TermDecorator f fields field = (HasField fields Range) => TermF f (Record fields) (Record (field ': fields)) -> field
|
||||||
|
|
||||||
-- | Term decorator computing the cost of an unpacked term.
|
-- | Term decorator computing the cost of an unpacked term.
|
||||||
termCostDecorator :: (Foldable f, Functor f) => TermDecorator f a Cost
|
termCostDecorator :: (Foldable f, Functor f) => TermDecorator f a Cost
|
||||||
termCostDecorator c = 1 + sum (cost <$> tailF c)
|
termCostDecorator c = 1 + sum (cost <$> tailF c)
|
||||||
|
|
||||||
|
-- | Term decorator extracting the source text for a term.
|
||||||
|
-- termSourceDecorator :: (Foldable f, Functor f) => Source Char -> TermDecorator f a SourceText
|
||||||
|
termSourceDecorator :: Source Char -> TermDecorator f a SourceText
|
||||||
|
termSourceDecorator source c = SourceText . toText $ Source.slice range' source
|
||||||
|
where range' = characterRange $ headF c
|
||||||
|
|
||||||
-- | A fallback parser that treats a file simply as rows of strings.
|
-- | A fallback parser that treats a file simply as rows of strings.
|
||||||
lineByLineParser :: Parser (Syntax Text) (Record '[Range, Category, SourceSpan])
|
lineByLineParser :: Parser (Syntax Text) (Record '[Range, Category, SourceSpan])
|
||||||
lineByLineParser SourceBlob{..} = pure . cofree . root $ case foldl' annotateLeaves ([], 0) lines of
|
lineByLineParser SourceBlob{..} = pure . cofree . root $ case foldl' annotateLeaves ([], 0) lines of
|
||||||
|
Loading…
Reference in New Issue
Block a user