1
1
mirror of https://github.com/github/semantic.git synced 2025-01-05 14:11:33 +03:00

Add ability to parse terms and decorate them with their source blob text

This commit is contained in:
Rick Winfrey 2016-11-09 17:43:14 -06:00
parent 4d3c2b1c70
commit 9cce621004

View File

@ -4,6 +4,7 @@ module Parse where
import Arguments import Arguments
import Category import Category
import Data.Aeson (ToJSON)
import Data.Aeson.Encode.Pretty import Data.Aeson.Encode.Pretty
import qualified Data.ByteString.Char8 as B1 import qualified Data.ByteString.Char8 as B1
import qualified Data.Text.ICU.Convert as Convert import qualified Data.Text.ICU.Convert as Convert
@ -30,7 +31,7 @@ run Arguments{..} = do
sources <- sequence $ readAndTranscodeFile <$> filePaths sources <- sequence $ readAndTranscodeFile <$> filePaths
let sourceBlobs = Source.SourceBlob <$> sources <*> pure mempty <*> filePaths <*> pure (Just Source.defaultPlainBlob) let sourceBlobs = Source.SourceBlob <$> sources <*> pure mempty <*> filePaths <*> pure (Just Source.defaultPlainBlob)
let parsers = parserWithCost <$> filePaths let parsers = parserWithSource <$> filePaths
let parsersAndBlobs = zip parsers sourceBlobs let parsersAndBlobs = zip parsers sourceBlobs
terms <- traverse (\(parser, sourceBlob) -> parser sourceBlob) parsersAndBlobs terms <- traverse (\(parser, sourceBlob) -> parser sourceBlob) parsersAndBlobs
@ -43,6 +44,10 @@ run Arguments{..} = do
parserWithCost :: FilePath -> Parser (Syntax Text) (Record '[Cost, Range, Category, SourceSpan]) parserWithCost :: FilePath -> Parser (Syntax Text) (Record '[Cost, Range, Category, SourceSpan])
parserWithCost path blob = decorateTerm termCostDecorator <$> parserForType (toS (takeExtension path)) blob parserWithCost path blob = decorateTerm termCostDecorator <$> parserForType (toS (takeExtension path)) blob
-- | Return a parser that decorates with the source text.
parserWithSource :: FilePath -> Parser (Syntax Text) (Record '[SourceText, Range, Category, SourceSpan])
parserWithSource path blob = decorateTerm (termSourceDecorator (source blob)) <$> parserForType (toS (takeExtension path)) blob
-- | Return a parser based on the file extension (including the "."). -- | Return a parser based on the file extension (including the ".").
parserForType :: Text -> Parser (Syntax Text) (Record '[Range, Category, SourceSpan]) parserForType :: Text -> Parser (Syntax Text) (Record '[Range, Category, SourceSpan])
parserForType mediaType = case languageForType mediaType of parserForType mediaType = case languageForType mediaType of
@ -53,16 +58,22 @@ parserForType mediaType = case languageForType mediaType of
_ -> lineByLineParser _ -> lineByLineParser
-- | Decorate a 'Term' using a function to compute the annotation values at every node. -- | Decorate a 'Term' using a function to compute the annotation values at every node.
decorateTerm :: Functor f => TermDecorator f fields field -> Term f (Record fields) -> Term f (Record (field ': fields)) decorateTerm :: (HasField fields Range, Functor f) => TermDecorator f fields field -> Term f (Record fields) -> Term f (Record (field ': fields))
decorateTerm decorator = cata $ \ c -> cofree ((decorator (extract <$> c) .: headF c) :< tailF c) decorateTerm decorator = cata $ \ term -> cofree ((decorator (extract <$> term) .: headF term) :< tailF term)
-- | A function computing a value to decorate terms with. This can be used to cache synthesized attributes on terms. -- | A function computing a value to decorate terms with. This can be used to cache synthesized attributes on terms.
type TermDecorator f fields field = TermF f (Record fields) (Record (field ': fields)) -> field type TermDecorator f fields field = (HasField fields Range) => TermF f (Record fields) (Record (field ': fields)) -> field
-- | Term decorator computing the cost of an unpacked term. -- | Term decorator computing the cost of an unpacked term.
termCostDecorator :: (Foldable f, Functor f) => TermDecorator f a Cost termCostDecorator :: (Foldable f, Functor f) => TermDecorator f a Cost
termCostDecorator c = 1 + sum (cost <$> tailF c) termCostDecorator c = 1 + sum (cost <$> tailF c)
-- | Term decorator extracting the source text for a term.
-- termSourceDecorator :: (Foldable f, Functor f) => Source Char -> TermDecorator f a SourceText
termSourceDecorator :: Source Char -> TermDecorator f a SourceText
termSourceDecorator source c = SourceText . toText $ Source.slice range' source
where range' = characterRange $ headF c
-- | A fallback parser that treats a file simply as rows of strings. -- | A fallback parser that treats a file simply as rows of strings.
lineByLineParser :: Parser (Syntax Text) (Record '[Range, Category, SourceSpan]) lineByLineParser :: Parser (Syntax Text) (Record '[Range, Category, SourceSpan])
lineByLineParser SourceBlob{..} = pure . cofree . root $ case foldl' annotateLeaves ([], 0) lines of lineByLineParser SourceBlob{..} = pure . cofree . root $ case foldl' annotateLeaves ([], 0) lines of