mirror of
https://github.com/github/semantic.git
synced 2024-12-23 06:41:45 +03:00
Add Parse module
This commit is contained in:
parent
3bd78d19b9
commit
2abd18e227
88
src/Parse.hs
Normal file
88
src/Parse.hs
Normal file
@ -0,0 +1,88 @@
|
||||
{-# LANGUAGE DataKinds, RankNTypes, TypeOperators #-}
|
||||
module Parse where
|
||||
|
||||
import Arguments
|
||||
import Category
|
||||
import Data.Aeson.Encode.Pretty
|
||||
import qualified Data.ByteString.Char8 as B1
|
||||
import qualified Data.Text.ICU.Convert as Convert
|
||||
import qualified Data.Text.ICU.Detect as Detect
|
||||
import Data.Record
|
||||
import qualified Data.Text as T
|
||||
import Info
|
||||
import Language
|
||||
import Language.Markdown
|
||||
import Parser
|
||||
import Prologue
|
||||
import Range
|
||||
import Source
|
||||
import SourceSpan
|
||||
import Syntax
|
||||
import System.FilePath
|
||||
import Term
|
||||
import Term.Instances
|
||||
import TreeSitter
|
||||
import Text.Parser.TreeSitter.Language
|
||||
|
||||
run :: Arguments -> IO ()
|
||||
run args@Arguments{..} = do
|
||||
sources <- sequence $ readAndTranscodeFile <$> filePaths
|
||||
|
||||
let sourceBlobs = Source.SourceBlob <$> sources <*> pure mempty <*> filePaths <*> pure (Just Source.defaultPlainBlob)
|
||||
let parsers = parserForFilepath <$> filePaths
|
||||
let parsersAndBlobs = zip parsers sourceBlobs
|
||||
|
||||
terms <- traverse (\(parser, sourceBlob) -> parser sourceBlob) parsersAndBlobs
|
||||
|
||||
putStrLn $ encodePretty terms
|
||||
|
||||
pure ()
|
||||
|
||||
-- | Return the parser that should be used for a given path.
|
||||
parserForFilepath :: FilePath -> Parser (Syntax Text) (Record '[Cost, Range, Category, SourceSpan])
|
||||
parserForFilepath path blob = decorateTerm termCostDecorator <$> parserForType (toS (takeExtension path)) blob
|
||||
|
||||
-- | Return a parser based on the file extension (including the ".").
|
||||
parserForType :: Text -> Parser (Syntax Text) (Record '[Range, Category, SourceSpan])
|
||||
parserForType mediaType = case languageForType mediaType of
|
||||
Just C -> treeSitterParser C ts_language_c
|
||||
Just JavaScript -> treeSitterParser JavaScript ts_language_javascript
|
||||
Just Markdown -> cmarkParser
|
||||
Just Ruby -> treeSitterParser Ruby ts_language_ruby
|
||||
_ -> lineByLineParser
|
||||
|
||||
-- | Decorate a 'Term' using a function to compute the annotation values at every node.
|
||||
decorateTerm :: Functor f => TermDecorator f fields field -> Term f (Record fields) -> Term f (Record (field ': fields))
|
||||
decorateTerm decorator = cata $ \ c -> cofree ((decorator (extract <$> c) .: headF c) :< tailF c)
|
||||
|
||||
-- | A function computing a value to decorate terms with. This can be used to cache synthesized attributes on terms.
|
||||
type TermDecorator f fields field = TermF f (Record fields) (Record (field ': fields)) -> field
|
||||
|
||||
-- | Term decorator computing the cost of an unpacked term.
|
||||
termCostDecorator :: (Foldable f, Functor f) => TermDecorator f a Cost
|
||||
termCostDecorator c = 1 + sum (cost <$> tailF c)
|
||||
|
||||
-- | A fallback parser that treats a file simply as rows of strings.
|
||||
lineByLineParser :: Parser (Syntax Text) (Record '[Range, Category, SourceSpan])
|
||||
lineByLineParser SourceBlob{..} = pure . cofree . root $ case foldl' annotateLeaves ([], 0) lines of
|
||||
(leaves, _) -> cofree <$> leaves
|
||||
where
|
||||
lines = actualLines source
|
||||
root children = (Range 0 (length source) .: Program .: rangeToSourceSpan source (Range 0 (length source)) .: RNil) :< Indexed children
|
||||
leaf charIndex line = (Range charIndex (charIndex + T.length line) .: Program .: rangeToSourceSpan source (Range charIndex (charIndex + T.length line)) .: RNil) :< Leaf line
|
||||
annotateLeaves (accum, charIndex) line =
|
||||
(accum <> [ leaf charIndex (toText line) ] , charIndex + length line)
|
||||
toText = T.pack . Source.toString
|
||||
|
||||
-- | Read the file and convert it to Unicode.
|
||||
readAndTranscodeFile :: FilePath -> IO (Source Char)
|
||||
readAndTranscodeFile path = do
|
||||
text <- B1.readFile path
|
||||
transcode text
|
||||
|
||||
-- | Transcode a file to a unicode source.
|
||||
transcode :: B1.ByteString -> IO (Source Char)
|
||||
transcode text = fromText <$> do
|
||||
match <- Detect.detectCharset text
|
||||
converter <- Convert.open match Nothing
|
||||
pure $ Convert.toUnicode converter text
|
Loading…
Reference in New Issue
Block a user