1
1
mirror of https://github.com/github/semantic.git synced 2024-11-24 08:54:07 +03:00

Reinstate import graph functionality.

This commit is contained in:
Patrick Thomson 2018-06-13 12:47:35 -04:00
parent 93ec234800
commit 19e7ff7510
12 changed files with 362 additions and 62 deletions

View File

@ -76,6 +76,8 @@ library
, Data.Functor.Both
, Data.Functor.Classes.Generic
, Data.Graph
, Data.Graph.AdjList
, Data.Graph.Vertex
, Data.JSON.Fields
, Data.Language
, Data.Map.Monoidal
@ -221,6 +223,7 @@ library
, proto3-wire
, unix
, unordered-containers
, vector
, haskell-tree-sitter
, tree-sitter-go
, tree-sitter-haskell

View File

@ -19,16 +19,26 @@ service SemanticAPI {
rpc HealthCheck(HealthCheckRequest) returns (HealthCheckResponse);
rpc FetchSummaries (SummariesRequest) returns (SummariesResponse) {}
rpc ParseBlobs (ParseRequest) returns (ParseResponse) {}
rpc GraphImports (ImportGraphRequest) returns (ImportGraphResponse) {}
}
message SummariesRequest {
repeated BlobPair blobPairs = 1;
}
message ImportGraphRequest {
Project project = 1;
}
message ParseRequest {
repeated Blob source = 1;
}
message ImportGraphResponse {
AdjList graph = 1;
repeated string errorMessages = 2;
}
message BlobPair {
Blob before = 1;
Blob after = 2;

View File

@ -19,22 +19,14 @@ import Data.Abstract.Address
import Data.Abstract.Module (Module(moduleInfo), ModuleInfo(..))
import Data.Abstract.Name
import Data.Abstract.Package (PackageInfo(..))
import Data.Aeson hiding (Result)
import Data.ByteString.Builder
import Data.Graph
import Data.Graph.Vertex
import Data.Sum
import qualified Data.Syntax as Syntax
import Data.Term
import qualified Data.Text as T
import qualified Data.Text.Encoding as T
import Prologue hiding (packageName, project)
-- | A vertex of some specific type.
data Vertex
= Package { vertexName :: Text }
| Module { vertexName :: Text }
| Variable { vertexName :: Text }
deriving (Eq, Ord, Show)
import Prologue hiding (project)
style :: Style Vertex Builder
style = (defaultStyle (T.encodeUtf8Builder . vertexName))
@ -88,13 +80,6 @@ graphingModules recur m = interpose @(Modules address value) pure (\ m yield ->
_ -> send m >>= yield)
(recur m)
packageVertex :: PackageInfo -> Vertex
packageVertex = Package . formatName . packageName
moduleVertex :: ModuleInfo -> Vertex
moduleVertex = Module . T.pack . modulePath
-- | Add an edge from the current package to the passed vertex.
packageInclusion :: ( Effectful m
, Member (Reader PackageInfo) effects
@ -133,17 +118,5 @@ appendGraph :: (Effectful m, Member (State (Graph Vertex)) effects) => Graph Ver
appendGraph = modify' . (<>)
instance ToJSON Vertex where
toJSON v = object [ "name" .= vertexToText v, "type" .= vertexToType v ]
vertexToText :: Vertex -> Text
vertexToText = vertexName
vertexToType :: Vertex -> Text
vertexToType Package{} = "package"
vertexToType Module{} = "module"
vertexToType Variable{} = "variable"
graphing :: Effectful m => m (State (Graph Vertex) ': effects) result -> m effects (result, Graph Vertex)
graphing = runState mempty

157
src/Data/Graph/AdjList.hs Normal file
View File

@ -0,0 +1,157 @@
{-# LANGUAGE DeriveAnyClass, LambdaCase, TupleSections #-}
module Data.Graph.AdjList
( AdjList (..)
, Edge (..)
, Tag
, Vertex (..)
, VertexType (..)
, graphToAdjList
, adjListToGraph
, tagGraph
, isCoherent
) where
import Prologue
import Algebra.Graph.AdjacencyMap (adjacencyMap)
import Algebra.Graph.Class (ToGraph (..), edges, vertices)
import Control.Monad.Effect
import Control.Monad.Effect.Fresh
import Data.Aeson
import Data.Coerce
import Data.HashMap.Strict ((!))
import qualified Data.HashMap.Strict as HashMap
import Data.HashSet (HashSet)
import qualified Data.HashSet as HashSet
import Data.Map (Map)
import qualified Data.Map as Map
import Data.Set (Set)
import qualified Data.Set as Set
import qualified Data.Vector as Vec
import Data.Word
import GHC.Exts (fromList)
import qualified Proto3.Suite as PB
import Data.Graph
import qualified Data.Graph.Vertex as V
-- | Sum type corresponding to a protobuf enum for vertex types.
data VertexType
= PACKAGE
| MODULE
| VARIABLE
deriving (Eq, Ord, Show, Enum, Bounded, Generic, ToJSON, FromJSON, PB.Named, PB.Finite, PB.MessageField)
-- | Defaults to 'PACKAGE'.
instance PB.HasDefault VertexType where def = PACKAGE
-- | Piggybacks on top of the 'Enumerated' instance, as the generated code would.
-- This instance will get easier when we have DerivingVia, or a Generic instance
-- that hooks into Enumerated.
instance PB.Primitive VertexType where
primType _ = PB.primType (Proxy @(PB.Enumerated VertexType))
encodePrimitive f = PB.encodePrimitive f . PB.Enumerated . Right
decodePrimitive = PB.decodePrimitive >>= \case
(PB.Enumerated (Right r)) -> pure r
other -> Prelude.fail ("VertexType decodeMessageField: unexpected value" <> show other)
-- | A tag used on each vertext of a 'Graph' to convert to an 'AdjList'.
type Tag = Word64
-- | A protobuf-compatible vertex type, with a unique 'Tag' identifier.
data Vertex = Vertex
{ vertexType :: VertexType
, vertexContents :: Text
, vertexTag :: Tag
} deriving (Eq, Ord, Show, Generic, PB.Message, PB.Named)
-- | A protobuf-compatible edge type. Only tag information is carried;
-- consumers are expected to look up nodes in the vertex list when necessary.
data Edge = Edge { edgeFrom :: !Tag, edgeTo :: !Tag }
deriving (Eq, Ord, Show, Generic, Hashable, PB.Named, PB.Message)
-- | An adjacency list-representation of a graph. You generally build these by calling
-- 'graphToAdjList' on an algebraic 'Graph'. This representation is less efficient and
-- fluent than an ordinary 'Graph', but is more amenable to serialization.
data AdjList = AdjList
{ graphVertices :: PB.NestedVec Vertex
, graphEdges :: PB.NestedVec Edge
} deriving (Eq, Ord, Show, Generic, PB.Named, PB.Message)
-- | Convert an algebraic graph to an adjacency list.
graphToAdjList :: Graph V.Vertex -> AdjList
graphToAdjList = taggedGraphToAdjList . tagGraph
-- * Internal interface stuff
-- Using a PBGraph as the accumulator for the fold would incur
-- significant overhead associated with Vector concatenation.
-- We use this and then pay the O(v + e) to-Vector cost once.
-- The fields are strict because we have StrictData on.
data Acc = Acc [Vertex] (HashSet Edge)
-- Convert a graph with tagged members to a protobuf-compatible adjacency list.
-- The Tag is necessary to build a canonical adjacency list.
-- Since import graphs can be very large, this is written with speed in mind, in
-- that we convert the graph to algebraic-graphs's 'AdjacencyMap' and then fold
-- to build a 'Graph', avoiding inefficient vector concatenation.
-- Time complexity, given V vertices and E edges, is at least O(2V + 2E + (V * E * log E)),
-- plus whatever overhead converting the graph to 'AdjacencyMap' may entail.
taggedGraphToAdjList :: Graph (V.Vertex, Tag) -> AdjList
taggedGraphToAdjList = accumToAdj . adjMapToAccum . adjacencyMap . toGraph . simplify
where adjMapToAccum :: Map (V.Vertex, Tag) (Set (V.Vertex, Tag)) -> Acc
adjMapToAccum = Map.foldlWithKey go (Acc [] mempty)
go :: Acc -> (V.Vertex, Tag) -> Set (V.Vertex, Tag) -> Acc
go (Acc vs es) (v, from) edges = Acc (vertexToPB v from : vs) (Set.foldr' (add . snd) es edges)
where add = HashSet.insert . Edge from
accumToAdj :: Acc -> AdjList
accumToAdj (Acc vs es) = AdjList (fromList vs) (fromList (toList es))
vertexToPB :: V.Vertex -> Tag -> Vertex
vertexToPB s = Vertex t (V.vertexName s) where
t = case s of
V.Package{} -> PACKAGE
V.Module{} -> MODULE
V.Variable{} -> VARIABLE
-- Annotate all vertices of a 'Graph' with a 'Tag', starting from 1.
tagGraph :: Graph vertex -> Graph (vertex, Tag)
tagGraph = run . runFresh 1 . go where
go :: Graph vertex -> Eff '[Fresh] (Graph (vertex, Tag))
go = traverse (\v -> (v, ) . fromIntegral <$> fresh)
-- | This is the reverse of 'graphToAdjList'. Don't use this outside of a testing context.
-- N.B. @adjListToGraph . graphToAdjList@ is 'id', but @graphToAdjList . adjListToGraph@ is not.
adjListToGraph :: AdjList -> Graph V.Vertex
adjListToGraph (AdjList vs es) = simplify built
where built = allEdges <> vertices unreferencedVertices
allEdges :: Graph V.Vertex
allEdges = fmap fst (edges (foldr addEdge [] es))
addEdge (Edge f t) xs = ((adjMap ! f, f), (adjMap ! t, t)) : xs
adjMap = foldMap (\v -> HashMap.singleton (vertexTag v) (pbToVertex v)) vs
unreferencedVertices :: [V.Vertex]
unreferencedVertices = pbToVertex <$> toList (Vec.filter isUnreferenced (coerce vs))
isUnreferenced :: Vertex -> Bool
isUnreferenced v = not (vertexTag v `HashSet.member` edgedTags)
edgedTags :: HashSet Tag
edgedTags = HashSet.fromList $ concatMap unEdge es where unEdge (Edge f t) = [f, t]
pbToVertex :: Vertex -> V.Vertex
pbToVertex (Vertex t c _) = case t of
MODULE -> V.Module c
PACKAGE -> V.Package c
VARIABLE -> V.Variable c
-- | For debugging: returns True if all edges reference a valid vertex tag.
isCoherent :: AdjList -> Bool
isCoherent (AdjList vs es) = all edgeValid es where
edgeValid (Edge a b) = HashSet.member a allTags && HashSet.member b allTags
allTags = HashSet.fromList (toList (vertexTag <$> vs))

37
src/Data/Graph/Vertex.hs Normal file
View File

@ -0,0 +1,37 @@
{-# LANGUAGE DeriveAnyClass #-}
module Data.Graph.Vertex
( Vertex (..)
, moduleVertex
, packageVertex
, vertexToType
) where
import Prologue hiding (packageName)
import Data.Aeson
import qualified Data.Text as T
import Data.Abstract.Module (ModuleInfo (..))
import Data.Abstract.Name
import Data.Abstract.Package (PackageInfo (..))
-- | A vertex of some specific type.
data Vertex
= Package { vertexName :: Text }
| Module { vertexName :: Text }
| Variable { vertexName :: Text }
deriving (Eq, Ord, Show, Generic, Hashable)
packageVertex :: PackageInfo -> Vertex
packageVertex = Package . formatName . packageName
moduleVertex :: ModuleInfo -> Vertex
moduleVertex = Module . T.pack . modulePath
instance ToJSON Vertex where
toJSON v = object [ "name" .= vertexName v, "type" .= vertexToType v ]
vertexToType :: Vertex -> Text
vertexToType Package{} = "package"
vertexToType Module{} = "module"
vertexToType Variable{} = "variable"

View File

@ -23,7 +23,7 @@ data Language
| Ruby
| TypeScript
| PHP
deriving (Eq, Generic, Ord, Read, Show, Bounded, ToJSON, Named, Enum, Finite, MessageField)
deriving (Eq, Generic, Ord, Read, Show, Bounded, Hashable, ToJSON, Named, Enum, Finite, MessageField)
instance FromJSON Language where
parseJSON = withText "Language" $ \l -> pure $ case T.toLower l of

View File

@ -1,18 +1,71 @@
module Data.Project where
{-# LANGUAGE DeriveAnyClass, KindSignatures, MultiWayIf #-}
import Data.Text as T (pack)
import Data.Language
import Prologue
import System.FilePath.Posix
module Data.Project (
-- * Projects
ProjectF (..)
, Project
, PBProject
, ProjectException (..)
, fromPB
, projectExtensions
, projectName
, projectEntryPoints
, projectFiles
, readFile
-- * Files
, File (..)
, file
) where
data Project = Project
{ projectRootDir :: FilePath
, projectFiles :: [File]
import Prelude hiding (readFile)
import Prologue hiding (throwError)
import Control.Monad.Effect
import Control.Monad.Effect.Exception
import Data.Blob
import Data.Language
import qualified Data.Text as T
import Proto3.Suite
import System.FilePath.Posix
-- | A 'ProjectF' contains all the information that semantic needs
-- to execute an analysis, diffing, or graphing pass. It is higher-kinded
-- in terms of the container type for paths and blobs, as well as the
-- path type (this is necessary because protobuf uses different vector
-- representations for @repeated string@ and @repeated Blob@.
-- You probably want to use the 'Project' or 'PB' type aliases.
data ProjectF (blobs :: * -> *) (paths :: * -> *) path = Project
{ projectRootDir :: path
, projectBlobs :: blobs Blob
, projectLanguage :: Language
, projectEntryPoints :: [File]
, projectExcludeDirs :: [FilePath]
, projectEntryPaths :: paths path
, projectExcludeDirs :: paths path
} deriving (Functor, Generic)
deriving instance (Eq path, Eq (blobs Blob), Eq (paths path)) => Eq (ProjectF blobs paths path)
deriving instance (Show path, Show (blobs Blob), Show (paths path)) => Show (ProjectF blobs paths path)
-- | This 'Project' type is the one used during semantic's normal
-- course of diffing, evaluation, and graphing. You probably want to
-- use this one.
type Project = ProjectF [] [] FilePath
-- | This 'Project' type is protobuf-compatible, and corresponds with
-- the @Project@ message declaration present in types.proto.
type PBProject = ProjectF NestedVec UnpackedVec Text
deriving instance Message PBProject
instance Named PBProject where nameOf _ = "Project"
-- | Convert from a packed protobuf representation to a more useful one.
fromPB :: PBProject -> Project
fromPB Project {..} = Project
{ projectRootDir = T.unpack projectRootDir
, projectBlobs = toList projectBlobs
, projectLanguage = projectLanguage
, projectEntryPaths = T.unpack <$> toList projectEntryPaths
, projectExcludeDirs = T.unpack <$> toList projectExcludeDirs
}
deriving (Eq, Ord, Show)
projectName :: Project -> Text
projectName = T.pack . dropExtensions . takeFileName . projectRootDir
@ -20,6 +73,15 @@ projectName = T.pack . dropExtensions . takeFileName . projectRootDir
projectExtensions :: Project -> [String]
projectExtensions = extensionsForLanguage . projectLanguage
projectEntryPoints :: Project -> [File]
projectEntryPoints Project {..} = foldr go [] projectBlobs
where go b acc =
if blobPath b `elem` projectEntryPaths
then toFile b : acc
else acc
projectFiles :: Project -> [File]
projectFiles = fmap toFile . projectBlobs
data File = File
{ filePath :: FilePath
@ -29,3 +91,24 @@ data File = File
file :: FilePath -> File
file path = File path (languageForFilePath path)
where languageForFilePath = languageForType . takeExtension
-- This is kind of a wart; Blob and File should be two views of
-- the same higher-kinded datatype.
toFile :: Blob -> File
toFile (Blob _ p l) = File p l
newtype ProjectException
= FileNotFound FilePath
deriving (Show, Eq, Typeable, Exception)
readFile :: Member (Exc SomeException) effs
=> Project
-> File
-> Eff effs (Maybe Blob)
readFile Project{..} f =
let p = filePath f
candidate = find (\b -> blobPath b == p) projectBlobs
in if
| p == "/dev/null" -> pure Nothing
| isJust candidate -> pure candidate
| otherwise -> throwError (SomeException (FileNotFound p))

View File

@ -16,6 +16,8 @@ module Semantic.Graph
, resumingEnvironmentError
) where
import Prelude hiding (readFile)
import Analysis.Abstract.Evaluating
import Analysis.Abstract.Graph
import Control.Abstract
@ -32,12 +34,11 @@ import Data.Term
import Data.Text (pack)
import Parsing.Parser
import Prologue hiding (MonadError (..))
import Semantic.IO (Files)
import Semantic.Task as Task
data GraphType = ImportGraph | CallGraph
runGraph :: ( Member (Distribute WrappedTask) effs, Member Files effs, Member Resolution effs, Member Task effs, Member Trace effs)
runGraph :: (Member (Exc SomeException) effs, Member (Distribute WrappedTask) effs, Member Resolution effs, Member Task effs, Member Trace effs)
=> GraphType
-> Bool
-> Project
@ -68,31 +69,32 @@ runGraph graphType includePackages project
. graphing
-- | Parse a list of files into a 'Package'.
parsePackage :: (Member (Distribute WrappedTask) effs, Member Files effs, Member Resolution effs, Member Task effs, Member Trace effs)
parsePackage :: (Member (Exc SomeException) effs, Member (Distribute WrappedTask) effs, Member Resolution effs, Member Task effs, Member Trace effs)
=> Parser term -- ^ A parser.
-> Maybe File -- ^ Prelude (optional).
-> Project -- ^ Project to parse into a package.
-> Eff effs (Package term)
parsePackage parser preludeFile project@Project{..} = do
prelude <- traverse (parseModule parser Nothing) preludeFile
p <- parseModules parser project
prelude <- traverse (parseModule project parser) preludeFile
p <- parseModules parser
resMap <- Task.resolutionMap project
let pkg = Package.fromModules n Nothing prelude (length projectEntryPoints) p resMap
let pkg = Package.fromModules n Nothing prelude (length projectEntryPaths) p resMap
pkg <$ trace ("project: " <> show pkg)
where
n = name (projectName project)
-- | Parse all files in a project into 'Module's.
parseModules :: Member (Distribute WrappedTask) effs => Parser term -> Project -> Eff effs [Module term]
parseModules parser Project{..} = distributeFor (projectEntryPoints <> projectFiles) (WrapTask . parseModule parser (Just projectRootDir))
parseModules :: Member (Distribute WrappedTask) effs => Parser term -> Eff effs [Module term]
parseModules parser = distributeFor (projectEntryPoints project <> projectFiles project) (WrapTask . parseModule project parser)
-- | Parse a file into a 'Module'.
parseModule :: (Member Files effs, Member Task effs) => Parser term -> Maybe FilePath -> File -> Eff effs (Module term)
parseModule parser rootDir file = do
blob <- readBlob file
moduleForBlob rootDir blob <$> parse parser blob
parseModule :: (Member (Exc SomeException) effs, Member Task effs) => Project -> Parser term -> File -> Eff effs (Module term)
parseModule proj parser file = do
mBlob <- readFile proj file
case mBlob of
Just blob -> moduleForBlob (Just (projectRootDir proj)) blob <$> parse parser blob
Nothing -> throwError (SomeException (FileNotFound (filePath file)))
withTermSpans :: ( HasField fields Span
, Member (Reader Span) effects

View File

@ -15,6 +15,7 @@ module Semantic.IO
, noLanguageForBlob
, openFileForReading
, readBlob
, readBlobFromPath
, readBlobPairs
, readBlobPairsFromHandle
, readBlobs
@ -41,7 +42,7 @@ import Control.Monad.IO.Class
import Data.Aeson
import Data.Blob
import Data.Bool
import Data.Project
import Data.Project hiding (readFile)
import qualified Data.ByteString as B
import qualified Data.ByteString.Builder as B
import qualified Data.ByteString.Lazy as BL
@ -106,8 +107,10 @@ readProjectFromPaths maybeRoot path lang excludeDirs = do
then (id, [], fromMaybe path maybeRoot)
else (filter (/= path), [toFile path], fromMaybe (takeDirectory path) maybeRoot)
paths <- liftIO $ filterFun <$> findFilesInDir rootDir exts excludeDirs
pure $ Project rootDir (toFile <$> paths) lang entryPoints excludeDirs
blobs <- traverse readBlobFromPath (entryPoints <> (toFile <$> paths))
pure (Project rootDir blobs lang (filePath <$> entryPoints) excludeDirs)
where
toFile path = File path lang
exts = extensionsForLanguage lang
@ -232,7 +235,6 @@ runFiles = interpret $ \ files -> case files of
Write (ToPath path) builder -> liftIO (IO.withBinaryFile path IO.WriteMode (`B.hPutBuilder` builder))
Write (ToHandle (WriteHandle handle)) builder -> liftIO (B.hPutBuilder handle builder)
-- | Catch exceptions in 'IO' actions embedded in 'Eff', handling them with the passed function.
--
-- Note that while the type allows 'IO' to occur anywhere within the effect list, it must actually occur at the end to be able to run the computation.

View File

@ -77,9 +77,23 @@ pythonPrelude = Just $ File (TypeLevel.symbolVal (Proxy :: Proxy (PreludePath Py
javaScriptPrelude = Just $ File (TypeLevel.symbolVal (Proxy :: Proxy (PreludePath TypeScript.Term))) Language.JavaScript
-- Evaluate a project, starting at a single entrypoint.
evaluateProject parser lang prelude path = evaluatePackageWith id withTermSpans . fmap quieterm <$> runTask (readProject Nothing path lang [] >>= parsePackage parser prelude)
evaluateProjectWithCaching parser lang prelude path = evaluatePackageWith convergingModules (withTermSpans . cachingTerms) . fmap quieterm <$> runTask (readProject Nothing path lang [] >>= parsePackage parser prelude)
evaluateProject parser lang prelude path = evaluatePackageWith id withTermSpans . fmap quieterm <$> runTask (readProject Nothing path lang [] >>= addPrelude lang >>= parsePackage parser prelude)
evaluateProjectWithCaching parser lang prelude path = evaluatePackageWith convergingModules (withTermSpans . cachingTerms) . fmap quieterm <$> runTask (readProject Nothing path lang [] >>= addPrelude lang >>= parsePackage parser prelude)
addPrelude :: Member IO effs => Language.Language -> Project -> Eff effs Project
addPrelude l proj = do
let p = case l of
Language.Ruby -> rubyPrelude
Language.Python -> pythonPrelude
Language.TypeScript -> javaScriptPrelude
Language.JavaScript -> javaScriptPrelude
_ -> Nothing
case p of
Nothing -> pure proj
Just pth -> do
bl <- readBlobFromPath pth
pure $ proj { projectBlobs = bl : projectBlobs proj }
parseFile :: Parser term -> FilePath -> IO term
parseFile parser = runTask . (parse parser <=< readBlob . file)

View File

@ -16,7 +16,7 @@ import qualified TreeSitter.Node as TS
import qualified TreeSitter.Parser as TS
import qualified TreeSitter.Tree as TS
import SpecHelpers
import SpecHelpers hiding (readFile)
spec :: Spec

View File

@ -12,6 +12,9 @@ enum Language {Unknown = 0;
Ruby = 9;
TypeScript = 10;
PHP = 11;}
enum VertexType {PACKAGE = 0;
MODULE = 1;
VARIABLE = 2;}
message Blob { bytes blobSource = 1;
string blobPath = 2;
Language blobLanguage = 3;
@ -22,20 +25,36 @@ message Pos { int64 posLine = 1;
message Span { Pos spanStart = 1;
Pos spanEnd = 2;
}
message AdjList { repeated Vertex graphVertices = 1;
repeated Edge graphEdges = 2;
}
message Vertex { VertexType vertexType = 1;
string vertexContents = 2;
uint64 vertexTag = 3;
}
message Edge { uint64 edgeFrom = 1;
uint64 edgeTo = 2;
}
message Project { string projectRootDir = 1;
repeated Blob projectBlobs = 2;
Language projectLanguage = 3;
repeated string projectEntryPaths = 4 [packed = false];
repeated string projectExcludeDirs = 5 [packed = false];
}
message Array { repeated Term arrayElements = 1;
}
message Boolean { bool booleanContent = 1;
}
message Hash { repeated Term hashElements = 1;
}
message Float { bytes floatContent = 1;
message Float { string floatContent = 1;
}
message KeyValue { Term key = 1;
Term value = 2;
}
message Null {
}
message TextElement { bytes textElementContent = 1;
message TextElement { string textElementContent = 1;
}
message Term { oneof syntax {Array array = 1;
Boolean boolean = 2;