mirror of
https://github.com/github/semantic.git
synced 2024-11-24 00:42:33 +03:00
Port AdjList and create the messages
This commit is contained in:
parent
c3542f31ed
commit
dd8c8b0762
@ -76,6 +76,8 @@ library
|
||||
, Data.Functor.Both
|
||||
, Data.Functor.Classes.Generic
|
||||
, Data.Graph
|
||||
, Data.Graph.AdjList
|
||||
, Data.Graph.Vertex
|
||||
, Data.JSON.Fields
|
||||
, Data.Language
|
||||
, Data.Map.Monoidal
|
||||
@ -214,6 +216,7 @@ library
|
||||
, proto3-wire
|
||||
, unix
|
||||
, unordered-containers
|
||||
, vector
|
||||
, haskell-tree-sitter
|
||||
, tree-sitter-go
|
||||
, tree-sitter-haskell
|
||||
|
@ -19,16 +19,26 @@ service SemanticAPI {
|
||||
rpc HealthCheck(HealthCheckRequest) returns (HealthCheckResponse);
|
||||
rpc FetchSummaries (SummariesRequest) returns (SummariesResponse) {}
|
||||
rpc ParseBlobs (ParseRequest) returns (ParseResponse) {}
|
||||
rpc GraphImports (ImportGraphRequest) returns (ImportGraphResponse) {}
|
||||
}
|
||||
|
||||
message SummariesRequest {
|
||||
repeated BlobPair blobPairs = 1;
|
||||
}
|
||||
|
||||
message ImportGraphRequest {
|
||||
Project project = 1;
|
||||
}
|
||||
|
||||
message ParseRequest {
|
||||
repeated Blob source = 1;
|
||||
}
|
||||
|
||||
message ImportGraphResponse {
|
||||
AdjList graph = 1;
|
||||
repeated string errorMessages = 2;
|
||||
}
|
||||
|
||||
message BlobPair {
|
||||
Blob before = 1;
|
||||
Blob after = 2;
|
||||
|
160
src/Data/Graph/AdjList.hs
Normal file
160
src/Data/Graph/AdjList.hs
Normal file
@ -0,0 +1,160 @@
|
||||
{-# LANGUAGE DeriveAnyClass, LambdaCase, TupleSections #-}
|
||||
|
||||
module Data.Graph.AdjList
|
||||
( AdjList (..)
|
||||
, Edge (..)
|
||||
, Tag
|
||||
, Vertex (..)
|
||||
, VertexType (..)
|
||||
, graphToAdjList
|
||||
, adjListToGraph
|
||||
, tagGraph
|
||||
, isCoherent
|
||||
) where
|
||||
|
||||
import Prologue
|
||||
|
||||
import Algebra.Graph.AdjacencyMap (adjacencyMap)
|
||||
import Algebra.Graph.Class (ToGraph (..), edges, vertices)
|
||||
import Control.Monad.Effect
|
||||
import Control.Monad.Effect.Fresh
|
||||
import Data.Aeson
|
||||
import Data.Coerce
|
||||
import Data.HashMap.Strict ((!))
|
||||
import qualified Data.HashMap.Strict as HashMap
|
||||
import Data.HashSet (HashSet)
|
||||
import qualified Data.HashSet as HashSet
|
||||
import Data.Map (Map)
|
||||
import qualified Data.Map as Map
|
||||
import Data.Set (Set)
|
||||
import qualified Data.Set as Set
|
||||
import qualified Data.Vector as Vec
|
||||
import Data.Word
|
||||
import GHC.Exts (fromList)
|
||||
import qualified Proto3.Suite as PB
|
||||
|
||||
import Data.Graph
|
||||
import qualified Data.Graph.Vertex as V
|
||||
|
||||
-- There are a lot of boilerplate instances for protobuf compatibility in this file.
|
||||
-- Some of them could be reduced by changing the data types and using generic deriving,
|
||||
-- but then you have to sacrifice derived JSON instances. Ultimately the protobuf ones
|
||||
-- aren't too onerous to write by hand.
|
||||
|
||||
-- | Sum type corresponding to a protobuf enum for vertex types.
|
||||
data VertexType
|
||||
= PACKAGE
|
||||
| MODULE
|
||||
| VARIABLE
|
||||
deriving (Eq, Ord, Show, Enum, Bounded, Generic, ToJSON, FromJSON, PB.Named, PB.Finite, PB.MessageField)
|
||||
|
||||
-- | Defaults to 'PACKAGE'.
|
||||
instance PB.HasDefault VertexType where def = PACKAGE
|
||||
|
||||
-- | Piggybacks on top of the 'Enumerated' instance, as the generated code would.
|
||||
-- This instance will get easier when we have DerivingVia.
|
||||
instance PB.Primitive VertexType where
|
||||
primType _ = PB.primType (Proxy @(PB.Enumerated VertexType))
|
||||
encodePrimitive f = PB.encodePrimitive f . PB.Enumerated . Right
|
||||
decodePrimitive = PB.decodePrimitive >>= \case
|
||||
(PB.Enumerated (Right r)) -> pure r
|
||||
other -> Prelude.fail ("VertexType decodeMessageField: unexpected value" <> show other)
|
||||
|
||||
-- | A tag used on each vertext of a 'Graph' to convert to an 'AdjList'.
|
||||
type Tag = Word64
|
||||
|
||||
-- | A protobuf-compatible vertex type, with a unique 'Tag' identifier.
|
||||
data Vertex = Vertex
|
||||
{ vertexType :: VertexType
|
||||
, vertexContents :: Text
|
||||
, vertexTag :: Tag
|
||||
} deriving (Eq, Ord, Show, Generic, PB.Message, PB.Named)
|
||||
|
||||
-- | A protobuf-compatible edge type. Only tag information is carried;
|
||||
-- consumers are expected to look up nodes in the vertex list when necessary.
|
||||
data Edge = Edge { edgeFrom :: !Tag, edgeTo :: !Tag }
|
||||
deriving (Eq, Ord, Show, Generic, Hashable, PB.Named, PB.Message)
|
||||
|
||||
-- | An adjacency list-representation of a graph. You generally build these by calling
|
||||
-- 'graphToAdjList' on an algebraic 'Graph'. This representation is less efficient and
|
||||
-- fluent than an ordinary 'Graph', but is more amenable to serialization.
|
||||
data AdjList = AdjList
|
||||
{ graphVertices :: PB.NestedVec Vertex
|
||||
, graphEdges :: PB.NestedVec Edge
|
||||
} deriving (Eq, Ord, Show, Generic, PB.Named, PB.Message)
|
||||
|
||||
-- | Convert an algebraic graph to an adjacency list.
|
||||
graphToAdjList :: Graph V.Vertex -> AdjList
|
||||
graphToAdjList = taggedGraphToAdjList . tagGraph
|
||||
|
||||
-- * Internal interface stuff
|
||||
|
||||
-- Using a PBGraph as the accumulator for the fold would incur
|
||||
-- significant overhead associated with Vector concatenation.
|
||||
-- We use this and then pay the O(v + e) to-Vector cost once.
|
||||
data Acc = Acc ![Vertex] !(HashSet Edge)
|
||||
|
||||
-- Convert a graph with tagged members to a protobuf-compatible adjacency list.
|
||||
-- The Tag is necessary to build a canonical adjacency list.
|
||||
-- Since import graphs can be very large, this is written with speed in mind, in
|
||||
-- that we convert the graph to algebraic-graphs's 'AdjacencyMap' and then fold
|
||||
-- to build a 'Graph', avoiding inefficient vector concatenation.
|
||||
-- Time complexity, given V vertices and E edges, is at least O(2V + 2E + (V * E * log E)),
|
||||
-- plus whatever overhead converting the graph to 'AdjacencyMap' may entail.
|
||||
taggedGraphToAdjList :: Graph (V.Vertex, Tag) -> AdjList
|
||||
taggedGraphToAdjList = accumToAdj . munge . adjacencyMap . toGraph . simplify
|
||||
where munge :: Map (V.Vertex, Tag) (Set (V.Vertex, Tag)) -> Acc
|
||||
munge = Map.foldlWithKey go (Acc [] mempty)
|
||||
|
||||
go :: Acc -> (V.Vertex, Tag) -> Set (V.Vertex, Tag) -> Acc
|
||||
go (Acc vs es) (v, from) edges = Acc (vertexToPB v from : vs) (Set.foldr' add es edges)
|
||||
where add (_, to) = HashSet.insert (Edge from to)
|
||||
|
||||
accumToAdj :: Acc -> AdjList
|
||||
accumToAdj (Acc vs es) = AdjList (fromList vs) (fromList (toList es))
|
||||
|
||||
vertexToPB :: V.Vertex -> Tag -> Vertex
|
||||
vertexToPB s = Vertex t (V.vertexName s) where
|
||||
t = case s of
|
||||
V.Package{} -> PACKAGE
|
||||
V.Module{} -> MODULE
|
||||
V.Variable{} -> VARIABLE
|
||||
|
||||
-- Annotate all vertices of a 'Graph' with a 'Tag', starting from 0.
|
||||
tagGraph :: Graph vertex -> Graph (vertex, Tag)
|
||||
tagGraph = run . runFresh 1 . go where
|
||||
go :: Graph vertex -> Eff '[Fresh] (Graph (vertex, Tag))
|
||||
go = traverse (\v -> (v, ) . fromIntegral <$> fresh)
|
||||
|
||||
-- | This is the reverse of 'graphToAdjList'. Don't use this outside of a testing context.
|
||||
-- N.B. @adjListToGraph . graphToAdjList@ is 'id', but @graphToAdjList . adjListToGraph@ is not.
|
||||
adjListToGraph :: AdjList -> Graph V.Vertex
|
||||
adjListToGraph (AdjList vs es) = simplify built
|
||||
where built = allEdges <> vertices unreferencedVertices
|
||||
|
||||
allEdges :: Graph V.Vertex
|
||||
allEdges = fmap fst (edges (foldr addEdge [] es))
|
||||
addEdge (Edge f t) xs = ((adjMap ! f, f), (adjMap ! t, t)) : xs
|
||||
adjMap = foldMap (\v -> HashMap.singleton (vertexTag v) (pbToVertex v)) vs
|
||||
|
||||
unreferencedVertices :: [V.Vertex]
|
||||
unreferencedVertices = pbToVertex <$> toList (Vec.filter isUnreferenced (coerce vs))
|
||||
|
||||
isUnreferenced :: Vertex -> Bool
|
||||
isUnreferenced v = not (vertexTag v `HashSet.member` edgedTags)
|
||||
|
||||
edgedTags :: HashSet Tag
|
||||
edgedTags = HashSet.fromList $ concatMap unEdge es where unEdge (Edge f t) = [f, t]
|
||||
|
||||
pbToVertex :: Vertex -> V.Vertex
|
||||
pbToVertex (Vertex t c _) = case t of
|
||||
MODULE -> V.Module c
|
||||
PACKAGE -> V.Package c
|
||||
VARIABLE -> V.Variable c
|
||||
|
||||
|
||||
-- | For debugging: returns True if all edges reference a valid vertex tag.
|
||||
isCoherent :: AdjList -> Bool
|
||||
isCoherent (AdjList vs es) = all edgeValid es where
|
||||
edgeValid (Edge a b) = HashSet.member a allTags && HashSet.member b allTags
|
||||
allTags = HashSet.fromList (toList (vertexTag <$> vs))
|
37
src/Data/Graph/Vertex.hs
Normal file
37
src/Data/Graph/Vertex.hs
Normal file
@ -0,0 +1,37 @@
|
||||
{-# LANGUAGE DeriveAnyClass #-}
|
||||
module Data.Graph.Vertex
|
||||
( Vertex (..)
|
||||
, moduleVertex
|
||||
, packageVertex
|
||||
, vertexToType
|
||||
) where
|
||||
|
||||
import Prologue hiding (packageName)
|
||||
|
||||
import Data.Aeson
|
||||
import qualified Data.Text as T
|
||||
|
||||
import Data.Abstract.Module (ModuleInfo (..))
|
||||
import Data.Abstract.Name
|
||||
import Data.Abstract.Package (PackageInfo (..))
|
||||
|
||||
-- | A vertex of some specific type.
|
||||
data Vertex
|
||||
= Package { vertexName :: Text }
|
||||
| Module { vertexName :: Text }
|
||||
| Variable { vertexName :: Text }
|
||||
deriving (Eq, Ord, Show, Generic, Hashable)
|
||||
|
||||
packageVertex :: PackageInfo -> Vertex
|
||||
packageVertex = Package . formatName . packageName
|
||||
|
||||
moduleVertex :: ModuleInfo -> Vertex
|
||||
moduleVertex = Module . T.pack . modulePath
|
||||
|
||||
instance ToJSON Vertex where
|
||||
toJSON v = object [ "name" .= vertexName v, "type" .= vertexToType v ]
|
||||
|
||||
vertexToType :: Vertex -> Text
|
||||
vertexToType Package{} = "package"
|
||||
vertexToType Module{} = "module"
|
||||
vertexToType Variable{} = "variable"
|
Loading…
Reference in New Issue
Block a user