mirror of
https://github.com/github/semantic.git
synced 2024-12-26 00:12:29 +03:00
Merge remote-tracking branch 'origin/master' into new-take-on-parse-examples
This commit is contained in:
commit
38b692b17c
@ -1,6 +1,6 @@
|
||||
name: semantic
|
||||
version: 0.4.0
|
||||
synopsis: Initial project template from stack
|
||||
synopsis: Framework and service for analyzing and diffing untrusted code.
|
||||
description: Please see README.md
|
||||
homepage: http://github.com/github/semantic#readme
|
||||
author: Rob Rix, Josh Vera
|
||||
@ -78,6 +78,7 @@ library
|
||||
, Data.AST
|
||||
, Data.Blob
|
||||
, Data.Diff
|
||||
, Data.Duration
|
||||
, Data.Error
|
||||
, Data.Functor.Both
|
||||
, Data.Functor.Classes.Generic
|
||||
@ -191,6 +192,7 @@ library
|
||||
, Semantic.Telemetry.Haystack
|
||||
, Semantic.Telemetry.Log
|
||||
, Semantic.Telemetry.Stat
|
||||
, Semantic.Timeout
|
||||
, Semantic.Util
|
||||
, Semantic.Util.Rewriting
|
||||
, Semantic.Version
|
||||
|
34
src/Data/Duration.hs
Normal file
34
src/Data/Duration.hs
Normal file
@ -0,0 +1,34 @@
|
||||
module Data.Duration
|
||||
( Duration(..)
|
||||
, fromSeconds
|
||||
, fromMilliseconds
|
||||
, fromMicroseconds
|
||||
, fromNanoseconds
|
||||
, toMicroseconds
|
||||
) where
|
||||
|
||||
-- A duration suitable for timeouts stored as an int of milliseconds.
|
||||
newtype Duration = Milliseconds Int
|
||||
deriving (Eq, Ord)
|
||||
|
||||
instance Show Duration where
|
||||
showsPrec _ (Milliseconds n) = shows n <> showString "ms"
|
||||
|
||||
fromSeconds :: Int -> Duration
|
||||
fromSeconds n = fromMilliseconds (n * 1000)
|
||||
|
||||
-- milli = 10E-3 seconds
|
||||
fromMilliseconds :: Int -> Duration
|
||||
fromMilliseconds n | n <= 0 = Milliseconds 0
|
||||
| otherwise = Milliseconds n
|
||||
|
||||
-- micro = 10E-6 seconds
|
||||
fromMicroseconds :: Int -> Duration
|
||||
fromMicroseconds n = fromMilliseconds (n `div` 1000)
|
||||
|
||||
-- nano = 10E-9 seconds
|
||||
fromNanoseconds :: Int -> Duration
|
||||
fromNanoseconds n = fromMicroseconds (n `div` 1000)
|
||||
|
||||
toMicroseconds :: Duration -> Int
|
||||
toMicroseconds (Milliseconds n) = n * 1000
|
@ -1,6 +1,6 @@
|
||||
{-# LANGUAGE DataKinds, GADTs, ScopedTypeVariables, TypeOperators #-}
|
||||
module Parsing.TreeSitter
|
||||
( Timeout (..)
|
||||
( Duration(..)
|
||||
, parseToAST
|
||||
) where
|
||||
|
||||
@ -16,22 +16,21 @@ import Data.ByteString.Unsafe (unsafeUseAsCStringLen)
|
||||
import Foreign
|
||||
import Foreign.C.Types (CBool (..))
|
||||
import Foreign.Marshal.Array (allocaArray)
|
||||
import System.Timeout
|
||||
|
||||
import Data.AST (AST, Node (Node))
|
||||
import Data.Blob
|
||||
import Data.Duration
|
||||
import Data.Range
|
||||
import Data.Source
|
||||
import Data.Span
|
||||
import Data.Term
|
||||
import Semantic.Timeout
|
||||
|
||||
import qualified TreeSitter.Language as TS
|
||||
import qualified TreeSitter.Node as TS
|
||||
import qualified TreeSitter.Parser as TS
|
||||
import qualified TreeSitter.Tree as TS
|
||||
|
||||
newtype Timeout = Milliseconds Int
|
||||
|
||||
data Result grammar
|
||||
= Failed
|
||||
| Succeeded (AST [] grammar)
|
||||
@ -58,10 +57,8 @@ runParser parser blobSource = unsafeUseAsCStringLen (sourceBytes blobSource) $
|
||||
|
||||
-- | Parse 'Source' with the given 'TS.Language' and return its AST.
|
||||
-- Returns Nothing if the operation timed out.
|
||||
parseToAST :: (Bounded grammar, Enum grammar, Member (Lift IO) effects, Member Trace effects, PureEffects effects) => Timeout -> Ptr TS.Language -> Blob -> Eff effects (Maybe (AST [] grammar))
|
||||
parseToAST (Milliseconds s) language Blob{..} = bracket TS.ts_parser_new TS.ts_parser_delete $ \ parser -> do
|
||||
let parserTimeout = s * 1000
|
||||
|
||||
parseToAST :: (Bounded grammar, Enum grammar, Member (Lift IO) effects, Member Timeout effects, Member Trace effects, PureEffects effects) => Duration -> Ptr TS.Language -> Blob -> Eff effects (Maybe (AST [] grammar))
|
||||
parseToAST parseTimeout language Blob{..} = bracket TS.ts_parser_new TS.ts_parser_delete $ \ parser -> do
|
||||
liftIO $ do
|
||||
TS.ts_parser_halt_on_error parser (CBool 1)
|
||||
TS.ts_parser_set_language parser language
|
||||
@ -71,7 +68,7 @@ parseToAST (Milliseconds s) language Blob{..} = bracket TS.ts_parser_new TS.ts_p
|
||||
parsing <- liftIO . async $ runParser parser blobSource
|
||||
|
||||
-- Kick the parser off asynchronously and wait according to the provided timeout.
|
||||
res <- liftIO . timeout parserTimeout $ wait parsing
|
||||
res <- timeout parseTimeout $ liftIO (wait parsing)
|
||||
|
||||
case res of
|
||||
Just Failed -> Nothing <$ trace ("tree-sitter: parsing failed " <> blobPath)
|
||||
|
@ -14,10 +14,10 @@ module Semantic.Config
|
||||
, withTelemetry
|
||||
) where
|
||||
|
||||
import Data.Duration
|
||||
import Network.BSD
|
||||
import Network.HTTP.Client.TLS
|
||||
import Network.URI
|
||||
import Parsing.TreeSitter (Timeout (..))
|
||||
import Prologue
|
||||
import Semantic.Env
|
||||
import Semantic.Telemetry
|
||||
@ -38,7 +38,7 @@ data Config
|
||||
, configStatsHost :: Stat.Host -- ^ Host of statsd/datadog (default: "127.0.0.1")
|
||||
, configStatsPort :: Stat.Port -- ^ Port of statsd/datadog (default: "28125")
|
||||
|
||||
, configTreeSitterParseTimeout :: Timeout -- ^ Timeout in milliseconds before canceling tree-sitter parsing (default: 10000).
|
||||
, configTreeSitterParseTimeout :: Duration -- ^ Timeout in milliseconds before canceling tree-sitter parsing (default: 10000).
|
||||
, configMaxTelemetyQueueSize :: Int -- ^ Max size of telemetry queues before messages are dropped (default: 1000).
|
||||
, configIsTerminal :: Bool -- ^ Whether a terminal is attached (set automaticaly at runtime).
|
||||
, configLogPrintSource :: Bool -- ^ Whether to print the source reference when logging errors (set automatically at runtime).
|
||||
@ -78,7 +78,7 @@ defaultConfig options@Options{..} = do
|
||||
, configStatsHost = statsHost
|
||||
, configStatsPort = statsPort
|
||||
|
||||
, configTreeSitterParseTimeout = Milliseconds parseTimeout
|
||||
, configTreeSitterParseTimeout = fromMilliseconds parseTimeout
|
||||
, configMaxTelemetyQueueSize = size
|
||||
, configIsTerminal = isTerminal
|
||||
, configLogPrintSource = isTerminal
|
||||
|
@ -34,6 +34,7 @@ import Semantic.IO as IO
|
||||
import Semantic.Resolution
|
||||
import Semantic.Task hiding (Error)
|
||||
import Semantic.Telemetry
|
||||
import Semantic.Timeout
|
||||
import Semantic.Telemetry.Log (LogOptions, Message(..), writeLogMessage)
|
||||
import Semantic.Util
|
||||
import System.Console.Haskeline
|
||||
@ -70,7 +71,7 @@ runREPL prefs settings = interpret $ \case
|
||||
|
||||
rubyREPL = repl (Proxy @'Language.Ruby) rubyParser
|
||||
|
||||
repl proxy parser paths = defaultConfig debugOptions >>= \ config -> runM . runDistribute . runError @_ @_ @SomeException . runTelemetryIgnoringStat (logOptionsFromConfig config) . runTraceInTelemetry . runReader config . IO.runFiles . runResolution . runTaskF $ do
|
||||
repl proxy parser paths = defaultConfig debugOptions >>= \ config -> runM . runDistribute . runTimeout (runM . runDistribute) . runError @_ @_ @SomeException . runTelemetryIgnoringStat (logOptionsFromConfig config) . runTraceInTelemetry . runReader config . IO.runFiles . runResolution . runTaskF $ do
|
||||
blobs <- catMaybes <$> traverse IO.readFile (flip File (Language.reflect proxy) <$> paths)
|
||||
package <- fmap (fmap quieterm) <$> parsePackage parser (Project (takeDirectory (maybe "/" fst (uncons paths))) blobs (Language.reflect proxy) [])
|
||||
modules <- topologicalSort <$> runImportGraphToModules proxy (snd <$> package)
|
||||
|
@ -64,6 +64,7 @@ import Data.Blob
|
||||
import Data.Bool
|
||||
import Data.ByteString.Builder
|
||||
import Data.Diff
|
||||
import Data.Duration
|
||||
import qualified Data.Error as Error
|
||||
import Data.Language (Language)
|
||||
import Data.Record
|
||||
@ -79,6 +80,7 @@ import Parsing.TreeSitter
|
||||
import Prologue hiding (MonadError (..), project)
|
||||
import Semantic.Config
|
||||
import Semantic.Distribute
|
||||
import Semantic.Timeout
|
||||
import qualified Semantic.IO as IO
|
||||
import Semantic.Resolution
|
||||
import Semantic.Telemetry
|
||||
@ -93,6 +95,7 @@ type TaskEff = Eff '[ Task
|
||||
, Trace
|
||||
, Telemetry
|
||||
, Exc SomeException
|
||||
, Timeout
|
||||
, Distribute
|
||||
, Lift IO
|
||||
]
|
||||
@ -146,6 +149,7 @@ runTaskWithConfig options logger statter task = do
|
||||
run
|
||||
= runM
|
||||
. runDistribute
|
||||
. runTimeout (runM . runDistribute)
|
||||
. runError
|
||||
. runTelemetry logger statter
|
||||
. runTraceInTelemetry
|
||||
@ -180,7 +184,7 @@ instance Effect Task where
|
||||
handleState c dist (Request (Serialize format input) k) = Request (Serialize format input) (dist . (<$ c) . k)
|
||||
|
||||
-- | Run a 'Task' effect by performing the actions in 'IO'.
|
||||
runTaskF :: (Member (Exc SomeException) effs, Member (Lift IO) effs, Member (Reader Config) effs, Member Telemetry effs, Member Trace effs, PureEffects effs) => Eff (Task ': effs) a -> Eff effs a
|
||||
runTaskF :: (Member (Exc SomeException) effs, Member (Lift IO) effs, Member (Reader Config) effs, Member Telemetry effs, Member Timeout effs, Member Trace effs, PureEffects effs) => Eff (Task ': effs) a -> Eff effs a
|
||||
runTaskF = interpret $ \ task -> case task of
|
||||
Parse parser blob -> runParser blob parser
|
||||
Analyze interpret analysis -> pure (interpret analysis)
|
||||
@ -196,13 +200,13 @@ runTaskF = interpret $ \ task -> case task of
|
||||
logError :: Member Telemetry effs => Config -> Level -> Blob -> Error.Error String -> [(String, String)] -> Eff effs ()
|
||||
logError Config{..} level blob err = writeLog level (Error.formatError configLogPrintSource configIsTerminal blob err)
|
||||
|
||||
data ParserCancelled = ParserTimedOut FilePath Language
|
||||
data ParserCancelled = ParserTimedOut FilePath Language | AssignmentTimedOut FilePath Language
|
||||
deriving (Show, Typeable)
|
||||
|
||||
instance Exception ParserCancelled
|
||||
|
||||
-- | Parse a 'Blob' in 'IO'.
|
||||
runParser :: (Member (Exc SomeException) effs, Member (Lift IO) effs, Member (Reader Config) effs, Member Telemetry effs, Member Trace effs, PureEffects effs) => Blob -> Parser term -> Eff effs term
|
||||
runParser :: (Member (Exc SomeException) effs, Member (Lift IO) effs, Member (Reader Config) effs, Member Telemetry effs, Member Timeout effs, Member Trace effs, PureEffects effs) => Blob -> Parser term -> Eff effs term
|
||||
runParser blob@Blob{..} parser = case parser of
|
||||
ASTParser language ->
|
||||
time "parse.tree_sitter_ast_parse" languageTag $ do
|
||||
@ -230,6 +234,7 @@ runParser blob@Blob{..} parser = case parser of
|
||||
, Member (Lift IO) effs
|
||||
, Member (Reader Config) effs
|
||||
, Member Telemetry effs
|
||||
, Member Timeout effs
|
||||
, Member Trace effs
|
||||
, PureEffects effs
|
||||
)
|
||||
@ -244,7 +249,9 @@ runParser blob@Blob{..} parser = case parser of
|
||||
writeStat (increment "parse.parse_failures" languageTag)
|
||||
writeLog Error "failed parsing" (("task", "parse") : blobFields)
|
||||
throwError (toException err)
|
||||
time "parse.assign" languageTag $
|
||||
|
||||
-- TODO: Could give assignment a dedicated config for it's timeout.
|
||||
res <- timeout (fromSeconds 3) . time "parse.assign" languageTag $
|
||||
case assign blobSource assignment ast of
|
||||
Left err -> do
|
||||
writeStat (increment "parse.assign_errors" languageTag)
|
||||
@ -260,3 +267,9 @@ runParser blob@Blob{..} parser = case parser of
|
||||
logError config Warning blob err (("task", "assign") : blobFields)
|
||||
when (optionsFailOnWarning (configOptions config)) $ throwError (toException err)
|
||||
term <$ writeStat (count "parse.nodes" (length term) languageTag)
|
||||
case res of
|
||||
Just r -> pure r
|
||||
Nothing -> do
|
||||
writeStat (increment "assign.assign_timeouts" languageTag)
|
||||
writeLog Error "assignment timeout" (("task", "assign") : blobFields)
|
||||
throwError (SomeException (AssignmentTimedOut blobPath blobLanguage))
|
||||
|
34
src/Semantic/Timeout.hs
Normal file
34
src/Semantic/Timeout.hs
Normal file
@ -0,0 +1,34 @@
|
||||
{-# LANGUAGE TypeOperators, GADTs, RankNTypes #-}
|
||||
module Semantic.Timeout
|
||||
( timeout
|
||||
, Timeout
|
||||
, runTimeout
|
||||
, Duration(..)
|
||||
) where
|
||||
|
||||
import Control.Monad.Effect
|
||||
import Control.Monad.IO.Class
|
||||
import Data.Duration
|
||||
import qualified System.Timeout as System
|
||||
|
||||
-- | Run an action with a timeout. Returns 'Nothing' when no result is available
|
||||
-- within the specified duration. Uses 'System.Timeout.timeout' so all caveats
|
||||
-- about not operating over FFI boundaries apply.
|
||||
timeout :: (Member Timeout effs) => Duration -> Eff effs output -> Eff effs (Maybe output)
|
||||
timeout n = send . Timeout n
|
||||
|
||||
-- | 'Timeout' effects run other effects, aborting them if they exceed the
|
||||
-- specified duration.
|
||||
data Timeout task output where
|
||||
Timeout :: Duration -> task output -> Timeout task (Maybe output)
|
||||
|
||||
instance PureEffect Timeout
|
||||
instance Effect Timeout where
|
||||
handleState c dist (Request (Timeout n task) k) = Request (Timeout n (dist (task <$ c))) (dist . maybe (k Nothing <$ c) (fmap (k . Just)))
|
||||
|
||||
-- | Evaulate a 'Timeoute' effect.
|
||||
runTimeout :: (Member (Lift IO) effects, PureEffects effects)
|
||||
=> (forall x . Eff effects x -> IO x)
|
||||
-> Eff (Timeout ': effects) a
|
||||
-> Eff effects a
|
||||
runTimeout handler = interpret (\ (Timeout n task) -> liftIO (System.timeout (toMicroseconds n) (handler (runTimeout handler task))))
|
Loading…
Reference in New Issue
Block a user