1
1
mirror of https://github.com/github/semantic.git synced 2024-12-26 00:12:29 +03:00

Merge remote-tracking branch 'origin/master' into new-take-on-parse-examples

This commit is contained in:
Timothy Clem 2018-09-17 16:47:00 -07:00
commit 38b692b17c
8 changed files with 100 additions and 19 deletions

View File

@ -1,6 +1,6 @@
name: semantic
version: 0.4.0
synopsis: Initial project template from stack
synopsis: Framework and service for analyzing and diffing untrusted code.
description: Please see README.md
homepage: http://github.com/github/semantic#readme
author: Rob Rix, Josh Vera
@ -78,6 +78,7 @@ library
, Data.AST
, Data.Blob
, Data.Diff
, Data.Duration
, Data.Error
, Data.Functor.Both
, Data.Functor.Classes.Generic
@ -191,6 +192,7 @@ library
, Semantic.Telemetry.Haystack
, Semantic.Telemetry.Log
, Semantic.Telemetry.Stat
, Semantic.Timeout
, Semantic.Util
, Semantic.Util.Rewriting
, Semantic.Version

34
src/Data/Duration.hs Normal file
View File

@ -0,0 +1,34 @@
module Data.Duration
( Duration(..)
, fromSeconds
, fromMilliseconds
, fromMicroseconds
, fromNanoseconds
, toMicroseconds
) where
-- A duration suitable for timeouts stored as an int of milliseconds.
newtype Duration = Milliseconds Int
deriving (Eq, Ord)
instance Show Duration where
showsPrec _ (Milliseconds n) = shows n <> showString "ms"
fromSeconds :: Int -> Duration
fromSeconds n = fromMilliseconds (n * 1000)
-- milli = 10E-3 seconds
fromMilliseconds :: Int -> Duration
fromMilliseconds n | n <= 0 = Milliseconds 0
| otherwise = Milliseconds n
-- micro = 10E-6 seconds
fromMicroseconds :: Int -> Duration
fromMicroseconds n = fromMilliseconds (n `div` 1000)
-- nano = 10E-9 seconds
fromNanoseconds :: Int -> Duration
fromNanoseconds n = fromMicroseconds (n `div` 1000)
toMicroseconds :: Duration -> Int
toMicroseconds (Milliseconds n) = n * 1000

View File

@ -1,6 +1,6 @@
{-# LANGUAGE DataKinds, GADTs, ScopedTypeVariables, TypeOperators #-}
module Parsing.TreeSitter
( Timeout (..)
( Duration(..)
, parseToAST
) where
@ -16,22 +16,21 @@ import Data.ByteString.Unsafe (unsafeUseAsCStringLen)
import Foreign
import Foreign.C.Types (CBool (..))
import Foreign.Marshal.Array (allocaArray)
import System.Timeout
import Data.AST (AST, Node (Node))
import Data.Blob
import Data.Duration
import Data.Range
import Data.Source
import Data.Span
import Data.Term
import Semantic.Timeout
import qualified TreeSitter.Language as TS
import qualified TreeSitter.Node as TS
import qualified TreeSitter.Parser as TS
import qualified TreeSitter.Tree as TS
newtype Timeout = Milliseconds Int
data Result grammar
= Failed
| Succeeded (AST [] grammar)
@ -58,10 +57,8 @@ runParser parser blobSource = unsafeUseAsCStringLen (sourceBytes blobSource) $
-- | Parse 'Source' with the given 'TS.Language' and return its AST.
-- Returns Nothing if the operation timed out.
parseToAST :: (Bounded grammar, Enum grammar, Member (Lift IO) effects, Member Trace effects, PureEffects effects) => Timeout -> Ptr TS.Language -> Blob -> Eff effects (Maybe (AST [] grammar))
parseToAST (Milliseconds s) language Blob{..} = bracket TS.ts_parser_new TS.ts_parser_delete $ \ parser -> do
let parserTimeout = s * 1000
parseToAST :: (Bounded grammar, Enum grammar, Member (Lift IO) effects, Member Timeout effects, Member Trace effects, PureEffects effects) => Duration -> Ptr TS.Language -> Blob -> Eff effects (Maybe (AST [] grammar))
parseToAST parseTimeout language Blob{..} = bracket TS.ts_parser_new TS.ts_parser_delete $ \ parser -> do
liftIO $ do
TS.ts_parser_halt_on_error parser (CBool 1)
TS.ts_parser_set_language parser language
@ -71,7 +68,7 @@ parseToAST (Milliseconds s) language Blob{..} = bracket TS.ts_parser_new TS.ts_p
parsing <- liftIO . async $ runParser parser blobSource
-- Kick the parser off asynchronously and wait according to the provided timeout.
res <- liftIO . timeout parserTimeout $ wait parsing
res <- timeout parseTimeout $ liftIO (wait parsing)
case res of
Just Failed -> Nothing <$ trace ("tree-sitter: parsing failed " <> blobPath)

View File

@ -14,10 +14,10 @@ module Semantic.Config
, withTelemetry
) where
import Data.Duration
import Network.BSD
import Network.HTTP.Client.TLS
import Network.URI
import Parsing.TreeSitter (Timeout (..))
import Prologue
import Semantic.Env
import Semantic.Telemetry
@ -38,7 +38,7 @@ data Config
, configStatsHost :: Stat.Host -- ^ Host of statsd/datadog (default: "127.0.0.1")
, configStatsPort :: Stat.Port -- ^ Port of statsd/datadog (default: "28125")
, configTreeSitterParseTimeout :: Timeout -- ^ Timeout in milliseconds before canceling tree-sitter parsing (default: 10000).
, configTreeSitterParseTimeout :: Duration -- ^ Timeout in milliseconds before canceling tree-sitter parsing (default: 10000).
, configMaxTelemetyQueueSize :: Int -- ^ Max size of telemetry queues before messages are dropped (default: 1000).
, configIsTerminal :: Bool -- ^ Whether a terminal is attached (set automaticaly at runtime).
, configLogPrintSource :: Bool -- ^ Whether to print the source reference when logging errors (set automatically at runtime).
@ -78,7 +78,7 @@ defaultConfig options@Options{..} = do
, configStatsHost = statsHost
, configStatsPort = statsPort
, configTreeSitterParseTimeout = Milliseconds parseTimeout
, configTreeSitterParseTimeout = fromMilliseconds parseTimeout
, configMaxTelemetyQueueSize = size
, configIsTerminal = isTerminal
, configLogPrintSource = isTerminal

View File

@ -34,6 +34,7 @@ import Semantic.IO as IO
import Semantic.Resolution
import Semantic.Task hiding (Error)
import Semantic.Telemetry
import Semantic.Timeout
import Semantic.Telemetry.Log (LogOptions, Message(..), writeLogMessage)
import Semantic.Util
import System.Console.Haskeline
@ -70,7 +71,7 @@ runREPL prefs settings = interpret $ \case
rubyREPL = repl (Proxy @'Language.Ruby) rubyParser
repl proxy parser paths = defaultConfig debugOptions >>= \ config -> runM . runDistribute . runError @_ @_ @SomeException . runTelemetryIgnoringStat (logOptionsFromConfig config) . runTraceInTelemetry . runReader config . IO.runFiles . runResolution . runTaskF $ do
repl proxy parser paths = defaultConfig debugOptions >>= \ config -> runM . runDistribute . runTimeout (runM . runDistribute) . runError @_ @_ @SomeException . runTelemetryIgnoringStat (logOptionsFromConfig config) . runTraceInTelemetry . runReader config . IO.runFiles . runResolution . runTaskF $ do
blobs <- catMaybes <$> traverse IO.readFile (flip File (Language.reflect proxy) <$> paths)
package <- fmap (fmap quieterm) <$> parsePackage parser (Project (takeDirectory (maybe "/" fst (uncons paths))) blobs (Language.reflect proxy) [])
modules <- topologicalSort <$> runImportGraphToModules proxy (snd <$> package)

View File

@ -64,6 +64,7 @@ import Data.Blob
import Data.Bool
import Data.ByteString.Builder
import Data.Diff
import Data.Duration
import qualified Data.Error as Error
import Data.Language (Language)
import Data.Record
@ -79,6 +80,7 @@ import Parsing.TreeSitter
import Prologue hiding (MonadError (..), project)
import Semantic.Config
import Semantic.Distribute
import Semantic.Timeout
import qualified Semantic.IO as IO
import Semantic.Resolution
import Semantic.Telemetry
@ -93,6 +95,7 @@ type TaskEff = Eff '[ Task
, Trace
, Telemetry
, Exc SomeException
, Timeout
, Distribute
, Lift IO
]
@ -146,6 +149,7 @@ runTaskWithConfig options logger statter task = do
run
= runM
. runDistribute
. runTimeout (runM . runDistribute)
. runError
. runTelemetry logger statter
. runTraceInTelemetry
@ -180,7 +184,7 @@ instance Effect Task where
handleState c dist (Request (Serialize format input) k) = Request (Serialize format input) (dist . (<$ c) . k)
-- | Run a 'Task' effect by performing the actions in 'IO'.
runTaskF :: (Member (Exc SomeException) effs, Member (Lift IO) effs, Member (Reader Config) effs, Member Telemetry effs, Member Trace effs, PureEffects effs) => Eff (Task ': effs) a -> Eff effs a
runTaskF :: (Member (Exc SomeException) effs, Member (Lift IO) effs, Member (Reader Config) effs, Member Telemetry effs, Member Timeout effs, Member Trace effs, PureEffects effs) => Eff (Task ': effs) a -> Eff effs a
runTaskF = interpret $ \ task -> case task of
Parse parser blob -> runParser blob parser
Analyze interpret analysis -> pure (interpret analysis)
@ -196,13 +200,13 @@ runTaskF = interpret $ \ task -> case task of
logError :: Member Telemetry effs => Config -> Level -> Blob -> Error.Error String -> [(String, String)] -> Eff effs ()
logError Config{..} level blob err = writeLog level (Error.formatError configLogPrintSource configIsTerminal blob err)
data ParserCancelled = ParserTimedOut FilePath Language
data ParserCancelled = ParserTimedOut FilePath Language | AssignmentTimedOut FilePath Language
deriving (Show, Typeable)
instance Exception ParserCancelled
-- | Parse a 'Blob' in 'IO'.
runParser :: (Member (Exc SomeException) effs, Member (Lift IO) effs, Member (Reader Config) effs, Member Telemetry effs, Member Trace effs, PureEffects effs) => Blob -> Parser term -> Eff effs term
runParser :: (Member (Exc SomeException) effs, Member (Lift IO) effs, Member (Reader Config) effs, Member Telemetry effs, Member Timeout effs, Member Trace effs, PureEffects effs) => Blob -> Parser term -> Eff effs term
runParser blob@Blob{..} parser = case parser of
ASTParser language ->
time "parse.tree_sitter_ast_parse" languageTag $ do
@ -230,6 +234,7 @@ runParser blob@Blob{..} parser = case parser of
, Member (Lift IO) effs
, Member (Reader Config) effs
, Member Telemetry effs
, Member Timeout effs
, Member Trace effs
, PureEffects effs
)
@ -244,7 +249,9 @@ runParser blob@Blob{..} parser = case parser of
writeStat (increment "parse.parse_failures" languageTag)
writeLog Error "failed parsing" (("task", "parse") : blobFields)
throwError (toException err)
time "parse.assign" languageTag $
-- TODO: Could give assignment a dedicated config for it's timeout.
res <- timeout (fromSeconds 3) . time "parse.assign" languageTag $
case assign blobSource assignment ast of
Left err -> do
writeStat (increment "parse.assign_errors" languageTag)
@ -260,3 +267,9 @@ runParser blob@Blob{..} parser = case parser of
logError config Warning blob err (("task", "assign") : blobFields)
when (optionsFailOnWarning (configOptions config)) $ throwError (toException err)
term <$ writeStat (count "parse.nodes" (length term) languageTag)
case res of
Just r -> pure r
Nothing -> do
writeStat (increment "assign.assign_timeouts" languageTag)
writeLog Error "assignment timeout" (("task", "assign") : blobFields)
throwError (SomeException (AssignmentTimedOut blobPath blobLanguage))

34
src/Semantic/Timeout.hs Normal file
View File

@ -0,0 +1,34 @@
{-# LANGUAGE TypeOperators, GADTs, RankNTypes #-}
module Semantic.Timeout
( timeout
, Timeout
, runTimeout
, Duration(..)
) where
import Control.Monad.Effect
import Control.Monad.IO.Class
import Data.Duration
import qualified System.Timeout as System
-- | Run an action with a timeout. Returns 'Nothing' when no result is available
-- within the specified duration. Uses 'System.Timeout.timeout' so all caveats
-- about not operating over FFI boundaries apply.
timeout :: (Member Timeout effs) => Duration -> Eff effs output -> Eff effs (Maybe output)
timeout n = send . Timeout n
-- | 'Timeout' effects run other effects, aborting them if they exceed the
-- specified duration.
data Timeout task output where
Timeout :: Duration -> task output -> Timeout task (Maybe output)
instance PureEffect Timeout
instance Effect Timeout where
handleState c dist (Request (Timeout n task) k) = Request (Timeout n (dist (task <$ c))) (dist . maybe (k Nothing <$ c) (fmap (k . Just)))
-- | Evaulate a 'Timeoute' effect.
runTimeout :: (Member (Lift IO) effects, PureEffects effects)
=> (forall x . Eff effects x -> IO x)
-> Eff (Timeout ': effects) a
-> Eff effects a
runTimeout handler = interpret (\ (Timeout n task) -> liftIO (System.timeout (toMicroseconds n) (handler (runTimeout handler task))))