1
1
mirror of https://github.com/github/semantic.git synced 2024-11-24 17:04:47 +03:00

Merge branch 'master' into abstract-abstract-semantics

This commit is contained in:
Rob Rix 2018-07-04 10:44:35 -04:00
commit 6f3739f4a3
4 changed files with 71 additions and 47 deletions

View File

@ -1,11 +1,11 @@
{-# LANGUAGE FunctionalDependencies, GeneralizedNewtypeDeriving, KindSignatures #-}
{-# LANGUAGE FunctionalDependencies #-}
-- | Deterministic assignment, à la _Deterministic, Error-Correcting Combinator Parsers_, S. Doaitse Swierstra & Luc Duponcheel: http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.80.9967&rep=rep1&type=pdf
module Assigning.Assignment.Deterministic
( Assigning(..)
, TermAssigning(..)
, parseError
, Assignment(..)
, assign
, runAssignment
, TermAssignment(..)
, State(..)
) where
@ -26,22 +26,21 @@ class (Alternative f, Ord symbol, Show symbol) => Assigning symbol f | f -> symb
leafNode :: symbol -> f Text
branchNode :: symbol -> f a -> f a
class Assigning symbol f => TermAssigning syntaxes symbol f | f -> symbol, f -> syntaxes where
toTerm :: Element syntax syntaxes
toTerm :: (Element syntax syntaxes, Element Syntax.Error syntaxes)
=> f (syntax (Term (Sum syntaxes) (Record Location)))
-> f (Term (Sum syntaxes) (Record Location))
parseError :: ( Bounded symbol
, Element Syntax.Error syntaxes
, HasCallStack
, TermAssigning syntaxes symbol f
, Assigning symbol f
)
=> f (Term (Sum syntaxes) (Record Location))
parseError = toTerm (leafNode maxBound $> Syntax.Error (Syntax.ErrorStack (Syntax.errorSite <$> getCallStack (freezeCallStack callStack))) [] (Just "ParseError") [])
data Assignment symbol a = Assignment
{ nullable :: Maybe (State symbol -> a)
{ nullable :: Nullable symbol a
, firstSet :: IntSet
, choices :: [(symbol, Cont symbol a)]
}
@ -49,30 +48,31 @@ data Assignment symbol a = Assignment
type Cont symbol a = Source -> State symbol -> [IntSet] -> Either (Error (Either String symbol)) (State symbol, a)
combine :: Maybe a -> IntSet -> IntSet -> IntSet
combine e s1 s2 = if isJust e then s1 <> s2 else lowerBound
combine :: Nullable symbol a -> IntSet -> IntSet -> IntSet
combine (Nullable _) s1 s2 = s1 <> s2
combine _ s1 _ = s1
choose :: Enum symbol
=> Maybe (State symbol -> a)
choose :: (Enum symbol, HasCallStack)
=> Nullable symbol a
-> IntSet
-> IntMap (Cont symbol a)
-> Cont symbol a
choose nullable firstSet table src state follow = case stateInput state of
[] -> case nullable of
Just f -> Right (state, f state)
_ -> Left (Error (stateSpan state) (Right . toEnum <$> IntSet.toList firstSet) Nothing)
Nullable f -> Right (state, f state)
_ -> Left (withFrozenCallStack (Error (stateSpan state) (Right . toEnum <$> IntSet.toList firstSet) Nothing))
s:_ -> case fromEnum (astSymbol s) `IntMap.lookup` table of
Just k -> k src state follow
_ -> notFound (astSymbol s) state follow
where notFound s state follow = case nullable of
Just f | any (fromEnum s `IntSet.member`) follow -> Right (state, f state)
_ -> Left (Error (stateSpan state) (Right . toEnum <$> IntSet.toList firstSet) (Just (Right s)))
Nullable f | any (fromEnum s `IntSet.member`) follow -> Right (state, f state)
_ -> Left (withFrozenCallStack (Error (stateSpan state) (Right . toEnum <$> IntSet.toList firstSet) (Just (Right s))))
instance (Enum symbol, Ord symbol) => Applicative (Assignment symbol) where
pure a = Assignment (Just (const a)) lowerBound []
pure a = Assignment (pure a) lowerBound []
{-# INLINABLE pure #-}
Assignment n1 f1 t1 <*> ~(Assignment n2 f2 t2) = Assignment (liftA2 (<*>) n1 n2) (combine n1 f1 f2) (t1 `tseq` t2)
Assignment n1 f1 t1 <*> ~(Assignment n2 f2 t2) = Assignment (n1 <*> n2) (combine n1 f1 f2) (t1 `tseq` t2)
where table2 = IntMap.fromList (map (first fromEnum) t2)
t1 `tseq` t2
= map (fmap (\ p src state follow -> do
@ -81,7 +81,7 @@ instance (Enum symbol, Ord symbol) => Applicative (Assignment symbol) where
let pq = p' q'
pq `seq` pure (state'', pq))) t1
<> case n1 of
Just p -> map (fmap (\ q src state follow -> do
Nullable p -> map (fmap (\ q src state follow -> do
let p' = p state
(state', q') <- p' `seq` q src state follow
let pq = p' q'
@ -90,54 +90,73 @@ instance (Enum symbol, Ord symbol) => Applicative (Assignment symbol) where
{-# INLINABLE (<*>) #-}
instance (Enum symbol, Ord symbol) => Alternative (Assignment symbol) where
empty = Assignment Nothing lowerBound []
empty = Assignment NotNullable lowerBound []
{-# INLINABLE empty #-}
Assignment n1 f1 t1 <|> Assignment n2 f2 t2 = Assignment (n1 <|> n2) (f1 <> f2) (t1 <> t2)
{-# INLINABLE (<|>) #-}
instance (Enum symbol, Ord symbol, Show symbol) => Assigning symbol (Assignment symbol) where
leafNode s = Assignment Nothing (IntSet.singleton (fromEnum s))
leafNode s = Assignment NotNullable (IntSet.singleton (fromEnum s))
[ (s, \ src state _ -> case stateInput state of
[] -> Left (Error (stateSpan state) [Right s] Nothing)
[] -> Left (withFrozenCallStack (Error (stateSpan state) [Right s] Nothing))
s:_ -> case decodeUtf8' (sourceBytes (Source.slice (astRange s) src)) of
Left err -> Left (Error (astSpan s) [Left "valid utf-8"] (Just (Left (show err))))
Left err -> Left (withFrozenCallStack (Error (astSpan s) [Left "valid utf-8"] (Just (Left (show err)))))
Right text -> Right (advanceState state, text))
]
branchNode s a = Assignment Nothing (IntSet.singleton (fromEnum s))
branchNode s a = Assignment NotNullable (IntSet.singleton (fromEnum s))
[ (s, \ src state _ -> case stateInput state of
[] -> Left (Error (stateSpan state) [Right s] Nothing)
[] -> Left (withFrozenCallStack (Error (stateSpan state) [Right s] Nothing))
s:_ -> first (const (advanceState state)) <$> runAssignment a src state { stateInput = astChildren s })
]
toTerm a = Assignment
(case nullable a of
Nullable f -> Nullable (\ state -> termIn (stateLocation state) (inject (f state)))
NotNullable -> NotNullable)
(firstSet a)
(map (fmap (\ match src state follow -> case match src state follow of
Left err
| Just _ <- errorActual err -> Right (advanceState state, termIn (stateLocation state) (inject (Syntax.errorSyntax (either id show <$> err) [])))
| otherwise -> Left err
Right (state', syntax) -> Right (state', termIn (stateLocation state) (inject syntax)))) (choices a))
assign :: (Enum symbol, Show symbol) => Assignment symbol a -> Source -> AST [] symbol -> Either (Error String) a
assign assignment src = bimap (fmap (either id show)) snd . runAssignment assignment src . State 0 lowerBound . pure
runAssignment :: Enum symbol => Assignment symbol a -> Source -> State symbol -> Either (Error (Either String symbol)) (State symbol, a)
runAssignment (Assignment nullable firstSet table) src input
= case choose nullable firstSet (IntMap.fromList (map (first fromEnum) table)) src input lowerBound of
Left err -> Left err
Right (state', a') -> case stateInput state' of
[] -> Right (state', a')
s':_ -> Left (Error (stateSpan state') [] (Just (Right (astSymbol s'))))
s':_ -> Left (withFrozenCallStack (Error (stateSpan state') [] (Just (Right (astSymbol s')))))
newtype TermAssignment (syntaxes :: [* -> *]) symbol a = TermAssignment { runTermAssignment :: Assignment symbol a }
deriving (Alternative, Applicative, Functor, Assigning symbol)
data Nullable symbol a
= NotNullable
| Nullable (State symbol -> a)
deriving (Functor)
instance (Enum symbol, Ord symbol, Show symbol) => TermAssigning syntaxes symbol (TermAssignment syntaxes symbol) where
toTerm (TermAssignment a) = TermAssignment (Assignment
(case nullable a of
Just f -> Just (\ state -> termIn (stateLocation state) (inject (f state)))
Nothing -> Nothing)
(firstSet a)
(map (fmap (\ match src state follow -> case match src state follow of
Left err -> Left err
Right (state', syntax) -> Right (state', termIn (stateLocation state) (inject syntax)))) (choices a)))
instance Applicative (Nullable symbol) where
pure = Nullable . const
Nullable f <*> Nullable a = Nullable (\ state -> f state (a state))
_ <*> _ = NotNullable
instance Alternative (Nullable symbol) where
empty = NotNullable
Nullable a <|> _ = Nullable a
_ <|> b = b
data State s = State
data State symbol = State
{ stateBytes :: {-# UNPACK #-} !Int
, statePos :: {-# UNPACK #-} !Pos
, stateInput :: ![AST [] s]
, stateInput :: ![AST [] symbol]
}
deriving (Eq, Ord, Show)

View File

@ -6,6 +6,7 @@ module Language.JSON.Assignment
where
import Assigning.Assignment.Deterministic hiding (Assignment)
import qualified Assigning.Assignment.Deterministic as Deterministic
import Data.AST
import Data.Record
import Data.Sum
@ -28,7 +29,7 @@ type Syntax =
]
type Term = Term.Term (Sum Syntax) (Record Location)
type Assignment = TermAssignment Syntax Grammar
type Assignment = Deterministic.Assignment Grammar
assignment :: Assignment Term

View File

@ -15,6 +15,7 @@ module Parsing.Parser
, javaParser
, javaASTParser
, jsonParser
, jsonASTParser
, markdownParser
, pythonParser
, rubyParser
@ -104,7 +105,7 @@ data Parser term where
-> Parser (Term (Sum fs) (Record Location)) -- A parser producing 'Term's.
DeterministicParser :: (Enum grammar, Ord grammar, Show grammar, Element Syntax.Error syntaxes, Apply Foldable syntaxes, Apply Functor syntaxes)
=> Parser (AST [] grammar)
-> Deterministic.TermAssignment syntaxes grammar (Term (Sum syntaxes) (Record Location))
-> Deterministic.Assignment grammar (Term (Sum syntaxes) (Record Location))
-> Parser (Term (Sum syntaxes) (Record Location))
-- | A parser for 'Markdown' using cmark.
MarkdownParser :: Parser (Term (TermF [] CMarkGFM.NodeType) (Node Markdown.Grammar))
@ -166,7 +167,10 @@ javaASTParser :: Parser (AST [] Java.Grammar)
javaASTParser = ASTParser tree_sitter_java
jsonParser :: Parser JSON.Term
jsonParser = DeterministicParser (ASTParser tree_sitter_json) JSON.assignment
jsonParser = DeterministicParser jsonASTParser JSON.assignment
jsonASTParser :: Parser (AST [] JSON.Grammar)
jsonASTParser = ASTParser tree_sitter_json
typescriptParser :: Parser TypeScript.Term
typescriptParser = AssignmentParser (ASTParser tree_sitter_typescript) TypeScript.assignment

View File

@ -226,18 +226,18 @@ runParser blob@Blob{..} parser = case parser of
throwError (toException err)
config <- ask
time "parse.assign_deterministic" languageTag $
case Deterministic.runAssignment (Deterministic.runTermAssignment assignment) blobSource (Deterministic.State 0 lowerBound [ast]) of
case Deterministic.assign assignment blobSource ast of
Left err -> do
writeStat (increment "parse.assign_errors" languageTag)
logError config Error blob (either id show <$> err) (("task", "assign") : blobFields)
throwError (toException (either id show <$> err))
Right (_, term) -> do
writeStat (increment "parse.assign_deterministic_errors" languageTag)
logError config Error blob err (("task", "assign") : blobFields)
throwError (toException err)
Right term -> do
for_ (errors term) $ \ err -> case Error.errorActual err of
Just "ParseError" -> do
writeStat (increment "parse.parse_errors" languageTag)
logError config Warning blob err (("task", "parse") : blobFields)
_ -> do
writeStat (increment "parse.assign_warnings" languageTag)
writeStat (increment "parse.assign_deterministic_warnings" languageTag)
logError config Warning blob err (("task", "assign") : blobFields)
when (optionsFailOnWarning (configOptions config)) $ throwError (toException err)
writeStat (count "parse.nodes" (length term) languageTag)