mirror of
https://github.com/github/semantic.git
synced 2024-11-28 10:15:55 +03:00
Merge branch 'master' into abstract-abstract-semantics
This commit is contained in:
commit
6f3739f4a3
@ -1,11 +1,11 @@
|
||||
{-# LANGUAGE FunctionalDependencies, GeneralizedNewtypeDeriving, KindSignatures #-}
|
||||
{-# LANGUAGE FunctionalDependencies #-}
|
||||
-- | Deterministic assignment, à la _Deterministic, Error-Correcting Combinator Parsers_, S. Doaitse Swierstra & Luc Duponcheel: http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.80.9967&rep=rep1&type=pdf
|
||||
module Assigning.Assignment.Deterministic
|
||||
( Assigning(..)
|
||||
, TermAssigning(..)
|
||||
, parseError
|
||||
, Assignment(..)
|
||||
, assign
|
||||
, runAssignment
|
||||
, TermAssignment(..)
|
||||
, State(..)
|
||||
) where
|
||||
|
||||
@ -26,22 +26,21 @@ class (Alternative f, Ord symbol, Show symbol) => Assigning symbol f | f -> symb
|
||||
leafNode :: symbol -> f Text
|
||||
branchNode :: symbol -> f a -> f a
|
||||
|
||||
class Assigning symbol f => TermAssigning syntaxes symbol f | f -> symbol, f -> syntaxes where
|
||||
toTerm :: Element syntax syntaxes
|
||||
toTerm :: (Element syntax syntaxes, Element Syntax.Error syntaxes)
|
||||
=> f (syntax (Term (Sum syntaxes) (Record Location)))
|
||||
-> f (Term (Sum syntaxes) (Record Location))
|
||||
|
||||
parseError :: ( Bounded symbol
|
||||
, Element Syntax.Error syntaxes
|
||||
, HasCallStack
|
||||
, TermAssigning syntaxes symbol f
|
||||
, Assigning symbol f
|
||||
)
|
||||
=> f (Term (Sum syntaxes) (Record Location))
|
||||
parseError = toTerm (leafNode maxBound $> Syntax.Error (Syntax.ErrorStack (Syntax.errorSite <$> getCallStack (freezeCallStack callStack))) [] (Just "ParseError") [])
|
||||
|
||||
|
||||
data Assignment symbol a = Assignment
|
||||
{ nullable :: Maybe (State symbol -> a)
|
||||
{ nullable :: Nullable symbol a
|
||||
, firstSet :: IntSet
|
||||
, choices :: [(symbol, Cont symbol a)]
|
||||
}
|
||||
@ -49,30 +48,31 @@ data Assignment symbol a = Assignment
|
||||
|
||||
type Cont symbol a = Source -> State symbol -> [IntSet] -> Either (Error (Either String symbol)) (State symbol, a)
|
||||
|
||||
combine :: Maybe a -> IntSet -> IntSet -> IntSet
|
||||
combine e s1 s2 = if isJust e then s1 <> s2 else lowerBound
|
||||
combine :: Nullable symbol a -> IntSet -> IntSet -> IntSet
|
||||
combine (Nullable _) s1 s2 = s1 <> s2
|
||||
combine _ s1 _ = s1
|
||||
|
||||
choose :: Enum symbol
|
||||
=> Maybe (State symbol -> a)
|
||||
choose :: (Enum symbol, HasCallStack)
|
||||
=> Nullable symbol a
|
||||
-> IntSet
|
||||
-> IntMap (Cont symbol a)
|
||||
-> Cont symbol a
|
||||
choose nullable firstSet table src state follow = case stateInput state of
|
||||
[] -> case nullable of
|
||||
Just f -> Right (state, f state)
|
||||
_ -> Left (Error (stateSpan state) (Right . toEnum <$> IntSet.toList firstSet) Nothing)
|
||||
Nullable f -> Right (state, f state)
|
||||
_ -> Left (withFrozenCallStack (Error (stateSpan state) (Right . toEnum <$> IntSet.toList firstSet) Nothing))
|
||||
s:_ -> case fromEnum (astSymbol s) `IntMap.lookup` table of
|
||||
Just k -> k src state follow
|
||||
_ -> notFound (astSymbol s) state follow
|
||||
where notFound s state follow = case nullable of
|
||||
Just f | any (fromEnum s `IntSet.member`) follow -> Right (state, f state)
|
||||
_ -> Left (Error (stateSpan state) (Right . toEnum <$> IntSet.toList firstSet) (Just (Right s)))
|
||||
Nullable f | any (fromEnum s `IntSet.member`) follow -> Right (state, f state)
|
||||
_ -> Left (withFrozenCallStack (Error (stateSpan state) (Right . toEnum <$> IntSet.toList firstSet) (Just (Right s))))
|
||||
|
||||
instance (Enum symbol, Ord symbol) => Applicative (Assignment symbol) where
|
||||
pure a = Assignment (Just (const a)) lowerBound []
|
||||
pure a = Assignment (pure a) lowerBound []
|
||||
{-# INLINABLE pure #-}
|
||||
|
||||
Assignment n1 f1 t1 <*> ~(Assignment n2 f2 t2) = Assignment (liftA2 (<*>) n1 n2) (combine n1 f1 f2) (t1 `tseq` t2)
|
||||
Assignment n1 f1 t1 <*> ~(Assignment n2 f2 t2) = Assignment (n1 <*> n2) (combine n1 f1 f2) (t1 `tseq` t2)
|
||||
where table2 = IntMap.fromList (map (first fromEnum) t2)
|
||||
t1 `tseq` t2
|
||||
= map (fmap (\ p src state follow -> do
|
||||
@ -81,7 +81,7 @@ instance (Enum symbol, Ord symbol) => Applicative (Assignment symbol) where
|
||||
let pq = p' q'
|
||||
pq `seq` pure (state'', pq))) t1
|
||||
<> case n1 of
|
||||
Just p -> map (fmap (\ q src state follow -> do
|
||||
Nullable p -> map (fmap (\ q src state follow -> do
|
||||
let p' = p state
|
||||
(state', q') <- p' `seq` q src state follow
|
||||
let pq = p' q'
|
||||
@ -90,54 +90,73 @@ instance (Enum symbol, Ord symbol) => Applicative (Assignment symbol) where
|
||||
{-# INLINABLE (<*>) #-}
|
||||
|
||||
instance (Enum symbol, Ord symbol) => Alternative (Assignment symbol) where
|
||||
empty = Assignment Nothing lowerBound []
|
||||
empty = Assignment NotNullable lowerBound []
|
||||
{-# INLINABLE empty #-}
|
||||
|
||||
Assignment n1 f1 t1 <|> Assignment n2 f2 t2 = Assignment (n1 <|> n2) (f1 <> f2) (t1 <> t2)
|
||||
{-# INLINABLE (<|>) #-}
|
||||
|
||||
instance (Enum symbol, Ord symbol, Show symbol) => Assigning symbol (Assignment symbol) where
|
||||
leafNode s = Assignment Nothing (IntSet.singleton (fromEnum s))
|
||||
leafNode s = Assignment NotNullable (IntSet.singleton (fromEnum s))
|
||||
[ (s, \ src state _ -> case stateInput state of
|
||||
[] -> Left (Error (stateSpan state) [Right s] Nothing)
|
||||
[] -> Left (withFrozenCallStack (Error (stateSpan state) [Right s] Nothing))
|
||||
s:_ -> case decodeUtf8' (sourceBytes (Source.slice (astRange s) src)) of
|
||||
Left err -> Left (Error (astSpan s) [Left "valid utf-8"] (Just (Left (show err))))
|
||||
Left err -> Left (withFrozenCallStack (Error (astSpan s) [Left "valid utf-8"] (Just (Left (show err)))))
|
||||
Right text -> Right (advanceState state, text))
|
||||
]
|
||||
|
||||
branchNode s a = Assignment Nothing (IntSet.singleton (fromEnum s))
|
||||
branchNode s a = Assignment NotNullable (IntSet.singleton (fromEnum s))
|
||||
[ (s, \ src state _ -> case stateInput state of
|
||||
[] -> Left (Error (stateSpan state) [Right s] Nothing)
|
||||
[] -> Left (withFrozenCallStack (Error (stateSpan state) [Right s] Nothing))
|
||||
s:_ -> first (const (advanceState state)) <$> runAssignment a src state { stateInput = astChildren s })
|
||||
]
|
||||
|
||||
toTerm a = Assignment
|
||||
(case nullable a of
|
||||
Nullable f -> Nullable (\ state -> termIn (stateLocation state) (inject (f state)))
|
||||
NotNullable -> NotNullable)
|
||||
(firstSet a)
|
||||
(map (fmap (\ match src state follow -> case match src state follow of
|
||||
Left err
|
||||
| Just _ <- errorActual err -> Right (advanceState state, termIn (stateLocation state) (inject (Syntax.errorSyntax (either id show <$> err) [])))
|
||||
| otherwise -> Left err
|
||||
Right (state', syntax) -> Right (state', termIn (stateLocation state) (inject syntax)))) (choices a))
|
||||
|
||||
|
||||
assign :: (Enum symbol, Show symbol) => Assignment symbol a -> Source -> AST [] symbol -> Either (Error String) a
|
||||
assign assignment src = bimap (fmap (either id show)) snd . runAssignment assignment src . State 0 lowerBound . pure
|
||||
|
||||
runAssignment :: Enum symbol => Assignment symbol a -> Source -> State symbol -> Either (Error (Either String symbol)) (State symbol, a)
|
||||
runAssignment (Assignment nullable firstSet table) src input
|
||||
= case choose nullable firstSet (IntMap.fromList (map (first fromEnum) table)) src input lowerBound of
|
||||
Left err -> Left err
|
||||
Right (state', a') -> case stateInput state' of
|
||||
[] -> Right (state', a')
|
||||
s':_ -> Left (Error (stateSpan state') [] (Just (Right (astSymbol s'))))
|
||||
s':_ -> Left (withFrozenCallStack (Error (stateSpan state') [] (Just (Right (astSymbol s')))))
|
||||
|
||||
|
||||
newtype TermAssignment (syntaxes :: [* -> *]) symbol a = TermAssignment { runTermAssignment :: Assignment symbol a }
|
||||
deriving (Alternative, Applicative, Functor, Assigning symbol)
|
||||
data Nullable symbol a
|
||||
= NotNullable
|
||||
| Nullable (State symbol -> a)
|
||||
deriving (Functor)
|
||||
|
||||
instance (Enum symbol, Ord symbol, Show symbol) => TermAssigning syntaxes symbol (TermAssignment syntaxes symbol) where
|
||||
toTerm (TermAssignment a) = TermAssignment (Assignment
|
||||
(case nullable a of
|
||||
Just f -> Just (\ state -> termIn (stateLocation state) (inject (f state)))
|
||||
Nothing -> Nothing)
|
||||
(firstSet a)
|
||||
(map (fmap (\ match src state follow -> case match src state follow of
|
||||
Left err -> Left err
|
||||
Right (state', syntax) -> Right (state', termIn (stateLocation state) (inject syntax)))) (choices a)))
|
||||
instance Applicative (Nullable symbol) where
|
||||
pure = Nullable . const
|
||||
|
||||
Nullable f <*> Nullable a = Nullable (\ state -> f state (a state))
|
||||
_ <*> _ = NotNullable
|
||||
|
||||
instance Alternative (Nullable symbol) where
|
||||
empty = NotNullable
|
||||
|
||||
Nullable a <|> _ = Nullable a
|
||||
_ <|> b = b
|
||||
|
||||
|
||||
data State s = State
|
||||
data State symbol = State
|
||||
{ stateBytes :: {-# UNPACK #-} !Int
|
||||
, statePos :: {-# UNPACK #-} !Pos
|
||||
, stateInput :: ![AST [] s]
|
||||
, stateInput :: ![AST [] symbol]
|
||||
}
|
||||
deriving (Eq, Ord, Show)
|
||||
|
||||
|
@ -6,6 +6,7 @@ module Language.JSON.Assignment
|
||||
where
|
||||
|
||||
import Assigning.Assignment.Deterministic hiding (Assignment)
|
||||
import qualified Assigning.Assignment.Deterministic as Deterministic
|
||||
import Data.AST
|
||||
import Data.Record
|
||||
import Data.Sum
|
||||
@ -28,7 +29,7 @@ type Syntax =
|
||||
]
|
||||
|
||||
type Term = Term.Term (Sum Syntax) (Record Location)
|
||||
type Assignment = TermAssignment Syntax Grammar
|
||||
type Assignment = Deterministic.Assignment Grammar
|
||||
|
||||
|
||||
assignment :: Assignment Term
|
||||
|
@ -15,6 +15,7 @@ module Parsing.Parser
|
||||
, javaParser
|
||||
, javaASTParser
|
||||
, jsonParser
|
||||
, jsonASTParser
|
||||
, markdownParser
|
||||
, pythonParser
|
||||
, rubyParser
|
||||
@ -104,7 +105,7 @@ data Parser term where
|
||||
-> Parser (Term (Sum fs) (Record Location)) -- A parser producing 'Term's.
|
||||
DeterministicParser :: (Enum grammar, Ord grammar, Show grammar, Element Syntax.Error syntaxes, Apply Foldable syntaxes, Apply Functor syntaxes)
|
||||
=> Parser (AST [] grammar)
|
||||
-> Deterministic.TermAssignment syntaxes grammar (Term (Sum syntaxes) (Record Location))
|
||||
-> Deterministic.Assignment grammar (Term (Sum syntaxes) (Record Location))
|
||||
-> Parser (Term (Sum syntaxes) (Record Location))
|
||||
-- | A parser for 'Markdown' using cmark.
|
||||
MarkdownParser :: Parser (Term (TermF [] CMarkGFM.NodeType) (Node Markdown.Grammar))
|
||||
@ -166,7 +167,10 @@ javaASTParser :: Parser (AST [] Java.Grammar)
|
||||
javaASTParser = ASTParser tree_sitter_java
|
||||
|
||||
jsonParser :: Parser JSON.Term
|
||||
jsonParser = DeterministicParser (ASTParser tree_sitter_json) JSON.assignment
|
||||
jsonParser = DeterministicParser jsonASTParser JSON.assignment
|
||||
|
||||
jsonASTParser :: Parser (AST [] JSON.Grammar)
|
||||
jsonASTParser = ASTParser tree_sitter_json
|
||||
|
||||
typescriptParser :: Parser TypeScript.Term
|
||||
typescriptParser = AssignmentParser (ASTParser tree_sitter_typescript) TypeScript.assignment
|
||||
|
@ -226,18 +226,18 @@ runParser blob@Blob{..} parser = case parser of
|
||||
throwError (toException err)
|
||||
config <- ask
|
||||
time "parse.assign_deterministic" languageTag $
|
||||
case Deterministic.runAssignment (Deterministic.runTermAssignment assignment) blobSource (Deterministic.State 0 lowerBound [ast]) of
|
||||
case Deterministic.assign assignment blobSource ast of
|
||||
Left err -> do
|
||||
writeStat (increment "parse.assign_errors" languageTag)
|
||||
logError config Error blob (either id show <$> err) (("task", "assign") : blobFields)
|
||||
throwError (toException (either id show <$> err))
|
||||
Right (_, term) -> do
|
||||
writeStat (increment "parse.assign_deterministic_errors" languageTag)
|
||||
logError config Error blob err (("task", "assign") : blobFields)
|
||||
throwError (toException err)
|
||||
Right term -> do
|
||||
for_ (errors term) $ \ err -> case Error.errorActual err of
|
||||
Just "ParseError" -> do
|
||||
writeStat (increment "parse.parse_errors" languageTag)
|
||||
logError config Warning blob err (("task", "parse") : blobFields)
|
||||
_ -> do
|
||||
writeStat (increment "parse.assign_warnings" languageTag)
|
||||
writeStat (increment "parse.assign_deterministic_warnings" languageTag)
|
||||
logError config Warning blob err (("task", "assign") : blobFields)
|
||||
when (optionsFailOnWarning (configOptions config)) $ throwError (toException err)
|
||||
writeStat (count "parse.nodes" (length term) languageTag)
|
||||
|
Loading…
Reference in New Issue
Block a user