diff --git a/.gitmodules b/.gitmodules index 410930286..0e9fd3c07 100644 --- a/.gitmodules +++ b/.gitmodules @@ -34,3 +34,6 @@ [submodule "languages/python/vendor/tree-sitter-python"] path = languages/python/vendor/tree-sitter-python url = https://github.com/tree-sitter/tree-sitter-python.git +[submodule "languages/json/vendor/tree-sitter-json"] + path = languages/json/vendor/tree-sitter-json + url = https://github.com/tree-sitter/tree-sitter-json diff --git a/languages/json/Setup.hs b/languages/json/Setup.hs new file mode 100644 index 000000000..9a994af67 --- /dev/null +++ b/languages/json/Setup.hs @@ -0,0 +1,2 @@ +import Distribution.Simple +main = defaultMain diff --git a/languages/json/json.cabal b/languages/json/json.cabal new file mode 100644 index 000000000..0b8e025c3 --- /dev/null +++ b/languages/json/json.cabal @@ -0,0 +1,27 @@ +name: json +version: 0.1.0 +synopsis: tree-sitter json language bindings +description: Please see README.md +homepage: https://github.com/github/semantic-diff#readme +author: semantic-code +maintainer: vera@github.com +copyright: 2017 GitHub +category: Web +build-type: Simple +-- extra-source-files: +cabal-version: >=1.10 + +library + hs-source-dirs: src + exposed-modules: Text.Parser.TreeSitter.JSON + build-depends: base >= 4.7 && < 5 + , haskell-tree-sitter + default-language: Haskell2010 + default-extensions: FlexibleInstances, OverloadedStrings, NoImplicitPrelude, RecordWildCards + c-sources: vendor/tree-sitter-json/src/parser.c + cc-options: -std=c99 -Os + + +source-repository head + type: git + location: https://github.com/github/semantic-diff diff --git a/languages/json/src/Text/Parser/TreeSitter/JSON.hs b/languages/json/src/Text/Parser/TreeSitter/JSON.hs new file mode 100644 index 000000000..92dec946e --- /dev/null +++ b/languages/json/src/Text/Parser/TreeSitter/JSON.hs @@ -0,0 +1,6 @@ +module Text.Parser.TreeSitter.JSON where + +import Foreign.Ptr +import Text.Parser.TreeSitter + +foreign import ccall unsafe "vendor/tree-sitter-json/src/parser.c tree_sitter_json" tree_sitter_json :: Ptr Language diff --git a/languages/json/vendor/tree-sitter-json b/languages/json/vendor/tree-sitter-json new file mode 160000 index 000000000..5c21eb644 --- /dev/null +++ b/languages/json/vendor/tree-sitter-json @@ -0,0 +1 @@ +Subproject commit 5c21eb6446ca3e0a49fe710fb948d8efb8a5b6da diff --git a/semantic-diff.cabal b/semantic-diff.cabal index a683994a4..df043eb8b 100644 --- a/semantic-diff.cabal +++ b/semantic-diff.cabal @@ -50,6 +50,8 @@ library , Language.Markdown.Syntax , Language.Go , Language.Go.Syntax + , Language.JSON.Grammar + , Language.JSON.Syntax , Language.Ruby , Language.Ruby.Grammar , Language.Ruby.Syntax @@ -117,6 +119,7 @@ library , ruby , typescript , python + , json default-language: Haskell2010 default-extensions: DeriveFunctor, DeriveFoldable, DeriveTraversable, DeriveGeneric, FlexibleContexts, FlexibleInstances, OverloadedStrings, NoImplicitPrelude, RecordWildCards, StrictData ghc-options: -Wall -fno-warn-name-shadowing -O -j diff --git a/src/Language.hs b/src/Language.hs index 5a0f22c23..e819e4e21 100644 --- a/src/Language.hs +++ b/src/Language.hs @@ -14,6 +14,7 @@ data Language = C | Go | JavaScript + | JSON | Markdown | Python | Ruby @@ -25,6 +26,7 @@ languageForType :: String -> Maybe Language languageForType mediaType = case mediaType of ".h" -> Just C ".c" -> Just C + ".json" -> Just JSON ".md" -> Just Markdown ".rb" -> Just Ruby ".go" -> Just Language.Go diff --git a/src/Language/JSON/Grammar.hs b/src/Language/JSON/Grammar.hs new file mode 100644 index 000000000..2439fe472 --- /dev/null +++ b/src/Language/JSON/Grammar.hs @@ -0,0 +1,10 @@ +{-# LANGUAGE TemplateHaskell #-} +module Language.JSON.Grammar where + +import Language.Haskell.TH +import Text.Parser.TreeSitter.Language +import Text.Parser.TreeSitter.JSON + +-- | Statically-known rules corresponding to symbols in the grammar. +-- v1 - bump this to regenerate +mkSymbolDatatype (mkName "Grammar") tree_sitter_json diff --git a/src/Language/JSON/Syntax.hs b/src/Language/JSON/Syntax.hs new file mode 100644 index 000000000..ebc8800de --- /dev/null +++ b/src/Language/JSON/Syntax.hs @@ -0,0 +1,68 @@ +{-# LANGUAGE DataKinds, GADTs, RankNTypes, TypeOperators #-} +module Language.JSON.Syntax + ( assignment + , Syntax + , Grammar + , Term) + where + +import qualified Data.Syntax as Syntax +import qualified Data.Syntax.Literal as Literal +import Data.Syntax.Assignment hiding (Assignment, Error) +import qualified Data.Syntax.Assignment as Assignment +import Language.JSON.Grammar as Grammar +import qualified Term +import Data.Record +import Data.Union +import GHC.Stack +import Prologue hiding (Location) + +type Syntax = + [ Literal.Array + , Literal.Boolean + , Literal.Hash + , Literal.Float + , Literal.KeyValue + , Literal.Null + , Literal.String + , Literal.TextElement + , Syntax.Error + , [] + ] + +type Term = Term.Term (Union Syntax) (Record Location) +type Assignment = HasCallStack => Assignment.Assignment (AST Grammar) Grammar Term + + +makeTerm :: (HasCallStack, f :< fs) => a -> f (Term.Term (Union fs) a) -> Term.Term (Union fs) a +makeTerm a f = cofree (a :< inj f) + +parseError :: Assignment +parseError = makeTerm <$> symbol ParseError <*> (Syntax.Error [] <$ source) + +assignment :: Assignment +assignment = object <|> array <|> parseError + +value :: Assignment +value = object <|> array <|> number <|> string <|> boolean <|> none <|> parseError + +object :: Assignment +object = makeTerm <$> symbol Object <*> children (Literal.Hash <$> many pairs) + where pairs = makeTerm <$> symbol Pair <*> children (Literal.KeyValue <$> (number <|> string) <*> value) + +array :: Assignment +array = makeTerm <$> symbol Array <*> children (Literal.Array <$> many value) + +number :: Assignment +number = makeTerm <$> symbol Number <*> (Literal.Float <$> source) + +string :: Assignment +string = makeTerm <$> symbol String <*> (Literal.TextElement <$> source) + +boolean :: Assignment +boolean = makeTerm <$> symbol Grammar.True <*> (Literal.true <$ source) + <|> makeTerm <$> symbol Grammar.False <*> (Literal.false <$ source) + +none :: Assignment +none = makeTerm <$> symbol Null <*> (Literal.Null <$ source) + diff --git a/src/Parser.hs b/src/Parser.hs index 5a4b7baee..658958b03 100644 --- a/src/Parser.hs +++ b/src/Parser.hs @@ -5,6 +5,7 @@ module Parser -- Syntax parsers , parserForLanguage -- À la carte parsers +, jsonParser , markdownParser , pythonParser , rubyParser @@ -21,6 +22,7 @@ import Data.Union import Info hiding (Empty, Go) import Language import Language.Markdown +import qualified Language.JSON.Syntax as JSON import qualified Language.Markdown.Syntax as Markdown import qualified Language.Python.Syntax as Python import qualified Language.Ruby.Syntax as Ruby @@ -34,6 +36,7 @@ import Text.Parser.TreeSitter.Go import Text.Parser.TreeSitter.Python import Text.Parser.TreeSitter.Ruby import Text.Parser.TreeSitter.TypeScript +import Text.Parser.TreeSitter.JSON import TreeSitter -- | A parser from 'Source' onto some term type. @@ -59,6 +62,7 @@ parserForLanguage Nothing = LineByLineParser parserForLanguage (Just language) = case language of C -> TreeSitterParser C tree_sitter_c Go -> TreeSitterParser Go tree_sitter_go + JSON -> TreeSitterParser JSON tree_sitter_json JavaScript -> TreeSitterParser TypeScript tree_sitter_typescript Ruby -> TreeSitterParser Ruby tree_sitter_ruby TypeScript -> TreeSitterParser TypeScript tree_sitter_typescript @@ -70,6 +74,9 @@ rubyParser = AssignmentParser (ASTParser tree_sitter_ruby) headF Ruby.assignment pythonParser :: Parser Python.Term pythonParser = AssignmentParser (ASTParser tree_sitter_python) headF Python.assignment +jsonParser :: Parser JSON.Term +jsonParser = AssignmentParser (ASTParser tree_sitter_json) headF JSON.assignment + markdownParser :: Parser Markdown.Term markdownParser = AssignmentParser MarkdownParser (\ (node@Node{..} :< _) -> node { nodeSymbol = toGrammar nodeSymbol }) Markdown.assignment diff --git a/src/Semantic.hs b/src/Semantic.hs index cf08178fe..867cd461f 100644 --- a/src/Semantic.hs +++ b/src/Semantic.hs @@ -47,12 +47,15 @@ parseBlob renderer blob@Blob{..} = case (renderer, blobLanguage) of (ToCTermRenderer, _) -> parse syntaxParser blob >>= decorate (syntaxDeclarationAlgebra blob) >>= render (renderToCTerm blob) (JSONTermRenderer, Just Language.Markdown) -> parse markdownParser blob >>= decorate constructorLabel >>= render (renderJSONTerm blob) (JSONTermRenderer, Just Language.Python) -> parse pythonParser blob >>= decorate constructorLabel >>= render (renderJSONTerm blob) + (JSONTermRenderer, Just Language.JSON) -> parse jsonParser blob >>= decorate constructorLabel >>= render (renderJSONTerm blob) (JSONTermRenderer, _) -> parse syntaxParser blob >>= decorate identifierAlgebra >>= render (renderJSONTerm blob) (SExpressionTermRenderer, Just Language.Markdown) -> parse markdownParser blob >>= decorate constructorLabel >>= render renderSExpressionTerm . fmap keepConstructorLabel (SExpressionTermRenderer, Just Language.Python) -> parse pythonParser blob >>= decorate constructorLabel >>= render renderSExpressionTerm . fmap keepConstructorLabel + (SExpressionTermRenderer, Just Language.JSON) -> parse jsonParser blob >>= decorate constructorLabel >>= render renderSExpressionTerm . fmap keepConstructorLabel (SExpressionTermRenderer, _) -> parse syntaxParser blob >>= render renderSExpressionTerm . fmap keepCategory (IdentityTermRenderer, Just Language.Markdown) -> pure Nothing (IdentityTermRenderer, Just Language.Python) -> pure Nothing + (IdentityTermRenderer, Just Language.JSON) -> pure Nothing (IdentityTermRenderer, _) -> Just <$> parse syntaxParser blob where syntaxParser = parserForLanguage blobLanguage @@ -69,12 +72,15 @@ diffBlobPair renderer blobs = case (renderer, effectiveLanguage) of (ToCDiffRenderer, _) -> run (\ blob -> parse syntaxParser blob >>= decorate (syntaxDeclarationAlgebra blob)) diffTerms (renderToCDiff blobs) (JSONDiffRenderer, Just Language.Markdown) -> run (parse markdownParser) diffLinearly (renderJSONDiff blobs) (JSONDiffRenderer, Just Language.Python) -> run (parse pythonParser) diffLinearly (renderJSONDiff blobs) + (JSONDiffRenderer, Just Language.JSON) -> run (parse jsonParser) diffLinearly (renderJSONDiff blobs) (JSONDiffRenderer, _) -> run (decorate identifierAlgebra <=< parse syntaxParser) diffTerms (renderJSONDiff blobs) (PatchDiffRenderer, Just Language.Markdown) -> run (parse markdownParser) diffLinearly (renderPatch blobs) (PatchDiffRenderer, Just Language.Python) -> run (parse pythonParser) diffLinearly (renderPatch blobs) + (PatchDiffRenderer, Just Language.JSON) -> run (parse jsonParser) diffLinearly (renderPatch blobs) (PatchDiffRenderer, _) -> run (parse syntaxParser) diffTerms (renderPatch blobs) (SExpressionDiffRenderer, Just Language.Markdown) -> run (decorate constructorLabel <=< parse markdownParser) diffLinearly (renderSExpressionDiff . mapAnnotations keepConstructorLabel) (SExpressionDiffRenderer, Just Language.Python) -> run (decorate constructorLabel <=< parse pythonParser) diffLinearly (renderSExpressionDiff . mapAnnotations keepConstructorLabel) + (SExpressionDiffRenderer, Just Language.JSON) -> run (decorate constructorLabel <=< parse jsonParser) diffLinearly (renderSExpressionDiff . mapAnnotations keepConstructorLabel) (SExpressionDiffRenderer, _) -> run (parse syntaxParser) diffTerms (renderSExpressionDiff . mapAnnotations keepCategory) (IdentityDiffRenderer, _) -> run (\ blob -> parse syntaxParser blob >>= decorate (syntaxDeclarationAlgebra blob)) diffTerms Just where effectiveLanguage = runBothWith (<|>) (blobLanguage <$> blobs)