1
1
mirror of https://github.com/github/semantic.git synced 2024-12-23 06:41:45 +03:00

Merge branch 'master' into gitmon-support

This commit is contained in:
Rick Winfrey 2017-02-10 14:17:47 -08:00
commit e44c0d122a
439 changed files with 273175 additions and 1685 deletions

View File

@ -1,15 +1,11 @@
# Build configuration for https://atom.io/packages/build
cmd: stack build semantic-diff
cmd: stack build
name: semantic-diff
env:
PATH: ~/.local/bin:~/Developer/Tools:~/Library/Haskell/bin:/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin
targets:
test:
cmd: stack build :semantic-diff-test
cmd: stack build :integration-test
keymap: cmd-u
semantic-difftool:
cmd: stack build :semantic-difftool
semantic-git-diff:
cmd: stack build :semantic-git-diff
errorMatch:
- \n(?<file>/[^:]+):(?<line>\d+):((?<col>\d+):)?

1
.gitattributes vendored
View File

@ -2,4 +2,5 @@ test/diffs linguist-vendored
test/diffs-todo linguist-vendored
test/crashers linguist-vendored
test/crashers-todo linguist-vendored
test/repos linguist-vendored
vendor linguist-vendored

45
.gitmodules vendored
View File

@ -1,9 +1,48 @@
[submodule "vendor/tree-sitter-parsers"]
path = vendor/tree-sitter-parsers
url = git@github.com:github/tree-sitter-parsers.git
[submodule "vendor/text-icu"]
path = vendor/text-icu
url = https://github.com/joshvera/text-icu
[submodule "vendor/gitlib"]
path = vendor/gitlib
url = https://github.com/joshvera/gitlib
[submodule "test/repos/jquery"]
path = test/repos/jquery
url = https://github.com/jquery/jquery
[submodule "test/repos/js-test"]
path = test/repos/js-test
url = https://github.com/rewinfrey/js-test.git
[submodule "test/repos/backbone"]
path = test/repos/backbone
url = https://github.com/jashkenas/backbone
[submodule "test/corpus/repos/javascript"]
path = test/corpus/repos/javascript
url = https://github.com/diff-fixtures/javascript
[submodule "vendor/hspec-expectations-pretty-diff"]
path = vendor/hspec-expectations-pretty-diff
url = https://github.com/rewinfrey/hspec-expectations-pretty-diff
[submodule "test/corpus/repos/go"]
path = test/corpus/repos/go
url = https://github.com/diff-fixtures/go.git
[submodule "test/corpus/repos/ruby"]
path = test/corpus/repos/ruby
url = https://github.com/diff-fixtures/ruby.git
[submodule "vendor/effects"]
path = vendor/effects
url = https://github.com/joshvera/effects.git
[submodule "languages/ruby/vendor/tree-sitter-ruby"]
path = languages/ruby/vendor/tree-sitter-ruby
url = https://github.com/tree-sitter/tree-sitter-ruby.git
[submodule "languages/c/vendor/tree-sitter-c"]
path = languages/c/vendor/tree-sitter-c
url = https://github.com/tree-sitter/tree-sitter-c.git
[submodule "languages/go/vendor/tree-sitter-go"]
path = languages/go/vendor/tree-sitter-go
url = https://github.com/tree-sitter/tree-sitter-go.git
[submodule "languages/javascript/vendor/tree-sitter-javascript"]
path = languages/javascript/vendor/tree-sitter-javascript
url = https://github.com/tree-sitter/tree-sitter-javascript.git
[submodule "vendor/haskell-tree-sitter"]
path = vendor/haskell-tree-sitter
url = https://github.com/tree-sitter/haskell-tree-sitter.git
[submodule "test/corpus/profile"]
path = test/corpus/profile
url = https://github.com/diff-fixtures/profile.git

19
HLint.hs Normal file
View File

@ -0,0 +1,19 @@
import "hint" HLint.Default
import "hint" HLint.Dollar
import "hint" HLint.Generalise
ignore "Use mappend"
error "generalize ++" = (++) ==> (<>)
-- AMP fallout
error "generalize mapM" = mapM ==> traverse
error "generalize mapM_" = mapM_ ==> traverse_
error "generalize forM" = forM ==> for
error "generalize forM_" = forM_ ==> for_
error "Avoid return" =
return ==> pure
where note = "return is obsolete as of GHC 7.10"
error "use pure" = free . Pure ==> pure
error "use wrap" = free . Free ==> wrap
error "use extract" = headF . runCofree ==> extract

View File

@ -1,17 +1,50 @@
# Semantic diff roadmap
# Roadmap
## Q1 2016
This is the long form version of our [roadmap project][].
1. [Staff ship & limited beta of semantic diffing on .com](https://github.com/github/semantic-diff/milestones/Staff%20Ship). This will be an opt-in, limited release of semantic diffs for a very small set of languages. UI in general will be unchanged; well simply start showing better diffs for the languages in question. The goal is to ease ourselves into deployment of the system, and benchmark under real loads.
## Things we are currently doing:
2. [Semantic diffing on .com](https://github.com/github/semantic-diff/milestones/Dot%20Calm). General release of semantic diffs for the supported languages.
1. [Diff summaries][] for C & JavaScript. Q3 2016 or so.
- Modelling the abstract semantics of the supported languages. Good summaries require us to know what different parts of the syntax represent.
- Performance/responsiveness. We need to be able to produce diffs more quickly, and without unicorns. Some of this will involve front-end work (e.g. requesting summaries out-of-band).
## Q2Q4 2016
2. [Semantic diffs][] on .com for C & JavaScript. Q4 2016 or so.
We will discuss future milestones at the **@github/network-intelligence** minisummit mid-Q1 2016, and document them here at that point.
- Performance, as above.
- Resilience. A fault in `semantic-diff` should not break anything else.
- Metrics. We need to know how its behaving in the wild to know what to do about it. This also includes operational metrics such as health checks.
## Ongoing
## Follow-up things:
- Creation, curation, and cultivation of grammars for semantic diffs.
1. Add support for more languages: [Ruby][], etc.
2. [Detecting & rendering moves][moves].
3. [Merging][].
4. Refining the diff summaries we produce.
## Things we would like to do:
1. [Interactively refining diffs][interactive].
2. [Filtering][] diffs.
3. Diff [table of contents][].
4. [Jump to symbol definition][].
5. Eliminate conflicts from renaming [variables][].
## Things we would like to do modulo interest/support from other teams:
1. APIs/tooling for data science & engineering teams.
2. Collect data on our heuristics &c. and refine them via e.g. ML.
3. Diffs as a [service][].
[roadmap project]: https://github.com/github/semantic-diff/projects/5
[Diff summaries]: https://github.com/github/semantic-diff/milestones/Summer%20Eyes
[Semantic diffs]: https://github.com/github/semantic-diff/milestones/Dot%20Calm
[Ruby]: https://github.com/github/semantic-diff/issues/282
[moves]: https://github.com/github/semantic-diff/issues/389
[Merging]: https://github.com/github/semantic-diff/issues/431
[interactive]: https://github.com/github/semantic-diff/issues/130
[Filtering]: https://github.com/github/semantic-diff/issues/428
[table of contents]: https://github.com/github/semantic-diff/issues/16
[Jump to symbol definition]: https://github.com/github/semantic-diff/issues/6
[variables]: https://github.com/github/semantic-diff/issues/91
[service]: https://github.com/github/platform/blob/master/services/README.md

1
UI/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
*.html

376
app/GenerateTestCases.hs Normal file
View File

@ -0,0 +1,376 @@
{-# LANGUAGE LambdaCase, GADTs, DataKinds #-}
module Main where
import Arguments
import Control.Exception
import Data.Aeson
import Data.Aeson.Encode.Pretty
import Data.Map.Strict as Map
import qualified Data.ByteString.Lazy as DL
import qualified Data.ByteString.Char8 as DC
import Data.String
import qualified Data.Text as DT
import JSONTestCase
import qualified Prelude
import Prologue
import SemanticDiff (fetchDiffs)
import System.FilePath.Glob
import System.Process
import qualified Data.String.Utils as DSUtils
import Options.Applicative hiding ((<>))
import qualified Options.Applicative as O
import qualified Renderer as R
import Control.Monad.Effect
import Control.Monad.Effect.Internal
data GenerateFormat =
GenerateSummaries
| GenerateJSON
deriving (Show)
data GeneratorArgs = GeneratorArgs { generateFormat :: GenerateFormat } deriving (Show)
generatorArgs :: Parser GeneratorArgs
generatorArgs = GeneratorArgs
<$> (flag' GenerateSummaries (long "generate-summaries" O.<> short 's' O.<> help "Generates summary results for new JSON test cases")
<|> flag' GenerateJSON (long "generate-json" O.<> short 'j' O.<> help "Generate JSON output for new JSON test cases"))
options :: ParserInfo GeneratorArgs
options = info (helper <*> generatorArgs) (fullDesc O.<> progDesc "Auto-generate JSON test cases" O.<> header "JSON Test Case Generator")
main :: IO ()
main = do
opts <- execParser options
generatorFilePaths <- runFetchGeneratorFiles
metaRepos <- traverse DL.readFile generatorFilePaths
for_ (decodeMetaRepos metaRepos) (handle opts generatorFilePaths)
where decodeMetaRepos :: [DL.ByteString] -> [Either String [JSONMetaRepo]]
decodeMetaRepos metaRepos = eitherDecode <$> metaRepos
handle :: GeneratorArgs -> [FilePath] -> Either String [JSONMetaRepo] -> IO ()
handle opts generatorFilePaths decodedMetaRepos =
case decodedMetaRepos of
Left err -> Prelude.putStrLn $ "An error occurred: " <> err
Right metaRepos -> do
traverse_ (runGenerator opts) metaRepos
traverse_ runMoveGeneratorFile generatorFilePaths
-- | Finds all JSON files within the generators directory.
runFetchGeneratorFiles :: IO [FilePath]
runFetchGeneratorFiles = globDir1 (compile "*.json") "test/corpus/generators"
-- | First initialize the git submodule repository where commits will be made for the given metaRepo and its syntaxes.
-- | Second generate the commits for each syntax and generate the associated JSONTestCase objects.
-- | Finally push the generated commits to the submodule's remote repository.
runGenerator :: GeneratorArgs -> JSONMetaRepo -> IO ()
runGenerator opts metaRepo@JSONMetaRepo{..} = do
runSetupGitRepo repoUrl $ repoPath language
runCommitsAndTestCasesGeneration opts metaRepo
runPullGitRemote repoUrl $ repoPath language
runPushGitRemote $ repoPath language
-- | Defines the repoPath based on the convention that a repository is based on its language name for a defaut location.
repoPath :: String -> FilePath
repoPath language = "test/corpus/repos/" <> language
-- | Upon successful test case generation for a generator file, move the file to the generated directory.
-- | This prevents subsequence runs of the test generator from duplicating test cases and adding extraneous
-- | commits to the git submodule.
runMoveGeneratorFile :: FilePath -> IO ()
runMoveGeneratorFile filePath = do
let updatedPath = DT.unpack $ DT.replace (DT.pack "generators") (DT.pack "generated") (DT.pack filePath)
Prelude.putStrLn updatedPath
_ <- readCreateProcess (shell $ "mv " <> filePath <> " " <> updatedPath) ""
return ()
-- | Initializes a new git repository and adds it as a submodule to the semantic-diff git index.
-- | This repository contains the commits associated with the given JSONMetaRepo's syntax examples.
runSetupGitRepo :: String -> FilePath -> IO ()
runSetupGitRepo repoUrl repoPath = do
runInitializeRepo repoUrl repoPath
runAddSubmodule repoUrl repoPath
-- | Performs the system calls for initializing the git repository.
-- | If the git repository already exists, the operation will result in an error,
-- | but will not prevent successful completion of the test case generation.
runInitializeRepo :: String -> FilePath -> IO ()
runInitializeRepo repoUrl repoPath = do
result <- try $ readCreateProcess (shell $ mkDirCommand repoPath) ""
case (result :: Either Prelude.IOError String) of
Left error -> Prelude.putStrLn $ "Creating the repository directory at " <> repoPath <> " failed with: " <> show error <> ". " <> "Possible reason: repository already initialized. \nProceeding to the next step."
Right _ -> do
_ <- executeCommand repoPath (initializeRepoCommand repoUrl)
Prelude.putStrLn $ "Repository directory successfully initialized for " <> repoPath <> "."
-- | Git repositories generated as a side-effect of generating tests cases are
-- | added to semantic-diff's git index as submodules. If the submodule initialization
-- | fails (usually because the submodule was already initialized), operations will
-- | continue.
runAddSubmodule :: String -> FilePath -> IO ()
runAddSubmodule repoUrl repoPath = do
result <- try $ readCreateProcess (shell $ addSubmoduleCommand repoUrl repoPath) ""
case (result :: Either Prelude.IOError String) of
Left error -> Prelude.putStrLn $ "Initializing the submodule repository at " <> repoPath <> " failed with: " <> show error <> ". " <> "Possible reason: submodule already initialized. \nProceeding to the next step."
_ -> Prelude.putStrLn $ "Submodule successfully initialized for " <> repoPath <> "."
-- | Performs the system calls for generating the commits and test cases.
-- | Also appends the JSONTestCases generated to the test case file defined by
-- | the syntaxes.
runCommitsAndTestCasesGeneration :: GeneratorArgs -> JSONMetaRepo -> IO ()
runCommitsAndTestCasesGeneration opts metaRepo@JSONMetaRepo{..} =
for_ syntaxes generate
where
generate :: JSONMetaSyntax -> IO ()
generate metaSyntax = do
_ <- runInitialCommitForSyntax metaRepo metaSyntax
let testCaseFilePath' = testCaseFilePath language opts metaSyntax
runSetupTestCaseFile testCaseFilePath'
runCommitAndTestCaseGeneration opts metaRepo metaSyntax testCaseFilePath'
runCloseTestCaseFile testCaseFilePath'
testCaseFilePath :: String -> GeneratorArgs -> JSONMetaSyntax -> FilePath
testCaseFilePath language GeneratorArgs{..} JSONMetaSyntax{..} = case generateFormat of
GenerateSummaries -> "test/corpus/diff-summaries/" <> language <> "/" <> syntax <> ".json"
GenerateJSON -> "test/corpus/json/" <> language <> "/" <> syntax <> ".json"
-- | For a syntax, we want the initial commit to be an empty file.
-- | This function performs a touch and commits the empty file.
runInitialCommitForSyntax :: JSONMetaRepo -> JSONMetaSyntax -> IO ()
runInitialCommitForSyntax metaRepo@JSONMetaRepo{..} metaSyntax@JSONMetaSyntax{..} = do
Prelude.putStrLn $ "Generating initial commit for " <> syntax <> " syntax."
let repoFilePath' = repoFilePath metaRepo metaSyntax
result <- try . executeCommand (repoPath language) $ touchCommand repoFilePath' <> commitCommand syntax "Initial commit"
case ( result :: Either Prelude.IOError String) of
Left error -> Prelude.putStrLn $ "Initializing the " <> repoFilePath metaRepo metaSyntax <> " failed with: " <> show error <> ". " <> "Possible reason: file already initialized. \nProceeding to the next step."
Right _ -> runAddTemplateForSyntax metaRepo metaSyntax
runAddTemplateForSyntax :: JSONMetaRepo -> JSONMetaSyntax -> IO ()
runAddTemplateForSyntax metaRepo@JSONMetaRepo{..} metaSyntax@JSONMetaSyntax{..} = case templateText of
Just templateText -> do
let repoFilePath' = repoFilePath metaRepo metaSyntax
_ <- executeCommand (repoPath language) $ fileWriteCommand repoFilePath' templateText <> commitCommand syntax ("Add " <> repoFilePath' <> " template text.")
pure ()
Nothing -> pure ()
-- | Initializes the test case file where JSONTestCase examples are written to.
-- | This manually inserts a "[" to open a JSON array.
runSetupTestCaseFile :: FilePath -> IO ()
runSetupTestCaseFile testCaseFilePath = do
Prelude.putStrLn $ "Opening " <> testCaseFilePath
DL.writeFile testCaseFilePath "["
-- | For each command constructed for a given metaSyntax, execute the system commands.
runCommitAndTestCaseGeneration :: GeneratorArgs -> JSONMetaRepo -> JSONMetaSyntax -> FilePath -> IO ()
runCommitAndTestCaseGeneration opts metaRepo metaSyntax testCaseFilePath =
traverse_ (runGenerateCommitAndTestCase opts metaRepo testCaseFilePath) (commands metaRepo metaSyntax)
-- | Converts a list of Output to a list of Renderer.Summary Map values
maybeMapSummary :: [R.Output] -> [Maybe (Map Text (Map Text [Value]))]
maybeMapSummary = fmap $ \case
R.SummaryOutput output -> Just output
_ -> Nothing
-- | Converst a list of Output to a list of Renderer.JSON values
maybeMapJSON :: [R.Output] -> [Maybe (Map Text Value)]
maybeMapJSON = fmap $ \case
R.JSONOutput output -> Just output
_ -> Nothing
-- | This function represents the heart of the test case generation. It keeps track of
-- | the git shas prior to running a command, fetches the git sha after a command, so that
-- | JSONTestCase objects can be created. Finally, it appends the created JSONTestCase
-- | object to the test case file.
runGenerateCommitAndTestCase :: GeneratorArgs -> JSONMetaRepo -> FilePath -> (JSONMetaSyntax, String, String, String) -> IO ()
runGenerateCommitAndTestCase opts JSONMetaRepo{..} testCaseFilePath (JSONMetaSyntax{..}, description, seperator, command) = do
Prelude.putStrLn $ "Executing " <> syntax <> " " <> description <> " commit."
beforeSha <- executeCommand (repoPath language) getLastCommitShaCommand
_ <- executeCommand (repoPath language) command
afterSha <- executeCommand (repoPath language) getLastCommitShaCommand
patch <- executeCommand (repoPath language) (gitDiffCommand beforeSha afterSha)
expectedResult' <- runExpectedResult (repoPath language) beforeSha afterSha (syntax <> fileExt) opts
let jsonTestCase = encodePretty JSONTestCase {
gitDir = extractGitDir (repoPath language),
testCaseDescription = language <> "-" <> syntax <> "-" <> description <> "-" <> "test",
filePaths = [syntax <> fileExt],
shas = beforeSha <> ".." <> afterSha,
patch = lines patch,
expectedResult = expectedResult'
}
Prelude.putStrLn $ "Generating test case for " <> language <> ": " <> syntax <> " " <> description <> "."
DL.appendFile testCaseFilePath $ jsonTestCase <> DL.fromStrict (DC.pack seperator)
where extractGitDir :: String -> String
extractGitDir fullRepoPath = DC.unpack $ snd $ DC.breakSubstring (DC.pack "test") (DC.pack fullRepoPath)
-- | This constructs an Eff and runs it to return the appropriate IO ExpectedResult.
runExpectedResult :: FilePath -> String -> String -> FilePath -> GeneratorArgs -> IO ExpectedResult
runExpectedResult repoPath beforeSha afterSha repoFilePath GeneratorArgs{..} =
case generateFormat of
GenerateSummaries -> Main.run $ constructSummariesEff repoPath beforeSha afterSha repoFilePath
GenerateJSON -> Main.run $ constructJSONEff repoPath beforeSha afterSha repoFilePath
data GenerateEff a where
GenerateSummaries' :: Arguments -> GenerateEff ExpectedResult
GenerateJSON' :: Arguments -> GenerateEff ExpectedResult
-- | Construct an Eff whose queue includes only GenerateEff effects.
constructSummariesEff :: FilePath -> String -> String -> FilePath -> Eff '[GenerateEff] ExpectedResult
constructSummariesEff repoPath beforeSha afterSha repoFilePath = send $ GenerateSummaries' (args repoPath beforeSha afterSha [repoFilePath] R.Summary)
-- | Construct an Eff whose queue includes only GenerateEff effects.
constructJSONEff :: FilePath -> String -> String -> FilePath -> Eff '[GenerateEff] ExpectedResult
constructJSONEff repoPath beforeSha afterSha repoFilePath = send $ GenerateJSON' (args repoPath beforeSha afterSha [repoFilePath] R.JSON)
-- | Evaluate the Effs and return the IO ExpectedResult.
run :: Eff '[GenerateEff] ExpectedResult -> IO ExpectedResult
run (Val x) = pure x
run (E u queue) = case decompose u of
(Right (GenerateSummaries' args)) -> generateSummaries args >>= \s -> Main.run (apply queue s)
(Right (GenerateJSON' args)) -> generateJSON args >>= \s -> Main.run (apply queue s)
(Left _) -> pure $ SummaryResult ( Map.fromList [ ("changes", Map.singleton mempty mempty), ("errors", Map.singleton mempty mempty) ] )
-- | Produces DiffSummary results for the given Arguments.
generateSummaries :: Arguments -> IO ExpectedResult
generateSummaries args@Arguments{..} = do
diffs <- fetchDiffs args
let headResult = Prelude.head $ maybeMapSummary diffs
let changes = fromMaybe (fromList [("changes", mempty)]) headResult ! "changes"
let errors = fromMaybe (fromList [("errors", mempty)]) headResult ! "errors"
pure $ SummaryResult ( Map.fromList [ ("changes", changes), ("errors", errors) ] )
-- | Produces JSON output for the given Arguments.
generateJSON :: Arguments -> IO ExpectedResult
generateJSON args = do
diffs <- fetchDiffs args
let headResult = Prelude.head $ maybeMapJSON diffs
let oids = fromMaybe (fromList [("oids", "")]) headResult ! "oids"
let paths = fromMaybe (fromList [("output", "")]) headResult ! "paths"
let rows = fromMaybe (fromList [("rows", "")]) headResult ! "rows"
pure $ JSONResult ( Map.fromList [ ("oids", oids), ("paths", paths), ("rows", rows) ] )
repoFilePath :: JSONMetaRepo -> JSONMetaSyntax -> String
repoFilePath metaRepo metaSyntax = syntax metaSyntax <> fileExt metaRepo
-- | Commands represent the various combination of patches (insert, delete, replacement)
-- | for a given syntax.
commands :: JSONMetaRepo -> JSONMetaSyntax -> [(JSONMetaSyntax, String, String, String)]
commands JSONMetaRepo{..} metaSyntax@JSONMetaSyntax{..} = case template of
(Just _) -> [ (metaSyntax, "setup", commaSeperator, fileWriteCommand repoFilePath (withTemplate "") <> commitCommand syntax "setup")
, (metaSyntax, "insert", commaSeperator, fileWriteCommand repoFilePath (withTemplate insert) <> commitCommand syntax "insert")
, (metaSyntax, "replacement", commaSeperator, fileWriteCommand repoFilePath (withTemplate replacement) <> commitCommand syntax "replacement")
, (metaSyntax, "delete-replacement", commaSeperator, fileWriteCommand repoFilePath (withTemplate insert) <> commitCommand syntax "delete replacement")
, (metaSyntax, "delete-insert", commaSeperator, fileWriteCommand repoFilePath (withTemplate "") <> commitCommand syntax "delete insert")
, (metaSyntax, "teardown", spaceSeperator, removeCommand repoFilePath <> touchCommand repoFilePath <> commitCommand syntax "teardown")
]
Nothing -> [ (metaSyntax, "insert", commaSeperator, fileWriteCommand repoFilePath insert <> commitCommand syntax "insert")
, (metaSyntax, "replacement-insert", commaSeperator, fileWriteCommand repoFilePath (Prologue.intercalate "\n" [replacement, insert, insert]) <> commitCommand syntax "replacement + insert + insert")
, (metaSyntax, "delete-insert", commaSeperator, fileWriteCommand repoFilePath (Prologue.intercalate "\n" [insert, insert, insert]) <> commitCommand syntax "delete + insert")
, (metaSyntax, "replacement", commaSeperator, fileWriteCommand repoFilePath (Prologue.intercalate "\n" [replacement, insert, insert]) <> commitCommand syntax "replacement")
, (metaSyntax, "delete-replacement", commaSeperator, fileWriteCommand repoFilePath (Prologue.intercalate "\n" [insert, replacement]) <> commitCommand syntax "delete + replacement")
, (metaSyntax, "delete", commaSeperator, fileWriteCommand repoFilePath replacement <> commitCommand syntax "delete")
, (metaSyntax, "delete-rest", spaceSeperator, removeCommand repoFilePath <> touchCommand repoFilePath <> commitCommand syntax "delete rest")
]
where commaSeperator = "\n,"
spaceSeperator = ""
repoFilePath = syntax <> fileExt
withTemplate = contentsWithTemplate template
contentsWithTemplate :: Maybe String -> String -> String
contentsWithTemplate (Just template) contents = DT.unpack $ DT.replace "{0}" (toS contents) (toS template)
contentsWithTemplate Nothing contents = contents
-- | Attempts to pull from the git repository's remote repository.
-- | If the pull fails, the exception is caught and continues to the next step.
runPullGitRemote :: String -> FilePath -> IO ()
runPullGitRemote repoUrl repoPath = do
Prelude.putStrLn "Attempting to fetch from the remote repository."
_ <- executeCommand repoPath checkoutMasterCommand
result <- attempt
handle result next errorMessage
where attempt :: IO (Either Prelude.IOError String)
attempt = try $ executeCommand repoPath pullFromRemoteCommand
handle :: Either Prelude.IOError String -> IO () -> (Prelude.IOError -> IO ()) -> IO ()
handle result success err = case (result :: Either Prelude.IOError String) of
Left error -> err error
Right _ -> success
next :: IO ()
next = Prelude.putStrLn "Remote repository successfully fetched.\n"
errorMessage :: Prelude.IOError -> IO ()
errorMessage err = Prelude.putStrLn $ "Pulling from the remote repository at " <> repoUrl <> " failed with: " <> show err <> ". Proceeding to the next step.\n"
-- | Pushes git commits to the submodule repository's remote.
runPushGitRemote :: FilePath -> IO ()
runPushGitRemote repoPath = do
Prelude.putStrLn "Updating git remote."
result <- try $ executeCommand repoPath pushToGitRemoteCommand
case (result :: Either Prelude.IOError String) of
Left err -> die $ "Failed to push to remote repository: " <> show err
Right _ -> Prelude.putStrLn "Successfully updated git remote."
-- | Closes the JSON array and closes the test case file.
runCloseTestCaseFile :: FilePath -> IO ()
runCloseTestCaseFile testCaseFilePath = do
Prelude.putStrLn $ "Closing " <> testCaseFilePath
DL.appendFile testCaseFilePath "]\n"
initializeRepoCommand :: String -> String
initializeRepoCommand repoUrl = "rm -rf *; rm -rf .git; git init .; git remote add origin " <> repoUrl <> ";"
addSubmoduleCommand :: String -> FilePath -> String
addSubmoduleCommand repoUrl repoPath = "git submodule add " <> repoUrl <> " " <> " ./" <> repoPath <> ";"
getLastCommitShaCommand :: String
getLastCommitShaCommand = "git log --pretty=format:\"%H\" -n 1;"
gitDiffCommand :: String -> String -> String
gitDiffCommand sha1 sha2 = "git diff " <> sha1 <> ".." <> sha2 <> ";"
checkoutMasterCommand :: String
checkoutMasterCommand = "git checkout master;"
pullFromRemoteCommand :: String
pullFromRemoteCommand = "git pull origin master;"
touchCommand :: FilePath -> String
touchCommand repoFilePath = "touch " <> repoFilePath <> ";"
-- | In order to correctly record syntax examples that include backticks (like JavaScript template strings)
-- | we must first escape them for bash (due to the use of the `echo` system command). Additionally,
-- | we must also escape the escape character `\` in Haskell, hence the double `\\`.
fileWriteCommand :: FilePath -> String -> String
fileWriteCommand repoFilePath contents = "echo \"" <> (escapeBackticks . escapeDoubleQuotes) contents <> "\" > " <> repoFilePath <> ";"
where
escapeBackticks = DSUtils.replace "`" "\\`"
escapeDoubleQuotes = DSUtils.replace "\"" "\\\""
fileAppendCommand :: FilePath -> String -> String
fileAppendCommand repoFilePath contents = "echo \"" <> (escapeBackticks . escapeDoubleQuotes) contents <> "\" >> " <> repoFilePath <> ";"
where
escapeBackticks = DSUtils.replace "`" "\\`"
escapeDoubleQuotes = DSUtils.replace "\"" "\\\""
commitCommand :: String -> String -> String
commitCommand syntax commitMessage = "git add .; git commit -m \"" <> syntax <> ": " <> commitMessage <> "\"" <> ";"
removeCommand :: FilePath -> String
removeCommand repoFilePath = "rm " <> repoFilePath <> ";"
pushToGitRemoteCommand :: String
pushToGitRemoteCommand = "git push origin HEAD;"
mkDirCommand :: FilePath -> String
mkDirCommand repoPath = "mkdir " <> repoPath <> ";"
executeCommand :: FilePath -> String -> IO String
executeCommand repoPath command = readCreateProcess (shell command) { cwd = Just repoPath } ""

3
app/Main.hs Normal file
View File

@ -0,0 +1,3 @@
module Main (main)
where
import SemanticDiff (main)

67
bench/Main.hs Normal file
View File

@ -0,0 +1,67 @@
{-# LANGUAGE DeriveAnyClass, FlexibleInstances #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
module Main where
import Arguments
import Criterion.Main
import Data.Function
import Data.List (genericLength)
import Data.String
import Patch
import Prologue
import qualified Renderer as R
import SemanticDiff (fetchDiffs)
import qualified SemanticDiffPar
import SES
import System.Directory (makeAbsolute)
main :: IO ()
main = defaultMain
[ bgroup "ses"
[ bench "0,0" (nf (uncurry benchmarkSES) ([], []))
, bench "1,1, =" (nf (uncurry benchmarkSES) ([lower], [lower]))
, bench "1,1, ≠" (nf (uncurry benchmarkSES) ([lower], [upper]))
, bench "10,10, =" (nf (uncurry benchmarkSES) (replicate 10 lower, replicate 10 lower))
, bench "10,10, ≠" (nf (uncurry benchmarkSES) (replicate 10 lower, replicate 10 upper))
, bench "100,100, =" (nf (uncurry benchmarkSES) (replicate 100 lower, replicate 100 lower))
, bench "100,100, ≠" (nf (uncurry benchmarkSES) (replicate 100 lower, replicate 100 upper))
]
, syncAsyncBenchmark
]
where lower = ['a'..'z']
upper = ['A'..'Z']
benchmarkSES :: [String] -> [String] -> [Either String (Patch String)]
benchmarkSES = ses compare cost
where compare a b = if a == b then Just (Left a) else Nothing
cost = either (const 0) (sum . fmap genericLength)
instance NFData a => NFData (Patch a)
syncAsyncBenchmark :: Benchmark
syncAsyncBenchmark =
bgroup "async vs par" [
bench "async" . whnfIO $ SemanticDiff.fetchDiffs =<< theArgs,
bench "par" . whnfIO $ SemanticDiffPar.fetchDiffs =<< theArgs
]
theArgs :: IO Arguments
theArgs = do
jqueryPath <- makeAbsolute "test/repos/jquery"
pure $ args jqueryPath sha1 sha2 files R.Patch
where
sha1 = "70526981916945dc4093e116a3de61b1777d4718"
sha2 = "e5ffcb0838c894e26f4ff32dfec162cf624d8d7d"
files = [
"src/manipulation/getAll.js",
"src/manipulation/support.js",
"src/manipulation/wrapMap.js",
"src/offset.js",
"test/unit/css.js",
"test/unit/deferred.js",
"test/unit/deprecated.js",
"test/unit/effects.js",
"test/unit/event.js",
"test/unit/offset.js",
"test/unit/wrap.js"
]

12
bench/SemanticDiffPar.hs Normal file
View File

@ -0,0 +1,12 @@
module SemanticDiffPar where
import Arguments
import qualified Control.Monad.Par.IO as ParIO
import Control.Monad.Reader
import qualified Data.Text as T
import Prologue
import qualified Renderer as R
import SemanticDiff
fetchDiffs :: Arguments -> IO [T.Text]
fetchDiffs args@Arguments{..} = pure . pure . R.concatOutputs =<< (ParIO.runParIO . liftIO $ for filePaths (fetchDiff args))

2
languages/c/Setup.hs Normal file
View File

@ -0,0 +1,2 @@
import Distribution.Simple
main = defaultMain

24
languages/c/c.cabal Normal file
View File

@ -0,0 +1,24 @@
name: c
version: 0.1.0
synopsis: tree-sitter c language bindings
description: Please see README.md
homepage: https://github.com/github/semantic-diff#readme
author: semantic-code
maintainer: tclem@github.com
copyright: 2017 GitHub
category: Web
build-type: Simple
-- extra-source-files:
cabal-version: >=1.10
library
hs-source-dirs: src
exposed-modules: Text.Parser.TreeSitter.C
build-depends: base >= 4.7 && < 5
, haskell-tree-sitter
default-language: Haskell2010
c-sources: vendor/tree-sitter-c/src/parser.c
source-repository head
type: git
location: https://github.com/github/semantic-diff

View File

@ -0,0 +1,6 @@
module Text.Parser.TreeSitter.C where
import Text.Parser.TreeSitter
import Foreign.Ptr
foreign import ccall "vendor/tree-sitter-c/src/parser.c tree_sitter_c" tree_sitter_c :: Ptr Language

1
languages/c/vendor/tree-sitter-c vendored Submodule

@ -0,0 +1 @@
Subproject commit 1e46713a228508ae83e2513b194647f6c508a17c

2
languages/go/Setup.hs Normal file
View File

@ -0,0 +1,2 @@
import Distribution.Simple
main = defaultMain

24
languages/go/go.cabal Normal file
View File

@ -0,0 +1,24 @@
name: go
version: 0.1.0
synopsis: tree-sitter go language bindings
description: Please see README.md
homepage: https://github.com/github/semantic-diff#readme
author: semantic-code
maintainer: tclem@github.com
copyright: 2017 GitHub
category: Web
build-type: Simple
-- extra-source-files:
cabal-version: >=1.10
library
hs-source-dirs: src
exposed-modules: Text.Parser.TreeSitter.Go
build-depends: base >= 4.7 && < 5
, haskell-tree-sitter
default-language: Haskell2010
c-sources: vendor/tree-sitter-go/src/parser.c
source-repository head
type: git
location: https://github.com/github/semantic-diff

View File

@ -0,0 +1,6 @@
module Text.Parser.TreeSitter.Go where
import Text.Parser.TreeSitter
import Foreign.Ptr
foreign import ccall "vendor/tree-sitter-go/src/parser.c tree_sitter_go" tree_sitter_go :: Ptr Language

1
languages/go/vendor/tree-sitter-go vendored Submodule

@ -0,0 +1 @@
Subproject commit ca3d2de4bdeebba0a408fc5936883045981880cf

View File

@ -0,0 +1,2 @@
import Distribution.Simple
main = defaultMain

View File

@ -0,0 +1,24 @@
name: javascript
version: 0.1.0
synopsis: tree-sitter javascript language bindings
description: Please see README.md
homepage: https://github.com/github/semantic-diff#readme
author: semantic-code
maintainer: tclem@github.com
copyright: 2017 GitHub
category: Web
build-type: Simple
-- extra-source-files:
cabal-version: >=1.10
library
hs-source-dirs: src
exposed-modules: Text.Parser.TreeSitter.JavaScript
build-depends: base >= 4.7 && < 5
, haskell-tree-sitter
default-language: Haskell2010
c-sources: vendor/tree-sitter-javascript/src/parser.c
source-repository head
type: git
location: https://github.com/github/semantic-diff

View File

@ -0,0 +1,6 @@
module Text.Parser.TreeSitter.JavaScript where
import Text.Parser.TreeSitter
import Foreign.Ptr
foreign import ccall "vendor/tree-sitter-javascript/src/parser.c tree_sitter_javascript" tree_sitter_javascript :: Ptr Language

@ -0,0 +1 @@
Subproject commit 4a819fc084092db1ea75978efa5371fe39312aab

2
languages/ruby/Setup.hs Normal file
View File

@ -0,0 +1,2 @@
import Distribution.Simple
main = defaultMain

26
languages/ruby/ruby.cabal Normal file
View File

@ -0,0 +1,26 @@
name: ruby
version: 0.1.0
synopsis: tree-sitter ruby language bindings
description: Please see README.md
homepage: https://github.com/github/semantic-diff#readme
author: semantic-code
maintainer: tclem@github.com
copyright: 2017 GitHub
category: Web
build-type: Simple
-- extra-source-files:
cabal-version: >=1.10
library
hs-source-dirs: src
exposed-modules: Text.Parser.TreeSitter.Ruby
build-depends: base >= 4.7 && < 5
, haskell-tree-sitter
default-language: Haskell2010
c-sources: vendor/tree-sitter-ruby/src/parser.c
, vendor/tree-sitter-ruby/src/scanner.cc
extra-libraries: stdc++
source-repository head
type: git
location: https://github.com/github/semantic-diff

View File

@ -0,0 +1,6 @@
module Text.Parser.TreeSitter.Ruby where
import Text.Parser.TreeSitter
import Foreign.Ptr
foreign import ccall "vendor/tree-sitter-ruby/src/parser.c tree_sitter_ruby" tree_sitter_ruby :: Ptr Language

@ -0,0 +1 @@
Subproject commit b2ca35ffc5b1e3eec5ee41fc3d0420788dffa04a

View File

@ -1,5 +1,5 @@
name: semantic-diff
version: 0.1.0.0
version: 0.2.0
synopsis: Initial project template from stack
description: Please see README.md
homepage: http://github.com/github/semantic-diff#readme
@ -15,90 +15,205 @@ library
hs-source-dirs: src
exposed-modules: Algorithm
, Alignment
, Arguments
, Category
, Control.Comonad.Cofree
, Control.Monad.Free
, Data.Adjoined
, Data.Align
, Data.Bifunctor.These
, Data.Coalescent
, Data.Copointed
, Data.Align.Generic
, Data.Functor.Both
, Data.Option
, Data.OrderedMap
, Data.Functor.Listable
, Data.Mergeable
, Data.Mergeable.Generic
, Data.RandomWalkSimilarity
, Data.Record
, Data.Text.Listable
, Diff
, Diffing
, DiffOutput
, DiffSummary
, Info
, Interpreter
, Language
, Line
, Operation
, Language.C
, Language.JavaScript
, Language.Markdown
, Language.Go
, Language.Ruby
, Parse
, Parser
, Patch
, Paths_semantic_diff
, Prologue
, Range
, Renderer
, Renderer.JSON
, Renderer.Patch
, Renderer.Split
, Renderer.Summary
, Renderer.SExpression
, Renderer.TOC
, SemanticDiff
, SES
, Source
, SourceSpan
, SplitDiff
, Syntax
, Term
, TreeSitter
build-depends: aeson
, base >= 4.8 && < 5
, FDoc.Term
, FDoc.RecursionSchemes
, FDoc.NatExample
build-depends: base >= 4.8 && < 5
, aeson
, aeson-pretty
, array
, async-pool
, bifunctors
, blaze-html
, blaze-markup
, bytestring
, cmark
, comonad
, containers
, directory
, dlist
, filepath
, free
, gitlib
, gitlib-libgit2
, gitrev
, hashable
, kdt
, leancheck
, mersenne-random-pure64
, MonadRandom
, mtl
, optparse-applicative
, pointed
, protolude
, recursion-schemes
, regex-compat
, semigroups
, text >= 1.2.1.3
, text-icu
, tree-sitter-parsers
, these
, haskell-tree-sitter
, vector
, wl-pprint-text
, c
, go
, ruby
, javascript
default-language: Haskell2010
default-extensions: DeriveFunctor, DeriveFoldable, DeriveTraversable, DeriveGeneric, OverloadedStrings
ghc-options: -Wall -fno-warn-name-shadowing -O2 -threaded -fprof-auto "-with-rtsopts=-N -p -s -h -i0.1" -j
default-extensions: DeriveFunctor, DeriveFoldable, DeriveTraversable, DeriveGeneric, FlexibleContexts, FlexibleInstances, OverloadedStrings, NoImplicitPrelude, RecordWildCards, LambdaCase, StrictData
ghc-options: -Wall -fno-warn-name-shadowing -O2 -j
ghc-prof-options: -fprof-auto
test-suite semantic-diff-test
executable semantic-diff
hs-source-dirs: app
main-is: Main.hs
ghc-options: -threaded -rtsopts "-with-rtsopts=-N -A4m -n2m" -static -j -O2 -pgml=script/g++
cc-options: -DU_STATIC_IMPLEMENTATION=1
cpp-options: -DU_STATIC_IMPLEMENTATION=1
build-depends: base
, semantic-diff
default-language: Haskell2010
default-extensions: FlexibleInstances, OverloadedStrings, NoImplicitPrelude, RecordWildCards
executable generate-test-cases
hs-source-dirs: app, test
main-is: GenerateTestCases.hs
ghc-options: -threaded -rtsopts -with-rtsopts=-N -static -j -pgml=script/g++
cc-options: -DU_STATIC_IMPLEMENTATION=1
cpp-options: -DU_STATIC_IMPLEMENTATION=1
other-modules: JSONTestCase
build-depends: base
, aeson
, aeson-pretty
, bytestring
, containers
, Glob
, MissingH
, optparse-applicative
, process
, semantic-diff
, text >= 1.2.1.3
, effects
, unordered-containers
default-language: Haskell2010
default-extensions: DeriveFunctor, DeriveFoldable, DeriveTraversable, DeriveGeneric, FlexibleContexts, FlexibleInstances, OverloadedStrings, NoImplicitPrelude, RecordWildCards, LambdaCase
benchmark semantic-diff-bench
type: exitcode-stdio-1.0
main-is: Main.hs
hs-source-dirs: bench
other-modules: SemanticDiffPar
build-depends: base
, criterion
, directory
, leancheck
, monad-par
, mtl
, semantic-diff
, text >= 1.2.1.3
ghc-options: -threaded -rtsopts -with-rtsopts=-N -j -O2 -static -pgml=script/g++
default-language: Haskell2010
default-extensions: FlexibleInstances, OverloadedStrings, NoImplicitPrelude, RecordWildCards
test-suite test
type: exitcode-stdio-1.0
hs-source-dirs: test
main-is: Spec.hs
other-modules: AlignmentSpec
, ArbitraryTerm
, CorpusSpec
, Data.Adjoined.Spec
, Data.Functor.Both.Spec
, Data.Mergeable.Spec
, Data.RandomWalkSimilarity.Spec
, Diff.Spec
, DiffSummarySpec
, InterpreterSpec
, OrderedMapSpec
, PatchOutputSpec
, RangeSpec
, Source.Spec
, TermSpec
build-depends: base
, bytestring
, containers
, Test.Hspec.LeanCheck
build-depends: array
, base
, bifunctors
, deepseq
, filepath
, Glob
, hspec >= 2.1.10
, QuickCheck >= 2.8.1
, quickcheck-text
, hspec-core
, hspec-expectations-pretty-diff
, leancheck
, mtl
, protolude
, recursion-schemes >= 4.1
, semantic-diff
, text >= 1.2.1.3
if os(darwin)
ghc-options: -threaded -rtsopts -with-rtsopts=-N -j
else
ghc-options: -threaded -rtsopts -with-rtsopts=-N -j -pgml=script/g++
, these
, vector
ghc-options: -threaded -rtsopts -with-rtsopts=-N -j -pgml=script/g++
default-language: Haskell2010
default-extensions: DeriveFunctor, DeriveGeneric, OverloadedStrings
if os(darwin)
extra-libraries: stdc++ icuuc icudata icui18n
if os(darwin)
extra-lib-dirs: /usr/local/opt/icu4c/lib
if os(darwin)
include-dirs: /usr/local/opt/icu4c/include
default-extensions: DeriveFunctor, DeriveGeneric, FlexibleContexts, FlexibleInstances, OverloadedStrings, NoImplicitPrelude, RecordWildCards
test-suite integration-test
type: exitcode-stdio-1.0
hs-source-dirs: test
main-is: SpecIntegration.hs
other-modules: IntegrationFormatSpec
, JSONTestCase
build-depends: base
, aeson
, bytestring
, containers
, Glob
, hspec >= 2.1.10
, hspec-expectations-pretty-diff
, semantic-diff
, split
, MissingH
, unordered-containers
ghc-options: -threaded -rtsopts -with-rtsopts=-N -j -pgml=script/g++
default-language: Haskell2010
default-extensions: DeriveGeneric, FlexibleInstances, OverloadedStrings, NoImplicitPrelude, RecordWildCards
source-repository head
type: git

View File

@ -1,7 +1,42 @@
module Algorithm where
import Control.Monad.Free
import Operation
import Control.Applicative.Free
import Prologue hiding (Pure)
-- | A lazily-produced AST for diffing.
type Algorithm a annotation = Free (Operation a annotation)
-- | A single step in a diffing algorithm.
--
-- 'term' is the type of terms.
-- 'diff' is the type of diffs.
-- 'f' represents the continuation after diffing. Often 'Algorithm'.
data AlgorithmF term diff f
-- | Recursively diff two terms and pass the result to the continuation.
= Recursive term term (diff -> f)
-- | Diff two lists by each elements position, and pass the resulting list of diffs to the continuation.
| ByIndex [term] [term] ([diff] -> f)
-- | Diff two lists by each elements similarity and pass the resulting list of diffs to the continuation.
| BySimilarity [term] [term] ([diff] -> f)
deriving Functor
-- | The free applicative for 'AlgorithmF'. This enables us to construct diff values using <$> and <*> notation.
type Algorithm term diff = Ap (AlgorithmF term diff)
-- | Tear down an Ap by iteration.
iterAp :: Functor g => (g a -> a) -> Ap g a -> a
iterAp algebra = go
where go (Pure a) = a
go (Ap underlying apply) = algebra (go . (apply <*>) . pure <$> underlying)
-- DSL
-- | Constructs a 'Recursive' diff of two terms.
recursively :: term -> term -> Algorithm term diff diff
recursively a b = liftAp (Recursive a b identity)
-- | Constructs a 'ByIndex' diff of two lists of terms.
byIndex :: [term] -> [term] -> Algorithm term diff [diff]
byIndex a b = liftAp (ByIndex a b identity)
-- | Constructs a 'BySimilarity' diff of two lists of terms.
bySimilarity :: [term] -> [term] -> Algorithm term diff [diff]
bySimilarity a b = liftAp (BySimilarity a b identity)

View File

@ -1,106 +1,152 @@
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE RankNTypes, ScopedTypeVariables #-}
module Alignment
( hasChanges
, linesInRangeOfSource
, numberedRows
, splitAbstractedTerm
, splitDiffByLines
, Row
, alignDiff
, alignBranch
, applyThese
, modifyJoin
) where
import Control.Arrow
import Control.Comonad.Cofree
import Control.Monad
import Control.Monad.Free
import Data.Adjoined
import Prologue hiding (fst, snd)
import Data.Align
import Data.Bifunctor.These
import Data.Coalescent
import Data.Copointed
import Data.Foldable
import Data.Functor.Both as Both
import Data.Functor.Identity
import Data.Maybe
import Data.Monoid
import qualified Data.OrderedMap as Map
import qualified Data.Text as T
import Data.Bifunctor.Join
import Data.Functor.Both
import Data.List (partition)
import Data.Maybe (fromJust)
import Data.Record
import Data.These
import Diff
import Line
import Info
import Patch
import Prelude hiding (fst, snd)
import qualified Prelude
import Range
import Source hiding (fromList, uncons)
import Source hiding (break, fromList, uncons)
import SplitDiff
import Syntax
import Term
-- | Assign line numbers to the lines on each side of a list of rows.
numberedRows :: [Row a] -> [Both (Int, Line a)]
numberedRows = countUp (pure 1)
where countUp from (row : rows) = ((,) <$> from <*> row) : countUp ((+) <$> from <*> (lineIncrement <$> row)) rows
countUp _ [] = []
numberedRows :: [Join These a] -> [Join These (Int, a)]
numberedRows = countUp (both 1 1)
where countUp _ [] = []
countUp from (row : rows) = numberedLine from row : countUp (nextLineNumbers from row) rows
numberedLine from row = fromJust ((,) <$> modifyJoin (uncurry These) from `applyThese` row)
nextLineNumbers from row = modifyJoin (fromThese identity identity) (succ <$ row) <*> from
-- | Determine whether a line contains any patches.
hasChanges :: Line (SplitDiff leaf Info) -> Bool
hasChanges = or . fmap (or . (True <$))
hasChanges :: (Foldable f, Functor f) => SplitDiff f annotation -> Bool
hasChanges = or . (True <$)
-- | Split a diff, which may span multiple lines, into rows of split diffs paired with the Range of characters spanned by that Row on each side of the diff.
splitDiffByLines :: Both (Source Char) -> Diff leaf Info -> [Row (SplitDiff leaf Info, Range)]
splitDiffByLines sources = toList . iter (\ (Annotated infos syntax) -> splitAbstractedTerm ((Free .) . Annotated) sources infos syntax) . fmap (splitPatchByLines sources)
-- | Align a Diff into a list of Join These SplitDiffs representing the (possibly blank) lines on either side.
alignDiff :: HasField fields Range => Both (Source Char) -> SyntaxDiff leaf fields -> [Join These (SplitSyntaxDiff leaf fields)]
alignDiff sources diff = iter (alignSyntax (runBothWith ((Join .) . These)) wrap getRange sources) (alignPatch sources <$> diff)
-- | Split a patch, which may span multiple lines, into rows of split diffs.
splitPatchByLines :: Both (Source Char) -> Patch (Term leaf Info) -> Adjoined (Both (Line (SplitDiff leaf Info, Range)))
splitPatchByLines sources patch = wrapTermInPatch <$> splitAndFoldTerm (unPatch patch)
where splitAndFoldTerm (This deleted) = tsequenceL mempty $ both (runIdentity <$> cata (splitAbstractedTerm (:<) (Identity $ fst sources)) (Identity <$> deleted)) nil
splitAndFoldTerm (That inserted) = tsequenceL mempty $ both nil (runIdentity <$> cata (splitAbstractedTerm (:<) (Identity $ snd sources)) (Identity <$> inserted))
splitAndFoldTerm (These deleted inserted) = tsequenceL mempty $ both (runIdentity <$> cata (splitAbstractedTerm (:<) (Identity $ fst sources)) (Identity <$> deleted)) (runIdentity <$> cata (splitAbstractedTerm (:<) (Identity $ snd sources)) (Identity <$> inserted))
wrapTermInPatch = fmap (fmap (first (Pure . constructor patch)))
constructor (Replace _ _) = SplitReplace
constructor (Insert _) = SplitInsert
constructor (Delete _) = SplitDelete
-- | Align the contents of a patch into a list of lines on the corresponding side(s) of the diff.
alignPatch :: forall fields leaf. HasField fields Range => Both (Source Char) -> Patch (SyntaxTerm leaf fields) -> [Join These (SplitSyntaxDiff leaf fields)]
alignPatch sources patch = case patch of
Delete term -> fmap (pure . SplitDelete) <$> alignSyntax' this (fst sources) term
Insert term -> fmap (pure . SplitInsert) <$> alignSyntax' that (snd sources) term
Replace term1 term2 -> fmap (pure . SplitReplace) <$> alignWith (fmap (these identity identity const . runJoin) . Join)
(alignSyntax' this (fst sources) term1)
(alignSyntax' that (snd sources) term2)
where getRange = characterRange . extract
alignSyntax' :: (forall a. Identity a -> Join These a) -> Source Char -> SyntaxTerm leaf fields -> [Join These (SyntaxTerm leaf fields)]
alignSyntax' side source term = hylo (alignSyntax side cofree getRange (Identity source)) runCofree (Identity <$> term)
this = Join . This . runIdentity
that = Join . That . runIdentity
-- | Split a term comprised of an Info & Syntax up into one `outTerm` (abstracted by an alignment function & constructor) per line in `Source`.
splitAbstractedTerm :: (Applicative f, Coalescent (f (Line (Maybe (Identity outTerm), Range))), Coalescent (f (Line (Maybe (T.Text, outTerm), Range))), Foldable f, TotalCrosswalk f) => (Info -> Syntax leaf outTerm -> outTerm) -> f (Source Char) -> f Info -> Syntax leaf (Adjoined (f (Line (outTerm, Range)))) -> Adjoined (f (Line (outTerm, Range)))
splitAbstractedTerm makeTerm sources infos syntax = case syntax of
Leaf a -> tsequenceL (pure mempty) $ fmap <$> ((\ categories -> fmap (\ range -> (makeTerm (Info range categories) (Leaf a), range))) <$> (Diff.categories <$> infos)) <*> (linesInRangeOfSource <$> (characterRange <$> infos) <*> sources)
Indexed children -> adjoinChildren sources infos (constructor (Indexed . fmap runIdentity)) (Identity <$> children)
Fixed children -> adjoinChildren sources infos (constructor (Fixed . fmap runIdentity)) (Identity <$> children)
Keyed children -> adjoinChildren sources infos (constructor (Keyed . Map.fromList)) (Map.toList children)
where constructor with info = makeTerm info . with
-- | The Applicative instance f is either Identity or Both. Identity is for Terms in Patches, Both is for Diffs in unchanged portions of the diff.
alignSyntax :: (Applicative f, HasField fields Range) => (forall a. f a -> Join These a) -> (SyntaxTermF leaf fields term -> term) -> (term -> Range) -> f (Source Char) -> TermF (Syntax leaf) (f (Record fields)) [Join These term] -> [Join These term]
alignSyntax toJoinThese toNode getRange sources (infos :< syntax) = catMaybes $ case syntax of
Leaf s -> wrapInBranch (const (Leaf s)) <$> alignBranch getRange [] bothRanges
Syntax.Comment a -> wrapInBranch (const (Syntax.Comment a)) <$> alignBranch getRange [] bothRanges
Fixed children -> wrapInBranch Fixed <$> alignBranch getRange (join children) bothRanges
_ -> wrapInBranch Indexed <$> alignBranch getRange (join (toList syntax)) bothRanges
where bothRanges = modifyJoin (fromThese [] []) lineRanges
lineRanges = toJoinThese $ actualLineRanges <$> (characterRange <$> infos) <*> sources
wrapInBranch constructor = applyThese $ toJoinThese ((\ info (range, children) -> toNode (setCharacterRange info range :< constructor children)) <$> infos)
-- | Adjoin a branch terms lines, wrapping children & context in branch nodes using a constructor.
adjoinChildren :: (Copointed c, Functor c, Applicative f, Coalescent (f (Line (Maybe (c a), Range))), Foldable f, TotalCrosswalk f) => f (Source Char) -> f Info -> (Info -> [c a] -> outTerm) -> [c (Adjoined (f (Line (a, Range))))] -> Adjoined (f (Line (outTerm, Range)))
adjoinChildren sources infos constructor children = wrap <$> leadingContext <> lines
where (lines, next) = foldr (childLines sources) (mempty, end <$> ranges) children
ranges = characterRange <$> infos
categories = Diff.categories <$> infos
leadingContext = tsequenceL (pure mempty) $ makeContextLines <$> (linesInRangeOfSource <$> (Range <$> (start <$> ranges) <*> next) <*> sources)
wrap = (wrapLineContents <$> (makeBranchTerm constructor <$> categories <*> next) <*>)
makeBranchTerm constructor categories next children = let range = unionRangesFrom (rangeAt next) $ Prelude.snd <$> children in
(constructor (Info range categories) . catMaybes . toList $ Prelude.fst <$> children, range)
-- | Given a function to get the range, a list of already-aligned children, and the lists of ranges spanned by a branch, return the aligned lines.
alignBranch :: (term -> Range) -> [Join These term] -> Both [Range] -> [Join These (Range, [term])]
-- There are no more ranges, so were done.
alignBranch _ _ (Join ([], [])) = []
-- There are no more children, so we can just zip the remaining ranges together.
alignBranch _ [] ranges = runBothWith (alignWith Join) (fmap (flip (,) []) <$> ranges)
-- There are both children and ranges, so we need to proceed line by line
alignBranch getRange children ranges = case intersectingChildren of
-- No child intersects the current ranges on either side, so advance.
[] -> (flip (,) [] <$> headRanges) : alignBranch getRange children (drop 1 <$> ranges)
-- At least one child intersects on at least one side.
_ -> case intersectionsWithHeadRanges <$> listToMaybe symmetricalChildren of
-- At least one child intersects on both sides, so align symmetrically.
Just (True, True) -> let (line, remaining) = lineAndRemaining intersectingChildren (Just headRanges) in
line $ alignBranch getRange (remaining <> nonIntersectingChildren) (drop 1 <$> ranges)
-- A symmetrical child intersects on the right, so align asymmetrically on the left.
Just (False, True) -> alignAsymmetrically leftRange first
-- A symmetrical child intersects on the left, so align asymmetrically on the right.
Just (True, False) -> alignAsymmetrically rightRange second
-- No symmetrical child intersects, so align asymmetrically, picking the left side first to match the deletion/insertion order convention in diffs.
_ -> if any (isThis . runJoin) asymmetricalChildren
then alignAsymmetrically leftRange first
else alignAsymmetrically rightRange second
where (intersectingChildren, nonIntersectingChildren) = partition (or . intersects getRange headRanges) children
(symmetricalChildren, asymmetricalChildren) = partition (isThese . runJoin) intersectingChildren
intersectionsWithHeadRanges = fromThese True True . runJoin . intersects getRange headRanges
Just headRanges = Join <$> bisequenceL (runJoin (listToMaybe <$> Join (runBothWith These ranges)))
(leftRange, rightRange) = splitThese headRanges
alignAsymmetrically range advanceBy = let (line, remaining) = lineAndRemaining asymmetricalChildren range in
line $ alignBranch getRange (remaining <> symmetricalChildren <> nonIntersectingChildren) (modifyJoin (advanceBy (drop 1)) ranges)
lineAndRemaining _ Nothing = (identity, [])
lineAndRemaining children (Just ranges) = let (intersections, remaining) = alignChildren getRange children ranges in
((:) $ (,) <$> ranges `applyToBoth` (sortBy (compare `on` getRange) <$> intersections), remaining)
-- | Accumulate the lines of and between a branch terms children.
childLines :: (Copointed c, Functor c, Applicative f, Coalescent (f (Line (Maybe (c a), Range))), Foldable f, TotalCrosswalk f) => f (Source Char) -> c (Adjoined (f (Line (a, Range)))) -> (Adjoined (f (Line (Maybe (c a), Range))), f Int) -> (Adjoined (f (Line (Maybe (c a), Range))), f Int)
-- We depend on source ranges increasing monotonically. If a child invalidates that, e.g. if its a move in a Keyed node, we dont output rows for it in this iteration. (It will still show up in the diff as context rows.) This works around https://github.com/github/semantic-diff/issues/488.
childLines sources child (nextLines, next) | or ((>) . end <$> childRanges <*> next) = (nextLines, next)
| otherwise = ((makeChildLines <$> copoint child)
<> tsequenceL (pure mempty) (makeContextLines <$> trailingContextLines)
<> nextLines, start <$> childRanges)
where makeChildLines = fmap (fmap (first (Just . (<$ child))))
trailingContextLines = linesInRangeOfSource <$> (Range <$> (end <$> childRanges) <*> next) <*> sources
childRanges = unionRangesFrom <$> (rangeAt <$> next) <*> (concat . fmap (fmap Prelude.snd . unLine) <$> sequenceA (copoint child))
-- | Given a list of aligned children, produce lists of their intersecting first lines, and a list of the remaining lines/nonintersecting first lines.
alignChildren :: (term -> Range) -> [Join These term] -> Join These Range -> (Both [term], [Join These term])
alignChildren _ [] _ = (both [] [], [])
alignChildren getRange (first:rest) headRanges
| ~(l, r) <- splitThese first
= case intersectionsWithHeadRanges first of
-- It intersects on both sides, so we can just take the first line whole.
(True, True) -> ((<>) <$> toTerms first <*> firstRemaining, restRemaining)
-- It only intersects on the left, so split it up.
(True, False) -> ((<>) <$> toTerms (fromJust l) <*> firstRemaining, maybe identity (:) r restRemaining)
-- It only intersects on the right, so split it up.
(False, True) -> ((<>) <$> toTerms (fromJust r) <*> firstRemaining, maybe identity (:) l restRemaining)
-- It doesnt intersect at all, so skip it and move along.
(False, False) -> (firstRemaining, first:restRemaining)
| otherwise = alignChildren getRange rest headRanges
where (firstRemaining, restRemaining) = alignChildren getRange rest headRanges
toTerms line = modifyJoin (fromThese [] []) (pure <$> line)
intersectionsWithHeadRanges = fromThese False False . runJoin . intersects getRange headRanges
makeContextLines :: Adjoined (Line Range) -> Adjoined (Line (Maybe a, Range))
makeContextLines = fmap (fmap ((,) Nothing))
-- | Test ranges and terms for intersection on either or both sides.
intersects :: (term -> Range) -> Join These Range -> Join These term -> Join These Bool
intersects getRange ranges line = intersectsRange <$> ranges `applyToBoth` modifyJoin (fromThese (Range (-1) (-1)) (Range (-1) (-1))) (getRange <$> line)
-- | Produce open/closed lines for the portion of the source spanned by a range.
linesInRangeOfSource :: Range -> Source Char -> Adjoined (Line Range)
linesInRangeOfSource range source = fromList $ pureBy (openRange source) <$> actualLineRanges range source
-- | Split a These value up into independent These values representing the left and right sides, if any.
splitThese :: Join These a -> (Maybe (Join These a), Maybe (Join These a))
splitThese these = fromThese Nothing Nothing $ bimap (Just . Join . This) (Just . Join . That) (runJoin these)
-- | Does this Range in this Source end with a newline?
openRange :: Source Char -> Range -> Bool
openRange source range = (at source <$> maybeLastIndex range) /= Just '\n'
infixl 4 `applyThese`
-- | A row in a split diff, composed of a before line and an after line.
type Row a = Both (Line a)
-- | Like `<*>`, but it returns its result in `Maybe` since the result is the intersection of the shapes of the inputs.
applyThese :: Join These (a -> b) -> Join These a -> Maybe (Join These b)
applyThese (Join fg) (Join ab) = fmap Join . uncurry maybeThese $ uncurry (***) (bimap (<*>) (<*>) (unpack fg)) (unpack ab)
where unpack = fromThese Nothing Nothing . bimap Just Just
infixl 4 `applyToBoth`
-- | Like `<*>`, but it takes a `Both` on the right to ensure that it can always return a value.
applyToBoth :: Join These (a -> b) -> Both a -> Join These b
applyToBoth (Join fg) (Join (a, b)) = Join $ these (This . ($ a)) (That . ($ b)) (\ f g -> These (f a) (g b)) fg
-- Map over the bifunctor inside a Join, producing another Join.
modifyJoin :: (p a a -> q b b) -> Join p a -> Join q b
modifyJoin f = Join . f . runJoin
-- | Given a pair of Maybes, produce a These containing Just their values, or Nothing if they havent any.
maybeThese :: Maybe a -> Maybe b -> Maybe (These a b)
maybeThese (Just a) (Just b) = Just (These a b)
maybeThese (Just a) _ = Just (This a)
maybeThese _ (Just b) = Just (That b)
maybeThese _ _ = Nothing

111
src/Arguments.hs Normal file
View File

@ -0,0 +1,111 @@
{-# OPTIONS_GHC -funbox-strict-fields #-}
module Arguments (Arguments(..), CmdLineOptions(..), DiffMode(..), ExtraArg(..), RunMode(..), programArguments, args) where
import Data.Functor.Both
import Data.Maybe
import Data.Text
import Prologue hiding ((<>))
import Prelude
import System.Environment
import System.Directory
import System.IO.Error (IOError)
import qualified Renderer as R
data ExtraArg = ShaPair (Both (Maybe String))
| FileArg FilePath
deriving (Show)
data DiffMode = PathDiff (Both FilePath)
| CommitDiff
deriving (Show)
data RunMode = Diff
| Parse
deriving (Show)
-- | The command line options to the application (arguments for optparse-applicative).
data CmdLineOptions = CmdLineOptions
{ outputFormat :: R.Format
, maybeTimeout :: Maybe Float
, outputFilePath :: Maybe FilePath
, noIndex :: Bool
, extraArgs :: [ExtraArg]
, developmentMode' :: Bool
, runMode' :: RunMode
}
-- | Arguments for the program (includes command line, environment, and defaults).
data Arguments = Arguments
{ gitDir :: FilePath
, alternateObjectDirs :: [Text]
, format :: R.Format
, timeoutInMicroseconds :: Int
, output :: Maybe FilePath
, diffMode :: DiffMode
, runMode :: RunMode
, shaRange :: Both (Maybe String)
, filePaths :: [FilePath]
, developmentMode :: Bool
} deriving (Show)
-- | Returns Arguments for the program from parsed command line arguments.
programArguments :: CmdLineOptions -> IO Arguments
programArguments CmdLineOptions{..} = do
pwd <- getCurrentDirectory
gitDir <- fromMaybe pwd <$> lookupEnv "GIT_DIR"
eitherObjectDirs <- try $ parseObjectDirs . toS <$> getEnv "GIT_ALTERNATE_OBJECT_DIRECTORIES"
let alternateObjectDirs = case (eitherObjectDirs :: Either IOError [Text]) of
(Left _) -> []
(Right objectDirs) -> objectDirs
let filePaths = fetchPaths extraArgs
pure Arguments
{ gitDir = gitDir
, alternateObjectDirs = alternateObjectDirs
, format = outputFormat
, timeoutInMicroseconds = maybe defaultTimeout toMicroseconds maybeTimeout
, output = outputFilePath
, diffMode = case (noIndex, filePaths) of
(True, [fileA, fileB]) -> PathDiff (both fileA fileB)
(_, _) -> CommitDiff
, runMode = runMode'
, shaRange = fetchShas extraArgs
, filePaths = filePaths
, developmentMode = developmentMode'
}
where
fetchPaths :: [ExtraArg] -> [FilePath]
fetchPaths [] = []
fetchPaths (FileArg x:xs) = x : fetchPaths xs
fetchPaths (_:xs) = fetchPaths xs
fetchShas :: [ExtraArg] -> Both (Maybe String)
fetchShas [] = both Nothing Nothing
fetchShas (ShaPair x:_) = x
fetchShas (_:xs) = fetchShas xs
-- | Quickly assemble an Arguments data record with defaults.
args :: FilePath -> String -> String -> [String] -> R.Format -> Arguments
args gitDir sha1 sha2 filePaths format = Arguments
{ gitDir = gitDir
, alternateObjectDirs = []
, format = format
, timeoutInMicroseconds = defaultTimeout
, output = Nothing
, diffMode = CommitDiff
, runMode = Diff
, shaRange = Just <$> both sha1 sha2
, filePaths = filePaths
, developmentMode = False
}
-- | 7 seconds
defaultTimeout :: Int
defaultTimeout = 7 * 1000000
toMicroseconds :: Float -> Int
toMicroseconds num = floor $ num * 1000000
parseObjectDirs :: Text -> [Text]
parseObjectDirs = split (== ':')

View File

@ -1,43 +1,356 @@
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# OPTIONS_GHC -funbox-strict-fields #-}
module Category where
import Term
import Control.Comonad.Cofree
import Data.Set
import Prologue
import Data.Functor.Listable
import Data.Text (pack)
import Data.Text.Listable
-- | A standardized category of AST node. Used to determine the semantics for
-- | semantic diffing and define comparability of nodes.
data Category =
-- | An operator with 2 operands.
BinaryOperator
data Category
-- | The top-level branch node.
= Program
-- | A node indicating syntax errors.
| ParseError
-- | A boolean expression.
| Boolean
-- | A bitwise operator.
| BitwiseOperator
-- | A boolean operator (e.g. ||, &&).
| BooleanOperator
-- | A literal key-value data structure.
| DictionaryLiteral
-- | A pair, e.g. of a key & value
| Pair
-- | A call to a function.
| FunctionCall
-- | A function declaration.
| Function
-- | An identifier.
| Identifier
-- | A function's parameters.
| Params
-- | A function's expression statements.
| ExpressionStatements
-- | A method call on an object.
| MethodCall
-- | A method's arguments.
| Args
-- | A string literal.
| StringLiteral
-- | An integer literal.
| IntegerLiteral
-- | A regex literal.
| Regex
-- | A return statement.
| Return
-- | A symbol literal.
| SymbolLiteral
-- | A template string literal.
| TemplateString
-- | An array literal.
| ArrayLiteral
-- | An assignment expression.
| Assignment
-- | A math assignment expression.
| MathAssignment
-- | A member access expression.
| MemberAccess
-- | A subscript access expression.
| SubscriptAccess
-- | A variable assignment within a variable declaration.
| VarAssignment
-- | A variable declaration.
| VarDecl
-- | A switch expression.
| Switch
-- | A if/else expression.
| If
-- | A for expression.
| For
-- | A while expression.
| While
-- | A do/while expression.
| DoWhile
-- | A ternary expression.
| Ternary
-- | A case expression.
| Case
-- | An expression with an operator.
| Operator
-- | An comma operator expression
| CommaOperator
-- | An object/dictionary/hash literal.
| Object
-- | A throw statement.
| Throw
-- | A constructor statement, e.g. new Foo;
| Constructor
-- | A try statement.
| Try
-- | A catch statement.
| Catch
-- | A finally statement.
| Finally
-- | A class declaration.
| Class
-- | A class method declaration.
| Method
-- | A comment.
| Comment
-- | A non-standard category, which can be used for comparability.
| Other String
deriving (Eq, Show, Ord)
| Other Text
-- | A relational operator (e.g. < or >=)
| RelationalOperator
-- | An empty statement. (e.g. ; in JavaScript)
| Empty
-- | A number literal.
| NumberLiteral
-- | A mathematical operator (e.g. +, -, *, /).
| MathOperator
-- | A module
| Module
-- | An import
| Import
-- | An export
| Export
-- | An anonymous function.
| AnonymousFunction
-- | An interpolation (e.g. "#{bar}" in Ruby)
| Interpolation
-- | A subshell command (e.g. `ls -la` in Ruby)
| Subshell
-- | Operator assignment, e.g. a ||= b, a += 1 in Ruby.
| OperatorAssignment
-- | A yield statement.
| Yield
-- | An until expression.
| Until
-- | A unless/else expression.
| Unless
| Begin
| Else
| Elsif
| Ensure
| Rescue
-- | Formerly used for Rubys @x rescue y@ modifier syntax. Deprecated. Use @Modifier Rescue@ instead. Left in place to preserve hashing & RWS results.
| RescueModifier
| RescuedException
| RescueArgs
| When
| Negate
-- | A select expression in Go.
| Select
| Defer
| Go
| Slice
| TypeAssertion
| TypeConversion
-- | An argument pair, e.g. foo(run: true) or foo(:run => true) in Ruby.
| ArgumentPair
-- | A keyword parameter, e.g. def foo(name:) or def foo(name:false) in Ruby.
| KeywordParameter
-- | An optional/default parameter, e.g. def foo(name = nil) in Ruby.
| OptionalParameter
-- | A splat parameter, e.g. def foo(*array) in Ruby.
| SplatParameter
-- | A hash splat parameter, e.g. def foo(**option) in Ruby.
| HashSplatParameter
-- | A block parameter, e.g. def foo(&block) in Ruby.
| BlockParameter
-- | A float literal.
| FloatLiteral
-- | An array type declaration, e.g. [2]string in Go.
| ArrayTy
-- | A dictionary type declaration, e.g. map[string] in Go.
| DictionaryTy
-- | A Struct type declaration, struct Foo {..} in Go.
| StructTy
-- | A Struct constructor, e.g. foo = Foo {..} in Go.
| Struct
-- | A break statement, e.g. break; in JavaScript.
| Break
-- | A continue statement, e.g. continue; in JavaScript.
| Continue
-- | A binary statement, e.g. a | b in Ruby.
| Binary
-- | A unary statement, e.g. !a in Ruby.
| Unary
-- | A constant, e.g `Foo::Bar` in Ruby.
| Constant
-- | A superclass, e.g `< Foo` in Ruby.
| Superclass
-- | A singleton class declaration, e.g. `class << self;end` in Ruby
| SingletonClass
-- | A range expression, e.g. `1..10` in Ruby.
| RangeExpression
-- | A scope resolution operator, e.g. `Foo::bar` in Ruby.
| ScopeOperator
-- | A BEGIN {} block of statements.
| BeginBlock
-- | An END {} block of statements.
| EndBlock
| ParameterDecl
-- | A default case in a switch statement.
| DefaultCase
-- | A type declaration.
| TypeDecl
| PointerTy
-- | A field declaration.
| FieldDecl
-- | A slice type, e.g. []string{"hello"} in Go.
| SliceTy
-- | An element of a slice literal.
| Element
-- | A literal value.
| Literal
-- | A channel type in Go.
| ChannelTy
-- | A send statement in Go.
| Send
-- | An Index expression, e.g. x[1] in Go.
| IndexExpression
-- | A function type.
| FunctionTy
-- | An increment statement, e.g. i++ in Go.
| IncrementStatement
-- | A decrement statement, e.g. i-- in Go.
| DecrementStatement
-- | A qualified identifier, e.g. Module.function in Go.
| QualifiedIdentifier
| FieldDeclarations
-- | A Go rune literal.
| RuneLiteral
-- | A modifier version of another Category, e.g. Rubys trailing @if@, @while@, etc. terms, whose subterms are swapped relative to regular @if@, @while@, etc. terms.
| Modifier Category
deriving (Eq, Generic, Ord, Show)
-- | The class of types that have categories.
class Categorizable a where
categories :: a -> Set Category
{-# DEPRECATED RescueModifier "Deprecated; use Modifier Rescue instead." #-}
instance Categorizable annotation => Categorizable (Term a annotation) where
categories (annotation :< _) = categories annotation
-- | Test whether the categories from the categorizables intersect.
comparable :: Categorizable a => a -> a -> Bool
comparable a b = catsA == catsB || (not . Data.Set.null $ intersection catsA catsB)
where
catsA = categories a
catsB = categories b
-- Instances
instance Hashable Category
instance (StringConv Category Text) where
strConv _ = pack . show
instance Listable Category where
tiers
= cons0 Program
\/ cons0 ParseError
\/ cons0 Boolean
\/ cons0 BooleanOperator
\/ cons0 MathOperator
\/ cons0 DictionaryLiteral
\/ cons0 Pair
\/ cons0 FunctionCall
\/ cons0 Function
\/ cons0 Identifier
\/ cons0 Params
\/ cons0 ExpressionStatements
\/ cons0 MethodCall
\/ cons0 Args
\/ cons0 StringLiteral
\/ cons0 IntegerLiteral
\/ cons0 NumberLiteral
\/ cons0 Regex
\/ cons0 Return
\/ cons0 SymbolLiteral
\/ cons0 TemplateString
\/ cons0 ArrayLiteral
\/ cons0 Assignment
\/ cons0 MathAssignment
\/ cons0 MemberAccess
\/ cons0 SubscriptAccess
\/ cons0 VarAssignment
\/ cons0 VarDecl
\/ cons0 For
\/ cons0 DoWhile
\/ cons0 While
\/ cons0 Switch
\/ cons0 If
\/ cons0 Ternary
\/ cons0 Case
\/ cons0 Operator
\/ cons0 CommaOperator
\/ cons0 Object
\/ cons0 Throw
\/ cons0 Constructor
\/ cons0 Try
\/ cons0 Catch
\/ cons0 Finally
\/ cons0 Class
\/ cons0 Method
\/ cons0 Comment
\/ cons0 RelationalOperator
\/ cons0 Empty
\/ cons0 Module
\/ cons0 Import
\/ cons0 Export
\/ cons0 AnonymousFunction
\/ cons0 Interpolation
\/ cons0 Subshell
\/ cons0 OperatorAssignment
\/ cons0 Yield
\/ cons0 Until
\/ cons0 Unless
\/ cons0 Begin
\/ cons0 Else
\/ cons0 Elsif
\/ cons0 Ensure
\/ cons0 Rescue
\/ cons0 RescueModifier
\/ cons0 RescuedException
\/ cons0 RescueArgs
\/ cons0 When
\/ cons0 Negate
\/ cons0 Select
\/ cons0 Defer
\/ cons0 Go
\/ cons0 Slice
\/ cons0 TypeAssertion
\/ cons0 TypeConversion
\/ cons0 ArgumentPair
\/ cons0 KeywordParameter
\/ cons0 OptionalParameter
\/ cons0 SplatParameter
\/ cons0 HashSplatParameter
\/ cons0 BlockParameter
\/ cons0 FloatLiteral
\/ cons0 ArrayTy
\/ cons0 DictionaryTy
\/ cons0 StructTy
\/ cons0 Struct
\/ cons0 Break
\/ cons0 Continue
\/ cons0 Binary
\/ cons0 Unary
\/ cons0 Constant
\/ cons0 Superclass
\/ cons0 SingletonClass
\/ cons0 RangeExpression
\/ cons0 ScopeOperator
\/ cons0 BeginBlock
\/ cons0 EndBlock
\/ cons0 ParameterDecl
\/ cons0 DefaultCase
\/ cons0 TypeDecl
\/ cons0 PointerTy
\/ cons0 FieldDecl
\/ cons0 SliceTy
\/ cons0 Element
\/ cons0 Literal
\/ cons0 ChannelTy
\/ cons0 Send
\/ cons0 IndexExpression
\/ cons0 FunctionTy
\/ cons0 IncrementStatement
\/ cons0 DecrementStatement
\/ cons0 QualifiedIdentifier
\/ cons0 FieldDeclarations
\/ cons0 RuneLiteral
\/ cons1 (Other . unListableText)
\/ cons1 Modifier

View File

@ -1,22 +0,0 @@
{-# LANGUAGE UndecidableInstances #-}
module Control.Comonad.Cofree where
import Data.Copointed
data Cofree functor annotation = annotation :< (functor (Cofree functor annotation))
deriving (Functor, Foldable, Traversable)
instance (Eq annotation, Eq (functor (Cofree functor annotation))) => Eq (Cofree functor annotation) where
a :< f == b :< g = a == b && f == g
instance (Show annotation, Show (functor (Cofree functor annotation))) => Show (Cofree functor annotation) where
showsPrec n (a :< f) = showsPrec n a . (" :< " ++) . showsPrec n f
unwrap :: Cofree functor annotation -> functor (Cofree functor annotation)
unwrap (_ :< f) = f
unfold :: Functor functor => (seed -> (annotation, functor seed)) -> seed -> Cofree functor annotation
unfold grow seed = case grow seed of (annotation, functor) -> annotation :< (unfold grow <$> functor)
instance Copointed (Cofree functor) where
copoint (annotation :< _) = annotation

View File

@ -1,18 +0,0 @@
{-# LANGUAGE UndecidableInstances #-}
module Control.Monad.Free where
data Free functor pure = Free (functor (Free functor pure)) | Pure pure
deriving (Functor, Foldable, Traversable)
instance (Eq pure, Eq (functor (Free functor pure))) => Eq (Free functor pure) where
Pure a == Pure b = a == b
Free f == Free g = f == g
_ == _ = False
instance (Show pure, Show (functor (Free functor pure))) => Show (Free functor pure) where
showsPrec n (Pure a) = ("Pure " ++) . showsPrec n a
showsPrec n (Free f) = ("Free " ++) . showsPrec n f
iter :: Functor functor => (functor pure -> pure) -> Free functor pure -> pure
iter _ (Pure a) = a
iter f (Free g) = f (iter f <$> g)

View File

@ -1,68 +0,0 @@
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
module Data.Adjoined where
import Control.Applicative
import Control.Monad
import Data.Align
import Data.Bifunctor.These
import Data.Coalescent
import Data.Sequence as Seq hiding (null)
-- | A collection of elements which can be adjoined onto other such collections associatively. There are two big wins with Data.Adjoined:
-- |
-- | 1. Efficient adjoining of lines and concatenation, thanks to its use of Data.Sequences `Seq` type.
-- | 2. The Monoid instance guarantees that adjoining cannot touch any lines other than the outermost.
-- |
-- | Since aligning diffs proceeds through the diff tree depth-first, adjoining child nodes and context from right to left, the former is crucial for efficiency, and the latter is crucial for correctness. Prior to using Data.Adjoined, repeatedly adjoining the last line in a node into its parent, and then its grandparent, and so forth, would sometimes cause blank lines to “travel” downwards, ultimately shifting blank lines at the end of nodes down proportionately to the depth in the tree at which they were introduced.
newtype Adjoined a = Adjoined { unAdjoined :: Seq a }
deriving (Eq, Foldable, Functor, Show, Traversable)
-- | Construct an Adjoined from a list.
fromList :: [a] -> Adjoined a
fromList = Adjoined . Seq.fromList
-- | Construct Adjoined by adding an element at the left.
cons :: a -> Adjoined a -> Adjoined a
cons a (Adjoined as) = Adjoined (a <| as)
-- | Destructure a non-empty Adjoined into Just the leftmost element and the rightward remainder of the Adjoined, or Nothing otherwise.
uncons :: Adjoined a -> Maybe (a, Adjoined a)
uncons (Adjoined v) | a :< as <- viewl v = Just (a, Adjoined as)
| otherwise = Nothing
-- | Construct Adjoined by adding an element at the right.
snoc :: Adjoined a -> a -> Adjoined a
snoc (Adjoined as) a = Adjoined (as |> a)
-- | Destructure a non-empty Adjoined into Just the rightmost element and the leftward remainder of the Adjoined, or Nothing otherwise.
unsnoc :: Adjoined a -> Maybe (Adjoined a, a)
unsnoc (Adjoined v) | as :> a <- viewr v = Just (Adjoined as, a)
| otherwise = Nothing
instance Applicative Adjoined where
pure = return
(<*>) = ap
instance Alternative Adjoined where
empty = Adjoined Seq.empty
Adjoined a <|> Adjoined b = Adjoined (a >< b)
instance Monad Adjoined where
return = Adjoined . return
a >>= f | Just (a, as) <- uncons a = f a <|> (as >>= f)
| otherwise = Adjoined Seq.empty
instance Coalescent a => Monoid (Adjoined a) where
mempty = Adjoined Seq.empty
a `mappend` b | Just (as, a) <- unsnoc a,
Just (b, bs) <- uncons b
= as <|> coalesce a b <|> bs
| otherwise = Adjoined (unAdjoined a >< unAdjoined b)
instance Align Adjoined where
nil = Adjoined Seq.empty
align as bs | Just (as, a) <- unsnoc as,
Just (bs, b) <- unsnoc bs = align as bs `snoc` These a b
| null bs = This <$> as
| null as = That <$> bs
| otherwise = nil

View File

@ -1,37 +0,0 @@
module Data.Align where
import Data.Bifunctor.These
import Data.Functor.Identity
-- | A functor which can be aligned, essentially the union of (potentially) asymmetrical values.
-- |
-- | For example, this allows a zip over lists which pads out the shorter side with a default value.
class Functor f => Align f where
-- | The empty value. The identity value for `align` (modulo the `This` or `That` constructor wrapping the results).
nil :: f a
-- | Combine two structures into a structure of `These` holding pairs of values in `These` where they overlap, and individual values in `This` and `That` elsewhere.
-- |
-- | Analogous with `zip`.
align :: f a -> f b -> f (These a b)
align = alignWith id
-- | Combine two structures into a structure by applying a function to pairs of values in `These` where they overlap, and individual values in `This` and `That` elsewhere.
-- |
-- | Analogous with `zipWith`.
alignWith :: (These a b -> c) -> f a -> f b -> f c
alignWith f a b = f <$> align a b
-- | A functor which can be traversed through an `Align`able functor, inverting the nesting of one in the other, given some default value.
-- |
-- | Analogous with `zip`, in that it can e.g. turn a tuple of lists into a list of tuples.
class Functor t => TotalCrosswalk t where
-- | Given some default value, embed a structure into an `Align`able functor by mapping its elements into that functor and convoluting (inverting the embedding).
tcrosswalk :: Align f => t b -> (a -> f b) -> t a -> f (t b)
tcrosswalk d f = tsequenceL d . fmap f
-- | Given some default value, convolute (invert the embedding of) a structure over an `Align`able functor.
tsequenceL :: Align f => t a -> t (f a) -> f (t a)
tsequenceL d = tcrosswalk d id
instance TotalCrosswalk Identity where
tcrosswalk _ f = fmap Identity . f . runIdentity

68
src/Data/Align/Generic.hs Normal file
View File

@ -0,0 +1,68 @@
{-# LANGUAGE DefaultSignatures, TypeOperators #-}
module Data.Align.Generic where
import Control.Monad
import Data.Align
import Data.These
import GHC.Generics
import Prologue
import Syntax
-- | Functors which can be aligned (structure-unioning-ly zipped). The default implementation will operate generically over the constructors in the aligning type.
class Functor f => GAlign f where
galign :: f a -> f b -> Maybe (f (These a b))
default galign :: (Generic1 f, GAlign (Rep1 f)) => f a -> f b -> Maybe (f (These a b))
galign a b = to1 <$> galign (from1 a) (from1 b)
-- Generically-derived instances
instance Eq a => GAlign (Syntax a)
-- 'Data.Align.Align' instances
instance GAlign [] where galign = galignAlign
instance GAlign Maybe where galign = galignAlign
-- | Implements a function suitable for use as the definition of 'galign' for 'Align'able functors.
galignAlign :: Align f => f a -> f b -> Maybe (f (These a b))
galignAlign a = Just . align a
-- Generics
-- | 'GAlign' over unit constructors.
instance GAlign U1 where
galign _ _ = Just U1
-- | 'GAlign' over parameters.
instance GAlign Par1 where
galign (Par1 a) (Par1 b) = Just (Par1 (These a b))
-- | 'GAlign' over non-parameter fields. Only equal values are aligned.
instance Eq c => GAlign (K1 i c) where
galign (K1 a) (K1 b) = guard (a == b) >> Just (K1 b)
-- | 'GAlign' over applications over parameters.
instance GAlign f => GAlign (Rec1 f) where
galign (Rec1 a) (Rec1 b) = Rec1 <$> galign a b
-- | 'GAlign' over metainformation (constructor names, etc).
instance GAlign f => GAlign (M1 i c f) where
galign (M1 a) (M1 b) = M1 <$> galign a b
-- | 'GAlign' over sums. Returns 'Nothing' for disjoint constructors.
instance (GAlign f, GAlign g) => GAlign (f :+: g) where
galign a b = case (a, b) of
(L1 a, L1 b) -> L1 <$> galign a b
(R1 a, R1 b) -> R1 <$> galign a b
_ -> Nothing
-- | 'GAlign' over products.
instance (GAlign f, GAlign g) => GAlign (f :*: g) where
galign (a1 :*: b1) (a2 :*: b2) = (:*:) <$> galign a1 a2 <*> galign b1 b2
-- | 'GAlign' over type compositions.
instance (Traversable f, Applicative f, GAlign g) => GAlign (f :.: g) where
galign (Comp1 a) (Comp1 b) = Comp1 <$> sequenceA (galign <$> a <*> b)

View File

@ -1,28 +0,0 @@
module Data.Bifunctor.These where
import Data.Bifunctor
data These a b = This a | That b | These a b
deriving (Eq, Show)
-- | Eliminate These by case analysis.
these :: (a -> c) -> (b -> c) -> (a -> b -> c) -> These a b -> c
these f _ _ (This this) = f this
these _ f _ (That that) = f that
these _ _ f (These this that) = f this that
-- | Return Just the value in This, or the first value in These, if any.
maybeFirst :: These a b -> Maybe a
maybeFirst = these Just (const Nothing) ((Just .) . const)
-- | Return Just the value in That, or the second value in These, if any.
maybeSecond :: These a b -> Maybe b
maybeSecond = these (const Nothing) Just ((Just .) . flip const)
-- Instances
instance Bifunctor These where
bimap f _ (This a) = This (f a)
bimap _ g (That b) = That (g b)
bimap f g (These a b) = These (f a) (g b)

View File

@ -1,13 +0,0 @@
module Data.Coalescent where
import Control.Applicative
import Data.Align
import Data.Functor.Identity
-- | The class of types which can optionally be coalesced together.
class Coalescent a where
-- | Returns the result of coalescing the operands together in an Alternative context. If they cannot be coalesced, they should each be produced individually.
coalesce :: (Align f, Alternative f) => a -> a -> f a
instance Coalescent a => Coalescent (Identity a) where
a `coalesce` b = sequenceA (coalesce <$> a <*> b)

View File

@ -1,13 +0,0 @@
module Data.Copointed where
import Data.Functor.Identity
-- | A value that can return its content.
class Copointed c where
copoint :: c a -> a
instance Copointed ((,) a) where
copoint = snd
instance Copointed Identity where
copoint = runIdentity

View File

@ -1,53 +1,33 @@
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
module Data.Functor.Both where
{-# OPTIONS_GHC -fno-warn-orphans -funbox-strict-fields #-}
module Data.Functor.Both (Both,both, runBothWith, fst, snd, module X) where
import Data.Align
import Data.Bifunctor
import Data.Bifunctor.These
import Data.Maybe
import Prelude hiding (zipWith, fst, snd)
import qualified Prelude
import Data.Bifunctor.Join as X
import Prologue hiding (fst, snd)
import qualified Prologue
-- | A computation over both sides of a pair.
newtype Both a = Both { runBoth :: (a, a) }
deriving (Eq, Foldable, Functor, Ord, Show, Traversable)
type Both a = Join (,) a
-- | Given two operands returns a functor operating on `Both`. This is a curried synonym for Both.
both :: a -> a -> Both a
both = curry Both
-- | Construct Both with These values & defaults.
bothOfThese :: Both a -> These a a -> Both a
bothOfThese a = these (`both` snd a) (both (fst a)) both
-- | Construct Both (Maybe) with These values, defaulting to Nothing.
maybeBothOfThese :: These a a -> Both (Maybe a)
maybeBothOfThese = bothOfThese (pure Nothing) . bimap Just Just
both = curry Join
-- | Apply a function to `Both` sides of a computation.
runBothWith :: (a -> a -> b) -> Both a -> b
runBothWith f = uncurry f . runBoth
runBothWith f = uncurry f . runJoin
-- | Runs the left side of a `Both`.
fst :: Both a -> a
fst = Prelude.fst . runBoth
fst = Prologue.fst . runJoin
-- | Runs the right side of a `Both`.
snd :: Both a -> a
snd = Prelude.snd . runBoth
snd = Prologue.snd . runJoin
unzip :: [Both a] -> Both [a]
unzip = foldr pair (pure [])
where pair (Both (a, b)) (Both (as, bs)) = Both (a : as, b : bs)
instance Applicative Both where
pure a = Both (a, a)
Both (f, g) <*> Both (a, b) = Both (f a, g b)
instance Monoid a => Monoid (Both a) where
instance (Semigroup a, Monoid a) => Monoid (Join (,) a) where
mempty = pure mempty
mappend a b = mappend <$> a <*> b
mappend = (<>)
instance TotalCrosswalk Both where
tsequenceL d = runBothWith (alignWith (\ these -> fromMaybe <$> d <*> maybeBothOfThese these))
instance (Semigroup a) => Semigroup (Join (,) a) where
a <> b = Join $ runJoin a <> runJoin b

View File

@ -0,0 +1,135 @@
module Data.Functor.Listable
( Listable(..)
, mapT
, cons0
, cons1
, cons2
, cons3
, cons4
, cons5
, cons6
, (\/)
, Tier
, Listable1(..)
, tiers1
, Listable2(..)
, tiers2
, liftCons1
, liftCons2
, liftCons3
, liftCons4
, liftCons5
, ListableF(..)
) where
import Data.Bifunctor.Join
import Data.These
import Prologue
import Test.LeanCheck
type Tier a = [a]
-- | Lifting of 'Listable' to @* -> *@.
class Listable1 l where
-- | The tiers for @l :: * -> *@, parameterized by the tiers for @a :: *@.
liftTiers :: [Tier a] -> [Tier (l a)]
-- | A suitable definition of 'tiers' for 'Listable1' type constructors parameterized by 'Listable' types.
tiers1 :: (Listable a, Listable1 l) => [Tier (l a)]
tiers1 = liftTiers tiers
-- | Lifting of 'Listable' to @* -> * -> *@.
class Listable2 l where
-- | The tiers for @l :: * -> * -> *@, parameterized by the tiers for @a :: *@ & @b :: *@.
liftTiers2 :: [Tier a] -> [Tier b] -> [Tier (l a b)]
-- | A suitable definition of 'tiers' for 'Listable2' type constructors parameterized by 'Listable' types.
tiers2 :: (Listable a, Listable b, Listable2 l) => [Tier (l a b)]
tiers2 = liftTiers2 tiers tiers
-- | Lifts a unary constructor to a list of tiers, given a list of tiers for its argument.
--
-- Commonly used in the definition of 'Listable1' and 'Listable2' instances.
liftCons1 :: [Tier a] -> (a -> b) -> [Tier b]
liftCons1 tiers f = mapT f tiers `addWeight` 1
-- | Lifts a binary constructor to a list of tiers, given lists of tiers for its arguments.
--
-- Commonly used in the definition of 'Listable1' and 'Listable2' instances.
liftCons2 :: [Tier a] -> [Tier b] -> (a -> b -> c) -> [Tier c]
liftCons2 tiers1 tiers2 f = mapT (uncurry f) (productWith (,) tiers1 tiers2) `addWeight` 1
-- | Lifts a ternary constructor to a list of tiers, given lists of tiers for its arguments.
--
-- Commonly used in the definition of 'Listable1' and 'Listable2' instances.
liftCons3 :: [Tier a] -> [Tier b] -> [Tier c] -> (a -> b -> c -> d) -> [Tier d]
liftCons3 tiers1 tiers2 tiers3 f = mapT (uncurry3 f) (productWith (\ x (y, z) -> (x, y, z)) tiers1 (liftCons2 tiers2 tiers3 (,)) ) `addWeight` 1
where uncurry3 f (a, b, c) = f a b c
-- | Lifts a quaternary constructor to a list of tiers, given lists of tiers for its arguments.
--
-- Commonly used in the definition of 'Listable1' and 'Listable2' instances.
liftCons4 :: [Tier a] -> [Tier b] -> [Tier c] -> [Tier d] -> (a -> b -> c -> d -> e) -> [Tier e]
liftCons4 tiers1 tiers2 tiers3 tiers4 f = mapT (uncurry4 f) (productWith (\ x (y, z, w) -> (x, y, z, w)) tiers1 (liftCons3 tiers2 tiers3 tiers4 (,,)) ) `addWeight` 1
where uncurry4 f (a, b, c, d) = f a b c d
-- | Lifts a quinary constructor to a list of tiers, given lists of tiers for its arguments.
--
-- Commonly used in the definition of 'Listable1' and 'Listable2' instances.
liftCons5 :: [Tier a] -> [Tier b] -> [Tier c] -> [Tier d] -> [Tier e] -> (a -> b -> c -> d -> e -> f) -> [Tier f]
liftCons5 tiers1 tiers2 tiers3 tiers4 tiers5 f = mapT (uncurry5 f) (productWith (\ x (y, z, w, u) -> (x, y, z, w, u)) tiers1 (liftCons4 tiers2 tiers3 tiers4 tiers5 (,,,)) ) `addWeight` 1
where uncurry5 f (a, b, c, d, e) = f a b c d e
-- | Convenient wrapper for 'Listable1' type constructors and 'Listable' types, where a 'Listable' instance would necessarily be orphaned.
newtype ListableF f a = ListableF { unListableF :: f a }
deriving Show
-- Instances
instance Listable1 Maybe where
liftTiers tiers = cons0 Nothing \/ liftCons1 tiers Just
instance Listable2 (,) where
liftTiers2 = productWith (,)
instance Listable2 Either where
liftTiers2 leftTiers rightTiers = liftCons1 leftTiers Left \/ liftCons1 rightTiers Right
instance Listable a => Listable1 ((,) a) where
liftTiers = liftTiers2 tiers
instance Listable1 [] where
liftTiers tiers = go
where go = cons0 [] \/ liftCons2 tiers go (:)
instance Listable2 p => Listable1 (Join p) where
liftTiers tiers = liftCons1 (liftTiers2 tiers tiers) Join
instance Listable2 These where
liftTiers2 this that = liftCons1 this This \/ liftCons1 that That \/ liftCons2 this that These
instance Listable1 f => Listable2 (CofreeF f) where
liftTiers2 annotationTiers recurTiers = liftCons2 annotationTiers (liftTiers recurTiers) (:<)
instance (Listable1 f, Listable a) => Listable1 (CofreeF f a) where
liftTiers = liftTiers2 tiers
instance Listable1 f => Listable1 (Cofree f) where
liftTiers annotationTiers = go
where go = liftCons1 (liftTiers2 annotationTiers go) cofree
instance Listable1 f => Listable2 (FreeF f) where
liftTiers2 pureTiers recurTiers = liftCons1 pureTiers Pure \/ liftCons1 (liftTiers recurTiers) Free
instance (Listable1 f, Listable a) => Listable1 (FreeF f a) where
liftTiers = liftTiers2 tiers
instance Listable1 f => Listable1 (Free f) where
liftTiers pureTiers = go
where go = liftCons1 (liftTiers2 pureTiers go) free
instance (Listable1 f, Listable a) => Listable (ListableF f a) where
tiers = ListableF `mapT` tiers1

37
src/Data/Mergeable.hs Normal file
View File

@ -0,0 +1,37 @@
{-# LANGUAGE DefaultSignatures #-}
module Data.Mergeable where
import Data.Functor.Identity
import Data.Mergeable.Generic
import GHC.Generics
import Prologue
-- Classes
-- | A 'Mergeable' functor is one which supports pushing itself through an 'Alternative' functor. Note the similarities with 'Traversable' & 'Crosswalk'.
--
-- This is a kind of distributive law which produces (at least) the union of the two functors shapes; i.e. unlike 'Traversable', an 'empty' value in the inner functor does not produce an 'empty' result, and unlike 'Crosswalk', an 'empty' value in the outer functor does not produce an 'empty' result.
--
-- For example, we can use 'merge' to select one side or the other of a diff node in 'Syntax', while correctly handling the fact that some patches dont have any content for that side:
--
-- @
-- let before = iter (\ (a :< s) -> cofree . (fst a :<) <$> sequenceAlt syntax) . fmap (maybeFst . unPatch)
-- @
class Functor t => Mergeable t where
-- | Merge a functor by mapping its elements into an 'Alternative' functor, combining them, and pushing the 'Mergeable' functor inside.
merge :: Alternative f => (a -> f b) -> t a -> f (t b)
default merge :: (Generic1 t, GMergeable (Rep1 t), Alternative f) => (a -> f b) -> t a -> f (t b)
merge = genericMerge
-- | Sequnce a 'Mergeable' functor by 'merge'ing the 'Alternative' values.
sequenceAlt :: Alternative f => t (f a) -> f (t a)
sequenceAlt = merge identity
-- Instances
instance Mergeable [] where merge = gmerge
instance Mergeable Maybe
instance Mergeable Identity where merge f = fmap Identity . f . runIdentity

View File

@ -0,0 +1,46 @@
{-# LANGUAGE TypeOperators #-}
module Data.Mergeable.Generic where
import GHC.Generics
import Prologue
-- Classes
class GMergeable t where
gmerge :: Alternative f => (a -> f b) -> t a -> f (t b)
genericMerge :: (Generic1 t, GMergeable (Rep1 t), Alternative f) => (a -> f b) -> t a -> f (t b)
genericMerge f = fmap to1 . gmerge f . from1
-- Instances
instance GMergeable U1 where
gmerge _ _ = pure U1
instance GMergeable Par1 where
gmerge f (Par1 a) = Par1 <$> f a
instance GMergeable (K1 i c) where
gmerge _ (K1 a) = pure (K1 a)
instance GMergeable f => GMergeable (Rec1 f) where
gmerge f (Rec1 a) = Rec1 <$> gmerge f a
instance GMergeable f => GMergeable (M1 i c f) where
gmerge f (M1 a) = M1 <$> gmerge f a
instance (GMergeable f, GMergeable g) => GMergeable (f :+: g) where
gmerge f (L1 a) = L1 <$> gmerge f a
gmerge f (R1 b) = R1 <$> gmerge f b
instance (GMergeable f, GMergeable g) => GMergeable (f :*: g) where
gmerge f (a :*: b) = (:*:) <$> gmerge f a <*> gmerge f b
instance GMergeable [] where
gmerge f (x:xs) = ((:) <$> f x <|> pure identity) <*> gmerge f xs
gmerge _ [] = pure []
instance GMergeable Maybe where
gmerge f (Just a) = Just <$> f a
gmerge _ Nothing = pure empty

View File

@ -1,11 +0,0 @@
{-# LANGUAGE FlexibleContexts #-}
module Data.Option where
newtype Option a = Option { getOption :: Maybe a }
option :: b -> (a -> b) -> Option a -> b
option b f = maybe b f . getOption
-- | Return Just the concatenation of any elements in a Foldable, or Nothing if it is empty.
maybeConcat :: (Foldable f, Monoid (Option a)) => f a -> Maybe a
maybeConcat = getOption . foldMap (Option. Just)

View File

@ -1,68 +0,0 @@
module Data.OrderedMap (
OrderedMap
, fromList
, toList
, keys
, (!)
, Data.OrderedMap.lookup
, size
, empty
, union
, unions
, intersectionWith
, difference
) where
import qualified Data.Maybe as Maybe
-- | An ordered map of keys and values.
data OrderedMap key value = OrderedMap { toList :: [(key, value)] }
deriving (Show, Eq, Functor, Foldable, Traversable)
instance Eq key => Monoid (OrderedMap key value) where
mempty = fromList []
mappend = union
-- | Construct an ordered map from a list of pairs of keys and values.
fromList :: [(key, value)] -> OrderedMap key value
fromList = OrderedMap
-- | Return a list of keys from the map.
keys :: OrderedMap key value -> [key]
keys (OrderedMap pairs) = fst <$> pairs
infixl 9 !
-- | Look up a value in the map by key, erroring if it doesn't exist.
(!) :: Eq key => OrderedMap key value -> key -> value
map ! key = Maybe.fromMaybe (error "no value found for key") $ Data.OrderedMap.lookup key map
-- | Look up a value in the map by key, returning Nothing if it doesn't exist.
lookup :: Eq key => key -> OrderedMap key value -> Maybe value
lookup key = Prelude.lookup key . toList
-- | Return the number of pairs in the map.
size :: OrderedMap key value -> Int
size = length . toList
-- | An empty ordered map.
empty :: OrderedMap key value
empty = OrderedMap []
-- | Combine `a` and `b`, picking the values from `a` when keys overlap.
union :: Eq key => OrderedMap key value -> OrderedMap key value -> OrderedMap key value
union a b = OrderedMap $ toList a ++ toList (difference b a)
-- | Union a list of ordered maps.
unions :: Eq key => [OrderedMap key value] -> OrderedMap key value
unions = foldl union empty
-- | Return an ordered map by combining the values from `a` and `b` that have
-- | the same key, dropping any values that are only in one of the maps.
intersectionWith :: Eq key => (a -> b -> c) -> OrderedMap key a -> OrderedMap key b -> OrderedMap key c
intersectionWith combine (OrderedMap a) (OrderedMap b) = OrderedMap $ a >>= (\ (key, value) -> maybe [] (pure . (,) key . combine value) $ Prelude.lookup key b)
-- | Return an ordered map with the pairs from `a` whose key isn't in `b`.
difference :: Eq key => OrderedMap key a -> OrderedMap key b -> OrderedMap key a
difference (OrderedMap a) (OrderedMap b) = OrderedMap $ filter ((`notElem` extant) . fst) a
where extant = fst <$> b

View File

@ -0,0 +1,314 @@
{-# LANGUAGE DataKinds, GADTs, RankNTypes, ScopedTypeVariables, TypeOperators #-}
{-# OPTIONS_GHC -funbox-strict-fields #-}
module Data.RandomWalkSimilarity
( rws
, pqGramDecorator
, defaultFeatureVectorDecorator
, featureVectorDecorator
, editDistanceUpTo
, defaultD
, defaultP
, defaultQ
, stripDiff
, stripTerm
, Gram(..)
, Label
, FeatureVector
) where
import Control.Applicative
import Control.Monad.Random
import Control.Monad.State
import Data.Align.Generic
import Data.Array
import Data.Functor.Listable
import Data.Hashable
import qualified Data.IntMap as IntMap
import qualified Data.KdTree.Static as KdTree
import Data.Record
import Data.Semigroup (Min(..), Option(..))
import Data.These
import Diff
import Info
import Patch
import Prologue as P
import qualified SES
import System.Random.Mersenne.Pure64
import Term (termSize, zipTerms, Term, TermF)
type Label f fields label = forall b. TermF f (Record fields) b -> label
type DiffTerms f fields = Term f (Record fields) -> Term f (Record fields) -> Maybe (Diff f (Record fields))
-- | Given a function comparing two terms recursively,
-- a function to compute a Hashable label from an unpacked term, and two lists of terms,
-- compute the diff of a pair of lists of terms using a random walk similarity metric,
-- which completes in log-linear time.
--
-- This implementation is based on the paper [_RWS-Diff—Flexible and Efficient Change Detection in Hierarchical Data_](https://github.com/github/semantic-diff/files/325837/RWS-Diff.Flexible.and.Efficient.Change.Detection.in.Hierarchical.Data.pdf).
rws :: forall f fields.
(GAlign f, Traversable f, Eq (f (Term f Category)), HasField fields Category, HasField fields (Maybe FeatureVector))
=> DiffTerms f fields -- ^ A function which compares a pair of terms recursively, returning 'Just' their diffed value if appropriate, or 'Nothing' if they should not be compared.
-> [Term f (Record fields)] -- ^ The list of old terms.
-> [Term f (Record fields)] -- ^ The list of new terms.
-> [Diff f (Record fields)] -- ^ The resulting list of similarity-matched diffs.
rws compare as bs
| null as, null bs = []
| null as = inserting . eraseFeatureVector <$> bs
| null bs = deleting . eraseFeatureVector <$> as
| otherwise =
-- Construct a State who's final value is a list of (Int, Diff leaf (Record fields))
-- and who's final state is (Int, IntMap UmappedTerm, IntMap UmappedTerm)
traverse findNearestNeighbourToDiff allDiffs &
fmap catMaybes &
-- Run the state with an initial state
(`runState` (minimumTermIndex featurizedAs, toMap featurizedAs, toMap featurizedBs)) &
uncurry deleteRemaining &
insertMapped countersAndDiffs &
fmap snd
where
minimumTermIndex = pred . maybe 0 getMin . getOption . foldMap (Option . Just . Min . termIndex)
sesDiffs = SES.ses replaceIfEqual cost as bs
(featurizedAs, featurizedBs, _, _, countersAndDiffs, allDiffs) =
foldl' (\(as, bs, counterA, counterB, diffs, allDiffs) diff -> case runFree diff of
Pure (Delete term) ->
(as <> pure (featurize counterA term), bs, succ counterA, counterB, diffs, allDiffs <> pure None)
Pure (Insert term) ->
(as, bs <> pure (featurize counterB term), counterA, succ counterB, diffs, allDiffs <> pure (Term (featurize counterB term)))
_ ->
(as, bs, succ counterA, succ counterB, diffs <> pure (These counterA counterB, diff), allDiffs <> pure (Index counterA))
) ([], [], 0, 0, [], []) sesDiffs
findNearestNeighbourToDiff :: TermOrIndexOrNone (UnmappedTerm f fields)
-> State (Int, UnmappedTerms f fields, UnmappedTerms f fields)
(Maybe (These Int Int, Diff f (Record fields)))
findNearestNeighbourToDiff termThing = case termThing of
None -> pure Nothing
Term term -> Just <$> findNearestNeighbourTo term
Index i -> do
(_, unA, unB) <- get
put (i, unA, unB)
pure Nothing
-- | Construct a diff for a term in B by matching it against the most similar eligible term in A (if any), marking both as ineligible for future matches.
findNearestNeighbourTo :: UnmappedTerm f fields
-> State (Int, UnmappedTerms f fields, UnmappedTerms f fields)
(These Int Int, Diff f (Record fields))
findNearestNeighbourTo term@(UnmappedTerm j _ b) = do
(previous, unmappedA, unmappedB) <- get
fromMaybe (insertion previous unmappedA unmappedB term) $ do
-- Look up the nearest unmapped term in `unmappedA`.
foundA@(UnmappedTerm i _ a) <- nearestUnmapped (IntMap.filterWithKey (\ k _ ->
isInMoveBounds previous k)
unmappedA) kdas term
-- Look up the nearest `foundA` in `unmappedB`
UnmappedTerm j' _ _ <- nearestUnmapped unmappedB kdbs foundA
-- Return Nothing if their indices don't match
guard (j == j')
compared <- compare a b
pure $! do
put (i, IntMap.delete i unmappedA, IntMap.delete j unmappedB)
pure (These i j, compared)
-- Returns a state (insertion index, old unmapped terms, new unmapped terms), and value of (index, inserted diff),
-- given a previous index, two sets of umapped terms, and an unmapped term to insert.
insertion :: Int
-> UnmappedTerms f fields
-> UnmappedTerms f fields
-> UnmappedTerm f fields
-> State (Int, UnmappedTerms f fields, UnmappedTerms f fields)
(These Int Int, Diff f (Record fields))
insertion previous unmappedA unmappedB (UnmappedTerm j _ b) = do
put (previous, unmappedA, IntMap.delete j unmappedB)
pure (That j, inserting b)
-- | Finds the most-similar unmapped term to the passed-in term, if any.
--
-- RWS can produce false positives in the case of e.g. hash collisions. Therefore, we find the _l_ nearest candidates, filter out any which have already been mapped, and select the minimum of the remaining by (a constant-time approximation of) edit distance.
--
-- cf §4.2 of RWS-Diff
nearestUnmapped
:: UnmappedTerms f fields -- ^ A set of terms eligible for matching against.
-> KdTree.KdTree Double (UnmappedTerm f fields) -- ^ The k-d tree to look up nearest neighbours within.
-> UnmappedTerm f fields -- ^ The term to find the nearest neighbour to.
-> Maybe (UnmappedTerm f fields) -- ^ The most similar unmapped term, if any.
nearestUnmapped unmapped tree key = getFirst $ foldMap (First . Just) (sortOn (maybe maxBound (editDistanceUpTo defaultM) . compare (term key) . term) (toList (IntMap.intersection unmapped (toMap (KdTree.kNearest tree defaultL key)))))
insertMapped diffs into = foldl' (\into (i, mappedTerm) ->
insertDiff (i, mappedTerm) into)
into
diffs
-- Given a list of diffs, and unmapped terms, deletes any terms that remain in unmappedA.
deleteRemaining diffs (_, unmappedA, _) = foldl' (\into (i, deletion) ->
insertDiff (This i, deletion) into)
diffs
((termIndex &&& deleting . term) <$> unmappedA)
-- Possibly replace terms in a diff.
replaceIfEqual :: Term f (Record fields) -> Term f (Record fields) -> Maybe (Diff f (Record fields))
replaceIfEqual a b
| (category <$> a) == (category <$> b) = hylo wrap runCofree <$> zipTerms (eraseFeatureVector a) (eraseFeatureVector b)
| otherwise = Nothing
cost = iter (const 0) . (1 <$)
kdas = KdTree.build (elems . feature) featurizedAs
kdbs = KdTree.build (elems . feature) featurizedBs
featurize :: Int -> Term f (Record fields) -> UnmappedTerm f fields
featurize index term = UnmappedTerm index (let Just v = getField (extract term) in v) (eraseFeatureVector term)
eraseFeatureVector :: Term f (Record fields) -> Term f (Record fields)
eraseFeatureVector term = let record :< functor = runCofree term in
cofree (setFeatureVector record Nothing :< functor)
setFeatureVector :: Record fields -> Maybe FeatureVector -> Record fields
setFeatureVector = setField
toMap = IntMap.fromList . fmap (termIndex &&& identity)
-- | Determines whether an index is in-bounds for a move given the most recently matched index.
isInMoveBounds previous i = previous < i && i < previous + defaultMoveBound
-- | Inserts an index and diff pair into a list of indices and diffs.
insertDiff :: (These Int Int, diff) -> [(These Int Int, diff)] -> [(These Int Int, diff)]
insertDiff inserted [] = [ inserted ]
insertDiff a@(ij1, _) (b@(ij2, _):rest) = case (ij1, ij2) of
(These i1 i2, These j1 j2) -> if i1 <= j1 && i2 <= j2 then a : b : rest else b : insertDiff a rest
(This i, This j) -> if i <= j then a : b : rest else b : insertDiff a rest
(That i, That j) -> if i <= j then a : b : rest else b : insertDiff a rest
(This i, These j _) -> if i <= j then a : b : rest else b : insertDiff a rest
(That i, These _ j) -> if i <= j then a : b : rest else b : insertDiff a rest
(This _, That _) -> b : insertDiff a rest
(That _, This _) -> b : insertDiff a rest
(These i1 i2, _) -> case break (isThese . fst) rest of
(rest, tail) -> let (before, after) = foldr' (combine i1 i2) ([], []) (b : rest) in
case after of
[] -> before <> insertDiff a tail
_ -> before <> (a : after) <> tail
where
combine i1 i2 each (before, after) = case fst each of
This j1 -> if i1 <= j1 then (before, each : after) else (each : before, after)
That j2 -> if i2 <= j2 then (before, each : after) else (each : before, after)
These _ _ -> (before, after)
-- | Return an edit distance as the sum of it's term sizes, given an cutoff and a syntax of terms 'f a'.
-- | Computes a constant-time approximation to the edit distance of a diff. This is done by comparing at most _m_ nodes, & assuming the rest are zero-cost.
editDistanceUpTo :: (Foldable f, Functor f) => Integer -> Diff f annotation -> Int
editDistanceUpTo m = diffSum (patchSum termSize) . cutoff m
where diffSum patchCost = sum . fmap (maybe 0 patchCost)
defaultD, defaultL, defaultP, defaultQ, defaultMoveBound :: Int
defaultD = 15
-- | How many of the most similar terms to consider, to rule out false positives.
defaultL = 2
defaultP = 2
defaultQ = 3
defaultMoveBound = 2
-- | How many nodes to consider for our constant-time approximation to tree edit distance.
defaultM :: Integer
defaultM = 10
-- | A term which has not yet been mapped by `rws`, along with its feature vector summary & index.
data UnmappedTerm f fields = UnmappedTerm {
termIndex :: Int -- ^ The index of the term within its root term.
, feature :: FeatureVector -- ^ Feature vector
, term :: Term f (Record fields) -- ^ The unmapped term
}
-- | Either a `term`, an index of a matched term, or nil.
data TermOrIndexOrNone term = Term term | Index Int | None
-- | An IntMap of unmapped terms keyed by their position in a list of terms.
type UnmappedTerms f fields = IntMap (UnmappedTerm f fields)
type FeatureVector = Array Int Double
-- | A `Gram` is a fixed-size view of some portion of a tree, consisting of a `stem` of _p_ labels for parent nodes, and a `base` of _q_ labels of sibling nodes. Collectively, the bag of `Gram`s for each node of a tree (e.g. as computed by `pqGrams`) form a summary of the tree.
data Gram label = Gram { stem :: [Maybe label], base :: [Maybe label] }
deriving (Eq, Show)
-- | Annotates a term with a feature vector at each node, using the default values for the p, q, and d parameters.
defaultFeatureVectorDecorator
:: (Hashable label, Traversable f)
=> Label f fields label
-> Term f (Record fields)
-> Term f (Record (Maybe FeatureVector ': fields))
defaultFeatureVectorDecorator getLabel = featureVectorDecorator getLabel defaultP defaultQ defaultD
-- | Annotates a term with a feature vector at each node, parameterized by stem length, base width, and feature vector dimensions.
featureVectorDecorator :: (Hashable label, Traversable f) => Label f fields label -> Int -> Int -> Int -> Term f (Record fields) -> Term f (Record (Maybe FeatureVector ': fields))
featureVectorDecorator getLabel p q d
= cata collect
. pqGramDecorator getLabel p q
where collect ((gram :. rest) :< functor) = cofree ((foldl' addSubtermVector (Just (unitVector d (hash gram))) functor :. rest) :< functor)
addSubtermVector :: Functor f => Maybe FeatureVector -> Term f (Record (Maybe FeatureVector ': fields)) -> Maybe FeatureVector
addSubtermVector v term = addVectors <$> v <*> rhead (extract term)
addVectors :: Num a => Array Int a -> Array Int a -> Array Int a
addVectors as bs = listArray (0, d - 1) (fmap (\ i -> as ! i + bs ! i) [0..(d - 1)])
-- | Annotates a term with the corresponding p,q-gram at each node.
pqGramDecorator
:: Traversable f
=> Label f fields label -- ^ A function computing the label from an arbitrary unpacked term. This function can use the annotation and functors constructor, but not any recursive values inside the functor (since theyre held parametric in 'b').
-> Int -- ^ 'p'; the desired stem length for the grams.
-> Int -- ^ 'q'; the desired base length for the grams.
-> Term f (Record fields) -- ^ The term to decorate.
-> Term f (Record (Gram label ': fields)) -- ^ The decorated term.
pqGramDecorator getLabel p q = cata algebra
where
algebra term = let label = getLabel term in
cofree ((gram label :. headF term) :< assignParentAndSiblingLabels (tailF term) label)
gram label = Gram (padToSize p []) (padToSize q (pure (Just label)))
assignParentAndSiblingLabels functor label = (`evalState` (replicate (q `div` 2) Nothing <> siblingLabels functor)) (for functor (assignLabels label))
assignLabels :: label
-> Term f (Record (Gram label ': fields))
-> State [Maybe label] (Term f (Record (Gram label ': fields)))
assignLabels label a = case runCofree a of
(gram :. rest) :< functor -> do
labels <- get
put (drop 1 labels)
pure $! cofree ((gram { stem = padToSize p (Just label : stem gram), base = padToSize q labels } :. rest) :< functor)
siblingLabels :: Traversable f => f (Term f (Record (Gram label ': fields))) -> [Maybe label]
siblingLabels = foldMap (base . rhead . extract)
padToSize n list = take n (list <> repeat empty)
-- | Computes a unit vector of the specified dimension from a hash.
unitVector :: Int -> Int -> FeatureVector
unitVector d hash = fmap (/ magnitude) uniform
where
uniform = listArray (0, d - 1) (evalRand components (pureMT (fromIntegral hash)))
magnitude = sqrtDouble (sum (fmap (** 2) uniform))
components = sequenceA (replicate d (liftRand randomDouble))
-- | Strips the head annotation off a term annotated with non-empty records.
stripTerm :: Functor f => Term f (Record (h ': t)) -> Term f (Record t)
stripTerm = fmap rtail
-- | Strips the head annotation off a diff annotated with non-empty records.
stripDiff
:: (Functor f, Functor g)
=> Free (TermF f (g (Record (h ': t)))) (Patch (Term f (Record (h ': t))))
-> Free (TermF f (g (Record t))) (Patch (Term f (Record t)))
stripDiff = mapAnnotations rtail
-- Instances
instance Hashable label => Hashable (Gram label) where
hashWithSalt _ = hash
hash gram = hash (stem gram <> base gram)
instance Listable1 Gram where
liftTiers tiers = liftCons2 (liftTiers (liftTiers tiers)) (liftTiers (liftTiers tiers)) Gram
instance Listable a => Listable (Gram a) where
tiers = tiers1

96
src/Data/Record.hs Normal file
View File

@ -0,0 +1,96 @@
{-# LANGUAGE DataKinds, GADTs, KindSignatures, MultiParamTypeClasses, TypeOperators, ConstraintKinds #-}
module Data.Record where
import Category
import Data.Aeson
import Data.Aeson.Types
import Data.Functor.Listable
import GHC.Show
import Prologue
import Range
import SourceSpan
-- | A type alias for HasField constraints commonly used throughout semantic-diff.
type DefaultFields fields = (HasField fields Category, HasField fields Range, HasField fields SourceSpan)
-- | A type-safe, extensible record structure.
-- |
-- | This is heavily inspired by Aaron Levins [Extensible Effects in the van Laarhoven Free Monad](http://aaronlevin.ca/post/136494428283/extensible-effects-in-the-van-laarhoven-free-monad).
data Record :: [*] -> * where
Nil :: Record '[]
(:.) :: h -> Record t -> Record (h ': t)
infixr 0 :.
-- | Get the first element of a non-empty record.
rhead :: Record (head ': tail) -> head
rhead (head :. _) = head
-- | Get the first element of a non-empty record.
rtail :: Record (head ': tail) -> Record tail
rtail (_ :. tail) = tail
-- Classes
-- | HasField enables indexing a Record by (phantom) type tags.
class HasField (fields :: [*]) (field :: *) where
getField :: Record fields -> field
setField :: Record fields -> field -> Record fields
-- Instances
-- OVERLAPPABLE is required for the HasField instances so that we can handle the two cases: either the head of the non-empty h-list is the requested field, or it isnt. The third possible case (the h-list is empty) is rejected at compile-time.
instance {-# OVERLAPPABLE #-} HasField fields field => HasField (notIt ': fields) field where
getField (_ :. t) = getField t
setField (h :. t) f = h :. setField t f
instance {-# OVERLAPPABLE #-} HasField (field ': fields) field where
getField (h :. _) = h
setField (_ :. t) f = f :. t
instance (Show h, Show (Record t)) => Show (Record (h ': t)) where
showsPrec n (h :. t) = showParen (n > 0) $ showsPrec 1 h . (" :. " <>) . shows t
instance Show (Record '[]) where
showsPrec n Nil = showParen (n > 0) ("Nil" <>)
instance (ToJSON h, ToJSONList (Record t)) => ToJSON (Record (h ': t)) where
toJSON r = toJSONList (toJSONValues r)
instance ToJSON (Record '[]) where
toJSON _ = emptyArray
class ToJSONList t where
toJSONValues :: t -> [Value]
instance (ToJSON h, ToJSONList (Record t)) => ToJSONList (Record (h ': t)) where
toJSONValues (h :. t) = toJSON h : toJSONValues t
instance ToJSONList (Record '[]) where
toJSONValues _ = []
instance (Eq h, Eq (Record t)) => Eq (Record (h ': t)) where
(h1 :. t1) == (h2 :. t2) = h1 == h2 && t1 == t2
instance Eq (Record '[]) where
_ == _ = True
instance (Ord h, Ord (Record t)) => Ord (Record (h ': t)) where
(h1 :. t1) `compare` (h2 :. t2) = let h = h1 `compare` h2 in
if h == EQ then t1 `compare` t2 else h
instance Ord (Record '[]) where
_ `compare` _ = EQ
instance (Listable head, Listable (Record tail)) => Listable (Record (head ': tail)) where
tiers = cons2 (:.)
instance Listable (Record '[]) where
tiers = cons0 Nil

10
src/Data/Text/Listable.hs Normal file
View File

@ -0,0 +1,10 @@
module Data.Text.Listable where
import Data.Functor.Listable
import Data.Text
import Prologue
newtype ListableText = ListableText { unListableText :: Text }
instance Listable ListableText where
tiers = cons1 (ListableText . pack)

View File

@ -1,34 +1,60 @@
{-# LANGUAGE TypeFamilies, TypeSynonymInstances, ScopedTypeVariables #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
module Diff where
import Category
import Control.Monad.Free
import Data.Functor.Both
import Data.Set
import Prologue
import Data.Functor.Foldable as F
import Data.Functor.Both as Both
import Data.Mergeable
import Data.Record
import Patch
import Range
import Syntax
import Term
-- | An annotated syntax in a diff tree.
data Annotated a annotation f = Annotated { annotation :: !annotation, syntax :: !(Syntax a f) }
deriving (Functor, Eq, Show, Foldable)
-- | An annotation for a source file, including the source range and semantic
-- | categories.
data Info = Info { characterRange :: !Range, categories :: !(Set Category) }
deriving (Eq, Show)
instance Categorizable Info where
categories = Diff.categories
-- | An annotated series of patches of terms.
type Diff a annotation = Free (Annotated a (Both annotation)) (Patch (Term a annotation))
type DiffF f annotation = FreeF (TermF f (Both annotation)) (Patch (Term f annotation))
type Diff f annotation = Free (TermF f (Both annotation)) (Patch (Term f annotation))
-- | Sum the result of a transform applied to all the patches in the diff.
diffSum :: (Patch (Term a annotation) -> Integer) -> Diff a annotation -> Integer
type SyntaxDiff leaf fields = Diff (Syntax leaf) (Record fields)
type instance Base (Free f a) = FreeF f a
instance Functor f => Recursive (Free f a) where project = runFree
instance Functor f => Corecursive (Free f a) where embed = free
diffSum :: (Foldable f, Functor f) => (Patch (Term f annotation) -> Int) -> Diff f annotation -> Int
diffSum patchCost diff = sum $ fmap patchCost diff
-- | The total cost of the diff.
-- | This is the number of all leaves in all terms in all patches of the diff.
diffCost :: Diff a annotation -> Integer
-- | The sum of the node count of the diffs patches.
diffCost :: (Foldable f, Functor f) => Diff f annotation -> Int
diffCost = diffSum $ patchSum termSize
-- | Merge a diff using a function to provide the Term (in Maybe, to simplify recovery of the before/after state) for every Patch.
mergeMaybe :: forall f annotation. Mergeable f => (Patch (Term f annotation) -> Maybe (Term f annotation)) -> (Both annotation -> annotation) -> Diff f annotation -> Maybe (Term f annotation)
mergeMaybe transform extractAnnotation = iter algebra . fmap transform
where algebra :: TermF f (Both annotation) (Maybe (Term f annotation)) -> Maybe (Term f annotation)
algebra (annotations :< syntax) = cofree . (extractAnnotation annotations :<) <$> sequenceAlt syntax
-- | Recover the before state of a diff.
beforeTerm :: Mergeable f => Diff f annotation -> Maybe (Term f annotation)
beforeTerm = mergeMaybe before Both.fst
-- | Recover the after state of a diff.
afterTerm :: Mergeable f => Diff f annotation -> Maybe (Term f annotation)
afterTerm = mergeMaybe after Both.snd
-- | Map a function over the annotations in a diff, whether in diff or term nodes.
--
-- Typed using Free so as to accommodate Free structures derived from diffs that dont fit into the Diff type synonym.
mapAnnotations :: (Functor f, Functor g)
=> (annotation -> annotation')
-> Free (TermF f (g annotation)) (Patch (Term f annotation))
-> Free (TermF f (g annotation')) (Patch (Term f annotation'))
mapAnnotations f = iter (\ (h :< functor) -> wrap (fmap f h :< functor)) . fmap (pure . fmap (fmap f))
-- | Map a function over the annotations of a single diff node, if it is in Free.
modifyAnnotations :: (Functor f, Functor g) => (annotation -> annotation) -> Free (TermF f (g annotation)) a -> Free (TermF f (g annotation)) a
modifyAnnotations f r = case runFree r of
Free (ga :< functor) -> wrap (fmap f ga :< functor)
_ -> r

View File

@ -1,34 +0,0 @@
module DiffOutput where
import qualified Data.ByteString.Lazy as B
import qualified Data.Text.Lazy.IO as TextIO
import Data.Functor.Both
import Diffing
import Parser
import qualified Renderer.JSON as J
import qualified Renderer.Patch as P
import Renderer.Split
import Source
import System.Directory
import System.FilePath
import qualified System.IO as IO
-- | The available types of diff rendering.
data Format = Split | Patch | JSON
data DiffArguments = DiffArguments { format :: Format, output :: Maybe FilePath, outputPath :: FilePath }
-- | Return a renderer from the command-line arguments that will print the diff.
printDiff :: Parser -> DiffArguments -> Both SourceBlob -> IO ()
printDiff parser arguments sources = case format arguments of
Split -> put (output arguments) =<< diffFiles parser split sources
where
put Nothing rendered = TextIO.putStr rendered
put (Just path) rendered = do
isDir <- doesDirectoryExist path
let outputPath = if isDir
then path </> (takeFileName outputPath -<.> ".html")
else path
IO.withFile outputPath IO.WriteMode (`TextIO.hPutStr` rendered)
Patch -> putStr =<< diffFiles parser P.patch sources
JSON -> B.putStr =<< diffFiles parser J.json sources

504
src/DiffSummary.hs Normal file
View File

@ -0,0 +1,504 @@
{-# LANGUAGE DataKinds, TypeFamilies, ScopedTypeVariables, DeriveAnyClass #-}
{-# OPTIONS_GHC -funbox-strict-fields #-}
{-# OPTIONS_GHC -Wno-deprecations #-}
-- Disabling deprecation warnings due to pattern match against RescueModifier.
module DiffSummary (diffSummaries, DiffSummary(..), DiffInfo(..), diffToDiffSummaries, isBranchInfo, isErrorSummary, JSONSummary(..)) where
import Prologue
import Diff
import Patch
import Term
import Info (category, characterRange)
import Range
import Syntax as S
import Category as C
import Data.Functor.Both hiding (fst, snd)
import qualified Data.Functor.Both as Both
import Data.Functor.Listable
import Data.List.NonEmpty (nonEmpty)
import qualified Data.Text as Text
import Data.Text.Listable
import Data.Record
import Data.These
import Text.PrettyPrint.Leijen.Text ((<+>), squotes, space, string, Doc, punctuate, pretty, hsep)
import qualified Text.PrettyPrint.Leijen.Text as P
import SourceSpan
import Source
import Data.Aeson as A
data Annotatable a = Annotatable a | Unannotatable a
annotatable :: SyntaxTerm leaf fields -> Annotatable (SyntaxTerm leaf fields)
annotatable term = isAnnotatable (unwrap term) term
where isAnnotatable = \case
S.Class{} -> Annotatable
S.Method{} -> Annotatable
S.Function{} -> Annotatable
S.Module{} -> Annotatable
_ -> Unannotatable
data Identifiable a = Identifiable a | Unidentifiable a
identifiable :: SyntaxTerm leaf fields -> Identifiable (SyntaxTerm leaf fields)
identifiable term = isIdentifiable (unwrap term) term
where isIdentifiable = \case
S.FunctionCall{} -> Identifiable
S.MethodCall{} -> Identifiable
S.Function{} -> Identifiable
S.Assignment{} -> Identifiable
S.OperatorAssignment{} -> Identifiable
S.VarAssignment{} -> Identifiable
S.SubscriptAccess{} -> Identifiable
S.Module{} -> Identifiable
S.Class{} -> Identifiable
S.Method{} -> Identifiable
S.Leaf{} -> Identifiable
S.DoWhile{} -> Identifiable
S.Import{} -> Identifiable
S.Export{} -> Identifiable
S.Ternary{} -> Identifiable
S.If{} -> Identifiable
S.Try{} -> Identifiable
S.Switch{} -> Identifiable
S.Rescue{} -> Identifiable
S.Pair{} -> Identifiable
S.Array ty _ -> maybe Unidentifiable (const Identifiable) ty
S.Object ty _ -> maybe Unidentifiable (const Identifiable) ty
S.BlockStatement{} -> Identifiable
S.TypeDecl{} -> Identifiable
S.Ty{} -> Identifiable
_ -> Unidentifiable
data JSONSummary summary span = JSONSummary { summary :: summary, span :: span }
| ErrorSummary { summary :: summary, span :: span }
deriving (Generic, Eq, Show)
instance (ToJSON summary, ToJSON span) => ToJSON (JSONSummary summary span) where
toJSON JSONSummary{..} = object [ "summary" .= summary, "span" .= span ]
toJSON ErrorSummary{..} = object [ "summary" .= summary, "span" .= span ]
isErrorSummary :: JSONSummary summary span -> Bool
isErrorSummary ErrorSummary{} = True
isErrorSummary _ = False
data DiffInfo = LeafInfo { leafCategory :: Category, termName :: Text, sourceSpan :: SourceSpan }
| BranchInfo { branches :: [ DiffInfo ], branchCategory :: Category, branchType :: Branch }
| ErrorInfo { errorSpan :: SourceSpan, termName :: Text }
| HideInfo -- Hide/Strip from summary output entirely.
deriving (Eq, Show)
data Branch = BIndexed | BFixed | BCommented | BIf deriving (Show, Eq, Generic)
data DiffSummary a = DiffSummary {
patch :: Patch a,
parentAnnotation :: [Either (Category, Text) (Category, Text)]
} deriving (Eq, Functor, Show, Generic)
-- Returns a list of diff summary texts given two source blobs and a diff.
diffSummaries :: (StringConv leaf Text, DefaultFields fields) => Both SourceBlob -> SyntaxDiff leaf fields -> [JSONSummary Text SourceSpans]
diffSummaries blobs diff = summaryToTexts =<< diffToDiffSummaries (source <$> blobs) diff
-- Takes a 'DiffSummary DiffInfo' and returns a list of JSON Summaries whose text summaries represent the LeafInfo summaries of the 'DiffSummary'.
summaryToTexts :: DiffSummary DiffInfo -> [JSONSummary Text SourceSpans]
summaryToTexts DiffSummary{..} = appendParentContexts <$> summaries patch
where appendParentContexts jsonSummary =
jsonSummary { summary = show $ summary jsonSummary <+> parentContexts parentAnnotation }
-- Returns a list of 'DiffSummary' given two source blobs and a diff.
diffToDiffSummaries :: (StringConv leaf Text, DefaultFields fields) => Both (Source Char) -> SyntaxDiff leaf fields -> [DiffSummary DiffInfo]
diffToDiffSummaries sources = para $ \diff ->
let
diff' = free (Prologue.fst <$> diff)
annotateWithCategory :: [DiffSummary DiffInfo] -> [DiffSummary DiffInfo]
annotateWithCategory children = case (beforeTerm diff', afterTerm diff') of
(_, Just diff'') -> appendSummary (Both.snd sources) diff'' <$> children
(Just diff'', _) -> appendSummary (Both.fst sources) diff'' <$> children
(Nothing, Nothing) -> []
in case diff of
-- Skip comments and leaves since they don't have any changes
(Free (_ :< syntax)) -> annotateWithCategory (toList syntax >>= snd)
(Pure patch) -> [ DiffSummary (mapPatch (termToDiffInfo beforeSource) (termToDiffInfo afterSource) patch) [] ]
where
(beforeSource, afterSource) = runJoin sources
-- Flattens a patch of diff infos into a list of docs, one for every 'LeafInfo' or `ErrorInfo` it contains.
summaries :: Patch DiffInfo -> [JSONSummary Doc SourceSpans]
summaries = \case
p@(Replace i1 i2) -> zipWith (\a b ->
JSONSummary
{
summary = summary (prefixWithPatch p This a) <+> "with" <+> summary b
, span = SourceSpans $ These (span a) (span b)
}) (toLeafInfos i1) (toLeafInfos i2)
p@(Insert info) -> prefixWithPatch p That <$> toLeafInfos info
p@(Delete info) -> prefixWithPatch p This <$> toLeafInfos info
-- Prefixes a given doc with the type of patch it represents.
prefixWithPatch :: Patch DiffInfo -> (SourceSpan -> These SourceSpan SourceSpan) -> JSONSummary Doc SourceSpan -> JSONSummary Doc SourceSpans
prefixWithPatch patch constructor = prefixWithThe (patchToPrefix patch)
where
prefixWithThe prefix jsonSummary = jsonSummary
{
summary = prefix <+> summary jsonSummary
, span = SourceSpans $ constructor (span jsonSummary)
}
patchToPrefix = \case
(Replace _ _) -> "Replaced"
(Insert _) -> "Added"
(Delete _) -> "Deleted"
toLeafInfos :: DiffInfo -> [JSONSummary Doc SourceSpan]
toLeafInfos err@ErrorInfo{..} = pure $ ErrorSummary (pretty err) errorSpan
toLeafInfos BranchInfo{..} = branches >>= toLeafInfos
toLeafInfos HideInfo = []
toLeafInfos LeafInfo{..} = pure $ JSONSummary (summary leafCategory termName) sourceSpan
where
summary :: Category -> Text -> Doc
summary category termName = case category of
C.NumberLiteral -> squotes $ toDoc termName
C.IntegerLiteral -> squotes $ toDoc termName
C.Boolean -> squotes $ toDoc termName
C.StringLiteral -> termAndCategoryName
C.Export -> termAndCategoryName
C.Import -> termAndCategoryName
C.Subshell -> termAndCategoryName
C.AnonymousFunction -> "an" <+> toDoc termName <+> "function"
C.Begin -> categoryName'
C.Select -> categoryName'
C.Else -> categoryName'
C.Ensure -> categoryName'
C.Break -> categoryName'
C.Continue -> categoryName'
C.BeginBlock -> categoryName'
C.EndBlock -> categoryName'
C.Yield | Text.null termName -> categoryName'
C.Return | Text.null termName -> categoryName'
C.Switch | Text.null termName -> categoryName'
_ -> "the" <+> squotes (toDoc termName) <+> toDoc categoryName
where
termAndCategoryName = "the" <+> toDoc termName <+> toDoc categoryName
categoryName = toCategoryName category
categoryName' = case categoryName of
name | startsWithVowel name -> "an" <+> toDoc name
| otherwise -> "a" <+> toDoc name
startsWithVowel text = getAny $ foldMap (Any . flip Text.isPrefixOf text) vowels
vowels = Text.singleton <$> ("aeiouAEIOU" :: [Char])
-- Returns a text representing a specific term given a source and a term.
toTermName :: forall leaf fields. (StringConv leaf Text, DefaultFields fields) => Source Char -> SyntaxTerm leaf fields -> Text
toTermName source term = case unwrap term of
S.Send _ _ -> termNameFromSource term
S.Ty _ -> termNameFromSource term
S.TypeDecl id _ -> toTermName' id
S.TypeAssertion _ _ -> termNameFromSource term
S.TypeConversion _ _ -> termNameFromSource term
S.Go expr -> toTermName' expr
S.Defer expr -> toTermName' expr
S.AnonymousFunction params _ -> "anonymous" <> paramsToArgNames params
S.Fixed children -> termNameFromChildren term children
S.Indexed children -> maybe "branch" sconcat (nonEmpty (intersperse ", " (toTermName' <$> children)))
Leaf leaf -> toS leaf
S.Assignment identifier _ -> toTermName' identifier
S.Function identifier _ _ _ -> toTermName' identifier
S.ParameterDecl _ _ -> termNameFromSource term
S.FunctionCall i args -> case unwrap i of
S.AnonymousFunction params _ ->
-- Omit a function call's arguments if it's arguments match the underlying
-- anonymous function's arguments.
if (category . extract <$> args) == (category . extract <$> params)
then toTermName' i
else "(" <> toTermName' i <> ")" <> paramsToArgNames args
_ -> toTermName' i <> paramsToArgNames args
S.MemberAccess base property -> case (unwrap base, unwrap property) of
(S.FunctionCall{}, S.FunctionCall{}) -> toTermName' base <> "()." <> toTermName' property <> "()"
(S.FunctionCall{}, _) -> toTermName' base <> "()." <> toTermName' property
(_, S.FunctionCall{}) -> toTermName' base <> "." <> toTermName' property <> "()"
(_, _) -> toTermName' base <> "." <> toTermName' property
S.MethodCall targetId methodId methodParams -> toTermName' targetId <> sep <> toTermName' methodId <> paramsToArgNames methodParams
where sep = case unwrap targetId of
S.FunctionCall{} -> "()."
_ -> "."
S.SubscriptAccess base element -> case (unwrap base, unwrap element) of
(S.FunctionCall{}, S.FunctionCall{}) -> toTermName' base <> "()." <> toTermName' element <> "()"
(S.FunctionCall{}, _) -> toTermName' base <> "()." <> toTermName' element
(_, S.FunctionCall{}) -> toTermName' base <> "[" <> toTermName' element <> "()" <> "]"
(S.Indexed _, _) -> case category . extract $ base of
SliceTy -> termNameFromSource base <> toTermName' element
_ -> toTermName' base <> "[" <> toTermName' element <> "]"
(_, _) -> toTermName' base <> "[" <> toTermName' element <> "]"
S.VarAssignment varId _ -> toTermName' varId
S.VarDecl decl _ -> toTermName' decl
-- TODO: We should remove Case from Syntax since I don't think we should ever
-- evaluate Case as a single toTermName Text - joshvera
S.Case expr _ -> termNameFromSource expr
S.Switch exprs _ -> maybe "" toTermName' (fmap snd (unsnoc exprs))
S.Ternary expr _ -> toTermName' expr
S.OperatorAssignment id _ -> toTermName' id
S.Operator _ -> termNameFromSource term
S.Object ty kvs -> maybe ("{ " <> Text.intercalate ", " (toTermName' <$> kvs) <> " }") termNameFromSource ty
S.Pair k v -> toKeyName k <> toArgName v
S.Return children -> Text.intercalate ", " (termNameFromSource <$> children)
S.Yield children -> Text.intercalate ", " (termNameFromSource <$> children)
S.ParseError _ -> termNameFromSource term
S.If expr _ -> termNameFromSource expr
S.For clauses _ -> termNameFromChildren term clauses
S.While expr _ -> toTermName' expr
S.DoWhile _ expr -> toTermName' expr
S.Throw expr -> termNameFromSource expr
S.Constructor expr -> toTermName' expr
S.Try clauses _ _ _ -> termNameFromChildren term clauses
S.Select clauses -> termNameFromChildren term clauses
S.Array ty _ -> maybe (termNameFromSource term) termNameFromSource ty
S.Class identifier _ _ -> toTermName' identifier
S.Method identifier (Just receiver) _ args _ -> termNameFromSource receiver <> "." <> toTermName' identifier <> paramsToArgNames args
S.Method identifier Nothing _ args _ -> toTermName' identifier <> paramsToArgNames args
S.Comment a -> toS a
S.Commented _ _ -> termNameFromChildren term (toList $ unwrap term)
S.Module identifier _ -> toTermName' identifier
S.Import identifier [] -> termNameFromSource identifier
S.Import identifier exprs -> termNameFromChildren term exprs <> " from " <> toTermName' identifier
S.Export Nothing expr -> "{ " <> Text.intercalate ", " (termNameFromSource <$> expr) <> " }"
S.Export (Just identifier) [] -> "{ " <> toTermName' identifier <> " }"
S.Export (Just identifier) expr -> "{ " <> Text.intercalate ", " (termNameFromSource <$> expr) <> " }" <> " from " <> toTermName' identifier
S.Negate expr -> toTermName' expr
S.Struct ty _ -> maybe (termNameFromSource term) termNameFromSource ty
S.Rescue args _ -> Text.intercalate ", " $ toTermName' <$> args
S.Break expr -> maybe "" toTermName' expr
S.Continue expr -> maybe "" toTermName' expr
S.BlockStatement children -> termNameFromChildren term children
S.DefaultCase children -> termNameFromChildren term children
S.FieldDecl id expr tag -> termNameFromSource id <> maybe "" (\expr' -> " " <> termNameFromSource expr') expr <> maybe "" ((" " <>) . termNameFromSource) tag
where toTermName' = toTermName source
termNameFromChildren term children = termNameFromRange (unionRangesFrom (range term) (range <$> children))
termNameFromSource term = termNameFromRange (range term)
termNameFromRange range = toText $ Source.slice range source
range = characterRange . extract
paramsToArgNames params = "(" <> Text.intercalate ", " (toArgName <$> params) <> ")"
toArgName :: SyntaxTerm leaf fields -> Text
toArgName arg = case identifiable arg of
Identifiable arg -> toTermName' arg
Unidentifiable _ -> ""
toKeyName key = case toTermName' key of
n | Text.head n == ':' -> n <> " => "
n -> n <> ": "
parentContexts :: [Either (Category, Text) (Category, Text)] -> Doc
parentContexts contexts = hsep $ either identifiableDoc annotatableDoc <$> contexts
where
identifiableDoc (c, t) = case c of
C.Assignment -> "in an" <+> catName c <+> "to" <+> termName t
C.Select -> "in a" <+> catName c
C.Begin -> "in a" <+> catName c
C.Else -> "in an" <+> catName c
C.Elsif -> "in the" <+> squotes (termName t) <+> catName c
C.Method -> "in the" <+> squotes (termName t) <+> catName c
C.Ternary -> "in the" <+> squotes (termName t) <+> catName c
C.Ensure -> "in an" <+> catName c
C.Rescue -> case t of
"" -> "in a" <+> catName c
_ -> "in the" <+> squotes (termName t) <+> catName c
C.Modifier C.Rescue -> "in the" <+> squotes ("rescue" <+> termName t) <+> "modifier"
C.If -> "in the" <+> squotes (termName t) <+> catName c
C.Case -> "in the" <+> squotes (termName t) <+> catName c
C.Break -> case t of
"" -> "in a" <+> catName c
_ -> "in the" <+> squotes (termName t) <+> catName c
C.Continue -> case t of
"" -> "in a" <+> catName c
_ -> "in the" <+> squotes (termName t) <+> catName c
C.Switch -> case t of
"" -> "in a" <+> catName c
_ -> "in the" <+> squotes (termName t) <+> catName c
C.When -> "in a" <+> catName c
C.BeginBlock -> "in a" <+> catName c
C.EndBlock -> "in an" <+> catName c
C.DefaultCase -> "in a" <+> catName c
C.TypeDecl -> "in the" <+> squotes (termName t) <+> catName c
_ -> "in the" <+> termName t <+> catName c
annotatableDoc (c, t) = "of the" <+> squotes (termName t) <+> catName c
catName = toDoc . toCategoryName
termName = toDoc
toDoc :: Text -> Doc
toDoc = string . toS
termToDiffInfo :: (StringConv leaf Text, DefaultFields fields) => Source Char -> SyntaxTerm leaf fields -> DiffInfo
termToDiffInfo blob term = case unwrap term of
S.Indexed children -> BranchInfo (termToDiffInfo' <$> children) (category $ extract term) BIndexed
S.Fixed children -> BranchInfo (termToDiffInfo' <$> children) (category $ extract term) BFixed
S.AnonymousFunction _ _ -> LeafInfo C.AnonymousFunction (toTermName' term) (getField $ extract term)
S.Comment _ -> HideInfo
S.Commented cs leaf -> BranchInfo (termToDiffInfo' <$> cs <> maybeToList leaf) (category $ extract term) BCommented
S.ParseError _ -> ErrorInfo (getField $ extract term) (toTermName' term)
_ -> toLeafInfo term
where toTermName' = toTermName blob
termToDiffInfo' = termToDiffInfo blob
toLeafInfo term = LeafInfo (category $ extract term) (toTermName' term) (getField $ extract term)
-- | Append a parentAnnotation to the current DiffSummary instance.
-- | For a DiffSummary without a parentAnnotation, we append a parentAnnotation with the first identifiable term.
-- | For a DiffSummary with a parentAnnotation, we append the next annotatable term to the extant parentAnnotation.
-- | If a DiffSummary already has a parentAnnotation, and a (grand) parentAnnotation, then we return the summary without modification.
appendSummary :: (StringConv leaf Text, DefaultFields fields) => Source Char -> SyntaxTerm leaf fields -> DiffSummary DiffInfo -> DiffSummary DiffInfo
appendSummary source term summary =
case (parentAnnotation summary, identifiable term, annotatable term) of
([], Identifiable _, _) -> appendParentAnnotation Left
([_], _, Annotatable _) -> appendParentAnnotation Right
(_, _, _) -> summary
where
appendParentAnnotation constructor = summary
{ parentAnnotation = parentAnnotation summary <> [ constructor (category (extract term), toTermName source term) ] }
isBranchInfo :: DiffInfo -> Bool
isBranchInfo info = case info of
BranchInfo{} -> True
_ -> False
-- The user-facing category name of 'a'.
class HasCategory a where
toCategoryName :: a -> Text
-- Instances
instance HasCategory Text where
toCategoryName = identity
instance HasCategory Category where
toCategoryName = \case
ArrayLiteral -> "array"
BooleanOperator -> "boolean operator"
MathOperator -> "math operator"
BitwiseOperator -> "bitwise operator"
RelationalOperator -> "relational operator"
Boolean -> "boolean"
DictionaryLiteral -> "dictionary"
C.Comment -> "comment"
C.ParseError -> "error"
ExpressionStatements -> "expression statements"
C.Assignment -> "assignment"
C.Function -> "function"
C.FunctionCall -> "function call"
C.MemberAccess -> "member access"
C.MethodCall -> "method call"
C.Args -> "arguments"
C.VarAssignment -> "var assignment"
C.VarDecl -> "variable"
C.Switch -> "switch statement"
C.Case -> "case statement"
C.SubscriptAccess -> "subscript access"
C.MathAssignment -> "math assignment"
C.Ternary -> "ternary expression"
C.Operator -> "operator"
Identifier -> "identifier"
IntegerLiteral -> "integer"
NumberLiteral -> "number"
FloatLiteral -> "float"
Other s -> s
C.Pair -> "pair"
C.Params -> "params"
Program -> "top level"
Regex -> "regex"
StringLiteral -> "string"
SymbolLiteral -> "symbol"
TemplateString -> "template string"
C.For -> "for statement"
C.While -> "while statement"
C.DoWhile -> "do/while statement"
C.Object -> "object"
C.Return -> "return statement"
C.Throw -> "throw statement"
C.Constructor -> "constructor"
C.Catch -> "catch statement"
C.Try -> "try statement"
C.Finally -> "finally statement"
C.Class -> "class"
C.Method -> "method"
C.If -> "if statement"
C.CommaOperator -> "comma operator"
C.Empty -> "empty statement"
C.Module -> "module"
C.Import -> "import statement"
C.Export -> "export statement"
C.AnonymousFunction -> "anonymous function"
C.Interpolation -> "interpolation"
C.Subshell -> "subshell command"
C.OperatorAssignment -> "operator assignment"
C.Yield -> "yield statement"
C.Until -> "until statement"
C.Unless -> "unless statement"
C.Begin -> "begin statement"
C.Else -> "else block"
C.Elsif -> "elsif block"
C.Ensure -> "ensure block"
C.Rescue -> "rescue block"
C.RescueModifier -> "rescue modifier"
C.When -> "when comparison"
C.RescuedException -> "last exception"
C.RescueArgs -> "arguments"
C.Negate -> "negate"
C.Select -> "select statement"
C.Go -> "go statement"
C.Slice -> "slice literal"
C.Defer -> "defer statement"
C.TypeAssertion -> "type assertion statement"
C.TypeConversion -> "type conversion expression"
C.ArgumentPair -> "argument"
C.KeywordParameter -> "parameter"
C.OptionalParameter -> "parameter"
C.SplatParameter -> "parameter"
C.HashSplatParameter -> "parameter"
C.BlockParameter -> "parameter"
C.ArrayTy -> "array type"
C.DictionaryTy -> "dictionary type"
C.StructTy -> "struct type"
C.Struct -> "struct"
C.Break -> "break statement"
C.Continue -> "continue statement"
C.Binary -> "binary statement"
C.Unary -> "unary statement"
C.Constant -> "constant"
C.Superclass -> "superclass"
C.SingletonClass -> "singleton class"
C.RangeExpression -> "range"
C.ScopeOperator -> "scope operator"
C.BeginBlock -> "BEGIN block"
C.EndBlock -> "END block"
C.ParameterDecl -> "parameter declaration"
C.DefaultCase -> "default statement"
C.TypeDecl -> "type declaration"
C.PointerTy -> "pointer type"
C.FieldDecl -> "field declaration"
C.SliceTy -> "slice type"
C.Element -> "element"
C.Literal -> "literal"
C.ChannelTy -> "channel type"
C.Send -> "send statement"
C.IndexExpression -> "index expression"
C.FunctionTy -> "function type"
C.IncrementStatement -> "increment statement"
C.DecrementStatement -> "decrement statement"
C.QualifiedIdentifier -> "qualified identifier"
C.FieldDeclarations -> "field declarations"
C.RuneLiteral -> "rune literal"
C.Modifier C.Rescue -> "rescue modifier"
C.Modifier c -> toCategoryName c
instance HasField fields Category => HasCategory (SyntaxTerm leaf fields) where
toCategoryName = toCategoryName . category . extract
instance Listable Branch where
tiers = cons0 BIndexed \/ cons0 BFixed \/ cons0 BCommented \/ cons0 BIf
instance Listable1 DiffSummary where
liftTiers termTiers = liftCons2 (liftTiers termTiers) (liftTiers (eitherTiers (liftTiers (mapT unListableText tiers)))) DiffSummary
where eitherTiers tiers = liftTiers2 tiers tiers
instance Listable a => Listable (DiffSummary a) where
tiers = tiers1
instance P.Pretty DiffInfo where
pretty LeafInfo{..} = squotes (string $ toSL termName) <+> string (toSL (toCategoryName leafCategory))
pretty BranchInfo{..} = mconcat $ punctuate (string "," P.<> space) (pretty <$> branches)
pretty ErrorInfo{..} = squotes (string $ toSL termName) <+> "at" <+> (string . toSL $ displayStartEndPos errorSpan)
pretty HideInfo = ""

View File

@ -1,78 +1,128 @@
{-# LANGUAGE DataKinds, RankNTypes, TypeOperators #-}
module Diffing where
import Diff
import Interpreter
import Language
import Parser
import Range
import Renderer
import Source hiding ((++))
import Syntax
import Term
import TreeSitter
import Text.Parser.TreeSitter.Language
import Control.Comonad.Cofree
import Prologue hiding (fst, snd)
import Category
import Data.Functor.Both
import qualified Data.ByteString.Char8 as B1
import Data.Foldable
import qualified Data.Text as T
import qualified Data.Text.ICU.Detect as Detect
import qualified Data.Text.ICU.Convert as Convert
import Data.RandomWalkSimilarity (defaultFeatureVectorDecorator, stripDiff)
import Data.Record
import qualified Data.Text.IO as TextIO
import Data.These
import Diff
import Info
import Interpreter
import Patch
import Parser
import Renderer
import Renderer.JSON
import Renderer.Patch
import Renderer.Split
import Renderer.Summary
import Renderer.SExpression
import Renderer.TOC
import Source
import Syntax
import System.Directory
import System.FilePath
-- | Return a parser based on the file extension (including the ".").
parserForType :: T.Text -> Parser
parserForType mediaType = case languageForType mediaType of
Just C -> treeSitterParser C ts_language_c
Just JavaScript -> treeSitterParser JavaScript ts_language_javascript
Just Ruby -> treeSitterParser Ruby ts_language_ruby
_ -> lineByLineParser
-- | A fallback parser that treats a file simply as rows of strings.
lineByLineParser :: Parser
lineByLineParser input = return . root . Indexed $ case foldl' annotateLeaves ([], 0) lines of
(leaves, _) -> leaves
where
lines = actualLines input
root syntax = Info (Range 0 $ length input) mempty :< syntax
leaf charIndex line = Info (Range charIndex $ charIndex + T.length line) mempty :< Leaf line
annotateLeaves (accum, charIndex) line =
(accum ++ [ leaf charIndex (toText line) ]
, charIndex + length line)
toText = T.pack . Source.toString
-- | Return the parser that should be used for a given path.
parserForFilepath :: FilePath -> Parser
parserForFilepath = parserForType . T.pack . takeExtension
-- | Replace every string leaf with leaves of the words in the string.
breakDownLeavesByWord :: Source Char -> Term T.Text Info -> Term T.Text Info
breakDownLeavesByWord source = cata replaceIn
where
replaceIn info@(Info range categories) (Leaf _) | ranges <- rangesAndWordsInSource range, length ranges > 1 = info :< Indexed (makeLeaf categories <$> ranges)
replaceIn info syntax = info :< syntax
rangesAndWordsInSource range = rangesAndWordsFrom (start range) (toString $ slice range source)
makeLeaf categories (range, substring) = Info range categories :< Leaf (T.pack substring)
-- | Transcode a file to a unicode source.
transcode :: B1.ByteString -> IO (Source Char)
transcode text = fromText <$> do
match <- Detect.detectCharset text
converter <- Convert.open match Nothing
return $ Convert.toUnicode converter text
-- | Read the file and convert it to Unicode.
readAndTranscodeFile :: FilePath -> IO (Source Char)
readAndTranscodeFile path = do
text <- B1.readFile path
transcode text
import qualified System.IO as IO
import System.Environment (lookupEnv)
import Term
import Data.Aeson (ToJSON, toJSON, toEncoding)
import Data.Aeson.Encoding (encodingToLazyByteString)
-- | Given a parser and renderer, diff two sources and return the rendered
-- | result.
diffFiles :: Parser -> Renderer T.Text b -> Both SourceBlob -> IO b
diffFiles parser renderer sourceBlobs = do
let sources = source <$> sourceBlobs
terms <- sequence $ parser <$> sources
let replaceLeaves = breakDownLeavesByWord <$> sources
return $ renderer (runBothWith diffTerms $ replaceLeaves <*> terms) sourceBlobs
-- | Returns the rendered result strictly, so it's always fully evaluated
-- | with respect to other IO actions.
diffFiles :: (HasField fields Category, HasField fields Cost)
=> Parser (Syntax Text) (Record fields)
-> Renderer (Record fields)
-> Both SourceBlob
-> IO Output
diffFiles parse render sourceBlobs = do
terms <- traverse (fmap (defaultFeatureVectorDecorator getLabel) . parse) sourceBlobs
pure $! render sourceBlobs (stripDiff (diffTerms' terms))
where
diffTerms' terms = case runBothWith areNullOids sourceBlobs of
(True, False) -> pure $ Insert (snd terms)
(False, True) -> pure $ Delete (fst terms)
(_, _) ->
runBothWith (diffTerms construct compareCategoryEq diffCostWithCachedTermCosts) terms
areNullOids a b = (hasNullOid a, hasNullOid b)
hasNullOid blob = oid blob == nullOid || null (source blob)
construct (info :< syntax) = free (Free ((setCost <$> info <*> sumCost syntax) :< syntax))
sumCost = fmap getSum . foldMap (fmap Sum . getCost)
getCost diff = case runFree diff of
Free (info :< _) -> cost <$> info
Pure patch -> uncurry both (fromThese 0 0 (unPatch (cost . extract <$> patch)))
getLabel :: HasField fields Category => CofreeF (Syntax leaf) (Record fields) b -> (Category, Maybe leaf)
getLabel (h :< t) = (category h, case t of
Leaf s -> Just s
_ -> Nothing)
-- | Determine whether two terms are comparable based on the equality of their categories.
compareCategoryEq :: Functor f => HasField fields Category => Term f (Record fields) -> Term f (Record fields) -> Bool
compareCategoryEq = (==) `on` category . extract
-- | The sum of the node count of the diffs patches.
diffCostWithCachedTermCosts :: Functor f => HasField fields Cost => Diff f (Record fields) -> Int
diffCostWithCachedTermCosts diff = unCost $ case runFree diff of
Free (info :< _) -> sum (cost <$> info)
Pure patch -> sum (cost . extract <$> patch)
-- | Returns a rendered diff given a parser, diff arguments and two source blobs.
textDiff :: (ToJSON (Record fields), DefaultFields fields, HasField fields Cost) => Parser (Syntax Text) (Record fields) -> DiffArguments -> Both SourceBlob -> IO Output
textDiff parser arguments = diffFiles parser $ case format arguments of
Split -> split
Patch -> patch
SExpression -> sExpression
JSON -> json
Summary -> summary
TOC -> toc
-- | Returns a truncated diff given diff arguments and two source blobs.
truncatedDiff :: DiffArguments -> Both SourceBlob -> IO Output
truncatedDiff arguments sources = pure $ case format arguments of
Split -> SplitOutput mempty
Patch -> PatchOutput (truncatePatch arguments sources)
SExpression -> SExpressionOutput mempty
JSON -> JSONOutput mempty
Summary -> SummaryOutput mempty
TOC -> TOCOutput mempty
-- | Prints a rendered diff to stdio or a filepath given a parser, diff arguments and two source blobs.
printDiff :: (ToJSON (Record fields), DefaultFields fields, HasField fields Cost) => Parser (Syntax Text) (Record fields) -> DiffArguments -> Both SourceBlob -> IO ()
printDiff parser arguments sources = do
rendered <- textDiff parser arguments sources
writeToOutput (output arguments) $
case rendered of
SplitOutput text -> text
PatchOutput text -> text
SExpressionOutput text -> text
JSONOutput series -> encodingToText (toJSON series)
SummaryOutput summaries -> encodingToText (toJSON summaries)
TOCOutput summaries -> encodingToText (toJSON summaries)
where
-- TODO: Don't go from Value to Text?
encodingToText = toS . encodingToLazyByteString . toEncoding
-- | Writes text to an output file or stdout.
writeToOutput :: Maybe FilePath -> Text -> IO ()
writeToOutput output text =
case output of
Nothing -> do
lang <- lookupEnv "LANG"
case lang of
-- If LANG is set and isn't the empty string, leave the encoding.
Just x | x /= "" -> pure ()
-- Otherwise default to utf8.
_ -> IO.hSetEncoding IO.stdout IO.utf8
TextIO.hPutStrLn IO.stdout text
Just path -> do
isDir <- doesDirectoryExist path
let outputPath = if isDir
then path </> (takeFileName outputPath -<.> ".html")
else path
IO.withFile outputPath IO.WriteMode (`TextIO.hPutStr` text)

60
src/FDoc/NatExample.hs Normal file
View File

@ -0,0 +1,60 @@
module FDoc.NatExample where
import Prologue
import Data.Functor.Foldable
-- Our base Functor. The recursive bit is parameterized by r.
data NatF r =
ZeroF
| SuccF r
deriving (Show, Functor)
-- Fix represents the "fixed point" for the NatF Functor, and enables recursion.
-- Important to note this has kind * -> *.
type Nat = Fix NatF
-- This is a fully applied type (Has kind *).
zero' :: Nat
zero' = Fix ZeroF
-- This is a partially applied type (has kind * -> *). The recursive bit is used
-- by recursion schemes and is referred to as the "carrier" functor.
succ' :: Nat -> Nat
succ' = Fix . SuccF
-- Catamorphism: "tear down" a recursive structure in the shape of Nat.
natToIntCata :: Nat -> Int
natToIntCata nats = cata algebra nats
where
algebra term = case term of
ZeroF -> 0
SuccF value -> 1 + value
-- Anamorphism: "build up" a recursive structure in the shape of Nat.
intToNatAna :: Int -> Nat
intToNatAna num = ana coalgebra num
where
coalgebra num = case num of
0 -> ZeroF
_ -> SuccF (num - 1)
-- Hylomorphism: first apply an anamorphism and then a catamorphism in the shape
-- of Nat.
natHylo :: Int -> Int
natHylo num = hylo algebra coalgebra num
where
algebra term = case term of
ZeroF -> 0
SuccF value -> 1 + value
coalgebra num = case num of
0 -> ZeroF
_ -> SuccF (num - 1)
-- Paramorphism: primitive recursion maintaining the original value along with
-- its computed value.
natPara :: Nat -> Int
natPara nats = para algebra nats
where
algebra value = case value of
ZeroF -> 0
(SuccF (_, value')) -> 1 + value'

View File

@ -0,0 +1,186 @@
{-# LANGUAGE DataKinds, ScopedTypeVariables, TypeFamilies, TypeOperators #-}
module FDoc.RecursionSchemes where
import Data.Record
import Range
import Category
import Term
import Syntax
import Prologue
import Prelude
import FDoc.Term
data NewField = NewField deriving (Show)
{-
Anamorphism -- add a new field to each term's Record fields
ana :: (a -> Base t a) -- a (Base t)-coalgebra
-> a -- seed
-> t -- resulting fixed point
Anamorphism as a recursion scheme "builds up" a recursive structure.
Anamorphisms work by using a coalgebra, which maps a seed value to a fixed point
structure.
The example below adds a new field to the `Record` fields.
-}
indexedTermAna :: [leaf] -> Term (Syntax leaf) (Record '[NewField, Range, Category])
indexedTermAna childrenLeaves = ana coalgebra (indexedTerm childrenLeaves)
where
coalgebra term = (NewField :. (extract term)) :< unwrap term
{-
Catamorphism example -- add a new field to each term's Record fields
cata :: (Base t a -> a) -- a (Base t)-algebra
-> t -- fixed point
-> a -- result
Catamorphism as a recursion scheme "tears down" a recursive structure.
Catamorphisms work by using an algebra, which maps a shape in our fixed point
structure to a new shape.
The example below adds a new field to the `Record` fields.
-}
indexedTermCata :: [leaf] -> Term (Syntax leaf) (Record '[NewField, Range, Category])
indexedTermCata childrenLeaves = cata algebra (indexedTerm childrenLeaves)
where
algebra :: CofreeF f (Record t) (Cofree f (Record (NewField : t))) -> Cofree f (Record (NewField : t))
algebra term = cofree $ (NewField :. (headF term)) :< tailF term
{-
Anamorphism -- construct a Term from a string
The example below shows how to build up a recursive Term structure from a string
representation.
Example usage:
stringToTermAna "indexed" =>
CofreeT (Identity ( (Range {start = 1, end = 10} :. MethodCall :. Nil)
:<
Indexed
[ CofreeT (Identity ( (Range {start = 1, end = 10} :. MethodCall :. Nil) :< Leaf "leaf1" ) )
, CofreeT (Identity ( (Range {start = 1, end = 10} :. MethodCall :. Nil) :< Leaf "leaf2" ) )
, CofreeT (Identity ( (Range {start = 1, end = 10} :. MethodCall :. Nil) :< Leaf "leaf3" ) )
] ))
First step is to match against the "indexed" string and begin building up a Cofree Indexed structure:
CofreeT (Identity ( (Range 1 10 :. Category.MethodCall :. Nil) :< Indexed ["leaf1", "leaf2", "leaf3"] ) )
While building up the `Indexed` structure, we continue to recurse over the
`Indexed` terms ["leaf1", "leaf2", "leaf3"]. These are pattern matched using
the catch all `_` and default to `Leaf` Syntax shapes:
CofreeT (Identity ( (Range 1 10 :. Category.MethodCall :. Nil) :< Leaf "leaf1" ) )
CofreeT (Identity ( (Range 1 10 :. Category.MethodCall :. Nil) :< Leaf "leaf2" ) )
CofreeT (Identity ( (Range 1 10 :. Category.MethodCall :. Nil) :< Leaf "leaf3" ) )
These structures are substituted in place of ["leaf1", "leaf2", "leaf3"] in
the new cofree `Indexed` structure, resulting in a expansion of all possible
string terms.
-}
stringToTermAna :: String -> Term (Syntax String) (Record '[Range, Category])
stringToTermAna = ana coalgebra
where
coalgebra representation = case representation of
"indexed" -> (Range 1 10 :. Category.MethodCall :. Nil) :< Indexed ["leaf1", "leaf2", "leaf3"]
_ -> (Range 1 10 :. Category.MethodCall :. Nil) :< Leaf representation
{-
Catamorphism -- construct a list of Strings from a recursive Term structure.
The example below shows how to tear down a recursive Term structure into a list
of String representation.
-}
termToStringCata :: Term (Syntax String) (Record '[Range, Category]) -> [String]
termToStringCata = cata algebra
where
algebra term = case term of
(_ :< Leaf value) -> [value]
(_ :< Indexed values) -> ["indexed"] <> Prologue.concat values
_ -> ["unknown"]
{-
Hylomorphism -- An anamorphism followed by a catamorphism
hylo :: Functor f => (f b -> b) -- an algebra
-> (a -> f a) -- a coalgebra
-> a -- seed value
-> b -- result
Hylomorphisms work by first applying a coalgebra (anamorphism) to build up a
structure. An algebra (catamorphism) is then applied to this structure. Because
of fusion the anamorphism and catamorphism occur in a single pass rather than
two separate traversals.
The example below shows how our algebra and coalgebra defined in the
termToStringCata and stringToTermAna can be utilized as a hylomorphism.
Example Usage:
stringTermHylo "indexed" => ["indexed", "leaf1", "leaf2", "leaf3"]
-}
stringTermHylo :: String -> [String]
stringTermHylo = hylo algebra coalgebra
where
algebra term = case term of
(_ :< Leaf value) -> [value]
(_ :< Indexed values) -> ["indexed"] <> Prologue.concat values
_ -> ["unknown"]
coalgebra stringRepresentation = case stringRepresentation of
"indexed" -> (Range 1 10 :. Category.MethodCall :. Nil) :< Indexed ["leaf1", "leaf2", "leaf3"]
_ -> (Range 1 10 :. Category.MethodCall :. Nil) :< Leaf stringRepresentation
{-
Paramorphism -- primitive recursion that maintains a reference to the original value and its computed value.
para :: (Base t (t, a) -> a) -- an algebra that takes a tuple of the last input
-> t -- fixed point
-> a -- result
Paramorphisms, like all recursion schemes, work via a bottom up traversal
(leaves to root), in which an algebra is applied to every node in the recursive
structure. The difference between paramorphisms and catamorphisms is the algebra
receives a tuple of the original subobject and its computed value (t, a) where
`t` is the original suboject and `a` is the computed value.
The example implementation below calculates a string representation for each
Syntax type, flattening the recursive structure into a one dimensional list to
tuples. The tuple contains the original syntax subobject, and its computed
string representation. This example aims to showcase how paramorphisms work by
returning a final list of tuples that mimics the intermediate tuple shapes the
algebra receives throughout the bottom up traversal.
Example Usage:
let terms = indexedTerm ["leaf1", "leaf2", "leaf3"]
termPara terms = Recurse over the structure to start at the leaves (bottom up traversal):
tuple3 = ( CofreeT (Identity ((Range {start = 1, end = 10} :. MethodCall :. Nil) :< Leaf "leaf3")), "leaf3" ) : []
Continue the traversal from leaves to root:
tuple2:3 = ( CofreeT (Identity ((Range {start = 1, end = 10} :. MethodCall :. Nil) :< Leaf "leaf2")), "leaf2") : tuple3
tuple1:2:3 = ( CofreeT (Identity ((Range {start = 1, end = 10} :. MethodCall :. Nil) :< Leaf "leaf1" )), "leaf1") : tuple2:3
Compute the root:
tupleIndexed:1:2:3 = ( CofreeT (Identity ((Range {start = 1, end = 10} :. MethodCall :. Nil) :< Indexed [])), "indexed" ) : tuple1:2:3
Final shape:
[ (CofreeT (Identity ((Range {start = 1, end = 10} :. MethodCall :. Nil) :< Indexed [])) , "indexed")
, (CofreeT (Identity ((Range {start = 1, end = 10} :. MethodCall :. Nil) :< Leaf "leaf1")), "leaf1")
, (CofreeT (Identity ((Range {start = 1, end = 10} :. MethodCall :. Nil) :< Leaf "leaf2")), "leaf2")
, (CofreeT (Identity ((Range {start = 1, end = 10} :. MethodCall :. Nil) :< Leaf "leaf3")), "leaf3")
]
-}
termPara :: Term (Syntax String) (Record '[Range, Category]) -> [(Term (Syntax String) (Record '[Range, Category]), String)]
termPara = para algebra
where
algebra term = case term of
(annotation :< Leaf representation) -> [(cofree (annotation :< Leaf representation), representation)]
(annotation :< Indexed values) -> [(cofree (annotation :< Indexed []), "indexed")] <> (values >>= Prelude.snd)
_ -> [(cofree ((Range 1 10 :. Category.MethodCall :. Nil) :< Leaf "unknown"), "unknown")]

67
src/FDoc/Term.hs Normal file
View File

@ -0,0 +1,67 @@
{-# LANGUAGE DataKinds, TypeOperators #-}
module FDoc.Term where
import Data.Record
import Range
import Category
import Term
import Syntax
import Prologue
{-
Constructs a Syntax.Leaf using the polymorphic type variable `leaf`.
This is in the TermF shape: CofreeF f a b where
f is the functor (Syntax.Leaf `leaf`)
a is the annotation (Record '[Range, Category])
b is the same type of functor defined by f
Two common convenience operations when working with CofreeF (for docs, see
Control.Comonad.Trans.Cofree.Types.CofreeF) are `headF` and `tailF`. `headF`
return the annotation portion of the CofreeF structure, and `tailF` returns the
functor portion (Syntax).
Example (from GHCi):
> let leaf = leafTermF "example"
> headF leaf
> Range {start = 1, end = 10} :. MethodCall :. Nil
> tailF leaf
> Leaf "example"
-}
leafTermF :: leaf -> TermF (Syntax leaf) (Record '[Range, Category]) b
leafTermF leaf = (Range 1 10 :. Category.MethodCall :. Nil) :< Leaf leaf
{-
Constructs a Syntax.Leaf using the polymorphic type variable `leaf`.
This is in the Term shape: Cofree f a where
f is the functor (Syntax.Leaf `leaf`)
a is the annotation (Record '[Range, Category])
Two common convenience operations when working with Cofree (for docs, see
Control.Comonad.Trans.Cofree.Types.Cofree) are `extract` and `unwrap`. `extract`
returns the annotation portion of the Cofree structure, and `unwrap` returns the
functor portion (Syntax).
Example (from GHCi):
> let leaf = leafTerm "example"
> extract leaf
> Range {start = 1, end = 10} :. MethodCall :. Nil
> unwrap leaf
> Leaf "example"
-}
leafTerm :: leaf -> Cofree (Syntax leaf) (Record '[Range, Category])
leafTerm = cofree . leafTermF
indexedTermF :: [leaf] -> TermF (Syntax leaf) (Record '[Range, Category]) (Term (Syntax leaf) (Record '[Range, Category]))
indexedTermF leaves = (Range 1 10 :. Category.MethodCall :. Nil) :< (Indexed (leafTerm <$> leaves))
indexedTerm :: [leaf] -> Term (Syntax leaf) (Record '[Range, Category])
indexedTerm leaves = cofree $ indexedTermF leaves

65
src/Info.hs Normal file
View File

@ -0,0 +1,65 @@
{-# LANGUAGE DataKinds, GeneralizedNewtypeDeriving #-}
module Info
( Range(..)
, characterRange
, setCharacterRange
, Category(..)
, category
, setCategory
, Cost(..)
, cost
, setCost
, SourceSpan(..)
, SourcePos(..)
, SourceSpans(..)
, sourceSpan
, setSourceSpan
, SourceText(..)
, sourceText
) where
import Data.Functor.Listable
import Data.Record
import Prologue
import Category
import Range
import SourceSpan
import Data.Aeson
newtype Cost = Cost { unCost :: Int }
deriving (Eq, Num, Ord, Show, ToJSON)
newtype SourceText = SourceText { unText :: Text }
deriving (Show, ToJSON)
characterRange :: HasField fields Range => Record fields -> Range
characterRange = getField
setCharacterRange :: HasField fields Range => Record fields -> Range -> Record fields
setCharacterRange = setField
category :: HasField fields Category => Record fields -> Category
category = getField
setCategory :: HasField fields Category => Record fields -> Category -> Record fields
setCategory = setField
cost :: HasField fields Cost => Record fields -> Cost
cost = getField
setCost :: HasField fields Cost => Record fields -> Cost -> Record fields
setCost = setField
sourceText :: HasField fields SourceText => Record fields -> SourceText
sourceText = getField
sourceSpan :: HasField fields SourceSpan => Record fields -> SourceSpan
sourceSpan = getField
setSourceSpan :: HasField fields SourceSpan => Record fields -> SourceSpan -> Record fields
setSourceSpan = setField
-- Instances
instance Listable Cost where
tiers = cons1 Cost

View File

@ -1,79 +1,113 @@
module Interpreter (interpret, Comparable, diffTerms) where
{-# LANGUAGE RankNTypes #-}
module Interpreter (Comparable, DiffConstructor, diffTerms) where
import Algorithm
import Category
import Control.Arrow
import Control.Comonad.Cofree
import Control.Monad.Free
import Data.Copointed
import Data.Align.Generic
import Data.Functor.Foldable
import Data.Functor.Both
import qualified Data.OrderedMap as Map
import qualified Data.List as List
import Data.List ((\\))
import Data.Maybe
import Data.OrderedMap ((!))
import Data.RandomWalkSimilarity as RWS
import Data.Record
import Data.These
import Diff
import Operation
import Info
import Patch
import Prelude hiding (lookup)
import Prologue hiding (lookup)
import SES
import Syntax
import Syntax as S
import Term
-- | Returns whether two terms are comparable
type Comparable a annotation = Term a annotation -> Term a annotation -> Bool
type Comparable f annotation = Term f annotation -> Term f annotation -> Bool
-- | Diff two terms, given the default Categorizable.comparable function.
diffTerms :: (Eq a, Eq annotation, Categorizable annotation) => Term a annotation -> Term a annotation -> Diff a annotation
diffTerms = interpret comparable
-- | Constructs a diff from the CofreeF containing its annotation and syntax. This function has the opportunity to, for example, cache properties in the annotation.
type DiffConstructor f annotation = TermF f (Both annotation) (Diff f annotation) -> Diff f annotation
-- | Diff two terms, given a function that determines whether two terms can be compared.
interpret :: (Eq a, Eq annotation) => Comparable a annotation -> Term a annotation -> Term a annotation -> Diff a annotation
interpret comparable a b = fromMaybe (Pure $ Replace a b) $ constructAndRun comparable a b
-- | Diff two terms recursively, given functions characterizing the diffing.
diffTerms :: (Eq leaf, HasField fields Category, HasField fields (Maybe FeatureVector))
=> DiffConstructor (Syntax leaf) (Record fields) -- ^ A function to wrap up & possibly annotate every produced diff.
-> Comparable (Syntax leaf) (Record fields) -- ^ A function to determine whether or not two terms should even be compared.
-> SES.Cost (SyntaxDiff leaf fields) -- ^ A function to compute the cost of a given diff node.
-> SyntaxTerm leaf fields -- ^ A term representing the old state.
-> SyntaxTerm leaf fields -- ^ A term representing the new state.
-> SyntaxDiff leaf fields
diffTerms construct comparable cost a b = fromMaybe (replacing a b) $ diffComparableTerms construct comparable cost a b
-- | A hylomorphism. Given an `a`, unfold and then refold into a `b`.
hylo :: Functor f => (t -> f b -> b) -> (a -> (t, f a)) -> a -> b
hylo down up a = down annotation $ hylo down up <$> syntax where
(annotation, syntax) = up a
-- | Diff two terms recursively, given functions characterizing the diffing. If the terms are incomparable, returns 'Nothing'.
diffComparableTerms :: (Eq leaf, HasField fields Category, HasField fields (Maybe FeatureVector))
=> DiffConstructor (Syntax leaf) (Record fields)
-> Comparable (Syntax leaf) (Record fields)
-> SES.Cost (SyntaxDiff leaf fields)
-> SyntaxTerm leaf fields
-> SyntaxTerm leaf fields
-> Maybe (SyntaxDiff leaf fields)
diffComparableTerms construct comparable cost = recur
where recur a b
| (category <$> a) == (category <$> b) = hylo construct runCofree <$> zipTerms a b
| comparable a b = runAlgorithm construct recur cost (Just <$> algorithmWithTerms construct a b)
| otherwise = Nothing
-- | Constructs an algorithm and runs it
constructAndRun :: (Eq a, Eq annotation) => Comparable a annotation -> Term a annotation -> Term a annotation -> Maybe (Diff a annotation)
constructAndRun _ a b | a == b = hylo (curry $ Free . uncurry Annotated) (copoint &&& unwrap) <$> zipTerms a b where
-- | Construct an algorithm to diff a pair of terms.
algorithmWithTerms :: Applicative diff
=> (TermF (Syntax leaf) (Both a) (diff (Patch (Term (Syntax leaf) a))) -> diff (Patch (Term (Syntax leaf) a)))
-> Term (Syntax leaf) a
-> Term (Syntax leaf) a
-> Algorithm (Term (Syntax leaf) a) (diff (Patch (Term (Syntax leaf) a))) (diff (Patch (Term (Syntax leaf) a)))
algorithmWithTerms construct t1 t2 = maybe (recursively t1 t2) (fmap annotate) $ case (unwrap t1, unwrap t2) of
(Indexed a, Indexed b) ->
Just $ Indexed <$> bySimilarity a b
(S.Module idA a, S.Module idB b) ->
Just $ S.Module <$> recursively idA idB <*> bySimilarity a b
(S.FunctionCall identifierA argsA, S.FunctionCall identifierB argsB) -> Just $
S.FunctionCall <$> recursively identifierA identifierB
<*> bySimilarity argsA argsB
(S.Switch exprA casesA, S.Switch exprB casesB) -> Just $
S.Switch <$> bySimilarity exprA exprB
<*> bySimilarity casesA casesB
(S.Object tyA a, S.Object tyB b) -> Just $
S.Object <$> maybeRecursively tyA tyB
<*> bySimilarity a b
(Commented commentsA a, Commented commentsB b) -> Just $
Commented <$> bySimilarity commentsA commentsB
<*> maybeRecursively a b
(Array tyA a, Array tyB b) -> Just $
Array <$> maybeRecursively tyA tyB
<*> bySimilarity a b
(S.Class identifierA paramsA expressionsA, S.Class identifierB paramsB expressionsB) -> Just $
S.Class <$> recursively identifierA identifierB
<*> maybeRecursively paramsA paramsB
<*> bySimilarity expressionsA expressionsB
(S.Method identifierA receiverA tyA paramsA expressionsA, S.Method identifierB receiverB tyB paramsB expressionsB) -> Just $
S.Method <$> recursively identifierA identifierB
<*> maybeRecursively receiverA receiverB
<*> maybeRecursively tyA tyB
<*> bySimilarity paramsA paramsB
<*> bySimilarity expressionsA expressionsB
(S.Function idA paramsA tyA bodyA, S.Function idB paramsB tyB bodyB) -> Just $
S.Function <$> recursively idA idB
<*> bySimilarity paramsA paramsB
<*> maybeRecursively tyA tyB
<*> bySimilarity bodyA bodyB
_ -> Nothing
where
annotate = construct . (both (extract t1) (extract t2) :<)
constructAndRun comparable a b | not $ comparable a b = Nothing
maybeRecursively :: Applicative f => Maybe a -> Maybe a -> Algorithm a (f (Patch a)) (Maybe (f (Patch a)))
maybeRecursively a b = sequenceA $ case (a, b) of
(Just a, Just b) -> Just $ recursively a b
(Nothing, Just b) -> Just $ pure (inserting b)
(Just a, Nothing) -> Just $ pure (deleting a)
(Nothing, Nothing) -> Nothing
constructAndRun comparable (annotation1 :< a) (annotation2 :< b) =
run comparable $ algorithm a b where
algorithm (Indexed a') (Indexed b') = Free $ ByIndex a' b' (annotate . Indexed)
algorithm (Keyed a') (Keyed b') = Free $ ByKey a' b' (annotate . Keyed)
algorithm (Leaf a') (Leaf b') | a' == b' = annotate $ Leaf b'
algorithm a' b' = Free $ Recursive (annotation1 :< a') (annotation2 :< b') Pure
annotate = Pure . Free . Annotated (Both (annotation1, annotation2))
-- | Runs the diff algorithm
run :: (Eq a, Eq annotation) => Comparable a annotation -> Algorithm a annotation (Diff a annotation) -> Maybe (Diff a annotation)
run _ (Pure diff) = Just diff
run comparable (Free (Recursive (annotation1 :< a) (annotation2 :< b) f)) = run comparable . f $ recur a b where
recur (Indexed a') (Indexed b') | length a' == length b' = annotate . Indexed $ zipWith (interpret comparable) a' b'
recur (Fixed a') (Fixed b') | length a' == length b' = annotate . Fixed $ zipWith (interpret comparable) a' b'
recur (Keyed a') (Keyed b') | Map.keys a' == bKeys = annotate . Keyed . Map.fromList . fmap repack $ bKeys
where
bKeys = Map.keys b'
repack key = (key, interpretInBoth key a' b')
interpretInBoth key x y = interpret comparable (x ! key) (y ! key)
recur _ _ = Pure $ Replace (annotation1 :< a) (annotation2 :< b)
annotate = Free . Annotated (Both (annotation1, annotation2))
run comparable (Free (ByKey a b f)) = run comparable $ f byKey where
byKey = Map.fromList $ toKeyValue <$> List.union aKeys bKeys
toKeyValue key | List.elem key deleted = (key, Pure . Delete $ a ! key)
toKeyValue key | List.elem key inserted = (key, Pure . Insert $ b ! key)
toKeyValue key = (key, interpret comparable (a ! key) (b ! key))
aKeys = Map.keys a
bKeys = Map.keys b
deleted = aKeys \\ bKeys
inserted = bKeys \\ aKeys
run comparable (Free (ByIndex a b f)) = run comparable . f $ ses (constructAndRun comparable) diffCost a b
-- | Run an algorithm, given functions characterizing the evaluation.
runAlgorithm :: (GAlign f, HasField fields Category, Eq (f (Cofree f Category)), Traversable f, HasField fields (Maybe FeatureVector))
=> (CofreeF f (Both (Record fields)) (Free (CofreeF f (Both (Record fields))) (Patch (Cofree f (Record fields)))) -> Free (CofreeF f (Both (Record fields))) (Patch (Cofree f (Record fields)))) -- ^ A function to wrap up & possibly annotate every produced diff.
-> (Cofree f (Record fields) -> Cofree f (Record fields) -> Maybe (Free (CofreeF f (Both (Record fields))) (Patch (Cofree f (Record fields))))) -- ^ A function to diff two subterms recursively, if they are comparable, or else return 'Nothing'.
-> SES.Cost (Free (CofreeF f (Both (Record fields))) (Patch (Cofree f (Record fields)))) -- ^ A function to compute the cost of a given diff node.
-> Algorithm (Cofree f (Record fields)) (Free (CofreeF f (Both (Record fields))) (Patch (Cofree f (Record fields)))) a -- ^ The algorithm to run.
-> a
runAlgorithm construct recur cost = iterAp $ \case
Recursive a b f -> f (maybe (replacing a b) (construct . (both (extract a) (extract b) :<)) $ do
aligned <- galign (unwrap a) (unwrap b)
traverse (these (Just . deleting) (Just . inserting) recur) aligned)
ByIndex as bs f -> f (ses recur cost as bs)
BySimilarity as bs f -> f (rws recur as bs)

View File

@ -1,10 +1,15 @@
{-# LANGUAGE DataKinds #-}
module Language where
import Data.Text
import Data.Record
import Info
import Prologue
import qualified Syntax as S
import Term
-- | A programming language.
data Language =
C
C
| CoffeeScript
| CPlusPlus
| CSharp
@ -13,6 +18,7 @@ data Language =
| HTML
| Java
| JavaScript
| Markdown
| ObjectiveC
| Perl
| PHP
@ -20,6 +26,8 @@ data Language =
| R
| Ruby
| Swift
| Go
deriving (Show)
-- | Returns a Language based on the file extension (including the ".").
languageForType :: Text -> Maybe Language
@ -27,5 +35,16 @@ languageForType mediaType = case mediaType of
".h" -> Just C
".c" -> Just C
".js" -> Just JavaScript
".md" -> Just Markdown
".rb" -> Just Ruby
".go" -> Just Language.Go
_ -> Nothing
toVarDecl :: (HasField fields Category) => Term (S.Syntax Text) (Record fields) -> Term (S.Syntax Text) (Record fields)
toVarDecl child = cofree $ setCategory (extract child) VarDecl :< S.VarDecl child Nothing
toTuple :: Term (S.Syntax Text) (Record fields) -> [Term (S.Syntax Text) (Record fields)]
toTuple child | S.Indexed [key,value] <- unwrap child = [cofree (extract child :< S.Pair key value)]
toTuple child | S.Fixed [key,value] <- unwrap child = [cofree (extract child :< S.Pair key value)]
toTuple child | S.Leaf c <- unwrap child = [cofree (extract child :< S.Comment c)]
toTuple child = pure child

19
src/Language/C.hs Normal file
View File

@ -0,0 +1,19 @@
{-# LANGUAGE DataKinds #-}
module Language.C where
import Info
import Prologue
import Source
import qualified Syntax as S
import Term
termAssignment
:: Source Char -- ^ The source of the term.
-> Category -- ^ The category for the term.
-> [ SyntaxTerm Text '[Range, Category, SourceSpan] ] -- ^ The child nodes of the term.
-> Maybe (S.Syntax Text (SyntaxTerm Text '[Range, Category, SourceSpan])) -- ^ The resulting term, in Maybe.
termAssignment _ _ _ = Nothing
categoryForCProductionName :: Text -> Category
categoryForCProductionName = Other

139
src/Language/Go.hs Normal file
View File

@ -0,0 +1,139 @@
{-# LANGUAGE DataKinds #-}
module Language.Go where
import Prologue
import Info
import Source
import Term
import qualified Syntax as S
termAssignment
:: Source Char -- ^ The source of the term.
-> Category -- ^ The category for the term.
-> [ SyntaxTerm Text '[Range, Category, SourceSpan] ] -- ^ The child nodes of the term.
-> Maybe (S.Syntax Text (SyntaxTerm Text '[Range, Category, SourceSpan])) -- ^ The resulting term, in Maybe.
termAssignment source category children = case (category, children) of
(Module, [moduleName]) -> Just $ S.Module moduleName []
(Import, [importName]) -> Just $ S.Import importName []
(Function, [id, params, block]) -> Just $ S.Function id (toList (unwrap params)) Nothing (toList (unwrap block))
(Function, [id, params, ty, block]) -> Just $ S.Function id (toList (unwrap params)) (Just ty) (toList (unwrap block))
(For, [body]) | Other "block" <- Info.category (extract body) -> Just $ S.For [] (toList (unwrap body))
(For, [forClause, body]) | Other "for_clause" <- Info.category (extract forClause) -> Just $ S.For (toList (unwrap forClause)) (toList (unwrap body))
(For, [rangeClause, body]) | Other "range_clause" <- Info.category (extract rangeClause) -> Just $ S.For (toList (unwrap rangeClause)) (toList (unwrap body))
(TypeDecl, [identifier, ty]) -> Just $ S.TypeDecl identifier ty
(StructTy, _) -> Just (S.Ty children)
(FieldDecl, [idList])
| [ident] <- toList (unwrap idList)
-> Just (S.FieldDecl ident Nothing Nothing)
(FieldDecl, [idList, ty])
| [ident] <- toList (unwrap idList)
-> Just $ case Info.category (extract ty) of
StringLiteral -> S.FieldDecl ident Nothing (Just ty)
_ -> S.FieldDecl ident (Just ty) Nothing
(FieldDecl, [idList, ty, tag])
| [ident] <- toList (unwrap idList)
-> Just (S.FieldDecl ident (Just ty) (Just tag))
(ParameterDecl, param : ty) -> Just $ S.ParameterDecl (listToMaybe ty) param
(Assignment, [identifier, expression]) -> Just $ S.VarAssignment identifier expression
(Select, _) -> Just $ S.Select (children >>= toList . unwrap)
(Go, [expr]) -> Just $ S.Go expr
(Defer, [expr]) -> Just $ S.Defer expr
(SubscriptAccess, [a, b]) -> Just $ S.SubscriptAccess a b
(IndexExpression, [a, b]) -> Just $ S.SubscriptAccess a b
(Slice, [a, rest]) -> Just $ S.SubscriptAccess a rest
(Other "composite_literal", [ty, values])
| ArrayTy <- Info.category (extract ty)
-> Just $ S.Array (Just ty) (toList (unwrap values))
| DictionaryTy <- Info.category (extract ty)
-> Just $ S.Object (Just ty) (toList (unwrap values))
| SliceTy <- Info.category (extract ty)
-> Just $ S.SubscriptAccess ty values
(Other "composite_literal", []) -> Just $ S.Struct Nothing []
(Other "composite_literal", [ty]) -> Just $ S.Struct (Just ty) []
(Other "composite_literal", [ty, values]) -> Just $ S.Struct (Just ty) (toList (unwrap values))
(TypeAssertion, [a, b]) -> Just $ S.TypeAssertion a b
(TypeConversion, [a, b]) -> Just $ S.TypeConversion a b
-- TODO: Handle multiple var specs
(VarAssignment, [identifier, expression]) -> Just $ S.VarAssignment identifier expression
(VarDecl, [idList, ty]) | Identifier <- Info.category (extract ty) -> Just $ S.VarDecl idList (Just ty)
(FunctionCall, id : rest) -> Just $ S.FunctionCall id rest
(AnonymousFunction, [params, _, body])
| [params'] <- toList (unwrap params)
-> Just $ S.AnonymousFunction (toList (unwrap params')) (toList (unwrap body))
(PointerTy, _) -> Just $ S.Ty children
(ChannelTy, _) -> Just $ S.Ty children
(Send, [channel, expr]) -> Just $ S.Send channel expr
(Operator, _) -> Just $ S.Operator children
(FunctionTy, _) -> Just $ S.Ty children
(IncrementStatement, _) -> Just $ S.Leaf (toText source)
(DecrementStatement, _) -> Just $ S.Leaf (toText source)
(QualifiedIdentifier, _) -> Just $ S.Leaf (toText source)
(Method, [receiverParams, name, body]) -> Just (S.Method name (Just receiverParams) Nothing [] (toList (unwrap body)))
(Method, [receiverParams, name, params, body])
-> Just (S.Method name (Just receiverParams) Nothing (toList (unwrap params)) (toList (unwrap body)))
(Method, [receiverParams, name, params, ty, body])
-> Just (S.Method name (Just receiverParams) (Just ty) (toList (unwrap params)) (toList (unwrap body)))
_ -> Nothing
categoryForGoName :: Text -> Category
categoryForGoName = \case
"identifier" -> Identifier
"int_literal" -> NumberLiteral
"float_literal" -> FloatLiteral
"comment" -> Comment
"return_statement" -> Return
"interpreted_string_literal" -> StringLiteral
"raw_string_literal" -> StringLiteral
"binary_expression" -> RelationalOperator
"function_declaration" -> Function
"func_literal" -> AnonymousFunction
"call_expression" -> FunctionCall
"selector_expression" -> SubscriptAccess
"index_expression" -> IndexExpression
"slice_expression" -> Slice
"parameters" -> Args
"short_var_declaration" -> VarDecl
"var_spec" -> VarAssignment
"const_spec" -> VarAssignment
"assignment_statement" -> Assignment
"source_file" -> Program
"package_clause" -> Module
"if_statement" -> If
"for_statement" -> For
"expression_switch_statement" -> Switch
"type_switch_statement" -> Switch
"expression_case_clause" -> Case
"type_case_clause" -> Case
"select_statement" -> Select
"communication_case" -> Case
"defer_statement" -> Defer
"go_statement" -> Go
"type_assertion_expression" -> TypeAssertion
"type_conversion_expression" -> TypeConversion
"keyed_element" -> Pair
"struct_type" -> StructTy
"map_type" -> DictionaryTy
"array_type" -> ArrayTy
"implicit_length_array_type" -> ArrayTy
"parameter_declaration" -> ParameterDecl
"expression_case" -> Case
"type_spec" -> TypeDecl
"field_declaration" -> FieldDecl
"pointer_type" -> PointerTy
"slice_type" -> SliceTy
"element" -> Element
"literal_value" -> Literal
"channel_type" -> ChannelTy
"send_statement" -> Send
"unary_expression" -> Operator
"function_type" -> FunctionTy
"inc_statement" -> IncrementStatement
"dec_statement" -> DecrementStatement
"qualified_identifier" -> QualifiedIdentifier
"break_statement" -> Break
"continue_statement" -> Continue
"rune_literal" -> RuneLiteral
"method_declaration" -> Method
"import_spec" -> Import
"block" -> ExpressionStatements
s -> Other (toS s)

142
src/Language/JavaScript.hs Normal file
View File

@ -0,0 +1,142 @@
{-# LANGUAGE DataKinds #-}
module Language.JavaScript where
import Info
import Prologue
import Source
import Language
import qualified Syntax as S
import Term
termAssignment
:: Source Char -- ^ The source of the term.
-> Category -- ^ The category for the term.
-> [ SyntaxTerm Text '[Range, Category, SourceSpan] ] -- ^ The child nodes of the term.
-> Maybe (S.Syntax Text (SyntaxTerm Text '[Range, Category, SourceSpan])) -- ^ The resulting term, in Maybe.
termAssignment _ category children
= case (category, children) of
(Assignment, [ identifier, value ]) -> Just $ S.Assignment identifier value
(MathAssignment, [ identifier, value ]) -> Just $ S.OperatorAssignment identifier value
(MemberAccess, [ base, property ]) -> Just $ S.MemberAccess base property
(SubscriptAccess, [ base, element ]) -> Just $ S.SubscriptAccess base element
(CommaOperator, [ a, b ])
| S.Indexed rest <- unwrap b
-> Just $ S.Indexed $ a : rest
(FunctionCall, member : args)
| S.MemberAccess target method <- unwrap member
-> Just $ S.MethodCall target method (toList . unwrap =<< args)
(FunctionCall, function : args) -> Just $ S.FunctionCall function (toList . unwrap =<< args)
(Ternary, condition : cases) -> Just $ S.Ternary condition cases
(VarAssignment, [ x, y ]) -> Just $ S.VarAssignment x y
(VarDecl, _) -> Just . S.Indexed $ toVarDecl <$> children
(Object, _) -> Just . S.Object Nothing $ foldMap toTuple children
(DoWhile, [ expr, body ]) -> Just $ S.DoWhile expr body
(Constructor, [ expr ]) -> Just $ S.Constructor expr
(Try, [ body ]) -> Just $ S.Try [body] [] Nothing Nothing
(Try, [ body, catch ])
| Catch <- Info.category (extract catch)
-> Just $ S.Try [body] [catch] Nothing Nothing
(Try, [ body, finally ])
| Finally <- Info.category (extract finally)
-> Just $ S.Try [body] [] Nothing (Just finally)
(Try, [ body, catch, finally ])
| Catch <- Info.category (extract catch)
, Finally <- Info.category (extract finally)
-> Just $ S.Try [body] [catch] Nothing (Just finally)
(ArrayLiteral, _) -> Just $ S.Array Nothing children
(Method, [ identifier, params, exprs ]) -> Just $ S.Method identifier Nothing Nothing (toList (unwrap params)) (toList (unwrap exprs))
(Method, [ identifier, exprs ]) -> Just $ S.Method identifier Nothing Nothing [] (toList (unwrap exprs))
(Class, [ identifier, superclass, definitions ]) -> Just $ S.Class identifier (Just superclass) (toList (unwrap definitions))
(Class, [ identifier, definitions ]) -> Just $ S.Class identifier Nothing (toList (unwrap definitions))
(Import, [ statements, identifier ] ) -> Just $ S.Import identifier (toList (unwrap statements))
(Import, [ identifier ] ) -> Just $ S.Import identifier []
(Export, [ statements, identifier] ) -> Just $ S.Export (Just identifier) (toList (unwrap statements))
(Export, [ statements ] )
| S.Indexed _ <- unwrap statements
-> Just $ S.Export Nothing (toList (unwrap statements))
| otherwise -> Just $ S.Export (Just statements) []
(For, _)
| Just (exprs, body) <- unsnoc children
-> Just $ S.For exprs [body]
(Function, [ body ]) -> Just $ S.AnonymousFunction [] [body]
(Function, [ params, body ]) -> Just $ S.AnonymousFunction (toList (unwrap params)) [body]
(Function, [ id, params, body ]) -> Just $ S.Function id (toList (unwrap params)) Nothing [body]
_ -> Nothing
categoryForJavaScriptProductionName :: Text -> Category
categoryForJavaScriptProductionName name = case name of
"object" -> Object
"expression_statement" -> ExpressionStatements
"trailing_expression_statement" -> ExpressionStatements
"this_expression" -> Identifier
"null" -> Identifier
"undefined" -> Identifier
"arrow_function" -> Function
"generator_function" -> Function
"math_op" -> MathOperator -- math operator, e.g. +, -, *, /.
"bool_op" -> BooleanOperator -- boolean operator, e.g. ||, &&.
"comma_op" -> CommaOperator -- comma operator, e.g. expr1, expr2.
"delete_op" -> Operator -- delete operator, e.g. delete x[2].
"type_op" -> Operator -- type operator, e.g. typeof Object.
"void_op" -> Operator -- void operator, e.g. void 2.
"for_statement" -> For
"trailing_for_statement" -> For
"for_in_statement" -> For
"trailing_for_in_statement" -> For
"for_of_statement" -> For
"trailing_for_of_statement" -> For
"new_expression" -> Constructor
"class" -> Class
"catch" -> Catch
"finally" -> Finally
"if_statement" -> If
"trailing_if_statement" -> If
"empty_statement" -> Empty
"program" -> Program
"function_call" -> FunctionCall
"pair" -> Pair
"string" -> StringLiteral
"integer" -> IntegerLiteral
"number" -> NumberLiteral
"float" -> FloatLiteral
"symbol" -> SymbolLiteral
"array" -> ArrayLiteral
"function" -> Function
"identifier" -> Identifier
"formal_parameters" -> Params
"arguments" -> Args
"statement_block" -> ExpressionStatements
"assignment" -> Assignment
"member_access" -> MemberAccess
"op" -> Operator
"subscript_access" -> SubscriptAccess
"regex" -> Regex
"template_string" -> TemplateString
"var_assignment" -> VarAssignment
"var_declaration" -> VarDecl
"trailing_var_declaration" -> VarDecl
"switch_statement" -> Switch
"math_assignment" -> MathAssignment
"case" -> Case
"true" -> Boolean
"false" -> Boolean
"ternary" -> Ternary
"while_statement" -> While
"trailing_while_statement" -> While
"do_statement" -> DoWhile
"trailing_do_statement" -> DoWhile
"return_statement" -> Return
"trailing_return_statement" -> Return
"throw_statement" -> Throw
"trailing_throw_statement" -> Throw
"try_statement" -> Try
"method_definition" -> Method
"comment" -> Comment
"bitwise_op" -> BitwiseOperator
"rel_op" -> RelationalOperator
"import_statement" -> Import
"export_statement" -> Export
"break_statement" -> Break
"continue_statement" -> Continue
"yield_statement" -> Yield
_ -> Other name

42
src/Language/Markdown.hs Normal file
View File

@ -0,0 +1,42 @@
{-# LANGUAGE DataKinds #-}
module Language.Markdown where
import CMark
import Data.Record
import Data.Text
import Info
import Parser
import Prologue
import Range
import Source
import Syntax
cmarkParser :: Parser (Syntax Text) (Record '[Range, Category, SourceSpan])
cmarkParser SourceBlob{..} = pure . toTerm (totalRange source) (rangeToSourceSpan source $ totalRange source) $ commonmarkToNode [ optSourcePos, optSafe ] (toText source)
where toTerm :: Range -> SourceSpan -> Node -> Cofree (Syntax Text) (Record '[Range, Category, SourceSpan])
toTerm within withinSpan (Node position t children) =
let
range = maybe within (sourceSpanToRange source . toSpan) position
span = maybe withinSpan toSpan position
in
cofree $ (range :. toCategory t :. span :. Nil) :< case t of
-- Leaves
CODE text -> Leaf text
TEXT text -> Leaf text
CODE_BLOCK _ text -> Leaf text
-- Branches
_ -> Indexed (toTerm range span <$> children)
toCategory :: NodeType -> Category
toCategory (TEXT _) = Other "text"
toCategory (CODE _) = Other "code"
toCategory (HTML_BLOCK _) = Other "html"
toCategory (HTML_INLINE _) = Other "html"
toCategory (HEADING _) = Other "heading"
toCategory (LIST ListAttributes{..}) = Other $ case listType of
BULLET_LIST -> "unordered list"
ORDERED_LIST -> "ordered list"
toCategory LINK{} = Other "link"
toCategory IMAGE{} = Other "image"
toCategory t = Other (show t)
toSpan PosInfo{..} = SourceSpan (SourcePos (pred startLine) (pred startColumn)) (SourcePos (pred endLine) endColumn)

171
src/Language/Ruby.hs Normal file
View File

@ -0,0 +1,171 @@
{-# LANGUAGE DataKinds #-}
module Language.Ruby where
import Data.List (partition)
import Info
import Prologue
import Source
import Language
import qualified Syntax as S
import Term
termAssignment
:: Source Char -- ^ The source of the term.
-> Category -- ^ The category for the term.
-> [ SyntaxTerm Text '[Range, Category, SourceSpan] ] -- ^ The child nodes of the term.
-> Maybe (S.Syntax Text (SyntaxTerm Text '[Range, Category, SourceSpan])) -- ^ The resulting term, in Maybe.
termAssignment _ category children
= case (category, children) of
(ArgumentPair, [ k, v ] ) -> Just $ S.Pair k v
(KeywordParameter, [ k, v ] ) -> Just $ S.Pair k v
-- NB: ("keyword_parameter", k) is a required keyword parameter, e.g.:
-- def foo(name:); end
-- Let it fall through to generate an Indexed syntax.
(OptionalParameter, [ k, v ] ) -> Just $ S.Pair k v
(ArrayLiteral, _ ) -> Just $ S.Array Nothing children
(Assignment, [ identifier, value ]) -> Just $ S.Assignment identifier value
(Begin, _ ) -> Just $ case partition (\x -> Info.category (extract x) == Rescue) children of
(rescues, rest) -> case partition (\x -> Info.category (extract x) == Ensure || Info.category (extract x) == Else) rest of
(ensureElse, body) -> case ensureElse of
[ elseBlock, ensure ]
| Else <- Info.category (extract elseBlock)
, Ensure <- Info.category (extract ensure) -> S.Try body rescues (Just elseBlock) (Just ensure)
[ ensure, elseBlock ]
| Ensure <- Info.category (extract ensure)
, Else <- Info.category (extract elseBlock) -> S.Try body rescues (Just elseBlock) (Just ensure)
[ elseBlock ] | Else <- Info.category (extract elseBlock) -> S.Try body rescues (Just elseBlock) Nothing
[ ensure ] | Ensure <- Info.category (extract ensure) -> S.Try body rescues Nothing (Just ensure)
_ -> S.Try body rescues Nothing Nothing
(Class, constant : superclass : body)
| Superclass <- Info.category (extract superclass)
-> Just $ S.Class constant (Just superclass) body
(Class, constant : rest) -> Just $ S.Class constant Nothing rest
(SingletonClass, identifier : rest) -> Just $ S.Class identifier Nothing rest
(Case, _) -> Just $ uncurry S.Switch (Prologue.break ((== When) . Info.category . extract) children)
(When, expr : body) -> Just $ S.Case expr body
(Ternary, condition : cases) -> Just $ S.Ternary condition cases
(Constant, _ ) -> Just $ S.Fixed children
(MethodCall, fn : args)
| MemberAccess <- Info.category (extract fn)
, [target, method] <- toList (unwrap fn)
-> Just $ S.MethodCall target method (toList . unwrap =<< args)
| otherwise
-> Just $ S.FunctionCall fn (toList . unwrap =<< args)
(Other "lambda", first : rest)
| null rest -> Just $ S.AnonymousFunction [] [first]
| otherwise -> Just $ S.AnonymousFunction (toList (unwrap first)) rest
(Object, _ ) -> Just . S.Object Nothing $ foldMap toTuple children
(Modifier If, [ lhs, condition ]) -> Just $ S.If condition [lhs]
(Modifier Unless, [lhs, rhs]) -> Just $ S.If (withRecord (setCategory (extract rhs) Negate) (S.Negate rhs)) [lhs]
(Unless, expr : rest) -> Just $ S.If (withRecord (setCategory (extract expr) Negate) (S.Negate expr)) rest
(Modifier Until, [ lhs, rhs ]) -> Just $ S.While (withRecord (setCategory (extract rhs) Negate) (S.Negate rhs)) [lhs]
(Until, expr : rest) -> Just $ S.While (withRecord (setCategory (extract expr) Negate) (S.Negate expr)) rest
(Elsif, condition : body ) -> Just $ S.If condition body
(SubscriptAccess, [ base, element ]) -> Just $ S.SubscriptAccess base element
(For, lhs : expr : rest ) -> Just $ S.For [lhs, expr] rest
(OperatorAssignment, [ identifier, value ]) -> Just $ S.OperatorAssignment identifier value
(MemberAccess, [ base, property ]) -> Just $ S.MemberAccess base property
(Method, expr : methodName : rest)
| params : body <- rest
, Params <- Info.category (extract params)
-> Just $ S.Method methodName (Just expr) Nothing (toList (unwrap params)) body
| Identifier <- Info.category (extract methodName)
-> Just $ S.Method methodName (Just expr) Nothing [] rest
(Method, identifier : rest)
| params : body <- rest
, Params <- Info.category (extract params)
-> Just $ S.Method identifier Nothing Nothing (toList (unwrap params)) body
| otherwise
-> Just $ S.Method identifier Nothing Nothing [] rest
(Module, constant : body ) -> Just $ S.Module constant body
(Modifier Rescue, [lhs, rhs] ) -> Just $ S.Rescue [lhs] [rhs]
(Rescue, exceptions : exceptionVar : rest)
| RescueArgs <- Info.category (extract exceptions)
, RescuedException <- Info.category (extract exceptionVar)
-> Just $ S.Rescue (toList (unwrap exceptions) <> [exceptionVar]) rest
(Rescue, exceptionVar : rest)
| RescuedException <- Info.category (extract exceptionVar)
-> Just $ S.Rescue [exceptionVar] rest
(Rescue, exceptions : body)
| RescueArgs <- Info.category (extract exceptions)
-> Just $ S.Rescue (toList (unwrap exceptions)) body
(Rescue, body) -> Just $ S.Rescue [] body
(Modifier While, [ lhs, condition ]) -> Just $ S.While condition [lhs]
_ | category `elem` [ BeginBlock, EndBlock ] -> Just $ S.BlockStatement children
_ -> Nothing
where
withRecord record syntax = cofree (record :< syntax)
categoryForRubyName :: Text -> Category
categoryForRubyName = \case
"argument_list" -> Args
"argument_list_with_parens" -> Args
"argument_pair" -> ArgumentPair
"array" -> ArrayLiteral
"assignment" -> Assignment
"begin_block" -> BeginBlock
"begin" -> Begin
"binary" -> Binary
"block_parameter" -> BlockParameter
"block_parameters" -> Params
"boolean" -> Boolean
"call" -> MemberAccess
"case" -> Case
"class" -> Class
"comment" -> Comment
"conditional" -> Ternary
"constant" -> Constant
"element_reference" -> SubscriptAccess
"else" -> Else
"elsif" -> Elsif
"empty_statement" -> Empty
"end_block" -> EndBlock
"ensure" -> Ensure
"exception_variable" -> RescuedException
"exceptions" -> RescueArgs
"false" -> Boolean
"float" -> NumberLiteral
"for" -> For
"hash_splat_parameter" -> HashSplatParameter
"hash" -> Object
"identifier" -> Identifier
"if_modifier" -> Modifier If
"if" -> If
"instance_variable" -> Identifier
"integer" -> IntegerLiteral
"interpolation" -> Interpolation
"keyword_parameter" -> KeywordParameter
"lambda_parameters" -> Params
"method_call" -> MethodCall
"method_parameters" -> Params
"method" -> Method
"module" -> Module
"nil" -> Identifier
"operator_assignment" -> OperatorAssignment
"optional_parameter" -> OptionalParameter
"pair" -> Pair
"program" -> Program
"range" -> RangeExpression
"regex" -> Regex
"rescue_modifier" -> Modifier Rescue
"rescue" -> Rescue
"return" -> Return
"scope_resolution" -> ScopeOperator
"self" -> Identifier
"singleton_class" -> SingletonClass
"splat_parameter" -> SplatParameter
"string" -> StringLiteral
"subshell" -> Subshell
"superclass" -> Superclass
"symbol" -> SymbolLiteral
"true" -> Boolean
"unary" -> Unary
"unless_modifier" -> Modifier Unless
"unless" -> Unless
"until_modifier" -> Modifier Until
"until" -> Until
"when" -> When
"while_modifier" -> Modifier While
"while" -> While
"yield" -> Yield
s -> Other s

View File

@ -1,64 +0,0 @@
{-# LANGUAGE FlexibleInstances #-}
module Line where
import Control.Applicative
import Data.Align
import Data.Coalescent
import Data.Functor.Both
-- | A line of items or an empty line.
data Line a = Line [a] | Closed [a]
deriving (Eq, Foldable, Functor, Show, Traversable)
-- | Construct a single-element Line with a predicate determining whether the line is open.
pureBy :: (a -> Bool) -> a -> Line a
pureBy predicate a | predicate a = Line [ a ]
| otherwise = Closed [ a ]
unLine :: Line a -> [a]
unLine (Line as) = as
unLine (Closed as) = as
-- | Is the given line empty?
isEmpty :: Line a -> Bool
isEmpty = null . unLine
-- | Is the given line open?
isOpen :: Line a -> Bool
isOpen (Line _) = True
isOpen _ = False
-- | The increment the given line implies for line numbering.
lineIncrement :: Num n => Line a -> n
lineIncrement line | isEmpty line = 0
| otherwise = 1
-- | Transform the line by applying a function to a list of all the items in the
-- | line.
wrapLineContents :: ([a] -> b) -> Line a -> Line b
wrapLineContents transform line = lineMap (if isEmpty line then const [] else pure . transform) line
-- | Map the elements of a line, preserving closed lines.
lineMap :: ([a] -> [b]) -> Line a -> Line b
lineMap f (Line ls) = Line (f ls)
lineMap f (Closed cs) = Closed (f cs)
-- | Return the first item in the Foldable, or Nothing if it's empty.
maybeFirst :: Foldable f => f a -> Maybe a
maybeFirst = foldr (const . Just) Nothing
instance Applicative Line where
pure = Line . pure
as <*> bs | isOpen as && isOpen bs = Line (unLine as <*> unLine bs)
| otherwise = Closed (unLine as <*> unLine bs)
instance Monoid (Line a) where
mempty = Line []
mappend xs ys = lineMap (mappend (unLine xs)) ys
instance Coalescent (Line a) where
coalesce a b | isOpen a = pure (a `mappend` b)
| otherwise = pure a <|> pure b
instance Coalescent (Both (Line a)) where
coalesce as bs = tsequenceL (pure (Line [])) (coalesce <$> as <*> bs)

View File

@ -1,20 +0,0 @@
module Operation where
import Diff
import Data.OrderedMap
import qualified Data.Text as T
import Term
-- | A single step in a diffing algorithm.
data Operation
a -- ^ The type of leaves in the syntax tree, typically String, but possibly some datatype representing different leaves more precisely.
annotation -- ^ The type of annotations.
f -- ^ The type representing another level of the diffing algorithm. Often Algorithm.
=
-- | Recursively diff two terms and pass the result to the continuation.
Recursive (Term a annotation) (Term a annotation) (Diff a annotation -> f)
-- | Diff two dictionaries and pass the result to the continuation.
| ByKey (OrderedMap T.Text (Term a annotation)) (OrderedMap T.Text (Term a annotation)) (OrderedMap T.Text (Diff a annotation) -> f)
-- | Diff two arrays and pass the result to the continuation.
| ByIndex [Term a annotation] [Term a annotation] ([Diff a annotation] -> f)
deriving Functor

128
src/Parse.hs Normal file
View File

@ -0,0 +1,128 @@
{-# LANGUAGE DataKinds, RankNTypes, TypeOperators, DeriveAnyClass #-}
module Parse where
import Arguments
import Category
import Data.Aeson (ToJSON)
import Data.Aeson.Encode.Pretty
import qualified Data.ByteString.Char8 as B1
import qualified Data.Text.ICU.Convert as Convert
import qualified Data.Text.ICU.Detect as Detect
import Data.Record
import qualified Data.Text as T
import qualified Data.Text.IO as T
import Info
import Language
import Language.Markdown
import Parser
import Prologue
import Source
import Syntax
import System.FilePath
import Term
import TreeSitter
import Renderer
import Renderer.JSON()
import Renderer.SExpression
import Text.Parser.TreeSitter.C
import Text.Parser.TreeSitter.Go
import Text.Parser.TreeSitter.JavaScript
import Text.Parser.TreeSitter.Ruby
data ParseJSON = ParseJSON
{ category :: Text
, range :: Range
, text :: SourceText
, children :: [ParseJSON]
} deriving (Show, Generic, ToJSON)
run :: Arguments -> IO ()
run Arguments{..} = do
sources <- sequence $ readAndTranscodeFile <$> filePaths
terms <- zipWithM (\parser sourceBlob -> parser sourceBlob) parsers (sourceBlobs sources)
writeToOutput output $ case format of
SExpression -> [foldr (\t acc -> printTerm t 0 <> acc) "" terms]
_ -> toS . encodePretty . cata algebra <$> terms
where
sourceBlobs sources = Source.SourceBlob <$> sources <*> pure mempty <*> filePaths <*> pure (Just Source.defaultPlainBlob)
parsers = parserWithSource <$> filePaths
algebra :: TermF (Syntax leaf) (Record '[SourceText, Range, Category, SourceSpan]) ParseJSON -> ParseJSON
algebra term = case term of
(annotation :< Leaf _) -> ParseJSON (category' annotation) (range' annotation) (text' annotation) []
(annotation :< syntax) -> ParseJSON (category' annotation) (range' annotation) (text' annotation) (toList syntax)
where
category' = toS . Info.category
range' = characterRange
text' = Info.sourceText
writeToOutput :: Maybe FilePath -> [Text] -> IO ()
writeToOutput output text =
case output of
Nothing -> for_ text putStrLn
Just path -> for_ text (T.writeFile path)
-- | Return a parser that decorates with the cost of a term and its children.
parserWithCost :: FilePath -> Parser (Syntax Text) (Record '[Cost, Range, Category, SourceSpan])
parserWithCost path blob = decorateTerm termCostDecorator <$> parserForType (toS (takeExtension path)) blob
-- | Return a parser that decorates with the source text.
parserWithSource :: FilePath -> Parser (Syntax Text) (Record '[SourceText, Range, Category, SourceSpan])
parserWithSource path blob = decorateTerm (termSourceDecorator (source blob)) <$> parserForType (toS (takeExtension path)) blob
-- | Return a parser based on the file extension (including the ".").
parserForType :: Text -> Parser (Syntax Text) (Record '[Range, Category, SourceSpan])
parserForType mediaType = case languageForType mediaType of
Just C -> treeSitterParser C tree_sitter_c
Just JavaScript -> treeSitterParser JavaScript tree_sitter_javascript
Just Markdown -> cmarkParser
Just Ruby -> treeSitterParser Ruby tree_sitter_ruby
Just Language.Go -> treeSitterParser Language.Go tree_sitter_go
_ -> lineByLineParser
-- | Decorate a 'Term' using a function to compute the annotation values at every node.
decorateTerm :: (Functor f) => TermDecorator f fields field -> Term f (Record fields) -> Term f (Record (field ': fields))
decorateTerm decorator = cata $ \ term -> cofree ((decorator (extract <$> term) :. headF term) :< tailF term)
-- | A function computing a value to decorate terms with. This can be used to cache synthesized attributes on terms.
type TermDecorator f fields field = TermF f (Record fields) (Record (field ': fields)) -> field
-- | Term decorator computing the cost of an unpacked term.
termCostDecorator :: (Foldable f, Functor f) => TermDecorator f a Cost
termCostDecorator c = 1 + sum (cost <$> tailF c)
-- | Term decorator extracting the source text for a term.
termSourceDecorator :: (HasField fields Range) => Source Char -> TermDecorator f fields SourceText
termSourceDecorator source c = SourceText . toText $ Source.slice range' source
where range' = characterRange $ headF c
-- | A fallback parser that treats a file simply as rows of strings.
lineByLineParser :: Parser (Syntax Text) (Record '[Range, Category, SourceSpan])
lineByLineParser SourceBlob{..} = pure . cofree . root $ case foldl' annotateLeaves ([], 0) lines of
(leaves, _) -> cofree <$> leaves
where
lines = actualLines source
root children = (Range 0 (length source) :. Program :. rangeToSourceSpan source (Range 0 (length source)) :. Nil) :< Indexed children
leaf charIndex line = (Range charIndex (charIndex + T.length line) :. Program :. rangeToSourceSpan source (Range charIndex (charIndex + T.length line)) :. Nil) :< Leaf line
annotateLeaves (accum, charIndex) line =
(accum <> [ leaf charIndex (toText line) ] , charIndex + length line)
toText = T.pack . Source.toString
-- | Return the parser that should be used for a given path.
parserForFilepath :: FilePath -> Parser (Syntax Text) (Record '[Cost, Range, Category, SourceSpan])
parserForFilepath path blob = decorateTerm termCostDecorator <$> parserForType (toS (takeExtension path)) blob
-- | Read the file and convert it to Unicode.
readAndTranscodeFile :: FilePath -> IO (Source Char)
readAndTranscodeFile path = do
text <- B1.readFile path
transcode text
-- | Transcode a file to a unicode source.
transcode :: B1.ByteString -> IO (Source Char)
transcode text = fromText <$> do
match <- Detect.detectCharset text
converter <- Convert.open match Nothing
pure $ Convert.toUnicode converter text

View File

@ -1,51 +1,9 @@
module Parser where
import Category
import Diff
import Range
import Syntax
import Term
import Control.Comonad.Cofree
import qualified Data.OrderedMap as Map
import qualified Data.Set as Set
import Prologue
import Source
import Data.Text as Text
-- | A function that takes a source file and returns an annotated AST.
-- | A function that takes a source blob and returns an annotated AST.
-- | The return is in the IO monad because some of the parsers are written in C
-- | and aren't pure.
type Parser = Source Char -> IO (Term Text Info)
-- | Given a source string, the term's range, production name, and
-- | production/child pairs, construct the term.
type Constructor = Source Char -> Range -> String -> [Term Text Info] -> Term Text Info
-- | Categories that are treated as keyed nodes.
keyedCategories :: Set.Set Category
keyedCategories = Set.fromList [ DictionaryLiteral ]
-- | Categories that are treated as fixed nodes.
fixedCategories :: Set.Set Category
fixedCategories = Set.fromList [ BinaryOperator, Pair ]
-- | Should these categories be treated as keyed nodes?
isKeyed :: Set.Set Category -> Bool
isKeyed = not . Set.null . Set.intersection keyedCategories
-- | Should these categories be treated as fixed nodes?
isFixed :: Set.Set Category -> Bool
isFixed = not . Set.null . Set.intersection fixedCategories
-- | Given a function that maps production names to sets of categories, produce
-- | a Constructor.
termConstructor :: (String -> Set.Set Category) -> Constructor
termConstructor mapping source range name = (Info range categories :<) . construct
where
categories = mapping name
construct [] = Leaf . pack . toString $ slice range source
construct children | isFixed categories = Fixed children
construct children | isKeyed categories = Keyed . Map.fromList $ assignKey <$> children
construct children = Indexed children
assignKey node@(Info _ categories :< Fixed (key : _)) | Set.member Pair categories = (getSubstring key, node)
assignKey node = (getSubstring node, node)
getSubstring (Info range _ :< _) = pack . toString $ slice range source
type Parser f a = SourceBlob -> IO (Cofree f a)

View File

@ -1,21 +1,60 @@
module Patch where
{-# OPTIONS_GHC -funbox-strict-fields #-}
module Patch
( Patch(..)
, replacing
, inserting
, deleting
, after
, before
, afterOrBefore
, unPatch
, patchSum
, maybeFst
, maybeSnd
, mapPatch
, patchType
) where
import Data.Bifunctor.These
import Data.Functor.Listable
import Data.These
import Prologue
-- | An operation to replace, insert, or delete an item.
data Patch a =
Replace a a
data Patch a
= Replace a a
| Insert a
| Delete a
deriving (Functor, Show, Eq)
deriving (Eq, Foldable, Functor, Generic, Ord, Show, Traversable)
-- DSL
-- | Constructs the replacement of one value by another in an Applicative context.
replacing :: Applicative f => a -> a -> f (Patch a)
replacing = (pure .) . Replace
-- | Constructs the insertion of a value in an Applicative context.
inserting :: Applicative f => a -> f (Patch a)
inserting = pure . Insert
-- | Constructs the deletion of a value in an Applicative context.
deleting :: Applicative f => a -> f (Patch a)
deleting = pure . Delete
-- | Return the item from the after side of the patch.
after :: Patch a -> Maybe a
after = maybeFirst . unPatch
after = maybeSnd . unPatch
-- | Return the item from the before side of the patch.
before :: Patch a -> Maybe a
before = maybeSecond . unPatch
before = maybeFst . unPatch
afterOrBefore :: Patch a -> Maybe a
afterOrBefore patch = case (before patch, after patch) of
(_, Just after) -> Just after
(Just before, _) -> Just before
(_, _) -> Nothing
-- | Return both sides of a patch.
unPatch :: Patch a -> These a a
@ -23,6 +62,33 @@ unPatch (Replace a b) = These a b
unPatch (Insert b) = That b
unPatch (Delete a) = This a
mapPatch :: (a -> b) -> (a -> b) -> Patch a -> Patch b
mapPatch f _ (Delete a ) = Delete (f a)
mapPatch _ g (Insert b) = Insert (g b)
mapPatch f g (Replace a b) = Replace (f a) (g b)
-- | Calculate the cost of the patch given a function to compute the cost of a item.
patchSum :: (a -> Integer) -> Patch a -> Integer
patchSum :: (a -> Int) -> Patch a -> Int
patchSum termCost patch = maybe 0 termCost (before patch) + maybe 0 termCost (after patch)
-- | Return Just the value in This, or the first value in These, if any.
maybeFst :: These a b -> Maybe a
maybeFst = these Just (const Nothing) ((Just .) . const)
-- | Return Just the value in That, or the second value in These, if any.
maybeSnd :: These a b -> Maybe b
maybeSnd = these (const Nothing) Just ((Just .) . flip const)
patchType :: Patch a -> Text
patchType = \case
Replace{} -> "modified"
Insert{} -> "added"
Delete{} -> "removed"
-- Instances
instance Listable1 Patch where
liftTiers t = liftCons1 t Insert \/ liftCons1 t Delete \/ liftCons2 t t Replace
instance Listable a => Listable (Patch a) where
tiers = tiers1

21
src/Prologue.hs Normal file
View File

@ -0,0 +1,21 @@
module Prologue
( module X
, lookup
, (&&&)
, (***)
, hylo, cata, para, ana
, module Data.Hashable
) where
import Protolude as X
import Data.List (lookup)
import Control.Comonad.Trans.Cofree as X
import Control.Monad.Trans.Free as X
import Control.Comonad as X
import Control.Arrow ((&&&), (***))
import Data.Functor.Foldable (hylo, cata, para, ana)
import Data.Hashable

View File

@ -1,14 +1,16 @@
{-# LANGUAGE FlexibleInstances #-}
module Range where
import Control.Applicative ((<|>))
import qualified Data.Char as Char
import Data.Maybe (fromMaybe)
import Data.Option
import Data.List (span)
import Data.List.NonEmpty (nonEmpty)
import Data.Semigroup
import Data.String
import Prologue
import Test.LeanCheck
-- | A half-open interval of integers, defined by start & end indices.
data Range = Range { start :: !Int, end :: !Int }
deriving (Eq, Show)
data Range = Range { start :: Int, end :: Int }
deriving (Eq, Show, Generic)
-- | Make a range at a given index.
rangeAt :: Int -> Range
@ -35,9 +37,9 @@ rangesAndWordsFrom startIndex string = fromMaybe [] $ take isWord <|> take isPun
endFor parsed = startIndex + length parsed
parse transform predicate = case span predicate string of
([], _) -> Nothing
(parsed, rest) -> Just $ maybe id (:) (transform parsed) $ rangesAndWordsFrom (endFor parsed) rest
(parsed, rest) -> Just . maybe identity (:) (transform parsed) $ rangesAndWordsFrom (endFor parsed) rest
-- | Is this a word character?
-- | Word characters are defined as in [Rubys `\p{Word}` syntax](http://ruby-doc.org/core-2.1.1/Regexp.html#class-Regexp-label-Character+Properties), i.e.:
-- | Word characters are defined as in [Rubys `\p{Word}` syntax](http://ruby-doc.org/core-2.1.1/Regexp.html#class-Regexp-label-Character+Properties), i.e:.
-- | > A member of one of the following Unicode general category _Letter_, _Mark_, _Number_, _Connector_Punctuation_
isWord c = Char.isLetter c || Char.isNumber c || Char.isMark c || Char.generalCategory c == Char.ConnectorPunctuation
isPunctuation c = not (Char.isSpace c || isWord c)
@ -47,24 +49,30 @@ maybeLastIndex :: Range -> Maybe Int
maybeLastIndex (Range start end) | start == end = Nothing
maybeLastIndex (Range _ end) = Just $ end - 1
-- | Test two ranges for intersection.
intersectsRange :: Range -> Range -> Bool
intersectsRange range1 range2 = start range1 < end range2 && start range2 < end range1
-- Return the (possibly empty, possibly ill-formed) intersection of two ranges.
intersectionRange :: Range -> Range -> Range
intersectionRange range1 range2 = Range (max (start range1) (start range2)) (min (end range1) (end range2))
-- | Return a range that contains both the given ranges.
unionRange :: Range -> Range -> Range
unionRange (Range start1 end1) (Range start2 end2) = Range (min start1 start2) (max end1 end2)
-- | Return a range that contains all the ranges in a Foldable, or Range 0 0 if its empty.
unionRanges :: Foldable f => f Range -> Range
unionRanges = unionRangesFrom (Range 0 0)
-- | Return a range that contains all the ranges in a Foldable, or the passed Range if the Foldable is empty.
unionRangesFrom :: Foldable f => Range -> f Range -> Range
unionRangesFrom range = fromMaybe range . maybeConcat
unionRangesFrom range = maybe range sconcat . nonEmpty . toList
instance Monoid (Option Range) where
mempty = Option Nothing
mappend (Option (Just a)) (Option (Just b)) = Option (Just (unionRange a b))
mappend a@(Option (Just _)) _ = a
mappend _ b@(Option (Just _)) = b
mappend _ _ = mempty
-- Instances
instance Semigroup Range where
a <> b = unionRange a b
instance Ord Range where
a <= b = start a <= start b
instance Listable Range where
tiers = cons2 Range

View File

@ -1,8 +1,76 @@
module Renderer where
module Renderer (Renderer, DiffArguments(..), Output(..), concatOutputs, toSummaryKey, Format(..)) where
import Data.Aeson (Value, toEncoding)
import Data.Aeson.Encoding (encodingToLazyByteString)
import Data.Functor.Both
import Data.Map as Map hiding (null)
import Data.Text as T (intercalate)
import Diff
import Source
import Prologue
import Source (SourceBlob)
import Syntax
-- | A function that will render a diff, given the two source files.
type Renderer a b = Diff a Info -> Both SourceBlob -> b
-- | A function that will render a diff, given the two source blobs.
type Renderer annotation = Both SourceBlob -> Diff (Syntax Text) annotation -> Output
data DiffArguments = DiffArguments { format :: Format, output :: Maybe FilePath }
deriving (Show)
-- | The available types of diff rendering.
data Format = Split | Patch | JSON | Summary | SExpression | TOC
deriving (Show)
data Output = SplitOutput Text | PatchOutput Text | JSONOutput (Map Text Value) | SummaryOutput (Map Text (Map Text [Value])) | SExpressionOutput Text | TOCOutput (Map Text (Map Text [Value]))
deriving (Show)
-- Returns a key representing the filename. If the filenames are different,
-- return 'before -> after'.
toSummaryKey :: Both FilePath -> Text
toSummaryKey = runBothWith $ \before after ->
toS $ case (before, after) of
("", after) -> after
(before, "") -> before
(before, after) | before == after -> after
(before, after) | not (null before) && not (null after) -> before <> " -> " <> after
(_, _) -> mempty
-- Concatenates a list of 'Output' depending on the output type.
-- For JSON, each file output is merged since they're uniquely keyed by filename.
-- For Summaries, each file output is merged into one 'Object' consisting of lists of
-- changes and errors.
-- Split and Patch output is appended together with newlines.
concatOutputs :: [Output] -> Text
concatOutputs list | isJSON list = toS . encodingToLazyByteString . toEncoding $ concatJSON list
where
concatJSON :: [Output] -> Map Text Value
concatJSON (JSONOutput hash : rest) = Map.union hash (concatJSON rest)
concatJSON _ = mempty
concatOutputs list | isSummary list = toS . encodingToLazyByteString . toEncoding $ concatSummaries list
where
concatSummaries :: [Output] -> Map Text (Map Text [Value])
concatSummaries (SummaryOutput hash : rest) = Map.unionWith (Map.unionWith (<>)) hash (concatSummaries rest)
concatSummaries (TOCOutput hash : rest) = Map.unionWith (Map.unionWith (<>)) hash (concatSummaries rest)
concatSummaries _ = mempty
concatOutputs list | isText list = T.intercalate "\n" (toText <$> list)
concatOutputs _ = mempty
isJSON :: [Output] -> Bool
isJSON (JSONOutput _ : _) = True
isJSON _ = False
isSummary :: [Output] -> Bool
isSummary (SummaryOutput _ : _) = True
isSummary (TOCOutput _ : _) = True
isSummary _ = False
isText :: [Output] -> Bool
isText (SplitOutput _ : _) = True
isText (PatchOutput _ : _) = True
isText (SExpressionOutput _ : _) = True
isText _ = False
toText :: Output -> Text
toText (SplitOutput text) = text
toText (PatchOutput text) = text
toText (SExpressionOutput text) = text
toText _ = mempty

View File

@ -1,83 +1,155 @@
{-# LANGUAGE FlexibleInstances, OverloadedStrings, TypeSynonymInstances #-}
{-# LANGUAGE OverloadedStrings, TypeSynonymInstances #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
module Renderer.JSON (
json
) where
import Prologue hiding (toList)
import Alignment
import Category
import Control.Comonad.Cofree
import Control.Monad.Free
import Data.Aeson hiding (json)
import Data.ByteString.Builder
import Data.ByteString.Lazy
import Data.Functor.Both
import Data.Monoid
import Data.OrderedMap hiding (fromList)
import Data.Aeson as A hiding (json)
import Data.Bifunctor.Join
import Data.Record
import qualified Data.Text as T
import Data.Vector hiding (toList)
import Diff
import Line
import Range
import Data.These
import Data.Vector as Vector hiding (toList)
import Info
import Renderer
import Source hiding (fromList)
import SplitDiff
import Syntax
import Syntax as S
import Term
import qualified Data.Map as Map
-- | Render a diff to a string representing its JSON.
json :: Renderer a ByteString
json diff sources = toLazyByteString . fromEncoding . pairs $
"rows" .= annotateRows (splitDiffByLines (source <$> sources) diff)
<> "oids" .= (oid <$> sources)
<> "paths" .= (path <$> sources)
where annotateRows = fmap (fmap NumberedLine) . numberedRows
json :: (ToJSON (Record fields), HasField fields Category, HasField fields Range) => Renderer (Record fields)
json blobs diff = JSONOutput $ Map.fromList [
("rows", toJSON (annotateRows (alignDiff (source <$> blobs) diff))),
("oids", toJSON (oid <$> blobs)),
("paths", toJSON (path <$> blobs)) ]
where annotateRows :: [Join These a] -> [Join These (NumberedLine a)]
annotateRows = fmap (fmap NumberedLine) . numberedRows
newtype NumberedLine a = NumberedLine (Int, Line a)
-- | A numbered 'a'.
newtype NumberedLine a = NumberedLine (Int, a)
instance (ToJSON leaf, ToJSON (Record fields), HasField fields Category, HasField fields Range) => ToJSON (NumberedLine (SplitSyntaxDiff leaf fields)) where
toJSON (NumberedLine (n, a)) = object (lineFields n a (getRange a))
toEncoding (NumberedLine (n, a)) = pairs $ mconcat (lineFields n a (getRange a))
instance ToJSON (NumberedLine (SplitDiff leaf Info, Range)) where
toJSON (NumberedLine (n, a)) = object (lineFields n a)
toEncoding (NumberedLine (n, a)) = pairs $ mconcat (lineFields n a)
instance ToJSON Category where
toJSON (Other s) = String $ T.pack s
toJSON (Other s) = String s
toJSON s = String . T.pack $ show s
instance ToJSON Range where
toJSON (Range start end) = Array . fromList $ toJSON <$> [ start, end ]
toEncoding (Range start end) = foldable [ start, end ]
instance ToJSON a => ToJSON (Both a) where
toJSON (Both (a, b)) = Array . fromList $ toJSON <$> [ a, b ]
toJSON (Range start end) = A.Array . Vector.fromList $ toJSON <$> [ start, end ]
toEncoding (Range start end) = foldable [ start, end ]
instance ToJSON a => ToJSON (Join These a) where
toJSON (Join vs) = A.Array . Vector.fromList $ toJSON <$> these pure pure (\ a b -> [ a, b ]) vs
toEncoding = foldable
instance ToJSON (SplitDiff leaf Info) where
toJSON (Free (Annotated info syntax)) = object (termFields info syntax)
toJSON (Pure patch) = object (patchFields patch)
toEncoding (Free (Annotated info syntax)) = pairs $ mconcat (termFields info syntax)
toEncoding (Pure patch) = pairs $ mconcat (patchFields patch)
instance ToJSON value => ToJSON (OrderedMap T.Text value) where
toJSON map = object $ uncurry (.=) <$> toList map
toEncoding map = pairs . mconcat $ uncurry (.=) <$> toList map
instance ToJSON (Term leaf Info) where
toJSON (info :< syntax) = object (termFields info syntax)
toEncoding (info :< syntax) = pairs $ mconcat (termFields info syntax)
lineFields :: KeyValue kv => Int -> Line (SplitDiff leaf Info, Range) -> [kv]
lineFields n line | isEmpty line = []
| otherwise = [ "number" .= n
, "terms" .= unLine (Prelude.fst <$> line)
, "range" .= unionRanges (Prelude.snd <$> line)
, "hasChanges" .= hasChanges (Prelude.fst <$> line)
]
instance ToJSON a => ToJSON (Join (,) a) where
toJSON (Join (a, b)) = A.Array . Vector.fromList $ toJSON <$> [ a, b ]
termFields :: (ToJSON recur, KeyValue kv) => Info -> Syntax leaf recur -> [kv]
termFields (Info range categories) syntax = "range" .= range : "categories" .= categories : case syntax of
Leaf _ -> []
Indexed c -> childrenFields c
Fixed c -> childrenFields c
Keyed c -> childrenFields c
where childrenFields c = [ "children" .= c ]
instance (ToJSON leaf, ToJSON (Record fields), HasField fields Category, HasField fields Range) => ToJSON (SplitSyntaxDiff leaf fields) where
toJSON splitDiff = case runFree splitDiff of
(Free (info :< syntax)) -> object (termFields info syntax)
(Pure patch) -> object (patchFields patch)
toEncoding splitDiff = case runFree splitDiff of
(Free (info :< syntax)) -> pairs $ mconcat (termFields info syntax)
(Pure patch) -> pairs $ mconcat (patchFields patch)
patchFields :: KeyValue kv => SplitPatch (Cofree (Syntax leaf) Info) -> [kv]
instance (ToJSON (Record fields), ToJSON leaf, HasField fields Category, HasField fields Range) => ToJSON (SyntaxTerm leaf fields) where
toJSON term |
(info :< syntax) <- runCofree term = object (termFields info syntax)
toEncoding term |
(info :< syntax) <- runCofree term = pairs $ mconcat (termFields info syntax)
lineFields :: (ToJSON leaf, ToJSON (Record fields), HasField fields Category, HasField fields Range, KeyValue kv) =>
Int ->
SplitSyntaxDiff leaf fields ->
Range ->
[kv]
lineFields n term range = [ "number" .= n
, "terms" .= [ term ]
, "range" .= range
, "hasChanges" .= hasChanges term
]
termFields :: (ToJSON recur, KeyValue kv, HasField fields Category, HasField fields Range) =>
Record fields ->
Syntax leaf recur ->
[kv]
termFields info syntax = "range" .= characterRange info : "category" .= category info : syntaxToTermField syntax
patchFields :: (ToJSON (Record fields), ToJSON leaf, KeyValue kv, HasField fields Category, HasField fields Range) =>
SplitPatch (SyntaxTerm leaf fields) ->
[kv]
patchFields patch = case patch of
SplitInsert term -> fields "insert" term
SplitDelete term -> fields "delete" term
SplitReplace term -> fields "replace" term
where fields kind (info :< syntax) = "patch" .= T.pack kind : termFields info syntax
where
fields kind term |
(info :< syntax) <- runCofree term = "patch" .= T.pack kind : termFields info syntax
syntaxToTermField :: (ToJSON recur, KeyValue kv) =>
Syntax leaf recur ->
[kv]
syntaxToTermField syntax = case syntax of
Leaf _ -> []
Indexed c -> childrenFields c
Fixed c -> childrenFields c
S.FunctionCall identifier parameters -> [ "identifier" .= identifier ] <> [ "parameters" .= parameters ]
S.Ternary expression cases -> [ "expression" .= expression ] <> [ "cases" .= cases ]
S.AnonymousFunction parameters c -> [ "parameters" .= parameters ] <> childrenFields c
S.Function identifier parameters ty c -> [ "identifier" .= identifier ] <> [ "parameters" .= parameters ] <> [ "type" .= ty ] <> childrenFields c
S.Assignment assignmentId value -> [ "identifier" .= assignmentId ] <> [ "value" .= value ]
S.OperatorAssignment identifier value -> [ "identifier" .= identifier ] <> [ "value" .= value ]
S.MemberAccess identifier value -> [ "identifier" .= identifier ] <> [ "value" .= value ]
S.MethodCall identifier methodIdentifier parameters -> [ "identifier" .= identifier ] <> [ "methodIdentifier" .= methodIdentifier ] <> [ "parameters" .= parameters ]
S.Operator syntaxes -> [ "operatorSyntaxes" .= syntaxes ]
S.VarDecl declaration ty -> [ "declaration" .= declaration ] <> [ "type" .= ty]
S.VarAssignment identifier value -> [ "identifier" .= identifier ] <> [ "value" .= value ]
S.SubscriptAccess identifier property -> [ "identifier" .= identifier ] <> [ "property" .= property ]
S.Switch expression cases -> [ "expression" .= expression ] <> [ "cases" .= cases ]
S.Case expression statements -> [ "expression" .= expression ] <> [ "statements" .= statements ]
S.Object ty keyValuePairs -> [ "type" .= ty ] <> childrenFields keyValuePairs
S.Pair a b -> childrenFields [a, b]
S.Comment _ -> []
S.Commented comments child -> childrenFields (comments <> maybeToList child)
S.ParseError c -> childrenFields c
S.For expressions body -> [ "expressions" .= expressions ] <> [ "body" .= body ]
S.DoWhile expression body -> [ "expression" .= expression ] <> [ "body" .= body ]
S.While expression body -> [ "expression" .= expression ] <> [ "body" .= body ]
S.Return expression -> [ "expression" .= expression ]
S.Throw c -> [ "expression" .= c ]
S.Constructor expression -> [ "expression" .= expression ]
S.Try body catchExpression elseExpression finallyExpression -> [ "body" .= body ] <> [ "catchExpression" .= catchExpression ] <> [ "elseExpression" .= elseExpression ] <> [ "finallyExpression" .= finallyExpression ]
S.Array ty c -> [ "type" .= ty ] <> childrenFields c
S.Class identifier superclass definitions -> [ "identifier" .= identifier ] <> [ "superclass" .= superclass ] <> [ "definitions" .= definitions ]
S.Method identifier receiver ty parameters definitions -> [ "identifier" .= identifier ] <> [ "receiver" .= receiver ] <> [ "type" .= ty ] <> [ "parameters" .= parameters ] <> [ "definitions" .= definitions ]
S.If expression clauses -> [ "expression" .= expression ] <> childrenFields clauses
S.Module identifier definitions-> [ "identifier" .= identifier ] <> [ "definitions" .= definitions ]
S.Import identifier statements -> [ "identifier" .= identifier ] <> [ "statements" .= statements ]
S.Export identifier statements -> [ "identifier" .= identifier ] <> [ "statements" .= statements ]
S.Yield expr -> [ "yieldExpression" .= expr ]
S.Negate expr -> [ "negate" .= expr ]
S.Rescue args expressions -> [ "args" .= args ] <> childrenFields expressions
S.Select cases -> childrenFields cases
S.Go cases -> childrenFields cases
S.Defer cases -> childrenFields cases
S.TypeAssertion a b -> childrenFields [a, b]
S.TypeConversion a b -> childrenFields [a, b]
S.Struct ty fields -> [ "type" .= ty ] <> childrenFields fields
S.Break expr -> [ "expression" .= expr ]
S.Continue expr -> [ "expression" .= expr ]
S.BlockStatement c -> childrenFields c
S.ParameterDecl ty field -> [ "type" .= ty ] <> [ "identifier" .= field ]
S.DefaultCase c -> childrenFields c
S.TypeDecl id ty -> [ "type" .= ty ] <> [ "identifier" .= id ]
S.FieldDecl id ty tag -> [ "type" .= ty ] <> [ "identifier" .= id ] <> [ "tag" .= tag]
S.Ty ty -> [ "type" .= ty ]
S.Send channel expr -> [ "channel" .= channel ] <> [ "expression" .= expr ]
where childrenFields c = [ "children" .= c ]

View File

@ -1,38 +1,43 @@
module Renderer.Patch (
patch,
hunks,
Hunk(..)
Hunk(..),
truncatePatch
) where
import Alignment
import Data.Bifunctor.Join
import Data.Functor.Both as Both
import Data.List (span, unzip)
import Data.Record
import Data.String
import Data.Text (pack)
import Data.These
import Diff
import Line
import Prelude hiding (fst, snd)
import qualified Prelude
import Patch
import Prologue hiding (fst, snd)
import Range
import Renderer
import Source hiding ((++), break)
import Source hiding (break)
import SplitDiff
import Control.Comonad.Cofree
import Control.Monad.Free
import Data.Functor.Both as Both
import Data.List
import Data.Maybe
import Data.Monoid
-- | Render a timed out file as a truncated diff.
truncatePatch :: DiffArguments -> Both SourceBlob -> Text
truncatePatch _ blobs = pack $ header blobs <> "#timed_out\nTruncating diff: timeout reached.\n"
-- | Render a diff in the traditional patch format.
patch :: Renderer a String
patch diff sources = case getLast $ foldMap (Last . Just) string of
Just c | c /= '\n' -> string ++ "\n\\ No newline at end of file\n"
patch :: HasField fields Range => Renderer (Record fields)
patch blobs diff = PatchOutput . pack $ case getLast (foldMap (Last . Just) string) of
Just c | c /= '\n' -> string <> "\n\\ No newline at end of file\n"
_ -> string
where string = mconcat $ showHunk sources <$> hunks diff sources
where string = header blobs <> mconcat (showHunk blobs <$> hunks diff blobs)
-- | A hunk in a patch, including the offset, changes, and context.
data Hunk a = Hunk { offset :: Both (Sum Int), changes :: [Change a], trailingContext :: [Row a] }
data Hunk a = Hunk { offset :: Both (Sum Int), changes :: [Change a], trailingContext :: [Join These a] }
deriving (Eq, Show)
-- | A change in a patch hunk, along with its preceding context.
data Change a = Change { context :: [Row a], contents :: [Row a] }
data Change a = Change { context :: [Join These a], contents :: [Join These a] }
deriving (Eq, Show)
-- | The number of lines in the hunk before and after.
@ -44,87 +49,91 @@ changeLength :: Change a -> Both (Sum Int)
changeLength change = mconcat $ (rowIncrement <$> context change) <> (rowIncrement <$> contents change)
-- | The increment the given row implies for line numbering.
rowIncrement :: Row a -> Both (Sum Int)
rowIncrement = fmap lineIncrement
rowIncrement :: Join These a -> Both (Sum Int)
rowIncrement = Join . fromThese (Sum 0) (Sum 0) . runJoin . (Sum 1 <$)
-- | Given the before and after sources, render a hunk to a string.
showHunk :: Both SourceBlob -> Hunk (SplitDiff a Info) -> String
showHunk blobs hunk = header blobs hunk ++
concat (showChange sources <$> changes hunk) ++
showLines (snd sources) ' ' (snd <$> trailingContext hunk)
showHunk :: Functor f => HasField fields Range => Both SourceBlob -> Hunk (SplitDiff f (Record fields)) -> String
showHunk blobs hunk = maybeOffsetHeader <>
concat (showChange sources <$> changes hunk) <>
showLines (snd sources) ' ' (maybeSnd . runJoin <$> trailingContext hunk)
where sources = source <$> blobs
maybeOffsetHeader = if lengthA > 0 && lengthB > 0
then offsetHeader
else mempty
offsetHeader = "@@ -" <> offsetA <> "," <> show lengthA <> " +" <> offsetB <> "," <> show lengthB <> " @@" <> "\n"
(lengthA, lengthB) = runJoin . fmap getSum $ hunkLength hunk
(offsetA, offsetB) = runJoin . fmap (show . getSum) $ offset hunk
-- | Given the before and after sources, render a change to a string.
showChange :: Both (Source Char) -> Change (SplitDiff a Info) -> String
showChange sources change = showLines (snd sources) ' ' (snd <$> context change) ++ deleted ++ inserted
where (deleted, inserted) = runBoth $ pure showLines <*> sources <*> Both ('-', '+') <*> Both.unzip (contents change)
showChange :: Functor f => HasField fields Range => Both (Source Char) -> Change (SplitDiff f (Record fields)) -> String
showChange sources change = showLines (snd sources) ' ' (maybeSnd . runJoin <$> context change) <> deleted <> inserted
where (deleted, inserted) = runJoin $ pure showLines <*> sources <*> both '-' '+' <*> Join (unzip (fromThese Nothing Nothing . runJoin . fmap Just <$> contents change))
-- | Given a source, render a set of lines to a string with a prefix.
showLines :: Source Char -> Char -> [Line (SplitDiff leaf Info)] -> String
showLines :: Functor f => HasField fields Range => Source Char -> Char -> [Maybe (SplitDiff f (Record fields))] -> String
showLines source prefix lines = fromMaybe "" . mconcat $ fmap prepend . showLine source <$> lines
where prepend "" = ""
prepend source = prefix : source
-- | Given a source, render a line to a string.
showLine :: Source Char -> Line (SplitDiff leaf Info) -> Maybe String
showLine source line | isEmpty line = Nothing
| otherwise = Just . toString . (`slice` source) . unionRanges $ getRange <$> unLine line
-- | Return the range from a split diff.
getRange :: SplitDiff leaf Info -> Range
getRange (Free (Annotated (Info range _) _)) = range
getRange (Pure patch) = let Info range _ :< _ = getSplitTerm patch in range
showLine :: Functor f => HasField fields Range => Source Char -> Maybe (SplitDiff f (Record fields)) -> Maybe String
showLine source line | Just line <- line = Just . toString . (`slice` source) $ getRange line
| otherwise = Nothing
-- | Returns the header given two source blobs and a hunk.
header :: Both SourceBlob -> Hunk a -> String
header blobs hunk = intercalate "\n" [filepathHeader, fileModeHeader, beforeFilepath, afterFilepath, maybeOffsetHeader]
where filepathHeader = "diff --git a/" ++ pathA ++ " b/" ++ pathB
header :: Both SourceBlob -> String
header blobs = intercalate "\n" ([filepathHeader, fileModeHeader] <> maybeFilepaths) <> "\n"
where filepathHeader = "diff --git a/" <> pathA <> " b/" <> pathB
fileModeHeader = case (modeA, modeB) of
(Nothing, Just mode) -> intercalate "\n" [ "new file mode " ++ modeToDigits mode, blobOidHeader ]
(Just mode, Nothing) -> intercalate "\n" [ "deleted file mode " ++ modeToDigits mode, blobOidHeader ]
(Just mode, Just other) | mode == other -> "index " ++ oidA ++ ".." ++ oidB ++ " " ++ modeToDigits mode
(Nothing, Just mode) -> intercalate "\n" [ "new file mode " <> modeToDigits mode, blobOidHeader ]
(Just mode, Nothing) -> intercalate "\n" [ "deleted file mode " <> modeToDigits mode, blobOidHeader ]
(Just mode, Just other) | mode == other -> "index " <> oidA <> ".." <> oidB <> " " <> modeToDigits mode
(Just mode1, Just mode2) -> intercalate "\n" [
"old mode " ++ modeToDigits mode1,
"new mode " ++ modeToDigits mode2,
"old mode " <> modeToDigits mode1,
"new mode " <> modeToDigits mode2,
blobOidHeader
]
(Nothing, Nothing) -> ""
blobOidHeader = "index " ++ oidA ++ ".." ++ oidB
blobOidHeader = "index " <> oidA <> ".." <> oidB
modeHeader :: String -> Maybe SourceKind -> String -> String
modeHeader ty maybeMode path = case maybeMode of
Just _ -> ty ++ "/" ++ path
Just _ -> ty <> "/" <> path
Nothing -> "/dev/null"
beforeFilepath = "--- " ++ modeHeader "a" modeA pathA
afterFilepath = "+++ " ++ modeHeader "b" modeB pathB
maybeOffsetHeader = if lengthA > 0 && lengthB > 0
then offsetHeader
else mempty
offsetHeader = "@@ -" ++ offsetA ++ "," ++ show lengthA ++ " +" ++ offsetB ++ "," ++ show lengthB ++ " @@" ++ "\n"
(lengthA, lengthB) = runBoth . fmap getSum $ hunkLength hunk
(offsetA, offsetB) = runBoth . fmap (show . getSum) $ offset hunk
(pathA, pathB) = runBoth $ path <$> blobs
(oidA, oidB) = runBoth $ oid <$> blobs
(modeA, modeB) = runBoth $ blobKind <$> blobs
maybeFilepaths = if (nullOid == oidA && null (snd sources)) || (nullOid == oidB && null (fst sources)) then [] else [ beforeFilepath, afterFilepath ]
beforeFilepath = "--- " <> modeHeader "a" modeA pathA
afterFilepath = "+++ " <> modeHeader "b" modeB pathB
sources = source <$> blobs
(pathA, pathB) = case runJoin $ path <$> blobs of
("", path) -> (path, path)
(path, "") -> (path, path)
paths -> paths
(oidA, oidB) = runJoin $ oid <$> blobs
(modeA, modeB) = runJoin $ blobKind <$> blobs
-- | A hunk representing no changes.
emptyHunk :: Hunk (SplitDiff a annotation)
emptyHunk = Hunk { offset = mempty, changes = [], trailingContext = [] }
-- | Render a diff as a series of hunks.
hunks :: Renderer a [Hunk (SplitDiff a Info)]
hunks :: HasField fields Range => SyntaxDiff leaf fields -> Both SourceBlob -> [Hunk (SplitSyntaxDiff leaf fields)]
hunks _ blobs | sources <- source <$> blobs
, sourcesEqual <- runBothWith (==) sources
, sourcesNull <- runBothWith (&&) (null <$> sources)
, sourcesEqual || sourcesNull
= [Hunk { offset = mempty, changes = [], trailingContext = [] }]
hunks diff blobs = hunksInRows (Both (1, 1)) $ fmap (fmap Prelude.fst) <$> splitDiffByLines (source <$> blobs) diff
= [emptyHunk]
hunks diff blobs = hunksInRows (pure 1) $ alignDiff (source <$> blobs) diff
-- | Given beginning line numbers, turn rows in a split diff into hunks in a
-- | patch.
hunksInRows :: Both (Sum Int) -> [Row (SplitDiff a Info)] -> [Hunk (SplitDiff a Info)]
hunksInRows :: (Foldable f, Functor f) => Both (Sum Int) -> [Join These (SplitDiff f annotation)] -> [Hunk (SplitDiff f annotation)]
hunksInRows start rows = case nextHunk start rows of
Nothing -> []
Just (hunk, rest) -> hunk : hunksInRows (offset hunk <> hunkLength hunk) rest
-- | Given beginning line numbers, return the next hunk and the remaining rows
-- | of the split diff.
nextHunk :: Both (Sum Int) -> [Row (SplitDiff a Info)] -> Maybe (Hunk (SplitDiff a Info), [Row (SplitDiff a Info)])
nextHunk :: (Foldable f, Functor f) => Both (Sum Int) -> [Join These (SplitDiff f annotation)] -> Maybe (Hunk (SplitDiff f annotation), [Join These (SplitDiff f annotation)])
nextHunk start rows = case nextChange start rows of
Nothing -> Nothing
Just (offset, change, rest) -> let (changes, rest') = contiguousChanges rest in Just (Hunk offset (change : changes) $ take 3 rest', drop 3 rest')
@ -136,7 +145,7 @@ nextHunk start rows = case nextChange start rows of
-- | Given beginning line numbers, return the number of lines to the next
-- | the next change, and the remaining rows of the split diff.
nextChange :: Both (Sum Int) -> [Row (SplitDiff a Info)] -> Maybe (Both (Sum Int), Change (SplitDiff a Info), [Row (SplitDiff a Info)])
nextChange :: (Foldable f, Functor f) => Both (Sum Int) -> [Join These (SplitDiff f annotation)] -> Maybe (Both (Sum Int), Change (SplitDiff f annotation), [Join These (SplitDiff f annotation)])
nextChange start rows = case changeIncludingContext leadingContext afterLeadingContext of
Nothing -> Nothing
Just (change, afterChanges) -> Just (start <> mconcat (rowIncrement <$> skippedContext), change, afterChanges)
@ -146,20 +155,12 @@ nextChange start rows = case changeIncludingContext leadingContext afterLeadingC
-- | Return a Change with the given context and the rows from the begginning of
-- | the given rows that have changes, or Nothing if the first row has no
-- | changes.
changeIncludingContext :: [Row (SplitDiff a Info)] -> [Row (SplitDiff a Info)] -> Maybe (Change (SplitDiff a Info), [Row (SplitDiff a Info)])
changeIncludingContext :: (Foldable f, Functor f) => [Join These (SplitDiff f annotation)] -> [Join These (SplitDiff f annotation)] -> Maybe (Change (SplitDiff f annotation), [Join These (SplitDiff f annotation)])
changeIncludingContext leadingContext rows = case changes of
[] -> Nothing
_ -> Just (Change leadingContext changes, afterChanges)
where (changes, afterChanges) = span rowHasChanges rows
-- | Whether a row has changes on either side.
rowHasChanges :: Row (SplitDiff a Info) -> Bool
rowHasChanges lines = or (lineHasChanges <$> lines)
-- | Whether a line has changes.
lineHasChanges :: Line (SplitDiff a Info) -> Bool
lineHasChanges = or . fmap diffHasChanges
-- | Whether a split diff has changes.
diffHasChanges :: SplitDiff a Info -> Bool
diffHasChanges = or . fmap (const True)
rowHasChanges :: (Foldable f, Functor f) => Join These (SplitDiff f annotation) -> Bool
rowHasChanges row = or (hasChanges <$> row)

View File

@ -0,0 +1,50 @@
{-# LANGUAGE RankNTypes, ScopedTypeVariables #-}
module Renderer.SExpression (sExpression, printTerm) where
import Data.Bifunctor.Join
import Data.Foldable
import Data.Record
import Data.Text hiding (foldr, replicate)
import Prologue hiding (toList, intercalate)
import Category as C
import Diff
import Renderer
import Patch
import Info
import Syntax
import Term
sExpression :: (HasField fields Category, HasField fields SourceSpan) => Renderer (Record fields)
sExpression _ diff = SExpressionOutput $ printDiff diff 0
printDiff :: (HasField fields Category, HasField fields SourceSpan) => Diff (Syntax Text) (Record fields) -> Int -> Text
printDiff diff level = case runFree diff of
(Pure patch) -> case patch of
Insert term -> pad (level - 1) <> "{+" <> printTerm term level <> "+}"
Delete term -> pad (level - 1) <> "{-" <> printTerm term level <> "-}"
Replace a b -> pad (level - 1) <> "{" <> printTerm a level <> "->" <> printTerm b level <> "}"
(Free (Join (_, annotation) :< syntax)) -> pad level <> "(" <> showAnnotation annotation <> foldr (\d acc -> printDiff d (level + 1) <> acc) "" syntax <> ")"
where
pad n | n < 1 = ""
| otherwise = "\n" <> mconcat (replicate n " ")
printTerm :: (HasField fields Category, HasField fields SourceSpan) => Term (Syntax t) (Record fields) -> Int -> Text
printTerm term level = go term level 0
where
pad p n | n < 1 = ""
| otherwise = "\n" <> mconcat (replicate (p + n) " ")
go term parentLevel level = case runCofree term of
(annotation :< Leaf _) -> pad parentLevel level <> "(" <> showAnnotation annotation <> ")"
(annotation :< syntax) -> pad parentLevel level <> "(" <> showAnnotation annotation <> foldr (\t acc -> go t parentLevel (level + 1) <> acc) "" syntax <> ")"
showAnnotation :: (HasField fields Category, HasField fields SourceSpan) => Record fields -> Text
showAnnotation annotation = categoryName annotation <> " " <> showSourceSpan annotation
where
showSourceSpan a = start a <> " - " <> end a
start = showPoint . spanStart . getField
end = showPoint . spanEnd . getField
showPoint SourcePos{..} = "[" <> show line <> ", " <> show column <> "]"
categoryName :: HasField fields Category => Record fields -> Text
categoryName = toS . category

View File

@ -1,21 +1,20 @@
{-# LANGUAGE FlexibleInstances #-}
module Renderer.Split where
{-# OPTIONS_GHC -Wno-deprecations #-}
-- Disabling deprecation warnings due to pattern match against RescueModifier.
module Renderer.Split (split) where
import Alignment
import Category
import Control.Comonad.Cofree
import Control.Monad.Free
import Category as C
import Data.Bifunctor.Join
import Data.Foldable
import Data.Functor.Both
import Data.Monoid
import Data.Record
import qualified Data.Text.Lazy as TL
import Diff
import Line
import Prelude hiding (div, head, span, fst, snd)
import qualified Prelude
import Range
import Data.These
import Info
import Prologue hiding (div, head, fst, snd, link, (<>))
import qualified Prologue
import Renderer
import Source hiding ((++))
import Source
import SplitDiff
import Syntax
import Term
@ -27,44 +26,151 @@ import qualified Text.Blaze.Internal as Blaze
-- | Add the first category from a Foldable of categories as a class name as a
-- | class name on the markup, prefixed by `category-`.
classifyMarkup :: Foldable f => f Category -> Markup -> Markup
classifyMarkup categories element = maybe element ((element !) . A.class_ . stringValue . styleName) $ maybeFirst categories
classifyMarkup :: Category -> Markup -> Markup
classifyMarkup category element = (element !) . A.class_ . textValue $ styleName category
-- | Return the appropriate style name for the given category.
styleName :: Category -> String
styleName category = "category-" ++ case category of
BinaryOperator -> "binary-operator"
styleName :: Category -> Text
styleName category = "category-" <> case category of
Program -> "program"
C.ParseError -> "error"
BooleanOperator -> "boolean_operator"
MathOperator -> "math_operator"
BitwiseOperator -> "bitwise_operator"
RelationalOperator -> "relational_operator"
Boolean -> "boolean"
DictionaryLiteral -> "dictionary"
Pair -> "pair"
FunctionCall -> "function_call"
C.Pair -> "pair"
StringLiteral -> "string"
SymbolLiteral -> "symbol"
IntegerLiteral -> "integer"
NumberLiteral -> "number"
FloatLiteral -> "float"
C.Comment -> "comment"
C.FunctionCall -> "function_call"
C.Function -> "function"
C.MethodCall -> "method_call"
C.Args -> "arguments"
C.Assignment -> "assignment"
C.MemberAccess -> "member_access"
C.VarDecl -> "var_declaration"
C.VarAssignment -> "var_assignment"
C.Switch -> "switch"
C.Case -> "case"
TemplateString -> "template_string"
Regex -> "regex"
Identifier -> "identifier"
C.Params -> "parameters"
ExpressionStatements -> "expression_statements"
C.MathAssignment -> "math_assignment"
C.SubscriptAccess -> "subscript_access"
C.Ternary -> "ternary"
C.Operator -> "operator"
C.Object -> "object"
C.For -> "for"
C.While -> "while"
C.DoWhile -> "do_while"
C.Return -> "return_statement"
C.Throw -> "throw_statement"
C.Constructor -> "constructor"
C.Try -> "try_statement"
C.Catch -> "catch_statement"
C.Finally -> "finally_statement"
ArrayLiteral -> "array"
C.Class -> "class_statement"
C.Method -> "method"
C.If -> "if_statement"
C.Empty -> "empty_statement"
C.CommaOperator -> "comma_operator"
Other string -> string
C.Module -> "module_statement"
C.Import -> "import_statement"
C.Export -> "export_statement"
C.AnonymousFunction -> "anonymous_function"
C.Interpolation -> "interpolation"
C.Subshell -> "subshell"
C.OperatorAssignment -> "operator_assignment"
C.Yield -> "yield_statement"
C.Until -> "until"
C.Unless -> "unless_statement"
C.Begin -> "begin_statement"
C.Else -> "else_block"
C.Elsif -> "elsif_block"
C.Ensure -> "ensure_block"
C.Rescue -> "rescue_block"
C.RescueModifier -> "rescue_modifier"
C.When -> "when_block"
C.RescuedException -> "last_exception"
C.RescueArgs -> "rescue_args"
C.Negate -> "negate"
C.Select -> "select_statement"
C.Go -> "go_statement"
C.Defer -> "defer_statement"
C.Slice -> "slice_expression"
C.TypeAssertion -> "type_assertion"
C.TypeConversion -> "type_conversion"
C.ArgumentPair -> "argument_pair"
C.KeywordParameter -> "keyword_param"
C.OptionalParameter -> "optional_param"
C.SplatParameter -> "splat_param"
C.HashSplatParameter -> "hash_splat_param"
C.BlockParameter -> "block_param"
C.ArrayTy -> "array_type"
C.DictionaryTy -> "dictionary_type"
C.StructTy -> "struct_type"
C.Struct -> "struct"
C.Break -> "break_statement"
C.Continue -> "continue_statement"
C.Binary -> "binary"
C.Unary -> "unary"
C.Constant -> "constant"
C.Superclass -> "superclass"
C.SingletonClass -> "singleton_class"
C.RangeExpression -> "range"
C.ScopeOperator -> "scope_operator"
C.BeginBlock -> "begin_block"
C.EndBlock -> "end_block"
C.ParameterDecl -> "parameter_declaration"
C.DefaultCase -> "default_statement"
C.TypeDecl -> "type_declaration"
C.PointerTy -> "pointer_type"
C.FieldDecl -> "field_declaration"
C.SliceTy -> "slice_type"
C.Element -> "element"
C.Literal -> "literal"
C.ChannelTy -> "channel_type"
C.FunctionTy -> "function_type"
C.Send -> "send_statement"
C.IncrementStatement -> "increment_statement"
C.DecrementStatement -> "decrement_statement"
C.QualifiedIdentifier -> "qualified_identifier"
C.IndexExpression -> "index_expression"
C.FieldDeclarations -> "field_declarations"
C.RuneLiteral -> "rune_literal"
C.Modifier c -> styleName c <> "_modifier"
-- | Pick the class name for a split patch.
splitPatchToClassName :: SplitPatch a -> AttributeValue
splitPatchToClassName patch = stringValue $ "patch " ++ case patch of
splitPatchToClassName patch = stringValue $ "patch " <> case patch of
SplitInsert _ -> "insert"
SplitDelete _ -> "delete"
SplitReplace _ -> "replace"
-- | Render a diff as an HTML split diff.
split :: Renderer leaf TL.Text
split diff blobs = renderHtml
split :: (HasField fields Category, HasField fields Cost, HasField fields Range) => Renderer (Record fields)
split blobs diff = SplitOutput . TL.toStrict . renderHtml
. docTypeHtml
. ((head $ link ! A.rel "stylesheet" ! A.href "style.css") <>)
. body
. (table ! A.class_ (stringValue "diff")) $
. (table ! A.class_ (stringValue "diff")) .
((colgroup $ (col ! A.width (stringValue . show $ columnWidth)) <> col <> (col ! A.width (stringValue . show $ columnWidth)) <> col) <>)
. mconcat $ numberedLinesToMarkup <$> numbered
where
sources = Source.source <$> blobs
numbered = numberedRows (fmap (fmap Prelude.fst) <$> splitDiffByLines sources diff)
numbered = numberedRows (alignDiff sources diff)
maxNumber = case numbered of
[] -> 0
(row : _) -> runBothWith max $ Prelude.fst <$> row
(row : _) -> mergeThese max . runJoin $ Prologue.fst <$> row
-- | The number of digits in a number (e.g. 342 has 3 digits).
digits :: Int -> Int
@ -74,49 +180,58 @@ split diff blobs = renderHtml
columnWidth = max (20 + digits maxNumber * 8) 40
-- | Render a line with numbers as an HTML row.
numberedLinesToMarkup :: Both (Int, Line (SplitDiff a Info)) -> Markup
numberedLinesToMarkup numberedLines = tr $ runBothWith (<>) (renderLine <$> numberedLines <*> sources) <> string "\n"
numberedLinesToMarkup numberedLines = tr $ runBothWith (<>) (renderLine <$> Join (fromThese Nothing Nothing (runJoin (Just <$> numberedLines))) <*> sources) <> string "\n"
renderLine :: (Int, Line (SplitDiff leaf Info)) -> Source Char -> Markup
renderLine (number, line) source = toMarkup $ Renderable (hasChanges line, number, Renderable . (,) source <$> line)
renderLine (Just (number, line)) source = toMarkup $ Cell (hasChanges line) number (Renderable source line)
renderLine _ _
= (td mempty ! A.class_ (stringValue "blob-num blob-num-empty empty-cell"))
<> (td mempty ! A.class_ (stringValue "blob-code blob-code-empty empty-cell"))
<> string "\n"
-- | Something that can be rendered as markup.
newtype Renderable a = Renderable a
-- | A cell in a table, characterized by whether it contains changes & its line number.
data Cell a = Cell !Bool !Int !a
instance ToMarkup f => ToMarkup (Renderable (Source Char, Info, Syntax a (f, Range))) where
toMarkup (Renderable (source, Info range categories, syntax)) = classifyMarkup categories $ case syntax of
Leaf _ -> span . string . toString $ slice range source
Indexed children -> ul . mconcat $ wrapIn li <$> contentElements children
Fixed children -> ul . mconcat $ wrapIn li <$> contentElements children
Keyed children -> dl . mconcat $ wrapIn dd <$> contentElements children
where markupForSeparatorAndChild :: ToMarkup f => ([Markup], Int) -> (f, Range) -> ([Markup], Int)
markupForSeparatorAndChild (rows, previous) (child, range) = (rows ++ [ string (toString $ slice (Range previous $ start range) source), toMarkup child ], end range)
-- | Something that can be rendered as markup with reference to some source.
data Renderable a = Renderable !(Source Char) !a
wrapIn _ l@Blaze.Leaf{} = l
wrapIn _ l@Blaze.CustomLeaf{} = l
wrapIn _ l@Blaze.Content{} = l
wrapIn _ l@Blaze.Comment{} = l
wrapIn f p = f p
contentElements :: (Foldable t, ToMarkup f) => Source Char -> Range -> t (f, Range) -> [Markup]
contentElements source range children = let (elements, next) = foldr' (markupForContextAndChild source) ([], end range) children in
text (toText (slice (Range (start range) (max next (start range))) source)) : elements
contentElements children = let (elements, previous) = foldl' markupForSeparatorAndChild ([], start range) children in
elements ++ [ string . toString $ slice (Range previous $ end range) source ]
markupForContextAndChild :: ToMarkup f => Source Char -> (f, Range) -> ([Markup], Int) -> ([Markup], Int)
markupForContextAndChild source (child, range) (rows, next) = (toMarkup child : text (toText (slice (Range (end range) next) source)) : rows, start range)
instance ToMarkup (Renderable (Source Char, Term a Info)) where
toMarkup (Renderable (source, term)) = Prelude.fst $ cata (\ info@(Info range _) syntax -> (toMarkup $ Renderable (source, info, syntax), range)) term
instance ToMarkup (Renderable (Source Char, SplitDiff a Info)) where
toMarkup (Renderable (source, diff)) = Prelude.fst $ iter (\ (Annotated info@(Info range _) syntax) -> (toMarkup $ Renderable (source, info, syntax), range)) $ toMarkupAndRange <$> diff
where toMarkupAndRange :: SplitPatch (Term a Info) -> (Markup, Range)
toMarkupAndRange patch = let term@(Info range _ :< _) = getSplitTerm patch in
((div ! A.class_ (splitPatchToClassName patch) ! A.data_ (stringValue . show $ termSize term)) . toMarkup $ Renderable (source, term), range)
wrapIn :: (Markup -> Markup) -> Markup -> Markup
wrapIn _ l@Blaze.Leaf{} = l
wrapIn _ l@Blaze.CustomLeaf{} = l
wrapIn _ l@Blaze.Content{} = l
wrapIn _ l@Blaze.Comment{} = l
wrapIn f p = f p
instance ToMarkup a => ToMarkup (Renderable (Bool, Int, Line a)) where
toMarkup (Renderable (_, _, line)) | isEmpty line =
td mempty ! A.class_ (stringValue "blob-num blob-num-empty empty-cell")
<> td mempty ! A.class_ (stringValue "blob-code blob-code-empty empty-cell")
<> string "\n"
toMarkup (Renderable (hasChanges, num, line)) =
td (string $ show num) ! A.class_ (stringValue $ if hasChanges then "blob-num blob-num-replacement" else "blob-num")
<> td (mconcat $ toMarkup <$> unLine line) ! A.class_ (stringValue $ if hasChanges then "blob-code blob-code-replacement" else "blob-code")
-- Instances
instance (ToMarkup f, HasField fields Category, HasField fields Range) => ToMarkup (Renderable (SyntaxTermF leaf fields (f, Range))) where
toMarkup (Renderable source (info :< syntax)) = classifyMarkup (category info) $ case syntax of
Leaf _ -> span . string . toString $ slice (characterRange info) source
_ -> ul . mconcat $ wrapIn li <$> contentElements source (characterRange info) (toList syntax)
instance (HasField fields Category, HasField fields Range) => ToMarkup (Renderable (SyntaxTerm leaf fields)) where
toMarkup (Renderable source term) = Prologue.fst $ cata (\ t -> (toMarkup $ Renderable source t, characterRange (headF t))) term
instance (HasField fields Category, HasField fields Cost, HasField fields Range) => ToMarkup (Renderable (SplitSyntaxDiff leaf fields)) where
toMarkup (Renderable source diff) = Prologue.fst . iter (\ t -> (toMarkup $ Renderable source t, characterRange (headF t))) $ toMarkupAndRange <$> diff
where toMarkupAndRange patch = let term@(info :< _) = runCofree $ getSplitTerm patch in
((div ! patchAttribute patch `withCostAttribute` cost info) . toMarkup $ Renderable source (cofree term), characterRange info)
patchAttribute patch = A.class_ (splitPatchToClassName patch)
withCostAttribute a (Cost c) | c > 0 = a ! A.data_ (stringValue (show c))
| otherwise = identity
instance ToMarkup a => ToMarkup (Cell a) where
toMarkup (Cell hasChanges num line)
= (td (string (show num)) ! A.class_ (stringValue $ if hasChanges then "blob-num blob-num-replacement" else "blob-num"))
<> (td (toMarkup line) ! A.class_ (stringValue $ if hasChanges then "blob-code blob-code-replacement" else "blob-code"))
<> string "\n"
(<>) :: Monoid m => m -> m -> m
(<>) = mappend

23
src/Renderer/Summary.hs Normal file
View File

@ -0,0 +1,23 @@
{-# LANGUAGE TupleSections, ScopedTypeVariables #-}
module Renderer.Summary where
import Prologue
import Renderer
import Data.Record
import DiffSummary
import Data.Map as Map hiding (null)
import Source
import Data.Aeson
import Data.List as List
summary :: (DefaultFields fields) => Renderer (Record fields)
summary blobs diff = SummaryOutput $ Map.fromList [
("changes", changes),
("errors", errors)
]
where
changes = if null changes' then mempty else Map.singleton summaryKey (toJSON <$> changes')
errors = if null errors' then mempty else Map.singleton summaryKey (toJSON <$> errors')
(errors', changes') = List.partition isErrorSummary summaries
summaryKey = toSummaryKey (path <$> blobs)
summaries = diffSummaries blobs diff

162
src/Renderer/TOC.hs Normal file
View File

@ -0,0 +1,162 @@
{-# LANGUAGE ScopedTypeVariables #-}
module Renderer.TOC (toc) where
import Category as C
import Data.Aeson
import Data.Functor.Both hiding (fst, snd)
import qualified Data.Functor.Both as Both
import Data.Record
import Diff
import Info
import Prologue
import qualified Data.List as List
import qualified Data.Map as Map hiding (null)
import Renderer
import Source
import Syntax as S
import Term
import Patch
import Unsafe (unsafeHead)
data JSONSummary = JSONSummary { info :: Summarizable }
| ErrorSummary { error :: Text, errorSpan :: SourceSpan }
deriving (Generic, Eq, Show)
instance ToJSON JSONSummary where
toJSON JSONSummary{..} = object $ case info of
InSummarizable{..} -> [ "changeType" .= ("modified" :: Text), "category" .= (show parentCategory :: Text), "term" .= parentTermName, "span" .= parentSourceSpan ]
Summarizable{..} -> [ "changeType" .= summarizableChangeType, "category" .= (show summarizableCategory :: Text), "term" .= summarizableTermName, "span" .= summarizableSourceSpan ]
NotSummarizable -> panic "NotSummarizable should have been pruned"
toJSON ErrorSummary{..} = object [ "error" .= error, "span" .= errorSpan ]
isErrorSummary :: JSONSummary -> Bool
isErrorSummary ErrorSummary{} = True
isErrorSummary _ = False
data DiffInfo = LeafInfo { leafCategory :: Category, termName :: Text, leafSourceSpan :: SourceSpan }
| BranchInfo { branches :: [ DiffInfo ], branchCategory :: Category }
| ErrorInfo { infoSpan :: SourceSpan, termName :: Text }
deriving (Eq, Show)
data TOCSummary a = TOCSummary {
summaryPatch :: Patch a,
parentInfo :: Summarizable
} deriving (Eq, Functor, Show, Generic)
data Summarizable = Summarizable { summarizableCategory :: Category, summarizableTermName :: Text, summarizableSourceSpan :: SourceSpan, summarizableChangeType :: Text }
| InSummarizable { parentCategory :: Category, parentTermName :: Text, parentSourceSpan :: SourceSpan }
| NotSummarizable
deriving (Eq, Show)
data SummarizableTerm a = SummarizableTerm a | NotSummarizableTerm a
toc :: (DefaultFields fields) => Renderer (Record fields)
toc blobs diff = TOCOutput $ Map.fromList [
("changes", changes),
("errors", errors)
]
where
changes = if null changes' then mempty else Map.singleton summaryKey (toJSON <$> changes')
errors = if null errors' then mempty else Map.singleton summaryKey (toJSON <$> errors')
(errors', changes') = List.partition isErrorSummary summaries
summaryKey = toSummaryKey (path <$> blobs)
summaries = diffTOC blobs diff
diffTOC :: (StringConv leaf Text, DefaultFields fields) => Both SourceBlob -> SyntaxDiff leaf fields -> [JSONSummary]
diffTOC blobs diff = do
noDupes <- removeDupes (diffToTOCSummaries (source <$> blobs) diff)
toJSONSummaries noDupes
where
removeDupes :: [TOCSummary DiffInfo] -> [TOCSummary DiffInfo]
removeDupes [] = []
removeDupes xs = (fmap unsafeHead . List.groupBy (\a b -> parentInfo a == parentInfo b)) xs
diffToTOCSummaries :: (StringConv leaf Text, DefaultFields fields) => Both (Source Char) -> SyntaxDiff leaf fields -> [TOCSummary DiffInfo]
diffToTOCSummaries sources = para $ \diff ->
let
diff' = free (Prologue.fst <$> diff)
patch' = mapPatch (termToDiffInfo beforeSource) (termToDiffInfo afterSource)
(beforeSource, afterSource) = runJoin sources
in case diff of
(Free (_ :< syntax)) -> mapToInSummarizable sources diff' (toList syntax >>= snd)
(Pure patch) -> toTOCSummaries (patch' patch)
-- Mark which leaves are summarizable.
toTOCSummaries :: Patch DiffInfo -> [TOCSummary DiffInfo]
toTOCSummaries patch = case afterOrBefore patch of
Just diffInfo -> toTOCSummaries' patch diffInfo
Nothing -> panic "No diff"
where
toTOCSummaries' patch' diffInfo = case diffInfo of
ErrorInfo{..} -> pure $ TOCSummary patch' NotSummarizable
BranchInfo{..} -> join $ zipWith toTOCSummaries' (flattenPatch patch') branches
LeafInfo{..} -> pure . TOCSummary patch' $ case leafCategory of
C.Function -> Summarizable leafCategory termName leafSourceSpan (patchType patch')
C.Method -> Summarizable leafCategory termName leafSourceSpan (patchType patch')
_ -> NotSummarizable
flattenPatch :: Patch DiffInfo -> [Patch DiffInfo]
flattenPatch = \case
Replace i1 i2 -> zipWith Replace (toLeafInfos' i1) (toLeafInfos' i2)
Insert info -> Insert <$> toLeafInfos' info
Delete info -> Delete <$> toLeafInfos' info
toLeafInfos' :: DiffInfo -> [DiffInfo]
toLeafInfos' BranchInfo{..} = branches >>= toLeafInfos'
toLeafInfos' leaf = [leaf]
mapToInSummarizable :: DefaultFields fields => Both (Source Char) -> SyntaxDiff leaf fields -> [TOCSummary DiffInfo] -> [TOCSummary DiffInfo]
mapToInSummarizable sources diff children = case (beforeTerm diff, afterTerm diff) of
(_, Just diff') -> mapToInSummarizable' (Both.snd sources) diff' <$> children
(Just diff', _) -> mapToInSummarizable' (Both.fst sources) diff' <$> children
(Nothing, Nothing) -> []
where
mapToInSummarizable' :: DefaultFields fields => Source Char -> SyntaxTerm leaf fields -> TOCSummary DiffInfo -> TOCSummary DiffInfo
mapToInSummarizable' source term summary =
case (parentInfo summary, summarizable term) of
(NotSummarizable, SummarizableTerm _) ->
summary { parentInfo = InSummarizable (category (extract term)) (toTermName source term) (Info.sourceSpan (extract term)) }
(_, _) -> summary
summarizable :: ComonadCofree (Syntax t) w => w a -> SummarizableTerm (w a)
summarizable term = go (unwrap term) term
where go = \case
S.Method{} -> SummarizableTerm
S.Function{} -> SummarizableTerm
_ -> NotSummarizableTerm
toJSONSummaries :: TOCSummary DiffInfo -> [JSONSummary]
toJSONSummaries TOCSummary{..} = case afterOrBefore summaryPatch of
Just diffInfo -> toJSONSummaries' diffInfo
Nothing -> panic "No diff"
where
toJSONSummaries' = \case
ErrorInfo{..} -> pure $ ErrorSummary termName infoSpan
BranchInfo{..} -> branches >>= toJSONSummaries'
LeafInfo{..} -> case parentInfo of
NotSummarizable -> []
_ -> pure $ JSONSummary parentInfo
termToDiffInfo :: (StringConv leaf Text, DefaultFields fields) => Source Char -> SyntaxTerm leaf fields -> DiffInfo
termToDiffInfo blob term = case unwrap term of
S.Indexed children -> BranchInfo (termToDiffInfo' <$> children) (category $ extract term)
S.Fixed children -> BranchInfo (termToDiffInfo' <$> children) (category $ extract term)
S.AnonymousFunction _ _ -> LeafInfo C.AnonymousFunction (toTermName' term) (getField $ extract term)
S.Commented cs leaf -> BranchInfo (termToDiffInfo' <$> cs <> maybeToList leaf) (category $ extract term)
S.ParseError _ -> ErrorInfo (getField $ extract term) (toTermName' term)
_ -> toLeafInfo term
where toTermName' = toTermName blob
termToDiffInfo' = termToDiffInfo blob
toLeafInfo term = LeafInfo (category $ extract term) (toTermName' term) (getField $ extract term)
toTermName :: forall leaf fields. DefaultFields fields => Source Char -> SyntaxTerm leaf fields -> Text
toTermName source term = case unwrap term of
S.Function identifier _ _ _ -> toTermName' identifier
S.Method identifier Nothing _ _ _ -> toTermName' identifier
S.Method identifier (Just receiver) _ _ _ -> toTermName' receiver <> "." <> toTermName' identifier
_ -> termNameFromSource term
where
toTermName' = toTermName source
termNameFromSource term = termNameFromRange (range term)
termNameFromRange range = toText $ Source.slice range source
range = characterRange . extract

View File

@ -1,56 +1,51 @@
{-# LANGUAGE Strict #-}
module SES where
import Patch
import Diff
import Term
import Control.Monad.Free
import Control.Monad.State
import Data.Foldable (minimumBy)
import Data.List (uncons)
import qualified Data.Map as Map
import Data.Ord (comparing)
import Patch
import Prologue
-- | A function that maybe creates a diff from two terms.
type Compare a annotation = Term a annotation -> Term a annotation -> Maybe (Diff a annotation)
-- | A function that computes the cost of a diff.
type Cost a annotation = Diff a annotation -> Integer
-- | Edit constructor for two terms, if comparable. Otherwise returns Nothing.
type Compare term edit = term -> term -> Maybe edit
-- | A function that computes the cost of an edit.
type Cost edit = edit -> Int
-- | Find the shortest edit script (diff) between two terms given a function to compute the cost.
ses :: Compare a annotation -> Cost a annotation -> [Term a annotation] -> [Term a annotation] -> [Diff a annotation]
ses :: Applicative edit => Compare term (edit (Patch term)) -> Cost (edit (Patch term)) -> [term] -> [term] -> [edit (Patch term)]
ses diffTerms cost as bs = fst <$> evalState diffState Map.empty where
diffState = diffAt diffTerms cost (0, 0) as bs
-- | Find the shortest edit script between two terms at a given vertex in the edit graph.
diffAt :: Compare a annotation -> Cost a annotation -> (Integer, Integer) -> [Term a annotation] -> [Term a annotation] -> State (Map.Map (Integer, Integer) [(Diff a annotation, Integer)]) [(Diff a annotation, Integer)]
diffAt _ _ _ [] [] = return []
diffAt _ cost _ [] bs = return $ foldr toInsertions [] bs where
toInsertions each = consWithCost cost (Pure . Insert $ each)
diffAt _ cost _ as [] = return $ foldr toDeletions [] as where
toDeletions each = consWithCost cost (Pure . Delete $ each)
diffAt diffTerms cost (i, j) (a : as) (b : bs) = do
diffAt :: Applicative edit => Compare term (edit (Patch term)) -> Cost (edit (Patch term)) -> (Int, Int) -> [term] -> [term] -> State (Map.Map (Int, Int) [(edit (Patch term), Int)]) [(edit (Patch term), Int)]
diffAt diffTerms cost (i, j) as bs
| (a : as) <- as, (b : bs) <- bs = do
cachedDiffs <- get
case Map.lookup (i, j) cachedDiffs of
Just diffs -> return diffs
Just diffs -> pure diffs
Nothing -> do
down <- recur (i, succ j) as (b : bs)
right <- recur (succ i, j) (a : as) bs
nomination <- fmap best $ case diffTerms a b of
nomination <- best <$> case diffTerms a b of
Just diff -> do
diagonal <- recur (succ i, succ j) as bs
return [ delete down, insert right, consWithCost cost diff diagonal ]
Nothing -> return [ delete down, insert right ]
pure [ delete a down, insert b right, consWithCost cost diff diagonal ]
Nothing -> pure [ delete a down, insert b right ]
cachedDiffs' <- get
put $ Map.insert (i, j) nomination cachedDiffs'
return nomination
pure nomination
| null as = pure $ foldr insert [] bs
| null bs = pure $ foldr delete [] as
| otherwise = pure []
where
delete = consWithCost cost (Pure . Delete $ a)
insert = consWithCost cost (Pure . Insert $ b)
delete = consWithCost cost . deleting
insert = consWithCost cost . inserting
costOf [] = 0
costOf ((_, c) : _) = c
best = minimumBy (comparing costOf)
recur = diffAt diffTerms cost
-- | Prepend a diff to the list with the cumulative cost.
consWithCost :: Cost a annotation -> Diff a annotation -> [(Diff a annotation, Integer)] -> [(Diff a annotation, Integer)]
consWithCost cost diff rest = (diff, cost diff + maybe 0 snd (fst <$> uncons rest)) : rest
-- | Prepend an edit script and the cumulative cost onto the edit script.
consWithCost :: Cost edit -> edit -> [(edit, Int)] -> [(edit, Int)]
consWithCost cost edit rest = (edit, cost edit + maybe 0 snd (fst <$> uncons rest)) : rest

173
src/SemanticDiff.hs Normal file
View File

@ -0,0 +1,173 @@
{-# LANGUAGE TemplateHaskell #-}
{-# OPTIONS_GHC -fno-warn-warnings-deprecations -fno-warn-incomplete-patterns #-}
module SemanticDiff (main, fetchDiff, fetchDiffs) where
import Arguments
import Prologue hiding ((<>), fst, snd)
import Data.String
import Data.Functor.Both
import Data.Version (showVersion)
import Text.Regex
import Diffing
import Git.Libgit2
import Git.Repository
import Git.Blob
import Git.Types
import Git.Libgit2.Backend
import Options.Applicative hiding (action)
import System.Timeout as Timeout
import Data.List ((\\))
import qualified Diffing as D
import qualified Git
import qualified Paths_semantic_diff as Library (version)
import qualified Renderer as R
import qualified Source
import qualified Control.Concurrent.Async.Pool as Async
import GHC.Conc (numCapabilities)
import Development.GitRev
import Parse
main :: IO ()
main = do
args@Arguments{..} <- programArguments =<< execParser argumentsParser
case runMode of
Diff -> runDiff args
Parse -> Parse.run args
runDiff :: Arguments -> IO ()
runDiff args@Arguments{..} = case diffMode of
PathDiff paths -> diffPaths args paths
CommitDiff -> diffCommits args
-- | A parser for the application's command-line arguments.
argumentsParser :: ParserInfo CmdLineOptions
argumentsParser = info (version <*> helper <*> argumentsP)
(fullDesc <> progDesc "Set the GIT_DIR environment variable to specify the git repository. Set GIT_ALTERNATE_OBJECT_DIRECTORIES to specify location of alternates."
<> header "semantic-diff - Show semantic changes between commits")
where
argumentsP :: Parser CmdLineOptions
argumentsP = CmdLineOptions
<$> (flag R.Split R.Patch (long "patch" <> help "output a patch(1)-compatible diff")
<|> flag R.Split R.JSON (long "json" <> help "output a json diff")
<|> flag' R.Split (long "split" <> help "output a split diff")
<|> flag' R.Summary (long "summary" <> help "output a diff summary")
<|> flag' R.SExpression (long "sexpression" <> help "output an s-expression diff tree")
<|> flag' R.TOC (long "toc" <> help "output a table of contents diff summary"))
<*> optional (option auto (long "timeout" <> help "timeout for per-file diffs in seconds, defaults to 7 seconds"))
<*> optional (strOption (long "output" <> short 'o' <> help "output directory for split diffs, defaults to stdout if unspecified"))
<*> switch (long "no-index" <> help "compare two paths on the filesystem")
<*> some (argument (eitherReader parseShasAndFiles) (metavar "SHA_A..SHAB FILES..."))
<*> switch (long "development" <> short 'd' <> help "set development mode which prevents timeout behavior by default")
<*> flag Diff Parse (long "parse" <> short 'p' <> help "parses a source file without diffing")
where
parseShasAndFiles :: String -> Either String ExtraArg
parseShasAndFiles s = case matchRegex regex s of
Just ["", sha2] -> Right . ShaPair $ both Nothing (Just sha2)
Just [sha1, sha2] -> Right . ShaPair $ Just <$> both sha1 sha2
_ -> Right $ FileArg s
where regex = mkRegexWithOpts "([0-9a-f]{40})\\.\\.([0-9a-f]{40})" True False
versionString :: String
versionString = "semantic-diff version " <> showVersion Library.version <> " (" <> $(gitHash) <> ")"
version :: Parser (a -> a)
version = infoOption versionString (long "version" <> short 'V' <> help "output the version of the program")
-- | Compare changes between two commits.
diffCommits :: Arguments -> IO ()
diffCommits args@Arguments{..} = do
ts <- fetchTerms args
writeToOutput output (maybe mempty R.concatOutputs ts)
where fetchTerms args = if developmentMode
then Just <$> fetchDiffs args
else Timeout.timeout timeoutInMicroseconds (fetchDiffs args)
-- | Compare two paths on the filesystem (compariable to git diff --no-index).
diffPaths :: Arguments -> Both FilePath -> IO ()
diffPaths args@Arguments{..} paths = do
sources <- sequence $ readAndTranscodeFile <$> paths
let sourceBlobs = Source.SourceBlob <$> sources <*> pure mempty <*> paths <*> pure (Just Source.defaultPlainBlob)
D.printDiff (parserWithCost (fst paths)) (diffArgs args) sourceBlobs
where
diffArgs Arguments{..} = R.DiffArguments { format = format, output = output }
fetchDiffs :: Arguments -> IO [R.Output]
fetchDiffs args@Arguments{..} = do
paths <- case(filePaths, shaRange) of
([], Join (Just a, Just b)) -> pathsToDiff args (both a b)
(ps, _) -> pure ps
Async.withTaskGroup numCapabilities $ \p ->
Async.mapTasks p (fetchDiff args <$> paths)
fetchDiff :: Arguments -> FilePath -> IO R.Output
fetchDiff args@Arguments{..} filepath = withRepository lgFactory gitDir $ do
repo <- getRepository
for_ alternateObjectDirs (liftIO . odbBackendAddPath repo . toS)
lift $ runReaderT (fetchDiff' args filepath) repo
fetchDiff' :: Arguments -> FilePath -> ReaderT LgRepo IO R.Output
fetchDiff' Arguments{..} filepath = do
sourcesAndOids <- sequence $ traverse (getSourceBlob filepath) <$> shaRange
let sources = fromMaybe (Source.emptySourceBlob filepath) <$> sourcesAndOids
let sourceBlobs = Source.idOrEmptySourceBlob <$> sources
let textDiff = D.textDiff (parserWithCost filepath) diffArguments sourceBlobs
text <- fetchText textDiff
truncatedPatch <- liftIO $ D.truncatedDiff diffArguments sourceBlobs
pure $ fromMaybe truncatedPatch text
where
diffArguments = R.DiffArguments { format = format, output = output }
fetchText textDiff = if developmentMode
then liftIO $ Just <$> textDiff
else liftIO $ Timeout.timeout timeoutInMicroseconds textDiff
pathsToDiff :: Arguments -> Both String -> IO [FilePath]
pathsToDiff Arguments{..} shas = withRepository lgFactory gitDir $ do
repo <- getRepository
for_ alternateObjectDirs (liftIO . odbBackendAddPath repo . toS)
lift $ runReaderT (pathsToDiff' shas) repo
-- | Returns a list of relative file paths that have changed between the given commit shas.
pathsToDiff' :: Both String -> ReaderT LgRepo IO [FilePath]
pathsToDiff' shas = do
entries <- blobEntriesToDiff shas
pure $ (\(p, _, _) -> toS p) <$> entries
-- | Returns a list of blob entries that have changed between the given commits shas.
blobEntriesToDiff :: Both String -> ReaderT LgRepo IO [(TreeFilePath, Git.BlobOid LgRepo, BlobKind)]
blobEntriesToDiff shas = do
a <- blobEntries (fst shas)
b <- blobEntries (snd shas)
pure $ (a \\ b) <> (b \\ a)
where blobEntries sha = treeForCommitSha sha >>= treeBlobEntries
-- | Returns a Git.Tree for a commit sha
treeForCommitSha :: String -> ReaderT LgRepo IO (Git.Tree LgRepo)
treeForCommitSha sha = do
object <- parseObjOid (toS sha)
commit <- lookupCommit object
lookupTree (commitTree commit)
-- | Returns a SourceBlob given a relative file path, and the sha to look up.
getSourceBlob :: FilePath -> String -> ReaderT LgRepo IO Source.SourceBlob
getSourceBlob path sha = do
tree <- treeForCommitSha sha
entry <- treeEntry tree (toS path)
(bytestring, oid, mode) <- case entry of
Nothing -> pure (mempty, mempty, Nothing)
Just (BlobEntry entryOid entryKind) -> do
blob <- lookupBlob entryOid
let (BlobString s) = blobContents blob
let oid = renderObjOid $ blobOid blob
pure (s, oid, Just entryKind)
s <- liftIO $ transcode bytestring
pure $ Source.SourceBlob s (toS oid) path (toSourceKind <$> mode)
where
toSourceKind :: Git.BlobKind -> Source.SourceKind
toSourceKind (Git.PlainBlob mode) = Source.PlainBlob mode
toSourceKind (Git.ExecutableBlob mode) = Source.ExecutableBlob mode
toSourceKind (Git.SymlinkBlob mode) = Source.SymlinkBlob mode

View File

@ -1,13 +1,24 @@
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# OPTIONS_GHC -funbox-strict-fields #-}
module Source where
import Data.Foldable
import qualified Data.Text as T
import Prologue hiding (uncons)
import Data.Text (unpack, pack)
import Data.String
import qualified Data.Vector as Vector
import Data.Word
import Numeric
import Range
import SourceSpan
-- | The source, oid, path, and Maybe SourceKind of a blob in a Git repo.
data SourceBlob = SourceBlob { source :: Source Char, oid :: String, path :: FilePath, blobKind :: Maybe SourceKind }
deriving (Show, Eq)
-- | The contents of a source file, backed by a vector for efficient slicing.
newtype Source a = Source { getVector :: Vector.Vector a }
deriving (Eq, Show, Foldable, Functor, Traversable)
-- | The kind of a blob, along with it's file mode.
data SourceKind = PlainBlob Word32 | ExecutableBlob Word32 | SymlinkBlob Word32
deriving (Show, Eq)
@ -16,24 +27,33 @@ modeToDigits (PlainBlob mode) = showOct mode ""
modeToDigits (ExecutableBlob mode) = showOct mode ""
modeToDigits (SymlinkBlob mode) = showOct mode ""
data SourceBlob = SourceBlob { source :: Source Char, oid :: String, path :: FilePath, blobKind :: Maybe SourceKind }
deriving (Show, Eq)
-- | The default plain blob mode
defaultPlainBlob :: SourceKind
defaultPlainBlob = PlainBlob 0o100644
-- | The contents of a source file, backed by a vector for efficient slicing.
newtype Source a = Source { getVector :: Vector.Vector a }
deriving (Eq, Show, Foldable, Functor, Traversable)
emptySourceBlob :: FilePath -> SourceBlob
emptySourceBlob filepath = SourceBlob (Source.fromList "") Source.nullOid filepath Nothing
sourceBlob :: Source Char -> FilePath -> SourceBlob
sourceBlob source filepath = SourceBlob source Source.nullOid filepath (Just defaultPlainBlob)
-- | Map blobs with Nothing blobKind to empty blobs.
idOrEmptySourceBlob :: SourceBlob -> SourceBlob
idOrEmptySourceBlob blob = if isNothing (blobKind blob)
then blob { oid = nullOid, blobKind = Nothing }
else blob
nullOid :: String
nullOid = "0000000000000000000000000000000000000000"
-- | Return a Source from a list of items.
fromList :: [a] -> Source a
fromList = Source . Vector.fromList
-- | Return a Source of Chars from a Text.
fromText :: T.Text -> Source Char
fromText = Source . Vector.fromList . T.unpack
fromText :: Text -> Source Char
fromText = Source . Vector.fromList . unpack
-- | Return a Source that contains a slice of the given Source.
slice :: Range -> Source a -> Source a
@ -43,6 +63,10 @@ slice range = Source . Vector.slice (start range) (rangeLength range) . getVecto
toString :: Source Char -> String
toString = toList
-- | Return a text with the contents of the Source.
toText :: Source Char -> Text
toText = pack . toList
-- | Return the item at the given index.
at :: Source a -> Int -> a
at = (Vector.!) . getVector
@ -55,19 +79,39 @@ uncons (Source vector) = if null vector then Nothing else Just (Vector.head vect
break :: (a -> Bool) -> Source a -> (Source a, Source a)
break predicate (Source vector) = let (start, remainder) = Vector.break predicate vector in (Source start, Source remainder)
-- | Concatenate two sources.
(++) :: Source a -> Source a -> Source a
(++) (Source a) = Source . (a Vector.++) . getVector
-- | Split the contents of the source after newlines.
actualLines :: Source Char -> [Source Char]
actualLines source | null source = [ source ]
actualLines source = case Source.break (== '\n') source of
(l, lines') -> case uncons lines' of
Nothing -> [ l ]
Just (_, lines') -> (l Source.++ fromList "\n") : actualLines lines'
Just (_, lines') -> (l <> fromList "\n") : actualLines lines'
-- | Compute the line ranges within a given range of a string.
actualLineRanges :: Range -> Source Char -> [Range]
actualLineRanges range = drop 1 . scanl toRange (Range (start range) (start range)) . actualLines . slice range
where toRange previous string = Range (end previous) $ end previous + length string
-- | Compute the character range given a Source and a SourceSpan.
sourceSpanToRange :: Source Char -> SourceSpan -> Range
sourceSpanToRange source SourceSpan{..} = Range start end
where start = sumLengths leadingRanges + column spanStart
end = start + sumLengths (take (line spanEnd - line spanStart) remainingRanges) + (column spanEnd - column spanStart)
(leadingRanges, remainingRanges) = splitAt (line spanStart) (actualLineRanges (totalRange source) source)
sumLengths = sum . fmap (\ Range{..} -> end - start)
rangeToSourceSpan :: Source Char -> Range -> SourceSpan
rangeToSourceSpan source range@Range{} = SourceSpan startPos endPos
where startPos = maybe (SourcePos 1 1) (toStartPos 1) (head lineRanges)
endPos = toEndPos (length lineRanges) (fromMaybe (rangeAt 0) (snd <$> unsnoc lineRanges))
lineRanges = actualLineRanges range source
toStartPos line range = SourcePos line (start range)
toEndPos line range = SourcePos line (end range)
instance Semigroup (Source a) where
Source a <> Source b = Source (a Vector.++ b)
instance Monoid (Source a) where
mempty = fromList []
mappend = (<>)

100
src/SourceSpan.hs Normal file
View File

@ -0,0 +1,100 @@
{-# LANGUAGE DeriveAnyClass #-}
{-# OPTIONS_GHC -funbox-strict-fields #-}
-- |
-- Source position and span information
-- Mostly taken from purescript's SourcePos definition.
--
module SourceSpan where
import Data.Aeson ((.=), (.:))
import qualified Data.Aeson as A
import Data.List.NonEmpty (nonEmpty)
import Data.Semigroup
import Data.These
import Prologue
import Test.LeanCheck
-- |
-- Source position information
--
data SourcePos = SourcePos
{ -- |
-- Line number
--
line :: Int
-- |
-- Column number
--
, column :: Int
} deriving (Show, Read, Eq, Ord, Generic, Hashable)
displaySourcePos :: SourcePos -> Text
displaySourcePos SourcePos{..} =
"line " <> show line <> ", column " <> show column
instance A.ToJSON SourcePos where
toJSON SourcePos{..} =
A.toJSON [line, column]
instance A.FromJSON SourcePos where
parseJSON arr = do
[line, col] <- A.parseJSON arr
pure $ SourcePos line col
data SourceSpan = SourceSpan
{ -- |
-- Start of the span
--
spanStart :: SourcePos
-- End of the span
--
, spanEnd :: SourcePos
} deriving (Show, Read, Eq, Ord, Generic, Hashable)
displayStartEndPos :: SourceSpan -> Text
displayStartEndPos sp =
displaySourcePos (spanStart sp) <> " - " <> displaySourcePos (spanEnd sp)
unionSourceSpansFrom :: Foldable f => SourceSpan -> f SourceSpan -> SourceSpan
unionSourceSpansFrom sourceSpan = maybe sourceSpan sconcat . nonEmpty . toList
unionSourceSpan :: SourceSpan -> SourceSpan -> SourceSpan
unionSourceSpan (SourceSpan start1 end1) (SourceSpan start2 end2) = SourceSpan (min start1 start2) (max end1 end2)
emptySourceSpan :: SourceSpan
emptySourceSpan = SourceSpan (SourcePos 1 1) (SourcePos 1 1)
instance Semigroup SourceSpan where
a <> b = unionSourceSpan a b
instance A.ToJSON SourceSpan where
toJSON SourceSpan{..} =
A.object [ "start" .= spanStart
, "end" .= spanEnd
]
instance A.FromJSON SourceSpan where
parseJSON = A.withObject "SourceSpan" $ \o ->
SourceSpan <$>
o .: "start" <*>
o .: "end"
newtype SourceSpans = SourceSpans { unSourceSpans :: These SourceSpan SourceSpan }
deriving (Eq, Show)
instance A.ToJSON SourceSpans where
toJSON (SourceSpans spans) = case spans of
(This span) -> A.object ["delete" .= span]
(That span) -> A.object ["insert" .= span]
(These span1 span2) -> A.object ["replace" .= (span1, span2)]
toEncoding (SourceSpans spans) = case spans of
(This span) -> A.pairs $ "delete" .= span
(That span) -> A.pairs $ "insert" .= span
(These span1 span2) -> A.pairs $ "replace" .= (span1, span2)
instance Listable SourcePos where
tiers = cons2 SourcePos
instance Listable SourceSpan where
tiers = cons2 SourceSpan

View File

@ -1,8 +1,10 @@
module SplitDiff where
import Diff (Annotated)
import Control.Monad.Free (Free)
import Term (Term)
import Data.Record
import Info
import Prologue
import Syntax
import Term (Term, TermF)
-- | A patch to only one side of a diff.
data SplitPatch a = SplitInsert a | SplitDelete a | SplitReplace a
@ -14,5 +16,12 @@ getSplitTerm (SplitInsert a) = a
getSplitTerm (SplitDelete a) = a
getSplitTerm (SplitReplace a) = a
-- | Get the range of a SplitDiff.
getRange :: Functor f => HasField fields Range => SplitDiff f (Record fields) -> Range
getRange diff = characterRange $ case runFree diff of
Free annotated -> headF annotated
Pure patch -> extract (getSplitTerm patch)
-- | A diff with only one sides annotations.
type SplitDiff leaf annotation = Free (Annotated leaf annotation) (SplitPatch (Term leaf annotation))
type SplitDiff f annotation = Free (TermF f annotation) (SplitPatch (Term f annotation))
type SplitSyntaxDiff leaf fields = SplitDiff (Syntax leaf) (Record fields)

View File

@ -1,19 +1,172 @@
{-# LANGUAGE DeriveAnyClass #-}
module Syntax where
import Data.OrderedMap
import qualified Data.Text as T
import Data.Aeson
import Data.Functor.Listable
import Data.Mergeable
import GHC.Generics
import Prologue
-- | A node in an abstract syntax tree.
data Syntax
a -- ^ The type of leaves in the syntax tree, typically String, but possibly some datatype representing different leaves more precisely.
f -- ^ The type representing another level of the tree, e.g. the children of branches. Often Cofree or Fix or similar.
=
--
-- 'a' is the type of leaves in the syntax tree, typically 'Text', but possibly some datatype representing different leaves more precisely.
-- 'f' is the type representing another level of the tree, e.g. the children of branches. Often 'Cofree', 'Free' or similar.
data Syntax a f
-- | A terminal syntax node, e.g. an identifier, or atomic literal.
Leaf a
= Leaf a
-- | An ordered branch of child nodes, expected to be variadic in the grammar, e.g. a list of statements or uncurried function parameters.
| Indexed [f]
-- | An ordered branch of child nodes, expected to be of fixed length in the grammar, e.g. a binary operator & its operands.
| Fixed [f]
-- | A branch of child nodes indexed by some String identity. This is useful for identifying e.g. methods & properties in a class scope by their names. Note that comments can generally occur in these scopes as well; one strategy for dealing with this is to identify comments by their text in the source.
| Keyed (OrderedMap T.Text f)
deriving (Functor, Show, Eq, Foldable, Traversable)
-- | A function call has an identifier where f is a (Leaf a) and a list of arguments.
| FunctionCall f [f]
-- | A ternary has a condition, a true case and a false case
| Ternary { ternaryCondition :: f, ternaryCases :: [f] }
-- | An anonymous function has a list of expressions and params.
| AnonymousFunction { params :: [f], expressions :: [f] }
-- | A function has a list of expressions.
| Function { id :: f, params :: [f], ty :: (Maybe f), expressions :: [f] }
-- | An assignment has an identifier where f can be a member access, and the value is another syntax element (function call, leaf, etc.)
| Assignment { assignmentId :: f, value :: f }
-- | An operator assignment represents expressions with operators like math (e.g x += 1) or conditional (e.g. x ||= 1) assignment.
| OperatorAssignment f f
-- | A member access contains a syntax, and another syntax that identifies a property or value in the first syntax.
-- | e.g. in Javascript x.y represents a member access syntax.
| MemberAccess { memberId :: f, property :: f }
-- | A method call consisting of its target, the method name, and the parameters passed to the method.
-- | e.g. in Javascript console.log('hello') represents a method call.
| MethodCall { targetId :: f, methodId :: f, methodParams :: [f] }
-- | An operator can be applied to a list of syntaxes.
| Operator [f]
-- | A variable declaration. e.g. var foo;
| VarDecl f (Maybe f)
-- | A variable assignment in a variable declaration. var foo = bar;
| VarAssignment { varId :: f, varValue :: f }
-- | A subscript access contains a syntax, and another syntax that indefies a property or value in the first syntax.
-- | e.g. in Javascript x["y"] represents a subscript access syntax.
| SubscriptAccess { subscriptId :: f, subscriptElement :: f }
| Switch { switchExpr :: [f], cases :: [f] }
| Case { caseExpr :: f, caseStatements :: [f] }
-- | A default case in a switch statement.
| DefaultCase [f]
| Select { cases :: [f] }
| Object { objectTy :: Maybe f, keyValues :: [f] }
-- | A pair in an Object. e.g. foo: bar or foo => bar
| Pair f f
-- | A comment.
| Comment a
-- | A term preceded or followed by any number of comments.
| Commented [f] (Maybe f)
| ParseError [f]
-- | A for statement has a list of expressions to setup the iteration and then a list of expressions in the body.
| For [f] [f]
| DoWhile { doWhileBody :: f, doWhileExpr :: f }
| While { whileExpr :: f, whileBody :: [f] }
| Return [f]
| Throw f
| Constructor f
-- | TODO: Is it a problem that in Ruby, this pattern can work for method def too?
| Try { tryBegin :: [f], catchRescue :: [f], beginElse :: Maybe f, finallyEnsure :: Maybe f }
-- | An array literal with list of children.
| Array (Maybe f) [f]
-- | A class with an identifier, superclass, and a list of definitions.
| Class f (Maybe f) [f]
-- | A method definition with an identifier, optional receiver, optional return type, params, and a list of expressions.
| Method f (Maybe f) (Maybe f) [f] [f]
-- | An if statement with an expression and maybe more expression clauses.
| If f [f]
-- | A module with an identifier, and a list of syntaxes.
| Module { moduleId:: f, moduleBody :: [f] }
| Import f [f]
| Export (Maybe f) [f]
| Yield [f]
-- | A negation of a single expression.
| Negate f
-- | A rescue block has a list of arguments to rescue and a list of expressions.
| Rescue [f] [f]
| Go f
| Defer f
| TypeAssertion f f
| TypeConversion f f
-- | A struct with an optional type.
| Struct (Maybe f) [f]
| Break (Maybe f)
| Continue (Maybe f)
-- | A block statement has an ordered branch of child nodes, e.g. BEGIN {...} or END {...} in Ruby/Perl.
| BlockStatement [f]
-- | A parameter declaration with an optional type.
| ParameterDecl (Maybe f) f
-- | A type declaration has an identifier and a type.
| TypeDecl f f
-- | A field declaration with an optional type, and an optional tag.
| FieldDecl f (Maybe f) (Maybe f)
-- | A type.
| Ty [f]
-- | A send statement has a channel and an expression in Go.
| Send f f
deriving (Eq, Foldable, Functor, Generic, Generic1, Mergeable, Ord, Show, Traversable, ToJSON)
-- Instances
instance Listable2 Syntax where
liftTiers2 leaf recur
= liftCons1 leaf Leaf
\/ liftCons1 (liftTiers recur) Indexed
\/ liftCons1 (liftTiers recur) Fixed
\/ liftCons2 recur (liftTiers recur) FunctionCall
\/ liftCons2 recur (liftTiers recur) Ternary
\/ liftCons2 (liftTiers recur) (liftTiers recur) AnonymousFunction
\/ liftCons4 recur (liftTiers recur) (liftTiers recur) (liftTiers recur) Function
\/ liftCons2 recur recur Assignment
\/ liftCons2 recur recur OperatorAssignment
\/ liftCons2 recur recur MemberAccess
\/ liftCons3 recur recur (liftTiers recur) MethodCall
\/ liftCons1 (liftTiers recur) Operator
\/ liftCons2 recur (liftTiers recur) VarDecl
\/ liftCons2 recur recur VarAssignment
\/ liftCons2 recur recur SubscriptAccess
\/ liftCons2 (liftTiers recur) (liftTiers recur) Switch
\/ liftCons2 recur (liftTiers recur) Case
\/ liftCons1 (liftTiers recur) Select
\/ liftCons2 (liftTiers recur) (liftTiers recur) Syntax.Object
\/ liftCons2 recur recur Pair
\/ liftCons1 leaf Comment
\/ liftCons2 (liftTiers recur) (liftTiers recur) Commented
\/ liftCons1 (liftTiers recur) Syntax.ParseError
\/ liftCons2 (liftTiers recur) (liftTiers recur) For
\/ liftCons2 recur recur DoWhile
\/ liftCons2 recur (liftTiers recur) While
\/ liftCons1 (liftTiers recur) Return
\/ liftCons1 recur Throw
\/ liftCons1 recur Constructor
\/ liftCons4 (liftTiers recur) (liftTiers recur) (liftTiers recur) (liftTiers recur) Try
\/ liftCons2 (liftTiers recur) (liftTiers recur) Syntax.Array
\/ liftCons3 recur (liftTiers recur) (liftTiers recur) Class
\/ liftCons5 recur (liftTiers recur) (liftTiers recur) (liftTiers recur) (liftTiers recur) Method
\/ liftCons2 recur (liftTiers recur) If
\/ liftCons2 recur (liftTiers recur) Module
\/ liftCons2 recur (liftTiers recur) Import
\/ liftCons2 (liftTiers recur) (liftTiers recur) Export
\/ liftCons1 (liftTiers recur) Yield
\/ liftCons1 recur Negate
\/ liftCons2 (liftTiers recur) (liftTiers recur) Rescue
\/ liftCons1 recur Go
\/ liftCons1 recur Defer
\/ liftCons2 recur recur TypeAssertion
\/ liftCons2 recur recur TypeConversion
\/ liftCons1 (liftTiers recur) Break
\/ liftCons1 (liftTiers recur) Continue
\/ liftCons1 (liftTiers recur) BlockStatement
\/ liftCons2 (liftTiers recur) recur ParameterDecl
\/ liftCons2 recur recur TypeDecl
\/ liftCons3 recur (liftTiers recur) (liftTiers recur) FieldDecl
\/ liftCons1 (liftTiers recur) Ty
\/ liftCons2 recur recur Send
\/ liftCons1 (liftTiers recur) DefaultCase
instance Listable leaf => Listable1 (Syntax leaf) where
liftTiers = liftTiers2 tiers
instance (Listable leaf, Listable recur) => Listable (Syntax leaf recur) where
tiers = tiers1

View File

@ -1,35 +1,47 @@
{-# LANGUAGE RankNTypes, TypeFamilies, TypeSynonymInstances #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
module Term where
import Control.Comonad.Cofree
import Prologue
import Data.Align.Generic
import Data.Functor.Foldable as Foldable
import Data.Functor.Both
import Data.Maybe
import Data.OrderedMap hiding (size)
import Data.Record
import Data.These
import Syntax
-- | An annotated node (Syntax) in an abstract syntax tree.
type Term a annotation = Cofree (Syntax a) annotation
-- | A Term with an abstract syntax tree and an annotation.
type Term f = Cofree f
type TermF = CofreeF
-- | A Term with a Syntax leaf and a record of fields.
type SyntaxTerm leaf fields = Term (Syntax leaf) (Record fields)
type SyntaxTermF leaf fields = TermF (Syntax leaf) (Record fields)
-- Term has a Base functor TermF which gives it Recursive and Corecursive instances.
type instance Base (Term f a) = TermF f a
instance Functor f => Recursive (Term f a) where project = runCofree
instance Functor f => Corecursive (Term f a) where embed = cofree
-- | Zip two terms by combining their annotations into a pair of annotations.
-- | If the structure of the two terms don't match, then Nothing will be returned.
zipTerms :: Term a annotation -> Term a annotation -> Maybe (Term a (Both annotation))
zipTerms (annotation1 :< a) (annotation2 :< b) = annotate $ zipUnwrap a b
where
annotate = fmap (Both (annotation1, annotation2) :<)
zipUnwrap (Leaf _) (Leaf b') = Just $ Leaf b'
zipUnwrap (Indexed a') (Indexed b') = Just . Indexed . catMaybes $ zipWith zipTerms a' b'
zipUnwrap (Fixed a') (Fixed b') = Just . Fixed . catMaybes $ zipWith zipTerms a' b'
zipUnwrap (Keyed a') (Keyed b') | keys a' == keys b' = Just . Keyed . fromList . catMaybes $ zipUnwrapMaps a' b' <$> keys a'
zipUnwrap _ _ = Nothing
zipUnwrapMaps a' b' key = (,) key <$> zipTerms (a' ! key) (b' ! key)
zipTerms :: (Traversable f, GAlign f) => Term f annotation -> Term f annotation -> Maybe (Term f (Both annotation))
zipTerms t1 t2 = iter go (alignCofreeWith galign (const Nothing) both (These t1 t2))
where go (a :< s) = cofree . (a :<) <$> sequenceA s
-- | Fold a term into some other value, starting with the leaves.
cata :: (annotation -> Syntax a b -> b) -> Term a annotation -> b
cata f (annotation :< syntax) = f annotation $ cata f <$> syntax
-- | Return the number of leaves in the node.
termSize :: Term a annotation -> Integer
-- | Return the node count of a term.
termSize :: (Foldable f, Functor f) => Term f annotation -> Int
termSize = cata size where
size _ (Leaf _) = 1
size _ (Indexed i) = sum i
size _ (Fixed f) = sum f
size _ (Keyed k) = sum k
size (_ :< syntax) = 1 + sum syntax
-- | Aligns (zips, retaining non-overlapping portions of the structure) a pair of terms.
alignCofreeWith :: Functor f
=> (forall a b. f a -> f b -> Maybe (f (These a b))) -- ^ A function comparing a pair of structures, returning `Just` the combined structure if they are comparable (e.g. if they have the same constructor), and `Nothing` otherwise. The 'Data.Align.Generic.galign' function is usually what you want here.
-> (These (Term f a) (Term f b) -> contrasted) -- ^ A function mapping a 'These' of incomparable terms into 'Pure' values in the resulting tree.
-> (a -> b -> combined) -- ^ A function mapping the input terms annotations into annotations in the 'Free' values in the resulting tree.
-> These (Term f a) (Term f b) -- ^ The input terms.
-> Free (TermF f combined) contrasted
alignCofreeWith compare contrast combine = go
where go terms = fromMaybe (pure (contrast terms)) $ case terms of
These t1 t2 -> wrap . (combine (extract t1) (extract t2) :<) . fmap go <$> compare (unwrap t1) (unwrap t2)
_ -> Nothing

View File

@ -1,62 +1,133 @@
module TreeSitter where
{-# LANGUAGE DataKinds #-}
module TreeSitter
( treeSitterParser
, defaultTermAssignment
) where
import Prologue hiding (Constructor)
import Category
import Data.Record
import Language
import qualified Language.C as C
import qualified Language.Go as Go
import qualified Language.JavaScript as JS
import qualified Language.Ruby as Ruby
import Parser
import Range
import Source
import qualified Data.Set as Set
import qualified Syntax
import Foreign
import Foreign.C.String
import qualified Syntax as S
import Term
import Text.Parser.TreeSitter hiding (Language(..))
import qualified Text.Parser.TreeSitter as TS
import SourceSpan
import Info
-- | Returns a TreeSitter parser for the given language and TreeSitter grammar.
treeSitterParser :: Language -> Ptr TS.Language -> Parser
treeSitterParser language grammar contents = do
document <- ts_document_make
treeSitterParser :: Language -> Ptr TS.Language -> Parser (Syntax.Syntax Text) (Record '[Range, Category, SourceSpan])
treeSitterParser language grammar blob = do
document <- ts_document_new
ts_document_set_language document grammar
withCString (toString contents) (\source -> do
withCString (toString $ source blob) (\source -> do
ts_document_set_input_string document source
ts_document_parse document
term <- documentToTerm (termConstructor $ categoriesForLanguage language) document contents
term <- documentToTerm language document blob
ts_document_free document
return term)
pure term)
-- Given a language and a node name, return the correct categories.
categoriesForLanguage :: Language -> String -> Set.Set Category
categoriesForLanguage language name = case (language, name) of
(JavaScript, "object") -> Set.singleton DictionaryLiteral
(JavaScript, "rel_op") -> Set.singleton BinaryOperator -- relational operator, e.g. >, <, <=, >=, ==, !=
(Ruby, "hash") -> Set.singleton DictionaryLiteral
_ -> defaultCategoryForNodeName name
-- | Given a node name from TreeSitter, return the correct categories.
defaultCategoryForNodeName :: String -> Set.Set Category
defaultCategoryForNodeName name = case name of
"function_call" -> Set.singleton FunctionCall
"pair" -> Set.singleton Pair
"string" -> Set.singleton StringLiteral
"integer" -> Set.singleton IntegerLiteral
"symbol" -> Set.singleton SymbolLiteral
"array" -> Set.singleton ArrayLiteral
_ -> Set.singleton (Other name)
-- | Given a constructor and a tree sitter document, return a parser.
documentToTerm :: Constructor -> Ptr Document -> Parser
documentToTerm constructor document contents = alloca $ \ root -> do
-- | Return a parser for a tree sitter language & document.
documentToTerm :: Language -> Ptr Document -> Parser (Syntax.Syntax Text) (Record '[Range, Category, SourceSpan])
documentToTerm language document SourceBlob{..} = alloca $ \ root -> do
ts_document_root_node_p document root
toTerm root
where toTerm node = do
name <- ts_node_p_name node document
name <- peekCString name
count <- ts_node_p_named_child_count node
children <- mapM (alloca . getChild node) $ take (fromIntegral count) [0..]
-- Note: The strict application here is semantically important. Without it, we may not evaluate the range until after weve exited the scope that `node` was allocated within, meaning `alloca` will free it & other stack data may overwrite it.
range <- return $! Range { start = fromIntegral $ ts_node_p_start_char node, end = fromIntegral $ ts_node_p_end_char node }
children <- filter isNonEmpty <$> traverse (alloca . getChild node) (take (fromIntegral count) [0..])
return $! constructor contents range name children
getChild node n out = do
_ <- ts_node_p_named_child node n out
toTerm out
let range = Range { start = fromIntegral $ ts_node_p_start_char node, end = fromIntegral $ ts_node_p_end_char node }
let startPos = SourcePos (1 + (fromIntegral $! ts_node_p_start_point_row node)) (1 + (fromIntegral $! ts_node_p_start_point_column node))
let endPos = SourcePos (1 + (fromIntegral $! ts_node_p_end_point_row node)) (1 + (fromIntegral $! ts_node_p_end_point_column node))
let sourceSpan = SourceSpan { spanStart = startPos , spanEnd = endPos }
allChildrenCount <- ts_node_p_child_count node
let allChildren = filter isNonEmpty <$> traverse (alloca . getUnnamedChild node) (take (fromIntegral allChildrenCount) [0..])
-- Note: The strict application here is semantically important.
-- Without it, we may not evaluate the value until after weve exited
-- the scope that `node` was allocated within, meaning `alloca` will
-- free it & other stack data may overwrite it.
range `seq` sourceSpan `seq` assignTerm language (slice range source) (range :. categoryForLanguageProductionName language (toS name) :. sourceSpan :. Nil) children allChildren
getChild node n out = ts_node_p_named_child node n out >> toTerm out
{-# INLINE getChild #-}
getUnnamedChild node n out = ts_node_p_child node n out >> toTerm out
{-# INLINE getUnnamedChild #-}
isNonEmpty child = category (extract child) /= Empty
assignTerm :: Language -> Source Char -> Record '[Range, Category, SourceSpan] -> [ SyntaxTerm Text '[ Range, Category, SourceSpan ] ] -> IO [ SyntaxTerm Text '[ Range, Category, SourceSpan ] ] -> IO (SyntaxTerm Text '[ Range, Category, SourceSpan ])
assignTerm language source annotation children allChildren =
cofree . (annotation :<) <$> case assignTermByLanguage language source (category annotation) children of
Just a -> pure a
_ -> defaultTermAssignment source (category annotation) children allChildren
where assignTermByLanguage :: Language -> Source Char -> Category -> [ SyntaxTerm Text '[ Range, Category, SourceSpan ] ] -> Maybe (S.Syntax Text (SyntaxTerm Text '[ Range, Category, SourceSpan ]))
assignTermByLanguage = \case
JavaScript -> JS.termAssignment
C -> C.termAssignment
Language.Go -> Go.termAssignment
Ruby -> Ruby.termAssignment
_ -> \ _ _ _ -> Nothing
defaultTermAssignment :: Source Char -> Category -> [ SyntaxTerm Text '[Range, Category, SourceSpan] ] -> IO [ SyntaxTerm Text '[Range, Category, SourceSpan] ] -> IO (S.Syntax Text (SyntaxTerm Text '[Range, Category, SourceSpan]))
defaultTermAssignment source category children allChildren
| category `elem` operatorCategories = S.Operator <$> allChildren
| otherwise = pure $! case (category, children) of
(ParseError, children) -> S.ParseError children
(Comment, _) -> S.Comment (toText source)
(Pair, [key, value]) -> S.Pair key value
-- Control flow statements
(If, condition : body) -> S.If condition body
(Switch, _) -> uncurry S.Switch (Prologue.break ((== Case) . Info.category . extract) children)
(Case, expr : body) -> S.Case expr body
(While, expr : rest) -> S.While expr rest
-- Statements
(Return, _) -> S.Return children
(Yield, _) -> S.Yield children
(Throw, [expr]) -> S.Throw expr
(Break, [label]) -> S.Break (Just label)
(Break, []) -> S.Break Nothing
(Continue, [label]) -> S.Continue (Just label)
(Continue, []) -> S.Continue Nothing
(_, []) -> S.Leaf (toText source)
(_, children) -> S.Indexed children
where operatorCategories =
[ Operator
, Binary
, Unary
, RangeExpression
, ScopeOperator
, BooleanOperator
, MathOperator
, RelationalOperator
, BitwiseOperator
]
categoryForLanguageProductionName :: Language -> Text -> Category
categoryForLanguageProductionName = withDefaults . \case
JavaScript -> JS.categoryForJavaScriptProductionName
C -> C.categoryForCProductionName
Ruby -> Ruby.categoryForRubyName
Language.Go -> Go.categoryForGoName
_ -> Other
where withDefaults productionMap = \case
"ERROR" -> ParseError
s -> productionMap s

View File

@ -1,67 +1,309 @@
{-# LANGUAGE DataKinds #-}
module AlignmentSpec where
import Test.Hspec
import Test.Hspec.QuickCheck
import Test.QuickCheck hiding (Fixed)
import Data.Text.Arbitrary ()
import Alignment
import ArbitraryTerm (arbitraryLeaf)
import Control.Arrow
import Control.Comonad.Cofree
import Control.Monad.Free hiding (unfold)
import Data.Adjoined
import Data.Copointed
import Control.Monad.State
import Data.Align hiding (align)
import Data.Bifunctor
import Data.Bifunctor.Join
import Data.Functor.Both as Both
import Diff
import qualified Data.Maybe as Maybe
import Data.Functor.Identity
import Line
import Data.Functor.Listable
import Data.List (nub)
import Data.Monoid hiding ((<>))
import Data.Record
import Data.String
import Data.These
import Patch
import Prelude hiding (fst, snd)
import qualified Prelude
import Prologue hiding (fst, snd)
import qualified Prologue
import Range
import Source hiding ((++), fromList)
import qualified Source
import SplitDiff
import Syntax
import Term
import Test.Hspec (Spec, describe, it, parallel)
import Test.Hspec.Expectations.Pretty
import Test.Hspec.LeanCheck
import Test.LeanCheck
import GHC.Show (Show(..))
spec :: Spec
spec = parallel $ do
describe "splitDiffByLines" $ do
prop "preserves line counts in equal sources" $
\ source ->
length (splitDiffByLines (pure source) (Free $ Annotated (pure $ Info (totalRange source) mempty) (Indexed . Prelude.fst $ foldl combineIntoLeaves ([], 0) source))) `shouldBe` length (filter (== '\n') $ toString source) + 1
describe "alignBranch" $ do
it "produces symmetrical context" $
alignBranch getRange ([] :: [Join These (SplitDiff (Syntax String) (Record '[Range]))]) (both [Range 0 2, Range 2 4] [Range 0 2, Range 2 4]) `shouldBe`
[ Join (These (Range 0 2, [])
(Range 0 2, []))
, Join (These (Range 2 4, [])
(Range 2 4, []))
]
prop "produces the maximum line count in inequal sources" $
\ sources ->
length (splitDiffByLines sources (Free $ Annotated ((`Info` mempty) . totalRange <$> sources) (Indexed $ leafWithRangesInSources sources <$> runBothWith (zipWith both) (actualLineRanges <$> (totalRange <$> sources) <*> sources)))) `shouldBe` runBothWith max ((+ 1) . length . filter (== '\n') . toString <$> sources)
it "produces asymmetrical context" $
alignBranch getRange ([] :: [Join These (SplitDiff (Syntax String) (Record '[Range]))]) (both [Range 0 2, Range 2 4] [Range 0 1]) `shouldBe`
[ Join (These (Range 0 2, [])
(Range 0 1, []))
, Join (This (Range 2 4, []))
]
describe "splitAbstractedTerm" $ do
prop "preserves line count" $
\ source -> let range = totalRange source in
splitAbstractedTerm (:<) (Identity source) (Identity (Info range mempty)) (Leaf source) `shouldBe` (Identity . lineMap (fmap (((:< Leaf source) . (`Info` mempty) &&& id))) <$> linesInRangeOfSource range source)
prop "covers every input line" $
\ elements -> let (_, children, ranges) = toAlignBranchInputs elements in
join <$> traverse (modifyJoin (fromThese [] []) . fmap (pure . Prologue.fst)) (alignBranch Prologue.snd children ranges) `shouldBe` ranges
let makeTerm = ((Free .) . Annotated) :: Info -> Syntax (Source Char) (SplitDiff (Source Char) Info) -> SplitDiff (Source Char) Info
prop "outputs one row for single-line unchanged leaves" $
forAll (arbitraryLeaf `suchThat` isOnSingleLine) $
\ (source, info@(Info range categories), syntax) -> splitAbstractedTerm makeTerm (pure source) (pure $ Info range categories) syntax `shouldBe` fromList [
both (pure (makeTerm info $ Leaf source, Range 0 (length source))) (pure (makeTerm info $ Leaf source, Range 0 (length source))) ]
prop "covers every input child" $
\ elements -> let (_, children, ranges) = toAlignBranchInputs elements in
sort (nub (keysOfAlignedChildren (alignBranch Prologue.snd children ranges))) `shouldBe` sort (nub (catMaybes (branchElementKey <$> elements)))
prop "outputs one row for single-line empty unchanged indexed nodes" $
forAll (arbitrary `suchThat` (\ a -> filter (/= '\n') (toString a) == toString a)) $
\ source -> splitAbstractedTerm makeTerm (pure source) (pure $ Info (totalRange source) mempty) (Indexed []) `shouldBe` fromList [
both (pure (makeTerm (Info (totalRange source) mempty) $ Indexed [], Range 0 (length source))) (pure (makeTerm (Info (totalRange source) mempty) $ Indexed [], Range 0 (length source))) ]
prop "covers every line of every input child" $
\ elements -> let (_, children, ranges) = toAlignBranchInputs elements in
sort (keysOfAlignedChildren (alignBranch Prologue.snd children ranges)) `shouldBe` sort (do
line <- children
these (pure . Prologue.fst) (pure . Prologue.fst) (\ (k1, _) (k2, _) -> [ k1, k2 ]) . runJoin $ line)
where
isOnSingleLine (a, _, _) = filter (/= '\n') (toString a) == toString a
describe "alignDiff" $ do
it "aligns identical branches on a single line" $
let sources = both (Source.fromList "[ foo ]") (Source.fromList "[ foo ]") in
align sources (pure (info 0 7) `branch` [ pure (info 2 5) `leaf` "foo" ]) `shouldBe` prettyDiff sources
[ Join (These (info 0 7 `branch` [ info 2 5 `leaf` "foo" ])
(info 0 7 `branch` [ info 2 5 `leaf` "foo" ])) ]
combineIntoLeaves (leaves, start) char = (leaves ++ [ Free $ Annotated (Info <$> pure (Range start $ start + 1) <*> mempty) (Leaf [ char ]) ], start + 1)
it "aligns identical branches spanning multiple lines" $
let sources = both (Source.fromList "[\nfoo\n]") (Source.fromList "[\nfoo\n]") in
align sources (pure (info 0 7) `branch` [ pure (info 2 5) `leaf` "foo" ]) `shouldBe` prettyDiff sources
[ Join (These (info 0 2 `branch` [])
(info 0 2 `branch` []))
, Join (These (info 2 6 `branch` [ info 2 5 `leaf` "foo" ])
(info 2 6 `branch` [ info 2 5 `leaf` "foo" ]))
, Join (These (info 6 7 `branch` [])
(info 6 7 `branch` []))
]
leafWithRangesInSources sources ranges = Free $ Annotated (Info <$> ranges <*> pure mempty) (Leaf $ runBothWith (++) (toString <$> sources))
it "aligns reformatted branches" $
let sources = both (Source.fromList "[ foo ]") (Source.fromList "[\nfoo\n]") in
align sources (pure (info 0 7) `branch` [ pure (info 2 5) `leaf` "foo" ]) `shouldBe` prettyDiff sources
[ Join (That (info 0 2 `branch` []))
, Join (These (info 0 7 `branch` [ info 2 5 `leaf` "foo" ])
(info 2 6 `branch` [ info 2 5 `leaf` "foo" ]))
, Join (That (info 6 7 `branch` []))
]
leafWithRangeInSource source range = Info range mempty :< Leaf source
it "aligns nodes following reformatted branches" $
let sources = both (Source.fromList "[ foo ]\nbar\n") (Source.fromList "[\nfoo\n]\nbar\n") in
align sources (pure (info 0 12) `branch` [ pure (info 0 7) `branch` [ pure (info 2 5) `leaf` "foo" ], pure (info 8 11) `leaf` "bar" ]) `shouldBe` prettyDiff sources
[ Join (That (info 0 2 `branch` [ info 0 2 `branch` [] ]))
, Join (These (info 0 8 `branch` [ info 0 7 `branch` [ info 2 5 `leaf` "foo" ] ])
(info 2 6 `branch` [ info 2 6 `branch` [ info 2 5 `leaf` "foo" ] ]))
, Join (That (info 6 8 `branch` [ info 6 7 `branch` [] ]))
, Join (These (info 8 12 `branch` [ info 8 11 `leaf` "bar" ])
(info 8 12 `branch` [ info 8 11 `leaf` "bar" ]))
, Join (These (info 12 12 `branch` [])
(info 12 12 `branch` []))
]
patchWithBoth (Insert ()) = Insert . snd
patchWithBoth (Delete ()) = Delete . fst
patchWithBoth (Replace () ()) = runBothWith Replace
it "aligns identical branches with multiple children on the same line" $
let sources = pure (Source.fromList "[ foo, bar ]") in
align sources (pure (info 0 12) `branch` [ pure (info 2 5) `leaf` "foo", pure (info 7 10) `leaf` "bar" ]) `shouldBe` prettyDiff sources
[ Join (runBothWith These (pure (info 0 12 `branch` [ info 2 5 `leaf` "foo", info 7 10 `leaf` "bar" ])) ) ]
it "aligns insertions" $
let sources = both (Source.fromList "a") (Source.fromList "a\nb") in
align sources (both (info 0 1) (info 0 3) `branch` [ pure (info 0 1) `leaf` "a", insert (info 2 3 `leaf` "b") ]) `shouldBe` prettyDiff sources
[ Join (These (info 0 1 `branch` [ info 0 1 `leaf` "a" ])
(info 0 2 `branch` [ info 0 1 `leaf` "a" ]))
, Join (That (info 2 3 `branch` [ insert (info 2 3 `leaf` "b") ]))
]
it "aligns total insertions" $
let sources = both (Source.fromList "") (Source.fromList "a") in
align sources (insert (info 0 1 `leaf` "a")) `shouldBe` prettyDiff sources
[ Join (That (insert (info 0 1 `leaf` "a"))) ]
it "aligns insertions into empty branches" $
let sources = both (Source.fromList "[ ]") (Source.fromList "[a]") in
align sources (pure (info 0 3) `branch` [ insert (info 1 2 `leaf` "a") ]) `shouldBe` prettyDiff sources
[ Join (That (info 0 3 `branch` [ insert (info 1 2 `leaf` "a") ]))
, Join (This (info 0 3 `branch` []))
]
it "aligns symmetrically following insertions" $
let sources = both (Source.fromList "a\nc") (Source.fromList "a\nb\nc") in
align sources (both (info 0 3) (info 0 5) `branch` [ pure (info 0 1) `leaf` "a", insert (info 2 3 `leaf` "b"), both (info 2 3) (info 4 5) `leaf` "c" ])
`shouldBe` prettyDiff sources
[ Join (These (info 0 2 `branch` [ info 0 1 `leaf` "a" ])
(info 0 2 `branch` [ info 0 1 `leaf` "a" ]))
, Join (That (info 2 4 `branch` [ insert (info 2 3 `leaf` "b") ]))
, Join (These (info 2 3 `branch` [ info 2 3 `leaf` "c" ])
(info 4 5 `branch` [ info 4 5 `leaf` "c" ]))
]
it "symmetrical nodes force the alignment of asymmetrical nodes on both sides" $
let sources = both (Source.fromList "[ a, b ]") (Source.fromList "[ b, c ]") in
align sources (pure (info 0 8) `branch` [ delete (info 2 3 `leaf` "a"), both (info 5 6) (info 2 3) `leaf` "b", insert (info 5 6 `leaf` "c") ]) `shouldBe` prettyDiff sources
[ Join (These (info 0 8 `branch` [ delete (info 2 3 `leaf` "a"), info 5 6 `leaf` "b" ])
(info 0 8 `branch` [ info 2 3 `leaf` "b", insert (info 5 6 `leaf` "c") ])) ]
it "when one of two symmetrical nodes must be split, splits the latter" $
let sources = both (Source.fromList "[ a, b ]") (Source.fromList "[ a\n, b\n]") in
align sources (both (info 0 8) (info 0 9) `branch` [ pure (info 2 3) `leaf` "a", both (info 5 6) (info 6 7) `leaf` "b" ]) `shouldBe` prettyDiff sources
[ Join (These (info 0 8 `branch` [ info 2 3 `leaf` "a", info 5 6 `leaf` "b" ])
(info 0 4 `branch` [ info 2 3 `leaf` "a" ]))
, Join (That (info 4 8 `branch` [ info 6 7 `leaf` "b" ]))
, Join (That (info 8 9 `branch` []))
]
it "aligns deletions before insertions" $
let sources = both (Source.fromList "[ a ]") (Source.fromList "[ b ]") in
align sources (pure (info 0 5) `branch` [ delete (info 2 3 `leaf` "a"), insert (info 2 3 `leaf` "b") ]) `shouldBe` prettyDiff sources
[ Join (This (info 0 5 `branch` [ delete (info 2 3 `leaf` "a") ]))
, Join (That (info 0 5 `branch` [ insert (info 2 3 `leaf` "b") ]))
]
it "aligns context-only lines symmetrically" $
let sources = both (Source.fromList "[\n a\n,\n b\n]") (Source.fromList "[\n a, b\n\n\n]") in
align sources (both (info 0 13) (info 0 12) `branch` [ pure (info 4 5) `leaf` "a", both (info 10 11) (info 7 8) `leaf` "b" ]) `shouldBe` prettyDiff sources
[ Join (These (info 0 2 `branch` [])
(info 0 2 `branch` []))
, Join (These (info 2 6 `branch` [ info 4 5 `leaf` "a" ])
(info 2 9 `branch` [ info 4 5 `leaf` "a", info 7 8 `leaf` "b" ]))
, Join (These (info 6 8 `branch` [])
(info 9 10 `branch` []))
, Join (This (info 8 12 `branch` [ info 10 11 `leaf` "b" ]))
, Join (These (info 12 13 `branch` [])
(info 10 11 `branch` []))
, Join (That (info 11 12 `branch` []))
]
it "aligns asymmetrical nodes preceding their symmetrical siblings conservatively" $
let sources = both (Source.fromList "[ b, c ]") (Source.fromList "[ a\n, c\n]") in
align sources (both (info 0 8) (info 0 9) `branch` [ insert (info 2 3 `leaf` "a"), delete (info 2 3 `leaf` "b"), both (info 5 6) (info 6 7) `leaf` "c" ]) `shouldBe` prettyDiff sources
[ Join (That (info 0 4 `branch` [ insert (info 2 3 `leaf` "a") ]))
, Join (These (info 0 8 `branch` [ delete (info 2 3 `leaf` "b"), info 5 6 `leaf` "c" ])
(info 4 8 `branch` [ info 6 7 `leaf` "c" ]))
, Join (That (info 8 9 `branch` []))
]
it "aligns symmetrical reformatted nodes" $
let sources = both (Source.fromList "a [ b ]\nc") (Source.fromList "a [\nb\n]\nc") in
align sources (pure (info 0 9) `branch` [ pure (info 0 1) `leaf` "a", pure (info 2 7) `branch` [ pure (info 4 5) `leaf` "b" ], pure (info 8 9) `leaf` "c" ]) `shouldBe` prettyDiff sources
[ Join (These (info 0 8 `branch` [ info 0 1 `leaf` "a", info 2 7 `branch` [ info 4 5 `leaf` "b" ] ])
(info 0 4 `branch` [ info 0 1 `leaf` "a", info 2 4 `branch` [] ]))
, Join (That (info 4 6 `branch` [ info 4 6 `branch` [ info 4 5 `leaf` "b" ] ]))
, Join (That (info 6 8 `branch` [ info 6 7 `branch` [] ]))
, Join (These (info 8 9 `branch` [ info 8 9 `leaf` "c" ])
(info 8 9 `branch` [ info 8 9 `leaf` "c" ]))
]
describe "numberedRows" $ do
prop "counts only non-empty values" $
\ xs -> counts (numberedRows (unListableF <$> xs :: [Join These Char])) `shouldBe` length . catMaybes <$> Join (unalign (runJoin . unListableF <$> xs))
data BranchElement
= Child String (Join These String)
| Margin (Join These String)
deriving Show
branchElementKey :: BranchElement -> Maybe String
branchElementKey (Child key _) = Just key
branchElementKey _ = Nothing
toAlignBranchInputs :: [BranchElement] -> (Both (Source.Source Char), [Join These (String, Range)], Both [Range])
toAlignBranchInputs elements = (sources, join . (`evalState` both 0 0) . traverse go $ elements, ranges)
where go :: BranchElement -> State (Both Int) [Join These (String, Range)]
go child@(Child key _) = do
lines <- traverse (\ (Child _ contents) -> do
prev <- get
let next = (+) <$> prev <*> modifyJoin (fromThese 0 0) (length <$> contents)
put next
pure $! modifyJoin (runBothWith bimap (const <$> (Range <$> prev <*> next))) contents) (alignBranchElement child)
pure $! fmap ((,) key) <$> lines
go (Margin contents) = do
prev <- get
put $ (+) <$> prev <*> modifyJoin (fromThese 0 0) (length <$> contents)
pure []
alignBranchElement element = case element of
Child key contents -> Child key <$> joinCrosswalk lines contents
Margin contents -> Margin <$> joinCrosswalk lines contents
where lines = fmap toList . Source.actualLines . Source.fromList
sources = foldMap Source.fromList <$> bothContents elements
ranges = fmap (filter (\ (Range start end) -> start /= end)) $ Source.actualLineRanges <$> (totalRange <$> sources) <*> sources
bothContents = foldMap (modifyJoin (fromThese [] []) . fmap (:[]) . branchElementContents)
branchElementContents (Child _ contents) = contents
branchElementContents (Margin contents) = contents
keysOfAlignedChildren :: [Join These (Range, [(String, Range)])] -> [String]
keysOfAlignedChildren lines = lines >>= these identity identity (<>) . runJoin . fmap (fmap Prologue.fst . Prologue.snd)
joinCrosswalk :: Bicrosswalk p => Align f => (a -> f b) -> Join p a -> f (Join p b)
joinCrosswalk f = fmap Join . bicrosswalk f f . runJoin
instance Listable BranchElement where
tiers = oneof [ (\ key -> Child key `mapT` joinTheseOf (contents key)) `concatMapT` key
, Margin `mapT` joinTheseOf (pure `mapT` padding '-') ]
where key = pure `mapT` [['a'..'z'] <> ['A'..'Z'] <> ['0'..'9']]
contents key = (wrap key . pure) `mapT` padding '*'
wrap key contents = "(" <> key <> contents <> ")" :: String
padding :: Char -> [Tier Char]
padding char = frequency [ (10, [[char]])
, (1, [['\n']]) ]
joinTheseOf g = oneof [ (Join . This) `mapT` g
, (Join . That) `mapT` g
, productWith ((Join .) . These) g g ]
frequency :: [(Int, [Tier a])] -> [Tier a]
frequency = concatT . foldr ((\/) . pure . uncurry replicate) []
oneof :: [[[a]]] -> [[a]]
oneof = frequency . fmap ((,) 1)
counts :: [Join These (Int, a)] -> Both Int
counts numbered = fromMaybe 0 . getLast . mconcat . fmap Last <$> Join (unalign (runJoin . fmap Prologue.fst <$> numbered))
align :: Both (Source.Source Char) -> ConstructibleFree (Patch (Term (Syntax String) (Record '[Range]))) (Both (Record '[Range])) -> PrettyDiff (SplitDiff (Syntax String) (Record '[Range]))
align sources = PrettyDiff sources . fmap (fmap (getRange &&& identity)) . alignDiff sources . deconstruct
info :: Int -> Int -> Record '[Range]
info start end = Range start end :. Nil
prettyDiff :: Both (Source.Source Char) -> [Join These (ConstructibleFree (SplitPatch (Term (Syntax String) (Record '[Range]))) (Record '[Range]))] -> PrettyDiff (SplitDiff (Syntax String) (Record '[Range]))
prettyDiff sources = PrettyDiff sources . fmap (fmap ((getRange &&& identity) . deconstruct))
data PrettyDiff a = PrettyDiff { unPrettySources :: Both (Source.Source Char), unPrettyLines :: [Join These (Range, a)] }
deriving Eq
instance Show (PrettyDiff a) where
showsPrec _ (PrettyDiff sources lines) = (prettyPrinted ++) -- . (("\n" ++ show lines) ++)
where prettyPrinted = showLine (maximum (0 : (maximum . fmap length <$> shownLines))) <$> shownLines >>= ('\n':)
shownLines = catMaybes $ toBoth <$> lines
showLine n line = uncurry ((<>) . (++ " | ")) (fromThese (replicate n ' ') (replicate n ' ') (runJoin (pad n <$> line)))
showDiff (range, _) = filter (/= '\n') . toList . Source.slice range
pad n string = (<>) (take n string) (replicate (max 0 (n - length string)) ' ')
toBoth them = showDiff <$> them `applyThese` modifyJoin (uncurry These) sources
newtype ConstructibleFree patch annotation = ConstructibleFree { deconstruct :: Free (CofreeF (Syntax String) annotation) patch }
class PatchConstructible p where
insert :: Term (Syntax String) (Record '[Range]) -> p
delete :: Term (Syntax String) (Record '[Range]) -> p
instance PatchConstructible (Patch (Term (Syntax String) (Record '[Range]))) where
insert = Insert
delete = Delete
instance PatchConstructible (SplitPatch (Term (Syntax String) (Record '[Range]))) where
insert = SplitInsert
delete = SplitDelete
instance PatchConstructible patch => PatchConstructible (ConstructibleFree patch annotation) where
insert = ConstructibleFree . pure . insert
delete = ConstructibleFree . pure . delete
class SyntaxConstructible s where
leaf :: annotation -> String -> s annotation
branch :: annotation -> [s annotation] -> s annotation
instance SyntaxConstructible (ConstructibleFree patch) where
leaf info = ConstructibleFree . free . Free . (info :<) . Leaf
branch info = ConstructibleFree . free . Free . (info :<) . Indexed . fmap deconstruct
instance SyntaxConstructible (Cofree (Syntax String)) where
info `leaf` value = cofree $ info :< Leaf value
info `branch` children = cofree $ info :< Indexed children

View File

@ -1,76 +0,0 @@
module ArbitraryTerm where
import Category
import Control.Comonad.Cofree
import Control.Monad
import Data.Functor.Both
import qualified Data.OrderedMap as Map
import qualified Data.List as List
import qualified Data.Set as Set
import Data.Text.Arbitrary ()
import Diff
import Line
import Patch
import Prelude hiding (fst, snd)
import Range
import Source hiding ((++))
import Syntax
import GHC.Generics
import Term
import Test.QuickCheck hiding (Fixed)
newtype ArbitraryTerm a annotation = ArbitraryTerm (annotation, Syntax a (ArbitraryTerm a annotation))
deriving (Show, Eq, Generic)
unTerm :: ArbitraryTerm a annotation -> Term a annotation
unTerm = unfold unpack
where unpack (ArbitraryTerm (annotation, syntax)) = (annotation, syntax)
instance (Eq a, Eq annotation, Arbitrary a, Arbitrary annotation) => Arbitrary (ArbitraryTerm a annotation) where
arbitrary = scale (`div` 2) $ sized (\ x -> boundedTerm x x) -- first indicates the cube of the max length of lists, second indicates the cube of the max depth of the tree
where boundedTerm maxLength maxDepth = ArbitraryTerm <$> ((,) <$> arbitrary <*> boundedSyntax maxLength maxDepth)
boundedSyntax _ maxDepth | maxDepth <= 0 = liftM Leaf arbitrary
boundedSyntax maxLength maxDepth = frequency
[ (12, liftM Leaf arbitrary),
(1, liftM Indexed $ take maxLength <$> listOf (smallerTerm maxLength maxDepth)),
(1, liftM Fixed $ take maxLength <$> listOf (smallerTerm maxLength maxDepth)),
(1, liftM (Keyed . Map.fromList) $ take maxLength <$> listOf (arbitrary >>= (\x -> (,) x <$> smallerTerm maxLength maxDepth))) ]
smallerTerm maxLength maxDepth = boundedTerm (div maxLength 3) (div maxDepth 3)
shrink term@(ArbitraryTerm (annotation, syntax)) = (++) (subterms term) $ filter (/= term) $
ArbitraryTerm <$> ((,) <$> shrink annotation <*> case syntax of
Leaf a -> Leaf <$> shrink a
Indexed i -> Indexed <$> (List.subsequences i >>= recursivelyShrink)
Fixed f -> Fixed <$> (List.subsequences f >>= recursivelyShrink)
Keyed k -> Keyed . Map.fromList <$> (List.subsequences (Map.toList k) >>= recursivelyShrink))
data CategorySet = A | B | C | D deriving (Eq, Show)
instance Categorizable CategorySet where
categories A = Set.fromList [ Other "a" ]
categories B = Set.fromList [ Other "b" ]
categories C = Set.fromList [ Other "c" ]
categories D = Set.fromList [ Other "d" ]
instance Arbitrary CategorySet where
arbitrary = elements [ A, B, C, D ]
instance Arbitrary a => Arbitrary (Both a) where
arbitrary = pure (curry Both) <*> arbitrary <*> arbitrary
shrink b = both <$> (shrink (fst b)) <*> (shrink (snd b))
instance Arbitrary a => Arbitrary (Line a) where
arbitrary = oneof [ Line <$> arbitrary, Closed <$> arbitrary ]
shrink line = (`lineMap` line) . const <$> shrinkList shrink (unLine line)
instance Arbitrary a => Arbitrary (Patch a) where
arbitrary = oneof [
Insert <$> arbitrary,
Delete <$> arbitrary,
Replace <$> arbitrary <*> arbitrary ]
instance Arbitrary a => Arbitrary (Source a) where
arbitrary = Source.fromList <$> arbitrary
arbitraryLeaf :: Gen (Source Char, Info, Syntax (Source Char) f)
arbitraryLeaf = toTuple <$> arbitrary
where toTuple string = (string, Info (Range 0 $ length string) mempty, Leaf string)

View File

@ -1,30 +1,32 @@
{-# LANGUAGE DataKinds, FlexibleContexts, GeneralizedNewtypeDeriving #-}
module CorpusSpec where
import Category
import Control.DeepSeq
import Data.Functor.Both
import Data.List (union)
import Data.Record
import qualified Data.Text as T
import Diffing
import GHC.Show (Show(..))
import Info
import Prologue hiding (fst, snd, lookup)
import Parse
import Renderer
import qualified Renderer.JSON as J
import qualified Renderer.Patch as P
import qualified Renderer.Split as Split
import Control.DeepSeq
import Data.Functor.Both
import qualified Data.ByteString.Lazy.Char8 as B
import Data.List as List
import Data.Map as Map
import Data.Maybe
import Data.Set as Set
import qualified Data.Text as T
import qualified Data.Text.Lazy as TL
import Prelude hiding (fst, snd)
import qualified Prelude
import qualified Source as S
import System.FilePath
import System.FilePath.Glob
import Test.Hspec
import Test.Hspec (Spec, describe, it, SpecWith, runIO, parallel)
import Test.Hspec.Expectations.Pretty
import Unsafe (unsafeFromJust)
spec :: Spec
spec = parallel $ do
describe "crashers crash" $ runTestsIn "test/crashers-todo/" $ \ a b -> a `deepseq` return (a == b) `shouldThrow` anyException
describe "crashers crash" . runTestsIn "test/crashers-todo/" $ \ a b ->
a `deepseq` pure (a == b) `shouldThrow` anyException
describe "crashers should not crash" $ runTestsIn "test/crashers/" shouldBe
describe "todos are incorrect" $ runTestsIn "test/diffs-todo/" shouldNotBe
describe "should produce the correct diff" $ runTestsIn "test/diffs/" shouldBe
@ -34,39 +36,37 @@ spec = parallel $ do
examples "test/diffs/" `shouldNotReturn` []
where
runTestsIn :: String -> (String -> String -> Expectation) -> SpecWith ()
runTestsIn :: FilePath -> (Maybe Verbatim -> Maybe Verbatim -> Expectation) -> SpecWith ()
runTestsIn directory matcher = do
paths <- runIO $ examples directory
let tests = correctTests =<< paths
mapM_ (\ (formatName, renderer, paths, output) -> it (normalizeName (fst paths) ++ " (" ++ formatName ++ ")") $ testDiff renderer paths output matcher) tests
correctTests :: (Both FilePath, Maybe FilePath, Maybe FilePath, Maybe FilePath) -> [(String, Renderer a String, Both FilePath, Maybe FilePath)]
correctTests paths@(_, Nothing, Nothing, Nothing) = testsForPaths paths
correctTests paths = List.filter (\(_, _, _, output) -> isJust output) $ testsForPaths paths
testsForPaths :: (Both FilePath, Maybe FilePath, Maybe FilePath, Maybe FilePath) -> [(String, Renderer a String, Both FilePath, Maybe FilePath)]
testsForPaths (paths, json, patch, split) = [ ("json", testJSON, paths, json), ("patch", P.patch, paths, patch), ("split", testSplit, paths, split) ]
testSplit :: Renderer a String
testSplit diff sources = TL.unpack $ Split.split diff sources
testJSON :: Renderer a String
testJSON diff sources = B.unpack $ J.json diff sources
traverse_ (\ (formatName, renderer, paths, output) ->
it (maybe "/dev/null" normalizeName (uncurry (<|>) (runJoin paths)) ++ " (" ++ formatName ++ ")") $ testDiff renderer paths output matcher) tests
correctTests paths@(_, _, Nothing, Nothing, Nothing) = testsForPaths paths
correctTests paths = filter (\(_, _, _, output) -> isJust output) $ testsForPaths paths
testsForPaths (aPath, bPath, json, patch, split) = [ ("json", J.json, paths, json), ("patch", P.patch, paths, patch), ("split", Split.split, paths, split) ]
where paths = both aPath bPath
-- | Return all the examples from the given directory. Examples are expected to
-- | have the form "foo.A.js", "foo.B.js", "foo.patch.js". Diffs are not
-- | required as the test may be verifying that the inputs don't crash.
examples :: FilePath -> IO [(Both FilePath, Maybe FilePath, Maybe FilePath, Maybe FilePath)]
examples :: FilePath -> IO [(Maybe FilePath, Maybe FilePath, Maybe FilePath, Maybe FilePath, Maybe FilePath)]
examples directory = do
as <- toDict <$> globFor "*.A.*"
bs <- toDict <$> globFor "*.B.*"
jsons <- toDict <$> globFor "*.json.*"
patches <- toDict <$> globFor "*.patch.*"
splits <- toDict <$> globFor "*.split.*"
let keys = Set.unions $ keysSet <$> [as, bs]
return $ (\name -> (Both (as ! name, bs ! name), Map.lookup name jsons, Map.lookup name patches, Map.lookup name splits)) <$> sort (Set.toList keys)
as <- globFor "*.A.*"
bs <- globFor "*.B.*"
jsons <- globFor "*.json.*"
patches <- globFor "*.patch.*"
splits <- globFor "*.split.*"
let lookupName name = (lookupNormalized name as, lookupNormalized name bs, lookupNormalized name jsons, lookupNormalized name patches, lookupNormalized name splits)
let keys = (normalizeName <$> as) `union` (normalizeName <$> bs)
pure $ lookupName <$> keys
where
globFor :: String -> IO [FilePath]
lookupNormalized name = find $ (== name) . normalizeName
globFor :: FilePath -> IO [FilePath]
globFor p = globDir1 (compile p) directory
toDict list = Map.fromList ((normalizeName <$> list) `zip` list)
-- | Given a test name like "foo.A.js", return "foo.js".
normalizeName :: FilePath -> FilePath
@ -75,14 +75,27 @@ normalizeName path = addExtension (dropExtension $ dropExtension path) (takeExte
-- | Given file paths for A, B, and, optionally, a diff, return whether diffing
-- | the files will produce the diff. If no diff is provided, then the result
-- | is true, but the diff will still be calculated.
testDiff :: Renderer T.Text String -> Both FilePath -> Maybe FilePath -> (String -> String -> Expectation) -> Expectation
testDiff :: Renderer (Record '[Cost, Range, Category, SourceSpan]) -> Both (Maybe FilePath) -> Maybe FilePath -> (Maybe Verbatim -> Maybe Verbatim -> Expectation) -> Expectation
testDiff renderer paths diff matcher = do
let parser = parserForFilepath (fst paths)
sources <- sequence $ readAndTranscodeFile <$> paths
let sourceBlobs = Both (S.SourceBlob, S.SourceBlob) <*> sources <*> pure mempty <*> paths <*> pure (Just S.defaultPlainBlob)
actual <- diffFiles parser renderer sourceBlobs
sources <- traverse (traverse readAndTranscodeFile) paths
actual <- fmap Verbatim <$> traverse ((pure . concatOutputs . pure) <=< diffFiles' sources) parser
case diff of
Nothing -> matcher actual actual
Just file -> do
expected <- readFile file
matcher actual expected
expected <- Verbatim <$> readFile file
matcher actual (Just expected)
where diffFiles' sources parser = diffFiles parser renderer (sourceBlobs sources paths)
parser = parserWithCost <$> runBothWith (<|>) paths
sourceBlobs :: Both (Maybe (S.Source Char)) -> Both (Maybe FilePath) -> Both S.SourceBlob
sourceBlobs sources paths = case runJoin paths of
(Nothing, Nothing) -> Join (S.emptySourceBlob "", S.emptySourceBlob "")
(Nothing, Just filepath) -> Join (S.emptySourceBlob "", S.sourceBlob (unsafeFromJust $ snd sources) filepath)
(Just filepath, Nothing) -> Join (S.sourceBlob (unsafeFromJust $ fst sources) filepath, S.emptySourceBlob "")
(Just path1, Just path2) -> Join (S.sourceBlob (unsafeFromJust $ fst sources) path1, S.sourceBlob (unsafeFromJust $ snd sources) path2)
-- | A wrapper around `Text` with a more readable `Show` instance.
newtype Verbatim = Verbatim Text
deriving (Eq, NFData)
instance Show Verbatim where
showsPrec _ (Verbatim text) = ('\n':) . (T.unpack text ++)

View File

@ -1,94 +0,0 @@
module Data.Adjoined.Spec (spec) where
import ArbitraryTerm ()
import Control.Applicative
import Data.Adjoined
import Data.Coalescent
import Data.Foldable
import Data.Functor.Both
import Data.Typeable
import Line
import Test.Hspec
import Test.Hspec.QuickCheck
import Test.QuickCheck
spec :: Spec
spec = do
prop "equality is reflexive" $
\ a -> a `shouldBe` (a :: Adjoined (Uncoalesced Char))
monoid (arbitrary :: Gen (Adjoined (Coalesced String)))
monoid (arbitrary :: Gen (Adjoined (Uncoalesced String)))
monoid (arbitrary :: Gen (Adjoined (Semicoalesced String)))
monoid (arbitrary :: Gen (Adjoined (Line Char)))
-- monoid (arbitrary :: Gen (Adjoined (Both (Line Char))))
monoid :: (Arbitrary a, Coalescent a, Eq a, Show a, Typeable a) => Gen (Adjoined a) -> Spec
monoid gen =
describe ("Monoid (" ++ showTypeOf (`asGeneratedTypeOf` gen) ++ ")") $ do
describe "mempty" $ do
prop "left identity" $ forAll gen $
\ a -> mempty `mappend` a `shouldBe` a
prop "right identity" $ forAll gen $
\ a -> a `mappend` mempty `shouldBe` a
describe "mappend" $ do
prop "associativity" $ forAll gen $
\ a b c -> (a `mappend` b) `mappend` c `shouldBe` a `mappend` (b `mappend` c)
instance Arbitrary a => Arbitrary (Adjoined a) where
arbitrary = fromList <$> arbitrary
shrink arbitrary = fromList <$> shrinkList shrink (toList arbitrary)
-- | A wrapper which never coalesces values.
newtype Uncoalesced a = Uncoalesced { runUncoalesced :: a }
deriving (Eq, Show)
instance Arbitrary a => Arbitrary (Uncoalesced a) where
arbitrary = Uncoalesced <$> arbitrary
instance Coalescent (Uncoalesced a) where
coalesce a b = pure a <|> pure b
-- | A wrapper which always coalesces values.
newtype Coalesced a = Coalesced { runCoalesced :: a }
deriving (Eq, Show)
instance Arbitrary a => Arbitrary (Coalesced a) where
arbitrary = Coalesced <$> arbitrary
instance Monoid a => Coalescent (Coalesced a) where
coalesce a b = pure (Coalesced (runCoalesced a `mappend` runCoalesced b))
-- | A wrapper which coalesces asymmetrically.
-- |
-- | Specifically, it coalesces only when the value at the left has `True` set.
newtype Semicoalesced a = Semicoalesced { runSemicoalesced :: (Bool, a) }
deriving (Eq, Show)
instance Arbitrary a => Arbitrary (Semicoalesced a) where
arbitrary = Semicoalesced <$> arbitrary
instance Monoid a => Coalescent (Semicoalesced a) where
Semicoalesced (True, a) `coalesce` Semicoalesced (flag, b) = pure (Semicoalesced (flag, a `mappend` b))
a `coalesce` b = pure a <|> pure b
-- | Returns a string with the name of a type.
-- |
-- | Use with `asTypeOf` or `asGeneratedTypeOf` to show type names for parameters without fighting type variable scoping:
-- |
-- | showTypeOf (`asTypeOf` someTypeParametricValue)
showTypeOf :: Typeable a => (a -> a) -> String
showTypeOf f = show (typeRep (proxyOf f))
where proxyOf :: (a -> a) -> Proxy a
proxyOf _ = Proxy
-- | Type-restricted `const`, usually written infix or as an operator section with `showTypeOf`.
asGeneratedTypeOf :: a -> Gen a -> a
asGeneratedTypeOf = const

View File

@ -1,22 +0,0 @@
module Data.Functor.Both.Spec (spec) where
import Data.Adjoined
import Data.Coalescent
import Data.Functor.Both
import Line
import Test.Hspec
spec :: Spec
spec = do
describe "Coalescent" $ do
it "should coalesce when both sides coalesce" $
(pure (Line [True]) `coalesce` pure (Line [True]) :: Adjoined (Both (Line Bool))) `shouldBe` fromList [pure (Line [True, True])]
it "should not coalesce when neither side coalesces" $
(pure (Closed [True]) `coalesce` pure (Line [True]) :: Adjoined (Both (Line Bool))) `shouldBe` fromList [pure (Closed [True]), pure (Line [True])]
it "should coalesce asymmetrically at left" $
(both (Line [True]) (Closed [True]) `coalesce` pure (Line [True]) :: Adjoined (Both (Line Bool))) `shouldBe` fromList [both (Line []) (Closed [True]), both (Line [True, True]) (Line [True])]
it "should coalesce asymmetrically at right" $
(both (Closed [True]) (Line [True]) `coalesce` pure (Line [True]) :: Adjoined (Both (Line Bool))) `shouldBe` fromList [both (Closed [True]) (Line []), both (Line [True]) (Line [True, True])]

View File

@ -0,0 +1,63 @@
{-# LANGUAGE RankNTypes, ScopedTypeVariables #-}
module Data.Mergeable.Spec where
import Data.Functor.Identity
import Data.Functor.Listable
import Data.Mergeable
import Data.String (String)
import GHC.Show
import Prologue
import Syntax
import Test.Hspec
import Test.Hspec.LeanCheck
import Test.LeanCheck
spec :: Spec
spec = parallel $ do
describe "[]" $ do
withAlternativeInstances sequenceAltLaws (tiers :: [Tier String])
withAlternativeInstances mergeLaws (tiers :: [Tier String])
describe "Maybe" $ do
withAlternativeInstances sequenceAltLaws (tiers :: [Tier (Maybe Char)])
withAlternativeInstances mergeLaws (tiers :: [Tier (Maybe Char)])
describe "Identity" $ do
withAlternativeInstances sequenceAltLaws (Identity `mapT` tiers :: [Tier (Identity Char)])
withAlternativeInstances mergeLaws (Identity `mapT` tiers :: [Tier (Identity Char)])
describe "Syntax" $ do
withAlternativeInstances sequenceAltLaws (tiers :: [Tier (Syntax Char Char)])
withAlternativeInstances mergeLaws (tiers :: [Tier (Syntax Char Char)])
prop "subsumes catMaybes/Just" $
\ a -> sequenceAlt a `shouldBe` pure (catMaybes (a :: [Maybe Char]))
mergeLaws :: forall f g a. (Mergeable f, Alternative g, Eq (g (f a)), Show (f a), Show (g (f a))) => [Tier (f a)] -> [Tier (Blind (a -> g a))] -> Spec
mergeLaws value function = describe "merge" $ do
prop "identity" . forAll value $
\ a -> merge pure a `shouldNotBe` (empty :: g (f a))
prop "relationship with sequenceAlt" . forAll (value >< function) $
\ (a, f) -> merge (getBlind f) a `shouldBe` sequenceAlt (fmap (getBlind f) a)
sequenceAltLaws :: forall f g a. (Mergeable f, Alternative g, Eq (g (f a)), Show (f a), Show (g (f a))) => [Tier (f a)] -> [Tier (Blind (a -> g a))] -> Spec
sequenceAltLaws value function = describe "sequenceAlt" $ do
prop "identity" . forAll value $
\ a -> sequenceAlt (pure <$> a) `shouldNotBe` (empty :: g (f a))
prop "relationship with merge" . forAll (productWith ((Blind .) . fmap . getBlind) function value :: [Tier (Blind (f (g a)))]) $
\ a -> sequenceAlt (getBlind a) `shouldBe` merge identity (getBlind a)
withAlternativeInstances :: forall f a. (Listable a, Eq (f a), Show (f a)) => (forall g. (Alternative g, Eq (g (f a)), Show (g (f a))) => [Tier (f a)] -> [Tier (Blind (a -> g a))] -> Spec) -> [Tier (f a)] -> Spec
withAlternativeInstances laws gen = do
describe "[]" $ laws gen (fmap const `mapT` tiers :: [Tier (Blind (a -> [a]))])
describe "Maybe" $ laws gen (fmap const `mapT` tiers :: [Tier (Blind (a -> Maybe a))])
newtype Blind a = Blind { getBlind :: a }
deriving Functor
instance Listable a => Listable (Blind a) where
tiers = Blind `mapT` tiers
instance Show (Blind a) where
showsPrec _ _ = showString "*"

View File

@ -0,0 +1,51 @@
{-# LANGUAGE DataKinds #-}
module Data.RandomWalkSimilarity.Spec where
import Category
import Data.Functor.Both
import Data.Functor.Listable
import Data.RandomWalkSimilarity
import Data.Record
import Data.String
import Diff
import Info
import Patch
import Prologue
import Syntax
import Term
import Test.Hspec
import Test.Hspec.LeanCheck
spec :: Spec
spec = parallel $ do
let positively = succ . abs
describe "pqGramDecorator" $ do
prop "produces grams with stems of the specified length" $
\ (term, p, q) -> pqGramDecorator (rhead . headF) (positively p) (positively q) (unListableF term :: SyntaxTerm String '[Category]) `shouldSatisfy` all ((== positively p) . length . stem . rhead)
prop "produces grams with bases of the specified width" $
\ (term, p, q) -> pqGramDecorator (rhead . headF) (positively p) (positively q) (unListableF term :: SyntaxTerm String '[Category]) `shouldSatisfy` all ((== positively q) . length . base . rhead)
describe "featureVectorDecorator" $ do
prop "produces a vector of the specified dimension" $
\ (term, p, q, d) -> featureVectorDecorator (rhead . headF) (positively p) (positively q) (positively d) (unListableF term :: SyntaxTerm String '[Category]) `shouldSatisfy` all ((== positively d) . length . rhead)
describe "rws" $ do
prop "produces correct diffs" $
\ (as, bs) -> let tas = decorate <$> (unListableF <$> as :: [SyntaxTerm String '[Category]])
tbs = decorate <$> (unListableF <$> bs :: [SyntaxTerm String '[Category]])
root = cofree . ((Program :. Nil) :<) . Indexed
diff = wrap (pure (Program :. Nil) :< Indexed (stripDiff <$> rws compare tas tbs)) in
(beforeTerm diff, afterTerm diff) `shouldBe` (Just (root (stripTerm <$> tas)), Just (root (stripTerm <$> tbs)))
it "produces unbiased insertions within branches" $
let (a, b) = (decorate (cofree ((StringLiteral :. Nil) :< Indexed [ cofree ((StringLiteral :. Nil) :< Leaf ("a" :: Text)) ])), decorate (cofree ((StringLiteral :. Nil) :< Indexed [ cofree ((StringLiteral :. Nil) :< Leaf "b") ]))) in
fmap stripDiff (rws compare [ b ] [ a, b ]) `shouldBe` fmap stripDiff [ inserting a, copying b ]
where compare :: (HasField fields Category, Functor f, Eq (Cofree f Category)) => Term f (Record fields) -> Term f (Record fields) -> Maybe (Diff f (Record fields))
compare a b | (category <$> a) == (category <$> b) = Just (copying b)
| otherwise = if ((==) `on` category . extract) a b then Just (replacing a b) else Nothing
copying :: Functor f => Cofree f (Record fields) -> Free (CofreeF f (Both (Record fields))) (Patch (Cofree f (Record fields)))
copying = cata wrap . fmap pure
decorate :: SyntaxTerm leaf '[Category] -> SyntaxTerm leaf '[Maybe FeatureVector, Category]
decorate = defaultFeatureVectorDecorator (category . headF)

40
test/Diff/Spec.hs Normal file
View File

@ -0,0 +1,40 @@
{-# LANGUAGE DataKinds #-}
module Diff.Spec where
import Category
import Data.Bifunctor.Join
import Data.Functor.Listable
import Data.RandomWalkSimilarity
import Data.String
import Diff
import Info
import Interpreter
import Patch
import Prologue
import Term
import Test.Hspec
import Test.Hspec.LeanCheck
spec :: Spec
spec = parallel $ do
let decorate = defaultFeatureVectorDecorator (category . headF)
prop "equality is reflexive" $
\ a -> let diff = unListableDiff a :: SyntaxDiff String '[Category] in
diff `shouldBe` diff
prop "equal terms produce identity diffs" $
\ a -> let term = decorate (unListableF a :: SyntaxTerm String '[Category]) in
diffCost (diffTerms wrap (==) diffCost term term) `shouldBe` 0
describe "beforeTerm" $ do
prop "recovers the before term" $
\ a b -> let diff = stripDiff $ diffTerms wrap (==) diffCost (decorate (unListableF a)) (decorate (unListableF b :: SyntaxTerm String '[Category])) in
beforeTerm diff `shouldBe` Just (unListableF a)
describe "afterTerm" $ do
prop "recovers the after term" $
\ a b -> let diff = stripDiff $ diffTerms wrap (==) diffCost (decorate (unListableF a)) (decorate (unListableF b :: SyntaxTerm String '[Category])) in
afterTerm diff `shouldBe` Just (unListableF b)
unListableDiff :: Functor f => ListableF (Free (TermF f (ListableF (Join (,)) annotation))) (Patch (ListableF (Term f) annotation)) -> Diff f annotation
unListableDiff diff = transFreeT (first unListableF) $ fmap unListableF <$> unListableF diff

102
test/DiffSummarySpec.hs Normal file
View File

@ -0,0 +1,102 @@
{-# LANGUAGE DataKinds #-}
module DiffSummarySpec where
import Category
import Data.Functor.Both
import Data.Functor.Listable
import Data.List (partition)
import Data.RandomWalkSimilarity
import Data.Record
import Data.String
import Diff
import DiffSummary
import Info
import Interpreter
import Patch
import Prologue
import Source
import Syntax
import Term
import Test.Hspec (Spec, describe, it, parallel)
import Test.Hspec.Expectations.Pretty
import Test.Hspec.LeanCheck
import Data.These
sourceSpanBetween :: (Int, Int) -> (Int, Int) -> SourceSpan
sourceSpanBetween (s1, e1) (s2, e2) = SourceSpan (SourcePos s1 e1) (SourcePos s2 e2)
arrayInfo :: Record '[Category, Range, SourceSpan]
arrayInfo = ArrayLiteral :. Range 0 3 :. sourceSpanBetween (1, 1) (1, 5) :. Nil
literalInfo :: Record '[Category, Range, SourceSpan]
literalInfo = StringLiteral :. Range 1 2 :. sourceSpanBetween (1, 2) (1, 4) :. Nil
testDiff :: Diff (Syntax Text) (Record '[Category, Range, SourceSpan])
testDiff = free $ Free (pure arrayInfo :< Indexed [ free $ Pure (Insert (cofree $ literalInfo :< Leaf "\"a\"")) ])
testSummary :: DiffSummary DiffInfo
testSummary = DiffSummary { patch = Insert (LeafInfo StringLiteral "a" $ sourceSpanBetween (1,1) (1, 2)), parentAnnotation = [] }
replacementSummary :: DiffSummary DiffInfo
replacementSummary = DiffSummary { patch = Replace (LeafInfo StringLiteral "a" $ sourceSpanBetween (1, 2) (1, 4)) (LeafInfo SymbolLiteral "b" $ sourceSpanBetween (1,1) (1, 2)), parentAnnotation = [Left (Info.FunctionCall, "foo")] }
blobs :: Both SourceBlob
blobs = both (SourceBlob (fromText "[]") nullOid "a.js" (Just defaultPlainBlob)) (SourceBlob (fromText "[a]") nullOid "b.js" (Just defaultPlainBlob))
spec :: Spec
spec = parallel $ do
describe "diffSummaries" $ do
it "outputs a diff summary" $
diffSummaries blobs testDiff `shouldBe` [ JSONSummary "Added the \"a\" string" (SourceSpans . That $ sourceSpanBetween (1, 2) (1, 4)) ]
prop "equal terms produce identity diffs" $
\ a -> let term = defaultFeatureVectorDecorator (category . headF) (unListableF a :: SyntaxTerm String '[Category, Range, SourceSpan]) in
diffSummaries blobs (diffTerms wrap (==) diffCost term term) `shouldBe` []
describe "DiffInfo" $ do
prop "patches in summaries match the patches in diffs" $
\a -> let
diff = unListableDiff a :: SyntaxDiff String '[Category, Cost, Range, SourceSpan]
summaries = diffToDiffSummaries (source <$> blobs) diff
patches = toList diff
in
case (partition isBranchNode (patch <$> summaries), partition isIndexedOrFixed patches) of
((branchPatches, otherPatches), (branchDiffPatches, otherDiffPatches)) ->
(() <$ branchPatches, () <$ otherPatches) `shouldBe` (() <$ branchDiffPatches, () <$ otherDiffPatches)
prop "generates one LeafInfo for each child in an arbitrary branch patch" $
\a -> let
diff = unListableDiff a :: SyntaxDiff String '[Category, Range, SourceSpan]
diffInfoPatches = patch <$> diffToDiffSummaries (source <$> blobs) diff
syntaxPatches = toList diff
extractLeaves :: DiffInfo -> [DiffInfo]
extractLeaves (BranchInfo children _ _) = join $ extractLeaves <$> children
extractLeaves leaf = [ leaf ]
extractDiffLeaves :: SyntaxTerm String '[Category, Range, SourceSpan] -> [ SyntaxTerm String '[Category, Range, SourceSpan] ]
extractDiffLeaves term = case unwrap term of
(Indexed children) -> join $ extractDiffLeaves <$> children
(Fixed children) -> join $ extractDiffLeaves <$> children
Commented children leaf -> children <> maybeToList leaf >>= extractDiffLeaves
_ -> [ term ]
in
case (partition isBranchNode diffInfoPatches, partition isIndexedOrFixed syntaxPatches) of
((branchPatches, _), (diffPatches, _)) ->
let listOfLeaves = foldMap extractLeaves (join $ toList <$> branchPatches)
listOfDiffLeaves = foldMap extractDiffLeaves (diffPatches >>= toList)
in
length listOfLeaves `shouldBe` length listOfDiffLeaves
isIndexedOrFixed :: Patch (Term (Syntax a) annotation) -> Bool
isIndexedOrFixed = any (isIndexedOrFixed' . unwrap)
isIndexedOrFixed' :: Syntax a f -> Bool
isIndexedOrFixed' syntax = case syntax of
(Indexed _) -> True
(Fixed _) -> True
_ -> False
isBranchNode :: Patch DiffInfo -> Bool
isBranchNode = any isBranchInfo
unListableDiff :: Functor f => ListableF (Free (TermF f (ListableF (Join (,)) annotation))) (Patch (ListableF (Term f) annotation)) -> Diff f annotation
unListableDiff diff = transFreeT (first unListableF) $ fmap unListableF <$> unListableF diff

View File

@ -0,0 +1,61 @@
module IntegrationFormatSpec where
import Arguments
import Data.Aeson
import Data.List.Split
import Control.Exception
import qualified Data.ByteString.Lazy as DL
import JSONTestCase
import Test.Hspec (Spec, describe, it, SpecWith, runIO, parallel)
import Prelude
import Prologue
import Renderer
import SemanticDiff
import System.FilePath.Glob
import Data.Maybe (fromJust)
import Test.Hspec.Expectations.Pretty
catchException :: IO [Text] -> IO [Text]
catchException = handle errorHandler
where errorHandler :: (SomeException -> IO [Text])
errorHandler exception = return [toS . encode $ ["Crashed: " <> Prologue.show exception :: Text]]
assertDiffSummary :: JSONTestCase -> Format -> (Either String ExpectedResult -> Either String ExpectedResult -> Expectation) -> Expectation
assertDiffSummary JSONTestCase {..} format matcher = do
diffs <- fetchDiffs $ args gitDir (Prelude.head shas') (Prelude.last shas') filePaths format
result <- catchException . pure . pure . concatOutputs $ diffs
let actual = eitherDecode . DL.fromStrict . encodeUtf8 . fromJust . listToMaybe $ result
matcher actual (Right expectedResult)
where shas' = splitOn ".." shas
runTestsIn :: [FilePath] -> Format -> (Either String ExpectedResult -> Either String ExpectedResult -> Expectation) -> SpecWith ()
runTestsIn filePaths format matcher = do
contents <- runIO $ traverse DL.readFile filePaths
let filePathContents = zip filePaths contents
let jsonContents = (\(filePath, content) -> (filePath, eitherDecode content)) <$> filePathContents :: [(FilePath, Either String [JSONTestCase])]
traverse_ handleJSONTestCase jsonContents
where handleJSONTestCase :: (FilePath, Either String [JSONTestCase]) -> SpecWith ()
handleJSONTestCase (filePath, eitherJSONTestCase) =
case eitherJSONTestCase of
Left err -> it ("An error occurred " <> err <> " (" <> filePath <> ")") $ True `shouldBe` False
Right testCases -> traverse_ (\testCase -> it (testCaseDescription testCase) $ assertDiffSummary testCase format matcher) testCases
spec :: Maybe String -> Spec
spec maybeLanguage = parallel $ do
summaryFormatFiles <- runIO $ testCaseFiles maybeLanguage "test/corpus/diff-summaries"
summaryFormatToDoFiles <- runIO $ testCaseFiles maybeLanguage "test/corpus/diff-summaries-todo"
summaryFormatCrasherFiles <- runIO $ testCaseFiles maybeLanguage "test/corpus/diff-summary-crashers"
jsonFormatFiles <- runIO $ testCaseFiles maybeLanguage "test/corpus/json"
describe "Summary format" $ runTestsIn summaryFormatFiles Summary shouldBe
describe "Summary format todo" $ runTestsIn summaryFormatToDoFiles Summary shouldNotBe
describe "Summary format crashers todo" $ runTestsIn summaryFormatCrasherFiles Summary shouldBe
describe "JSON format" $ runTestsIn jsonFormatFiles JSON shouldBe
where
testCaseFiles :: Maybe String -> String -> IO [FilePath]
testCaseFiles maybeLanguage dir = case maybeLanguage of
Just language -> globDir1 (compile (language <> "/*.json")) dir
Nothing -> globDir1 (compile "*/*.json") dir

View File

@ -1,22 +1,44 @@
{-# LANGUAGE DataKinds #-}
module InterpreterSpec where
import qualified Interpreter as I
import Range
import Syntax
import Control.Comonad.Cofree
import Control.Monad.Free
import Patch
import Diff
import Category
import Test.Hspec
import Data.Array
import Data.Functor.Foldable hiding (Nil)
import Data.Functor.Listable
import Data.RandomWalkSimilarity
import Data.Record
import Data.String
import Diff
import Info
import Interpreter
import Patch
import Prologue
import Syntax
import Term
import Test.Hspec (Spec, describe, it, parallel)
import Test.Hspec.Expectations.Pretty
import Test.Hspec.LeanCheck
spec :: Spec
spec = parallel $ do
describe "interpret" $ do
let decorate = defaultFeatureVectorDecorator (category . headF)
let compare = (==) `on` category . extract
it "returns a replacement when comparing two unicode equivalent terms" $
I.interpret comparable (Info range mempty :< Leaf "t\776") (Info range2 mempty :< Leaf "\7831") `shouldBe`
Pure (Replace (Info range mempty :< Leaf "t\776") (Info range2 mempty :< Leaf "\7831"))
let termA = cofree $ (StringLiteral :. Nil) :< Leaf ("t\776" :: String)
termB = cofree $ (StringLiteral :. Nil) :< Leaf "\7831" in
stripDiff (diffTerms wrap compare diffCost (decorate termA) (decorate termB)) `shouldBe` replacing termA termB
where
range = Range 0 2
range2 = Range 0 1
prop "produces correct diffs" $
\ a b -> let diff = stripDiff $ diffTerms wrap compare diffCost (decorate (unListableF a)) (decorate (unListableF b :: SyntaxTerm String '[Category])) in
(beforeTerm diff, afterTerm diff) `shouldBe` (Just (unListableF a), Just (unListableF b))
prop "constructs zero-cost diffs of equal terms" $
\ a -> let term = decorate (unListableF a :: SyntaxTerm String '[Category])
diff = diffTerms wrap compare diffCost term term in
diffCost diff `shouldBe` 0
it "produces unbiased insertions within branches" $
let term s = decorate (cofree ((StringLiteral :. Nil) :< Indexed [ cofree ((StringLiteral :. Nil) :< Leaf s) ]))
root = cofree . ((Just (listArray (0, defaultD) (repeat 0)) :. Program :. Nil) :<) . Indexed in
stripDiff (diffTerms wrap compare diffCost (root [ term "b" ]) (root [ term "a", term "b" ])) `shouldBe` wrap (pure (Program :. Nil) :< Indexed [ inserting (stripTerm (term "a")), cata wrap (fmap pure (stripTerm (term "b"))) ])

69
test/JSONTestCase.hs Normal file
View File

@ -0,0 +1,69 @@
{-# LANGUAGE DeriveAnyClass, OverloadedStrings #-}
module JSONTestCase where
import Data.Aeson
import Data.Aeson.Types
import Data.Map.Strict as Map
import Data.HashMap.Strict as HM
import Prelude
import Prologue
data JSONMetaRepo = JSONMetaRepo { repoUrl :: !String
, language :: !String
, fileExt :: !String
, syntaxes :: ![JSONMetaSyntax]
, templateText :: !(Maybe String)
} deriving (Show, Generic, FromJSON)
data JSONMetaSyntax = JSONMetaSyntax { template :: !(Maybe String)
, syntax :: !String
, insert :: !String
, replacement :: !String
} deriving (Show, Generic, FromJSON)
data JSONTestCase = JSONTestCase { gitDir :: !String
, testCaseDescription :: !String
, filePaths :: ![String]
, shas :: !String
, patch :: ![String]
, expectedResult :: !ExpectedResult
} deriving (Show, Generic, FromJSON)
data ExpectedResult = SummaryResult (Map Text (Map Text [Value]))
| JSONResult (Map Text Value)
deriving (Show, Generic, Eq)
-- | These replace the defaultOptions normally used by genericToEncoding.
-- | All options are default except for `sumEncoding`, which uses the `UntaggedValue`
-- | option to prevent the sum type `ExpectedResult` from encoding with a `tag` and `contents`
-- | fields when a JSONTestCase is encoded.
jsonTestCaseOptions :: Options
jsonTestCaseOptions = Options { fieldLabelModifier = id
, constructorTagModifier = id
, allNullaryToStringTag = False
, omitNothingFields = True
, sumEncoding = UntaggedValue
, unwrapUnaryRecords = False
}
instance ToJSON JSONTestCase where
toJSON = genericToJSON jsonTestCaseOptions
toEncoding = genericToEncoding jsonTestCaseOptions
instance ToJSON ExpectedResult where
toJSON = genericToJSON jsonTestCaseOptions
toEncoding = genericToEncoding jsonTestCaseOptions
-- | We have to parse the specific formats of the ExpectedResults based on their keys.
-- | This is how we determine which ExpectedResult constructor to use.
instance FromJSON ExpectedResult where
parseJSON = Data.Aeson.withObject "ExpectedResult" $ \o ->
SummaryResult <$> summaryResultValues o <|>
JSONResult <$> jsonResultValues o
where
jsonResultValues :: Object -> Parser (Map Text Value)
jsonResultValues o = Map.fromList <$> (fromKey "oids" <> fromKey "rows" <> fromKey "paths")
where fromKey k = (\a -> [(k, a)]) <$> o .: k
summaryResultValues :: Object -> Parser (Map Text (Map Text [Value]))
summaryResultValues o = Map.fromList <$> (fromKey "changes" <> fromKey "errors")
where fromKey k = (\a -> [(k :: Text, Map.fromList . HM.toList $ a )] ) <$> o .: k

View File

@ -1,44 +0,0 @@
module OrderedMapSpec where
import qualified Data.OrderedMap as Map
import Test.Hspec
spec :: Spec
spec = parallel $ do
describe "difference" $ do
it "should return those elements of a not in b" $
Map.difference a b `shouldBe` Map.fromList [ ("a", 1) ]
it "is asymmetrical" $
Map.difference a b `shouldNotBe` Map.difference b a
describe "union" $ do
it "should return those elements in either a or b" $
Map.union a b `shouldBe` Map.fromList (Map.toList a ++ [ ("d", -4) ])
it "is asymmetrical" $
Map.union a b `shouldNotBe` Map.union b a
describe "unions" $ do
it "is equivalent to `union` for two maps" $
Map.unions [ a, b ] `shouldBe` Map.union a b
it "does not duplicate elements" $
Map.unions [ a, b, a, b, a, b ] `shouldBe` Map.union a b
describe "intersectionWith" $ do
it "should return those elements in both a and b, combined with a function" $
Map.intersectionWith (-) a b `shouldBe` Map.fromList [ ("b", 4), ("c", 6) ]
it "is asymmetrical" $
Map.intersectionWith (-) a b `shouldNotBe` Map.intersectionWith (-) b a
describe "keys" $ do
it "should return all the keys in a map" $
Map.keys a `shouldBe` [ "a", "b", "c" ]
it "is ordered" $
Map.keys (Map.union b a) `shouldBe` [ "b", "c", "d", "a" ]
where a = Map.fromList [ ("a", 1), ("b", 2), ("c", 3) ]
b = Map.fromList [ ("b", -2), ("c", -3), ("d", -4) ]

View File

@ -1,16 +1,17 @@
module PatchOutputSpec where
import Prologue
import Data.Functor.Both
import Diff
import Renderer.Patch
import Data.Record
import Range
import Renderer.Patch
import Source
import Syntax
import Control.Monad.Free
import Test.Hspec
import Test.Hspec (Spec, describe, it, parallel)
import Test.Hspec.Expectations.Pretty
spec :: Spec
spec = parallel $
describe "hunks" $
spec = parallel $ do
describe "hunks" $ do
it "empty diffs have empty hunks" $
hunks (Free . Annotated (pure (Info (Range 0 0) mempty)) $ Leaf "") (Both (SourceBlob (fromList "") "abcde" "path2.txt" (Just defaultPlainBlob), SourceBlob (fromList "") "xyz" "path2.txt" (Just defaultPlainBlob))) `shouldBe` [Hunk {offset = Both (0, 0), changes = [], trailingContext = []}]
hunks (wrap $ pure (Range 0 0 :. Nil) :< Leaf ("" :: Text)) (both (SourceBlob (fromList "") "abcde" "path2.txt" (Just defaultPlainBlob)) (SourceBlob (fromList "") "xyz" "path2.txt" (Just defaultPlainBlob))) `shouldBe` [Hunk {offset = pure 0, changes = [], trailingContext = []}]

View File

@ -1,7 +1,9 @@
module RangeSpec where
import Test.Hspec
import Prologue
import Range
import Test.Hspec (Spec, describe, it, parallel)
import Test.Hspec.Expectations.Pretty
spec :: Spec
spec = parallel $ do

Some files were not shown because too many files have changed in this diff Show More