1
1
mirror of https://github.com/github/semantic.git synced 2024-11-28 10:15:55 +03:00

Merge pull request #1037 from github/cantor-keys

SES (and other) optimizations
This commit is contained in:
Timothy Clem 2017-03-13 16:46:17 -07:00 committed by GitHub
commit f0cb65eb2e
2 changed files with 22 additions and 20 deletions

View File

@ -80,15 +80,14 @@ fetchDiff' args@Arguments{..} filepath = do
let sources = fromMaybe (emptySourceBlob filepath) <$> sourcesAndOids
let sourceBlobs = idOrEmptySourceBlob <$> sources
let textDiff' = textDiff (parserForFilepath filepath) args sourceBlobs
text <- fetchText textDiff'
text <- liftIO . render $ textDiff (parserForFilepath filepath) args sourceBlobs
truncatedPatch <- liftIO $ truncatedDiff args sourceBlobs
pure $ fromMaybe truncatedPatch text
where
fetchText textDiff = if developmentMode
then liftIO $ Just <$> textDiff
else liftIO $ timeout timeoutInMicroseconds textDiff
render output = if developmentMode
then Just <$> output
else timeout timeoutInMicroseconds output
pathsToDiff :: Arguments -> Both String -> IO [FilePath]
pathsToDiff Arguments{..} shas = withRepository lgFactory gitDir $ do
@ -147,7 +146,8 @@ diffFiles :: HasField fields Category
-> Both SourceBlob
-> IO Output
diffFiles parse render sourceBlobs = do
terms <- traverse (fmap (defaultFeatureVectorDecorator getLabel) . parse) sourceBlobs
terms <- Async.withTaskGroup numCapabilities . flip Async.mapTasks $
(fmap (defaultFeatureVectorDecorator getLabel) . parse) <$> sourceBlobs
pure $! render sourceBlobs (stripDiff (diffTerms' terms))
where

View File

@ -1,7 +1,8 @@
{-# LANGUAGE Strict #-}
module SES where
import qualified Data.Map as Map
import Data.Array.MArray
import Data.Array.ST
import Data.These
import Prologue
@ -14,26 +15,27 @@ type Cost term = These term term -> Int
-- | Find the shortest edit script (diff) between two terms given a function to compute the cost.
ses :: Comparable term -> Cost term -> [term] -> [term] -> [These term term]
ses canCompare cost as bs = fst <$> evalState diffState Map.empty where
diffState = diffAt canCompare cost (0, 0) as bs
ses canCompare cost as bs = runST $ do
array <- newArray ((0, 0), (length bs, length as)) Nothing
editScript <- diffAt array canCompare cost (0, 0) as bs
pure $ fst <$> editScript
-- | Find the shortest edit script between two terms at a given vertex in the edit graph.
diffAt :: Comparable term -> Cost term -> (Int, Int) -> [term] -> [term] -> State (Map.Map (Int, Int) [(These term term, Int)]) [(These term term, Int)]
diffAt canCompare cost (i, j) as bs
| (a : as) <- as, (b : bs) <- bs = do
cachedDiffs <- get
case Map.lookup (i, j) cachedDiffs of
diffAt :: STArray s (Int, Int) (Maybe [(These term term, Int)]) -> Comparable term -> Cost term -> (Int, Int) -> [term] -> [term] -> ST s [(These term term, Int)]
diffAt array canCompare cost (i, j) as bs
| (a : as') <- as, (b : bs') <- bs = do
maybeDiff <- readArray array (i, j)
case maybeDiff of
Just diffs -> pure diffs
Nothing -> do
down <- recur (i, succ j) as (b : bs)
right <- recur (succ i, j) (a : as) bs
down <- recur (i, succ j) as' bs
right <- recur (succ i, j) as bs'
nomination <- best <$> if canCompare a b
then do
diagonal <- recur (succ i, succ j) as bs
diagonal <- recur (succ i, succ j) as' bs'
pure [ delete a down, insert b right, consWithCost cost (These a b) diagonal ]
else pure [ delete a down, insert b right ]
cachedDiffs' <- get
put $ Map.insert (i, j) nomination cachedDiffs'
writeArray array (i, j) (Just nomination)
pure nomination
| null as = pure $ foldr insert [] bs
| null bs = pure $ foldr delete [] as
@ -44,7 +46,7 @@ diffAt canCompare cost (i, j) as bs
costOf [] = 0
costOf ((_, c) : _) = c
best = minimumBy (comparing costOf)
recur = diffAt canCompare cost
recur = diffAt array canCompare cost
-- | Prepend an edit script and the cumulative cost onto the edit script.
consWithCost :: Cost term -> These term term -> [(These term term, Int)] -> [(These term term, Int)]