1
1
mirror of https://github.com/github/semantic.git synced 2024-12-01 09:15:01 +03:00

Re-implement dedupe with a Map for SPEED

This commit is contained in:
Timothy Clem 2017-09-14 15:50:36 -07:00
parent d7647f4fdc
commit 44c2d7819c

View File

@ -1,4 +1,4 @@
{-# LANGUAGE DeriveAnyClass, MultiParamTypeClasses, RankNTypes, TypeOperators #-}
{-# LANGUAGE DeriveAnyClass, MultiParamTypeClasses, RankNTypes, TypeOperators, ScopedTypeVariables #-}
module Renderer.TOC
( renderToCDiff
, renderToCTerm
@ -29,6 +29,7 @@ import qualified Data.Functor.Both as Both
import Data.Functor.Foldable (cata)
import Data.Function (on)
import Data.List.NonEmpty (nonEmpty)
import Data.List (sortOn)
import Data.Maybe (fromMaybe, mapMaybe)
import Data.Output
import Data.Record
@ -187,18 +188,33 @@ termTableOfContentsBy selector = cata termAlgebra
where termAlgebra r | Just a <- selector r = [a]
| otherwise = fold r
dedupe :: HasField fields (Maybe Declaration) => [Entry (Record fields)] -> [Entry (Record fields)]
dedupe = foldl' go []
where go xs x | (_, _:_) <- find (exactMatch `on` entryPayload) x xs = xs
| (front, similar : back) <- find (similarMatch `on` entryPayload) x xs =
front <> (Replaced (entryPayload similar) : back)
| otherwise = xs <> [x]
find p x = List.break (p x)
exactMatch = (==) `on` getDeclaration
similarMatch a b = sameCategory a b && similarDeclaration a b
sameCategory = (==) `on` fmap toCategoryName . getDeclaration
similarDeclaration = (==) `on` fmap (toLower . declarationIdentifier) . getDeclaration
newtype DedupeKey = DedupeKey (Maybe T.Text, Maybe T.Text) deriving (Eq, Ord)
-- Dedupe entries in a final pass. This catches two specific scenarios with
-- different behaviors:
-- 1. Identical entries are in the list.
-- Action: take the first one, drop all subsequent.
-- 2. Two similar entries (defined by a case insensitive comparision of their
-- identifiers) are in the list.
-- Action: Combine them into a single Replaced entry.
dedupe :: forall fields. HasField fields (Maybe Declaration) => [Entry (Record fields)] -> [Entry (Record fields)]
dedupe = let tuples = sortOn fst . Map.elems . snd . foldl' go (0, Map.empty) in (fmap . fmap) snd tuples
where
go :: HasField fields (Maybe Declaration)
=> (Int, Map.Map DedupeKey (Int, Entry (Record fields)))
-> Entry (Record fields)
-> (Int, Map.Map DedupeKey (Int, Entry (Record fields)))
go (index, m) x | Just (_, similar) <- Map.lookup (dedupeKey x) m
= if exactMatch similar x
then (succ index, m)
else
let replacement = Replaced (entryPayload similar)
in (succ index, Map.insert (dedupeKey replacement) (index, replacement) m)
| otherwise = (succ index, Map.insert (dedupeKey x) (index, x) m)
dedupeKey entry = DedupeKey ((fmap toCategoryName . getDeclaration . entryPayload) entry, (fmap (toLower . declarationIdentifier) . getDeclaration . entryPayload) entry)
exactMatch = (==) `on` (getDeclaration . entryPayload)
-- | Construct a 'JSONSummary' from an 'Entry'. Returns 'Nothing' for 'Unchanged' patches.
entrySummary :: (HasField fields (Maybe Declaration), HasField fields Span) => Entry (Record fields) -> Maybe JSONSummary