From f4863dd3e8900c149054693d12340406c4f62d31 Mon Sep 17 00:00:00 2001 From: Janek Spaderna Date: Sun, 26 Jun 2022 23:58:37 +0200 Subject: [PATCH 1/3] Collect reports in a difference list Using a difference list we get O(1) appending vs. O(n) with the built-in list type. Overall for n reports this results in O(n) operations instead of O(n^2) operations. --- diagnose.cabal | 6 +++++- package.yaml | 5 +++-- src/Error/Diagnose/Diagnostic/Internal.hs | 11 +++++++---- 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/diagnose.cabal b/diagnose.cabal index 2137f7d..298813d 100644 --- a/diagnose.cabal +++ b/diagnose.cabal @@ -1,6 +1,6 @@ cabal-version: 1.12 --- This file has been generated from package.yaml by hpack version 0.34.6. +-- This file has been generated from package.yaml by hpack version 0.34.4. -- -- see: https://github.com/sol/hpack @@ -67,6 +67,7 @@ library build-depends: base >=4.7 && <5 , data-default >=0.7 && <1 + , dlist ==1.0.* , hashable >=1.3 && <2 , prettyprinter >=1.7.0 && <2 , prettyprinter-ansi-terminal >=1.1.0 && <2 @@ -111,6 +112,7 @@ test-suite diagnose-megaparsec-tests base >=4.7 && <5 , data-default >=0.7 && <1 , diagnose + , dlist ==1.0.* , hashable >=1.3 && <2 , prettyprinter >=1.7.0 && <2 , prettyprinter-ansi-terminal >=1.1.0 && <2 @@ -150,6 +152,7 @@ test-suite diagnose-parsec-tests base >=4.7 && <5 , data-default >=0.7 && <1 , diagnose + , dlist ==1.0.* , hashable >=1.3 && <2 , prettyprinter >=1.7.0 && <2 , prettyprinter-ansi-terminal >=1.1.0 && <2 @@ -188,6 +191,7 @@ test-suite diagnose-rendering-tests base >=4.7 && <5 , data-default >=0.7 && <1 , diagnose + , dlist ==1.0.* , hashable >=1.3 && <2 , prettyprinter >=1.7.0 && <2 , prettyprinter-ansi-terminal >=1.1.0 && <2 diff --git a/package.yaml b/package.yaml index 512dff3..406bd30 100644 --- a/package.yaml +++ b/package.yaml @@ -8,11 +8,12 @@ category: "Error Reporting" dependencies: - base >= 4.7 && < 5 +- data-default >= 0.7 && < 1 +- dlist >= 1.0 && < 1.1 +- hashable >= 1.3 && < 2 - prettyprinter >= 1.7.0 && < 2 - prettyprinter-ansi-terminal >= 1.1.0 && < 2 - unordered-containers >= 0.2 && < 0.3 -- hashable >= 1.3 && < 2 -- data-default >= 0.7 && < 1 - wcwidth >= 0.0.1 && <1 - text >= 1.0.0.0 && <= 2.0 # ^^^ This is unfortunately required, but as 'prettyprinter' already depends on it, it will already have been fetched diff --git a/src/Error/Diagnose/Diagnostic/Internal.hs b/src/Error/Diagnose/Diagnostic/Internal.hs index 8a58a07..1fefd58 100644 --- a/src/Error/Diagnose/Diagnostic/Internal.hs +++ b/src/Error/Diagnose/Diagnostic/Internal.hs @@ -20,8 +20,11 @@ import Control.Monad.IO.Class (MonadIO, liftIO) import Data.Aeson (ToJSON(..), encode, object, (.=)) import Data.ByteString.Lazy (ByteString) #endif + +import Data.DList (DList) +import qualified Data.DList as DL import Data.Default (Default, def) -import Data.Foldable (fold) +import Data.Foldable (fold, toList) import Data.HashMap.Lazy (HashMap) import qualified Data.HashMap.Lazy as HashMap import Data.List (intersperse) @@ -38,7 +41,7 @@ import System.IO (Handle) -- to create a new empty diagnostic, and 'addFile' and 'addReport' to alter its internal state. data Diagnostic msg = Diagnostic - [Report msg] + (DList (Report msg)) -- ^ All the reports contained in a diagnostic. -- -- Reports are output one by one, without connections in between. @@ -85,7 +88,7 @@ prettyDiagnostic :: Diagnostic msg -> Doc Annotation prettyDiagnostic withUnicode tabSize (Diagnostic reports file) = - fold . intersperse hardline $ prettyReport file withUnicode tabSize <$> reports + fold . intersperse hardline $ prettyReport file withUnicode tabSize <$> toList reports {-# INLINE prettyDiagnostic #-} -- | Prints a 'Diagnostic' onto a specific 'Handle'. @@ -127,7 +130,7 @@ addReport :: Report msg -> Diagnostic msg addReport (Diagnostic reports files) report = - Diagnostic (reports <> [report]) files + Diagnostic (reports `DL.snoc` report) files {-# INLINE addReport #-} #ifdef USE_AESON From 4d12cc8236d0d8cae6e8200da07d8f0c4d3e60a8 Mon Sep 17 00:00:00 2001 From: Janek Spaderna Date: Sun, 26 Jun 2022 20:22:45 +0200 Subject: [PATCH 2/3] Calculate character widths per line in array Using an unboxed array has better performance than using a `HashMap Int Int`: although both data structures support O(1) access, the array has a better constant factor. Additionally, an array is suited very well since we we have a character width for every index i in [1..n]. --- diagnose.cabal | 12 ++-- package.yaml | 1 + src/Error/Diagnose/Report/Internal.hs | 87 ++++++++++++++++++--------- 3 files changed, 66 insertions(+), 34 deletions(-) diff --git a/diagnose.cabal b/diagnose.cabal index 298813d..6f7647d 100644 --- a/diagnose.cabal +++ b/diagnose.cabal @@ -65,7 +65,8 @@ library BlockArguments ghc-options: -Wall -Wextra build-depends: - base >=4.7 && <5 + array ==0.5.* + , base >=4.7 && <5 , data-default >=0.7 && <1 , dlist ==1.0.* , hashable >=1.3 && <2 @@ -109,7 +110,8 @@ test-suite diagnose-megaparsec-tests BlockArguments ghc-options: -Wall -Wextra -threaded -rtsopts -with-rtsopts=-N build-depends: - base >=4.7 && <5 + array ==0.5.* + , base >=4.7 && <5 , data-default >=0.7 && <1 , diagnose , dlist ==1.0.* @@ -149,7 +151,8 @@ test-suite diagnose-parsec-tests BlockArguments ghc-options: -Wall -Wextra -threaded -rtsopts -with-rtsopts=-N build-depends: - base >=4.7 && <5 + array ==0.5.* + , base >=4.7 && <5 , data-default >=0.7 && <1 , diagnose , dlist ==1.0.* @@ -188,7 +191,8 @@ test-suite diagnose-rendering-tests BlockArguments ghc-options: -Wall -Wextra -threaded -rtsopts -with-rtsopts=-N build-depends: - base >=4.7 && <5 + array ==0.5.* + , base >=4.7 && <5 , data-default >=0.7 && <1 , diagnose , dlist ==1.0.* diff --git a/package.yaml b/package.yaml index 406bd30..8546254 100644 --- a/package.yaml +++ b/package.yaml @@ -8,6 +8,7 @@ category: "Error Reporting" dependencies: - base >= 4.7 && < 5 +- array >= 0.5 && < 0.6 - data-default >= 0.7 && < 1 - dlist >= 1.0 && < 1.1 - hashable >= 1.3 && < 2 diff --git a/src/Error/Diagnose/Report/Internal.hs b/src/Error/Diagnose/Report/Internal.hs index 6b487a8..0f6568d 100644 --- a/src/Error/Diagnose/Report/Internal.hs +++ b/src/Error/Diagnose/Report/Internal.hs @@ -25,6 +25,9 @@ module Error.Diagnose.Report.Internal where import Data.Aeson (ToJSON(..), object, (.=)) #endif import Control.Applicative ((<|>)) +import Data.Array.IArray ((!)) +import qualified Data.Array.IArray as Array +import Data.Array.Unboxed (IArray, Ix, UArray, listArray) import Data.Bifunctor (bimap, first, second) import Data.Char.WCWidth (wcwidth) import Data.Default (def) @@ -33,9 +36,9 @@ import Data.Function (on) import Data.Functor ((<&>)) import Data.HashMap.Lazy (HashMap) import qualified Data.HashMap.Lazy as HashMap -import qualified Data.HashMap.Lazy as IntMap import qualified Data.List as List import qualified Data.List.Safe as List +import Data.Maybe import Data.Ord (Down (Down)) import Data.String (IsString (fromString)) import qualified Data.Text as Text @@ -44,6 +47,8 @@ import Error.Diagnose.Style (Annotation (..)) import Prettyprinter (Doc, Pretty (..), align, annotate, colon, hardline, lbracket, rbracket, space, width, (<+>)) import Prettyprinter.Internal (Doc (..)) +type WidthTable = UArray Int Int + -- | The type of diagnostic reports with abstract message type. data Report msg = Report @@ -467,34 +472,47 @@ prettyAllLines files withUnicode isError tabSize leftLen inline multiline lineNu in prefixWithBar colorOfFirstMultilineMarker <> prefix <> fold (List.intersperse prefix $ showMultilineMarkerMessages multiline) -- | -getLine_ :: HashMap FilePath [String] -> [(Position, Marker msg)] -> Int -> Int -> Bool -> (IntMap.HashMap Int Int, Doc Annotation) +getLine_ :: + HashMap FilePath [String] -> + [(Position, Marker msg)] -> + Int -> + Int -> + Bool -> + (WidthTable, Doc Annotation) getLine_ files markers line tabSize isError = case List.safeIndex (line - 1) =<< (HashMap.!?) files . file . fst =<< List.safeHead markers of - Nothing -> (mempty, annotate NoLineColor "") + Nothing -> + ( mkWidthTable "", + annotate NoLineColor "" + ) Just code -> - let (tabs, code') = indexedWithTabsReplaced code - in ( tabs, - fold $ - code' <&> \(n, c) -> - let colorizingMarkers = flip - filter - markers - \(Position (bl, bc) (el, ec) _, _) -> - if bl == el - then n >= bc && n < ec - else (bl == line && n >= bc) || (el == line && n < ec) || (bl < line && el > line) - in maybe id ((\m -> annotate (MarkerStyle $ markerColor isError m)) . snd) (List.safeHead colorizingMarkers) (pretty c) - ) + ( mkWidthTable code, + flip foldMap (zip [1 ..] code) \(n, c) -> + let cdoc = + ifTab (pretty (replicate tabSize ' ')) pretty c + colorizingMarkers = flip filter markers \case + (Position (bl, bc) (el, ec) _, _) + | bl == el -> + n >= bc && n < ec + | otherwise -> + (bl == line && n >= bc) + || (el == line && n < ec) + || (bl < line && el > line) + in maybe + id + ((\m -> annotate (MarkerStyle $ markerColor isError m)) . snd) + (List.safeHead colorizingMarkers) + cdoc + ) where - indexedWithTabsReplaced :: String -> (IntMap.HashMap Int Int, [(Int, Char)]) - indexedWithTabsReplaced = goIndexed 1 + ifTab :: a -> (Char -> a) -> Char -> a + ifTab a _ '\t' = a + ifTab _ f c = f c - goIndexed :: Int -> String -> (IntMap.HashMap Int Int, [(Int, Char)]) - goIndexed _ [] = (mempty, []) - goIndexed n ('\t' : xs) = bimap (IntMap.insert n tabSize) (replicate tabSize (n, ' ') <>) (goIndexed (n + 1) xs) - goIndexed n (x : xs) = bimap (IntMap.insert n (wcwidth x)) ((n, x) :) (goIndexed (n + 1) xs) + mkWidthTable :: String -> WidthTable + mkWidthTable s = listArray (1, length s) (ifTab tabSize wcwidth <$> s) -- | -showAllMarkersInLine :: Pretty msg => Bool -> Bool -> (Doc Annotation -> Doc Annotation) -> Bool -> Bool -> Int -> IntMap.HashMap Int Int -> [(Position, Marker msg)] -> Doc Annotation +showAllMarkersInLine :: Pretty msg => Bool -> Bool -> (Doc Annotation -> Doc Annotation) -> Bool -> Bool -> Int -> WidthTable -> [(Position, Marker msg)] -> Doc Annotation showAllMarkersInLine _ _ _ _ _ _ _ [] = mempty showAllMarkersInLine hasMultilines inSpanOfMultiline colorMultilinePrefix withUnicode isError leftLen widths ms = let maxMarkerColumn = snd $ end $ fst $ List.last $ List.sortOn (snd . end . fst) ms @@ -505,19 +523,23 @@ showAllMarkersInLine hasMultilines inSpanOfMultiline colorMultilinePrefix withUn in -- get the maximum end column, so that we know when to stop looking for other markers on the same line hardline <+> dotPrefix leftLen withUnicode <+> (if List.null ms then mempty else specialPrefix <> showMarkers 1 maxMarkerColumn <> showMessages specialPrefix ms maxMarkerColumn) where + widthAt i = 0 `fromMaybe` safeArrayIndex i widths + widthsBetween start end = + sum $ take (end - start) $ drop (start - 1) $ Array.elems widths + showMarkers n lineLen | n > lineLen = mempty -- reached the end of the line | otherwise = let allMarkers = flip filter ms \(Position (_, bc) (_, ec) _, _) -> n >= bc && n < ec in -- only consider markers which span onto the current column case allMarkers of - [] -> fold (replicate (IntMap.lookupDefault 0 n widths) space) <> showMarkers (n + 1) lineLen + [] -> fold (replicate (widthAt n) space) <> showMarkers (n + 1) lineLen (Position {..}, marker) : _ -> annotate (markerColor isError marker) ( if snd begin == n - then (if withUnicode then "┬" else "^") <> fold (replicate (IntMap.lookupDefault 0 n widths - 1) if withUnicode then "─" else "-") - else fold (replicate (IntMap.lookupDefault 0 n widths) if withUnicode then "─" else "-") + then (if withUnicode then "┬" else "^") <> fold (replicate (widthAt n - 1) if withUnicode then "─" else "-") + else fold (replicate (widthAt n) if withUnicode then "─" else "-") ) <> showMarkers (n + 1) lineLen @@ -532,15 +554,15 @@ showAllMarkersInLine hasMultilines inSpanOfMultiline colorMultilinePrefix withUn allColumns _ [] = (1, []) allColumns n ms@((Position (_, bc) _ _, col) : ms') | n == bc = bimap (+ 1) (col :) (allColumns (n + 1) ms') - | n < bc = bimap (+ 1) (replicate (IntMap.lookupDefault 0 n widths) space <>) (allColumns (n + 1) ms) - | otherwise = bimap (+ 1) (replicate (IntMap.lookupDefault 0 n widths) space <>) (allColumns (n + 1) ms') + | n < bc = bimap (+ 1) (replicate (widthAt n) space <>) (allColumns (n + 1) ms) + | otherwise = bimap (+ 1) (replicate (widthAt n) space <>) (allColumns (n + 1) ms') -- transform the list of remaining markers into a single document line hasSuccessor = length filteredPipes /= length pipes lineStart pipes = let (n, docs) = allColumns 1 $ List.sortOn (snd . begin . fst) pipes - numberOfSpaces = sum [IntMap.lookupDefault 0 x widths | x <- [n .. bc - 1]] -- bc - n + numberOfSpaces = widthsBetween n bc in dotPrefix leftLen withUnicode <+> specialPrefix <> fold docs <> pretty (replicate numberOfSpaces ' ') -- the start of the line contains the "dot"-prefix as well as all the pipes for all the still not rendered marker messages @@ -555,7 +577,7 @@ showAllMarkersInLine hasMultilines inSpanOfMultiline colorMultilinePrefix withUn lineLen = case lastBeginPosition of Nothing -> 0 - Just col -> sum [IntMap.lookupDefault 0 x widths | x <- [bc .. col - 1]] + Just col -> widthsBetween bc col currentPipe = if @@ -639,3 +661,8 @@ prettyAllHints (h : hs) leftLen withUnicode = noteMessage (Note msg) = msg noteMessage (Hint msg) = msg + +safeArrayIndex :: (Ix i, IArray a e) => i -> a i e -> Maybe e +safeArrayIndex i a + | Array.inRange (Array.bounds a) i = Just (a ! i) + | otherwise = Nothing From 7ba617130560df662814302da2d4f46d5cc52006 Mon Sep 17 00:00:00 2001 From: Janek Spaderna Date: Sun, 26 Jun 2022 20:22:41 +0200 Subject: [PATCH 3/3] Store a file's lines in an array Accessing a single line in `getLine_` using `List.safeIndex` takes O(n) time with n being the number of lines. Overall this results in O(n^2) time for formatting a diagnostic. By using an array we cut the overall time down to O(n). --- src/Error/Diagnose/Diagnostic/Internal.hs | 13 +++-- src/Error/Diagnose/Report/Internal.hs | 62 ++++++++++++----------- 2 files changed, 40 insertions(+), 35 deletions(-) diff --git a/src/Error/Diagnose/Diagnostic/Internal.hs b/src/Error/Diagnose/Diagnostic/Internal.hs index 1fefd58..d4f6cac 100644 --- a/src/Error/Diagnose/Diagnostic/Internal.hs +++ b/src/Error/Diagnose/Diagnostic/Internal.hs @@ -21,15 +21,15 @@ import Data.Aeson (ToJSON(..), encode, object, (.=)) import Data.ByteString.Lazy (ByteString) #endif +import Data.Array (listArray) import Data.DList (DList) import qualified Data.DList as DL import Data.Default (Default, def) import Data.Foldable (fold, toList) -import Data.HashMap.Lazy (HashMap) import qualified Data.HashMap.Lazy as HashMap import Data.List (intersperse) import Error.Diagnose.Report (Report) -import Error.Diagnose.Report.Internal (prettyReport) +import Error.Diagnose.Report.Internal (FileMap, prettyReport) import Error.Diagnose.Style (Annotation, Style) import Prettyprinter (Doc, Pretty, hardline, unAnnotate) import Prettyprinter.Render.Terminal (hPutDoc) @@ -45,7 +45,7 @@ data Diagnostic msg -- ^ All the reports contained in a diagnostic. -- -- Reports are output one by one, without connections in between. - (HashMap FilePath [String]) + !FileMap -- ^ A map associating files with their content as lists of lines. instance Default (Diagnostic msg) where @@ -57,7 +57,7 @@ instance Semigroup (Diagnostic msg) where #ifdef USE_AESON instance ToJSON msg => ToJSON (Diagnostic msg) where toJSON (Diagnostic reports files) = - object [ "files" .= fmap toJSONFile (HashMap.toList files) + object [ "files" .= fmap toJSONFile (fmap toList <$> (HashMap.toList files)) , "reports" .= reports ] where @@ -120,7 +120,10 @@ addFile :: String -> Diagnostic msg addFile (Diagnostic reports files) path content = - Diagnostic reports (HashMap.insert path (lines content) files) + let fileLines = lines content + lineCount = length fileLines + lineArray = listArray (0, lineCount - 1) fileLines + in Diagnostic reports (HashMap.insert path lineArray files) {-# INLINE addFile #-} -- | Inserts a new report into a diagnostic. diff --git a/src/Error/Diagnose/Report/Internal.hs b/src/Error/Diagnose/Report/Internal.hs index 0f6568d..dd0c135 100644 --- a/src/Error/Diagnose/Report/Internal.hs +++ b/src/Error/Diagnose/Report/Internal.hs @@ -25,9 +25,8 @@ module Error.Diagnose.Report.Internal where import Data.Aeson (ToJSON(..), object, (.=)) #endif import Control.Applicative ((<|>)) -import Data.Array.IArray ((!)) import qualified Data.Array.IArray as Array -import Data.Array.Unboxed (IArray, Ix, UArray, listArray) +import Data.Array.Unboxed (Array, IArray, Ix, UArray, listArray, (!)) import Data.Bifunctor (bimap, first, second) import Data.Char.WCWidth (wcwidth) import Data.Default (def) @@ -47,6 +46,8 @@ import Error.Diagnose.Style (Annotation (..)) import Prettyprinter (Doc, Pretty (..), align, annotate, colon, hardline, lbracket, rbracket, space, width, (<+>)) import Prettyprinter.Internal (Doc (..)) +type FileMap = HashMap FilePath (Array Int String) + type WidthTable = UArray Int Int -- | The type of diagnostic reports with abstract message type. @@ -158,7 +159,7 @@ err = Report True prettyReport :: Pretty msg => -- | The content of the file the reports are for - HashMap FilePath [String] -> + FileMap -> -- | Should we print paths in unicode? Bool -> -- | The number of spaces each TAB character will span @@ -315,7 +316,7 @@ groupMarkersPerFile markers = prettySubReport :: Pretty msg => -- | The content of files in the diagnostics - HashMap FilePath [String] -> + FileMap -> -- | Is the output done with Unicode characters? Bool -> -- | Is the current report an error report? @@ -372,7 +373,7 @@ splitMarkersPerLine (m@(Position {..}, _) : ms) = -- | prettyAllLines :: Pretty msg => - HashMap FilePath [String] -> + FileMap -> Bool -> Bool -> -- | The number of spaces each TAB character will span @@ -473,36 +474,37 @@ prettyAllLines files withUnicode isError tabSize leftLen inline multiline lineNu -- | getLine_ :: - HashMap FilePath [String] -> + FileMap -> [(Position, Marker msg)] -> Int -> Int -> Bool -> (WidthTable, Doc Annotation) -getLine_ files markers line tabSize isError = case List.safeIndex (line - 1) =<< (HashMap.!?) files . file . fst =<< List.safeHead markers of - Nothing -> - ( mkWidthTable "", - annotate NoLineColor "" - ) - Just code -> - ( mkWidthTable code, - flip foldMap (zip [1 ..] code) \(n, c) -> - let cdoc = - ifTab (pretty (replicate tabSize ' ')) pretty c - colorizingMarkers = flip filter markers \case - (Position (bl, bc) (el, ec) _, _) - | bl == el -> - n >= bc && n < ec - | otherwise -> - (bl == line && n >= bc) - || (el == line && n < ec) - || (bl < line && el > line) - in maybe - id - ((\m -> annotate (MarkerStyle $ markerColor isError m)) . snd) - (List.safeHead colorizingMarkers) - cdoc - ) +getLine_ files markers line tabSize isError = + case safeArrayIndex (line - 1) =<< (HashMap.!?) files . file . fst =<< List.safeHead markers of + Nothing -> + ( mkWidthTable "", + annotate NoLineColor "" + ) + Just code -> + ( mkWidthTable code, + flip foldMap (zip [1 ..] code) \(n, c) -> + let cdoc = + ifTab (pretty (replicate tabSize ' ')) pretty c + colorizingMarkers = flip filter markers \case + (Position (bl, bc) (el, ec) _, _) + | bl == el -> + n >= bc && n < ec + | otherwise -> + (bl == line && n >= bc) + || (el == line && n < ec) + || (bl < line && el > line) + in maybe + id + ((\m -> annotate (MarkerStyle $ markerColor isError m)) . snd) + (List.safeHead colorizingMarkers) + cdoc + ) where ifTab :: a -> (Char -> a) -> Char -> a ifTab a _ '\t' = a