delete old doc support from the lexer and parser

2024-09-11 10:35:57 +03:00 · 2024-06-21 16:24:41 -04:00 · 2024-06-21 16:24:41 -04:00 · 5ca1ef7829
commit 5ca1ef7829
parent 2010b57e2f
2 changed files with 0 additions and 396 deletions
--- a/parser-typechecker/src/Unison/Syntax/TermParser.hs
+++ b/parser-typechecker/src/Unison/Syntax/TermParser.hs
@ -19,11 +19,8 @@ import Data.List qualified as List
 import Data.List.Extra qualified as List.Extra
 import Data.List.NonEmpty (NonEmpty ((:|)))
 import Data.List.NonEmpty qualified as NonEmpty
-import Data.Maybe qualified as Maybe
-import Data.Sequence qualified as Sequence
 import Data.Set qualified as Set
 import Data.Text qualified as Text
-import Data.Tuple.Extra qualified as TupleE
 import Text.Megaparsec qualified as P
 import U.Core.ABT qualified as ABT
 import Unison.ABT qualified as ABT
@ -56,7 +53,6 @@ import Unison.Type (Type)
 import Unison.Type qualified as Type
 import Unison.Typechecker.Components qualified as Components
 import Unison.Util.Bytes qualified as Bytes
-import Unison.Util.List (intercalateMapWith, quenchRuns)
 import Unison.Var (Var)
 import Unison.Var qualified as Var
 import Prelude hiding (and, or, seq)
@ -129,15 +125,6 @@ termLink' = do
      | Set.size s == 1 -> pure $ const (Set.findMin s) <$> id
      | otherwise -> customFailure $ UnknownTerm id s

-link' :: (Monad m, Var v) => P v m (Either (L.Token Reference) (L.Token Referent))
-link' = do
-  id <- hqPrefixId
-  ns <- asks names
-  case (Names.lookupHQTerm Names.IncludeSuffixes (L.payload id) ns, Names.lookupHQType Names.IncludeSuffixes (L.payload id) ns) of
-    (s, s2) | Set.size s == 1 && Set.null s2 -> pure . Right $ const (Set.findMin s) <$> id
-    (s, s2) | Set.size s2 == 1 && Set.null s -> pure . Left $ const (Set.findMin s2) <$> id
-    (s, s2) -> customFailure $ UnknownId id s s2
-
 link :: (Monad m, Var v) => TermP v m
 link = termLink <|> typeLink
  where
@ -452,7 +439,6 @@ termLeaf =
      delayQuote,
      (snd <$> delayBlock),
      bang,
-      docBlock,
      doc2Block <&> \(spanAnn, trm) -> trm {ABT.annotation = ABT.annotation trm <> spanAnn}
    ]

@ -610,66 +596,6 @@ doc2Block = do
            pure $ (spanAnn, Term.apps' f [addDelay tm])
          _ -> regular

-docBlock :: (Monad m, Var v) => TermP v m
-docBlock = do
-  openTok <- openBlockWith "[:"
-  segs <- many segment
-  closeTok <- closeBlock
-  let a = ann openTok <> ann closeTok
-  pure . docNormalize $ Term.app a (Term.constructor a (ConstructorReference DD.docRef DD.docJoinId)) (Term.list a segs)
-  where
-    segment = blob <|> linky
-    blob = do
-      s <- string
-      pure $
-        Term.app
-          (ann s)
-          (Term.constructor (ann s) (ConstructorReference DD.docRef DD.docBlobId))
-          (Term.text (ann s) (L.payload s))
-    linky = asum [include, signature, evaluate, source, link]
-    include = do
-      _ <- P.try (reserved "include")
-      hashQualifiedPrefixTerm
-    signature = do
-      _ <- P.try (reserved "signature")
-      tok <- termLink'
-      pure $
-        Term.app
-          (ann tok)
-          (Term.constructor (ann tok) (ConstructorReference DD.docRef DD.docSignatureId))
-          (Term.termLink (ann tok) (L.payload tok))
-    evaluate = do
-      _ <- P.try (reserved "evaluate")
-      tok <- termLink'
-      pure $
-        Term.app
-          (ann tok)
-          (Term.constructor (ann tok) (ConstructorReference DD.docRef DD.docEvaluateId))
-          (Term.termLink (ann tok) (L.payload tok))
-    source = do
-      _ <- P.try (reserved "source")
-      l <- link''
-      pure $
-        Term.app
-          (ann l)
-          (Term.constructor (ann l) (ConstructorReference DD.docRef DD.docSourceId))
-          l
-    link'' = either ty t <$> link'
-      where
-        t tok =
-          Term.app
-            (ann tok)
-            (Term.constructor (ann tok) (ConstructorReference DD.linkRef DD.linkTermId))
-            (Term.termLink (ann tok) (L.payload tok))
-        ty tok =
-          Term.app
-            (ann tok)
-            (Term.constructor (ann tok) (ConstructorReference DD.linkRef DD.linkTypeId))
-            (Term.typeLink (ann tok) (L.payload tok))
-    link = d <$> link''
-      where
-        d tm = Term.app (ann tm) (Term.constructor (ann tm) (ConstructorReference DD.docRef DD.docLinkId)) tm
-
 -- Used by unbreakParas within docNormalize.  Doc literals are a joined sequence
 -- segments.  This type describes a property of a segment.
 data UnbreakCase
@ -682,298 +608,6 @@ data UnbreakCase
    StartsUnindented
  deriving (Eq, Show)

-- Doc literal normalization
--
-- This normalization allows the pretty-printer and doc display code to do
-- indenting, and to do line-wrap of paragraphs, but without the inserted
-- newlines being then frozen into the text for ever more over subsequent
-- edit/update cycles.
--
-- The alternative would be to stop line-wrapping docs on view/display by adding
-- newlines in the pretty-printer, and instead leave wrapping to the
-- terminal/editor.  Might be worth considering if this code ends up being
-- too buggy and fragile to maintain.  Maybe display could add newlines,
-- and view could refrain from doing so.
--
-- Operates on the text of the Blobs within a doc (as parsed by docBlock):
-- - reduces the whitespace after all newlines so that at least one of the
--   non-initial lines has zero indent (important because the pretty-printer adds
--   indenting when displaying doc literals)
-- - removes trailing whitespace from each line
-- - removes newlines between any sequence of non-empty zero-indent lines
--   (i.e. undo line-breaking within paragraphs).
--
-- Should be understood in tandem with Util.Pretty.paragraphyText, which
-- outputs doc text for display/edit/view.
-- See also unison-src/transcripts/doc-formatting.md.
--
-- There is some heuristic/approximate logic in here - see the comment flagged
-- with ** below.
--
-- This function is a bit painful - it's trying to act on a sequence of lines,
-- but that sequence is split up between the various blobs in the doc, which
-- are separated by the elements tracking things like @[source] etc.  It
-- would be simplified if the doc representation was something like
-- [Either Char EnrichedElement].
--
-- This function has some tracing which you can enable by deleting some calls to
-- 'const id' below.
-docNormalize :: (Ord v, Show v) => Term v a -> Term v a
-docNormalize tm = case tm of
-  -- This pattern is just `DD.DocJoin seqs`, but exploded in order to grab
-  -- the annotations.  The aim is just to map `normalize` over it.
-  a@(Term.App' c@(Term.Constructor' (ConstructorReference DD.DocRef DD.DocJoinId)) s@(Term.List' seqs)) ->
-    join
-      (ABT.annotation a)
-      (ABT.annotation c)
-      (ABT.annotation s)
-      (normalize seqs)
-    where
-
-  _ -> error $ "unexpected doc structure: " ++ show tm
-  where
-    normalize =
-      Sequence.fromList
-        . (map TupleE.fst3)
-        . (tracing "after unbreakParas")
-        . unbreakParas
-        . (tracing "after full preprocess")
-        . preProcess
-        . (tracing "after unindent")
-        . unIndent
-        . (tracing "initial parse")
-        . miniPreProcess
-    preProcess xs =
-      zip3
-        seqs
-        (lineStarteds $ Sequence.fromList seqs)
-        (followingLines $ Sequence.fromList seqs)
-      where
-        seqs = map fst xs
-    miniPreProcess seqs = zip (toList seqs) (lineStarteds seqs)
-    unIndent ::
-      (Ord v) =>
-      [(Term v a, UnbreakCase)] ->
-      [(Term v a, UnbreakCase)]
-    unIndent tms = map go tms
-      where
-        go (b, previous) =
-          ((mapBlob $ (reduceIndent includeFirst minIndent)) b, previous)
-          where
-            -- Since previous was calculated before unindenting, it will often be wrongly
-            -- StartsIndented instead of StartsUnindented - but that's OK just for the test
-            -- below.  And we'll recalculate it later in preProcess.
-            includeFirst = previous == LineEnds
-        concatenatedBlobs :: Text
-        concatenatedBlobs = mconcat (toList (fmap (getBlob . fst) tms))
-        getBlob (DD.DocBlob txt) = txt
-        getBlob _ = "."
-        -- Note we exclude the first line when calculating the minimum indent - the lexer
-        -- already stripped leading spaces from it, and anyway it would have been sharing
-        -- its line with the [: and maybe other stuff.
-        nonInitialNonEmptyLines =
-          filter (not . Text.null) $
-            map Text.stripEnd $
-              drop 1 $
-                Text.lines
-                  concatenatedBlobs
-        minIndent =
-          minimumOrZero $
-            map
-              (Text.length . (Text.takeWhile Char.isSpace))
-              nonInitialNonEmptyLines
-        minimumOrZero xs = if length xs == 0 then 0 else minimum xs
-        reduceIndent :: Bool -> Int -> Text -> Text
-        reduceIndent includeFirst n t =
-          fixup $
-            Text.unlines $
-              mapExceptFirst reduceLineIndent onFirst $
-                Text.lines t
-          where
-            onFirst = if includeFirst then reduceLineIndent else id
-            reduceLineIndent l = result
-              where
-                currentIndent = Text.length $ (Text.takeWhile Char.isSpace) l
-                remainder = (Text.dropWhile Char.isSpace) l
-                newIndent = maximum [0, currentIndent - n]
-                result = Text.replicate newIndent " " `mappend` remainder
-            -- unlines . lines adds a trailing newline if one was not present: undo that.
-            fixup = if Text.takeEnd 1 t == "\n" then id else Text.dropEnd 1
-    -- Remove newlines between any sequence of non-empty zero-indent lines.
-    -- This is made more complicated by Doc elements (e.g. links) which break up a
-    -- blob but don't break a line of output text**.  We sometimes need to refer back to the
-    -- previous blob to see whether a newline is between two zero-indented lines.
-    -- For example...
-    -- "This link to @foo makes it harder to see\n
-    --  that the newline should be removed."
-    -- Whether an element does this (breaks a blob but not a line of output text) really
-    -- depends on some things we don't know here: does an @[include] target doc occupy
-    -- just one line or several; whether this doc is going to be viewed or displayed.
-    -- So we'll get it wrong sometimes.  The impact of this is that we may sometimes
-    -- misjudge whether a newline is separating two non-indented lines, and should therefore
-    -- be removed.
-    unbreakParas ::
-      (Show v, Ord v) =>
-      [(Term v a, UnbreakCase, Bool)] ->
-      [(Term v a, UnbreakCase, Bool)]
-    unbreakParas = map go
-      where
-        -- 'candidate' means 'candidate to be joined with an adjacent line as part of a
-        -- paragraph'.
-        go (b, previous, nextIsCandidate) =
-          (mapBlob go b, previous, nextIsCandidate)
-          where
-            go txt = if Text.null txt then txt else tr result'
-              where
-                tr =
-                  const id $
-                    trace $
-                      "\nprocessElement on blob "
-                        ++ (show txt)
-                        ++ ", result' = "
-                        ++ (show result')
-                        ++ ", lines: "
-                        ++ (show ls)
-                        ++ ", candidates = "
-                        ++ (show candidates)
-                        ++ ", previous = "
-                        ++ (show previous)
-                        ++ ", firstIsCandidate = "
-                        ++ (show firstIsCandidate)
-                        ++ "\n\n"
-                -- remove trailing whitespace
-                -- ls is non-empty thanks to the Text.null check above
-                -- Don't cut the last line's trailing whitespace - there's an assumption here
-                -- that it's followed by something which will put more text on the same line.
-                ls = mapExceptLast Text.stripEnd id $ Text.lines txt
-                -- Work out which lines are candidates to be joined as part of a paragraph, i.e.
-                -- are not indented.
-                candidate l = case Text.uncons l of
-                  Just (initial, _) -> not . Char.isSpace $ initial
-                  Nothing -> False -- empty line
-                  -- The segment of this blob that runs up to the first newline may not itself
-                  -- be the start of a line of the doc - for example if it's preceded by a link.
-                  -- So work out whether the line of which it is a part is a candidate.
-                firstIsCandidate = case previous of
-                  LineEnds -> candidate (head ls)
-                  StartsIndented -> False
-                  StartsUnindented -> True
-                candidates = firstIsCandidate : (tail (map candidate ls))
-                result = mconcat $ intercalateMapWith sep fst (zip ls candidates)
-                sep (_, candidate1) (_, candidate2) =
-                  if candidate1 && candidate2 then " " else "\n"
-                -- Text.lines forgets whether there was a trailing newline.
-                -- If there was one, then either add it back or convert it to a space.
-                result' =
-                  if (Text.takeEnd 1 txt) == "\n"
-                    then
-                      if (last candidates) && nextIsCandidate
-                        then result `Text.append` " "
-                        else result `Text.append` "\n"
-                    else result
-    -- A list whose entries match those of tms.  `Nothing` is used for elements
-    -- which just continue a line, and so need to be ignored when looking back
-    -- for how the last line started.  Otherwise describes whether the last
-    -- line of this entry is indented (or maybe terminated by a newline.)
-    -- A value of `Nothing` protects ensuing text from having its leading
-    -- whitespace removed by `unindent`.
-    -- Note that some elements render over multiple lines when displayed.
-    -- See test2 in transcript doc-formatting.md for an example of how
-    -- this looks when there is whitespace immediately following @[source]
-    -- or @[evaluate].
-    lastLines :: (Show v) => Sequence.Seq (Term v a) -> [Maybe UnbreakCase]
-    lastLines tms = (flip fmap) (toList tms) $ \case
-      DD.DocBlob txt -> unbreakCase txt
-      DD.DocLink _ -> Nothing
-      DD.DocSource _ -> Nothing
-      DD.DocSignature _ -> Nothing
-      DD.DocEvaluate _ -> Nothing
-      Term.Var' _ -> Nothing -- @[include]
-      e@_ -> error ("unexpected doc element: " ++ show e)
-    -- Work out whether the last line of this blob is indented (or maybe
-    -- terminated by a newline.)
-    unbreakCase :: Text -> Maybe UnbreakCase
-    unbreakCase txt =
-      let (startAndNewline, afterNewline) = Text.breakOnEnd "\n" txt
-       in if Text.null startAndNewline
-            then Nothing
-            else
-              if Text.null afterNewline
-                then Just LineEnds
-                else
-                  if Char.isSpace (Text.head afterNewline)
-                    then Just StartsIndented
-                    else Just StartsUnindented
-    -- A list whose entries match those of tms.  Describes how the current
-    -- line started (the line including the start of this entry) - or LineEnds
-    -- if this entry is starting a line itself.
-    -- Calculated as the UnbreakCase of the previous entry that included a newline.
-    -- Really there's a function of type (a -> Bool) -> a -> [a] -> [a] in here
-    -- fighting to break free - overwriting elements that are 'shadowed' by
-    -- a preceding element for which the predicate is true, with a copy of
-    -- that element.
-    lineStarteds :: (Show v) => Sequence.Seq (Term v a) -> [UnbreakCase]
-    lineStarteds tms = tr $ quenchRuns LineEnds StartsUnindented $ xs''
-      where
-        tr =
-          const id $
-            trace $
-              "lineStarteds: xs = "
-                ++ (show xs)
-                ++ ", xss = "
-                ++ (show xss)
-                ++ ", xs' = "
-                ++ (show xs')
-                ++ ", xs'' = "
-                ++ (show xs'')
-                ++ "\n\n"
-        -- Make sure there's a Just at the start of the list so we always find
-        -- one when searching back.
-        -- Example: xs = [J1,N2,J3]
-        xs :: [Maybe UnbreakCase]
-        xs = Just LineEnds : (lastLines tms)
-        -- Example: xss = [[J1],[J1,N2],[J1,N2,J3]]
-        xss :: [[Maybe UnbreakCase]]
-        xss = drop 1 $ List.inits xs
-        -- Example: after each step of the map...
-        --            [[J1],[N2,J1],[J3,N2,J1]]   -- after reverse
-        --            [Just J1, Just J1, Just J3] -- after find
-        --            ...
-        --   result = [1,1,3]
-        xs' =
-          map (Maybe.fromJust . Maybe.fromJust . (List.find isJust) . reverse) xss
-        xs'' = List.Extra.dropEnd 1 xs'
-    -- For each element, can it be a line-continuation of a preceding blob?
-    continuesLine :: Sequence.Seq (Term v a) -> [Bool]
-    continuesLine tms = (flip fmap) (toList tms) \case
-      DD.DocBlob _ -> False -- value doesn't matter - you don't get adjacent blobs
-      DD.DocLink _ -> True
-      DD.DocSource _ -> False
-      DD.DocSignature _ -> False
-      DD.DocEvaluate _ -> False
-      Term.Var' _ -> False -- @[include]
-      _ -> error ("unexpected doc element" ++ show tm)
-    -- A list whose entries match those of tms.  Can the subsequent entry by a
-    -- line continuation of this one?
-    followingLines tms = drop 1 ((continuesLine tms) ++ [False])
-    mapExceptFirst :: (a -> b) -> (a -> b) -> [a] -> [b]
-    mapExceptFirst fRest fFirst = \case
-      [] -> []
-      x : rest -> (fFirst x) : (map fRest rest)
-    mapExceptLast fRest fLast = reverse . (mapExceptFirst fRest fLast) . reverse
-    tracing :: (Show a) => [Char] -> a -> a
-    tracing when x =
-      (const id $ trace ("at " ++ when ++ ": " ++ (show x) ++ "\n")) x
-    blob aa ac at txt =
-      Term.app aa (Term.constructor ac (ConstructorReference DD.docRef DD.docBlobId)) (Term.text at txt)
-    join aa ac as segs =
-      Term.app aa (Term.constructor ac (ConstructorReference DD.docRef DD.docJoinId)) (Term.list' as segs)
-    mapBlob :: (Ord v) => (Text -> Text) -> Term v a -> Term v a
-    -- this pattern is just `DD.DocBlob txt` but exploded to capture the annotations as well
-    mapBlob f (aa@(Term.App' ac@(Term.Constructor' (ConstructorReference DD.DocRef DD.DocBlobId)) at@(Term.Text' txt))) =
-      blob (ABT.annotation aa) (ABT.annotation ac) (ABT.annotation at) (f txt)
-    mapBlob _ t = t
-
 delayQuote :: (Monad m, Var v) => TermP v m
 delayQuote = P.label "quote" do
  start <- reserved "'"
--- a/unison-syntax/src/Unison/Syntax/Lexer.hs
+++ b/unison-syntax/src/Unison/Syntax/Lexer.hs
@ -406,7 +406,6 @@ lexemes' eof =
    toks :: P [Token Lexeme]
    toks =
      doc2
-        <|> doc
        <|> token numeric
        <|> token character
        <|> reserved
@ -835,35 +834,6 @@ lexemes' eof =
              where
                final = last ts

-    doc :: P [Token Lexeme]
-    doc = open <+> (CP.space *> fmap fixup body) <+> (close <* space)
-      where
-        open = token'' (\t _ _ -> t) $ tok (Open <$> lit "[:")
-        close = tok (Close <$ lit ":]")
-        at = lit "@"
-        -- this removes some trailing whitespace from final textual segment
-        fixup [] = []
-        fixup (Token (Textual (reverse -> txt)) start stop : []) =
-          [Token (Textual txt') start stop]
-          where
-            txt' = reverse (dropWhile (\c -> isSpace c && not (c == '\n')) txt)
-        fixup (h : t) = h : fixup t
-
-        body :: P [Token Lexeme]
-        body = txt <+> (atk <|> pure [])
-          where
-            ch = (":]" <$ lit "\\:]") <|> ("@" <$ lit "\\@") <|> (pure <$> P.anySingle)
-            txt = tok (Textual . join <$> P.manyTill ch (P.lookAhead sep))
-            sep = void at <|> void close
-            ref = at *> (tok identifierLexemeP <|> docTyp)
-            atk = (ref <|> docTyp) <+> body
-            docTyp = do
-              _ <- lit "["
-              typ <- tok (P.manyTill P.anySingle (P.lookAhead (lit "]")))
-              _ <- lit "]" *> CP.space
-              t <- tok identifierLexemeP
-              pure $ (fmap Reserved <$> typ) <> t
-
    blank =
      separated wordySep do
        _ <- char '_'