diff --git a/.gitignore b/.gitignore index 9280f5786..5175da69e 100644 --- a/.gitignore +++ b/.gitignore @@ -30,3 +30,5 @@ bin/ .licenses/log/ codex.tags + +vendor/proto3-suite diff --git a/.hlint.yaml b/.hlint.yaml index b60478f4c..ed0e78a92 100644 --- a/.hlint.yaml +++ b/.hlint.yaml @@ -48,6 +48,7 @@ # Terms - warning: {lhs: termFAnnotation . unTerm, rhs: termAnnotation, name: Use termAnnotation} - warning: {lhs: termFOut . unTerm, rhs: termOut, name: Use termOut} +- warning: {lhs: project . termOut, rhs: projectTerm, name: Use projectTerm} # Conveniences - warning: {lhs: maybe a pure, rhs: maybeM a, name: Use maybeM} diff --git a/.licenses/semantic/cabal/machines.txt b/.licenses/semantic/cabal/machines.txt new file mode 100644 index 000000000..2f231d750 --- /dev/null +++ b/.licenses/semantic/cabal/machines.txt @@ -0,0 +1,38 @@ +--- +type: cabal +name: machines +version: 0.6.4 +summary: Networked stream transducers +homepage: https://github.com/ekmett/machines/ +license: bsd-3-clause +--- +Copyright 2012-2015 Edward Kmett, Runar Bjarnason, Paul Chiusano + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +3. Neither the name of the author nor the names of his contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. diff --git a/.licenses/semantic/cabal/prettyprinter.txt b/.licenses/semantic/cabal/prettyprinter.txt new file mode 100644 index 000000000..55fd581be --- /dev/null +++ b/.licenses/semantic/cabal/prettyprinter.txt @@ -0,0 +1,31 @@ +--- +type: cabal +name: prettyprinter +version: 1.2.1 +summary: A modern, easy to use, well-documented, extensible pretty-printer. +homepage: https://github.com/quchen/prettyprinter +license: bsd-2-clause +--- +Copyright 2008, Daan Leijen and Max Bolingbroke, 2016 David Luposchainsky. All +rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + + - Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +This software is provided by the copyright holders "as is" and any express or +implied warranties, including, but not limited to, the implied warranties of +merchantability and fitness for a particular purpose are disclaimed. In no event +shall the copyright holders be liable for any direct, indirect, incidental, +special, exemplary, or consequential damages (including, but not limited to, +procurement of substitute goods or services; loss of use, data, or profits; or +business interruption) however caused and on any theory of liability, whether in +contract, strict liability, or tort (including negligence or otherwise) arising +in any way out of the use of this software, even if advised of the possibility +of such damage. \ No newline at end of file diff --git a/proto/code_analysis.proto b/proto/code_analysis.proto index 500d60be8..de7a55c58 100644 --- a/proto/code_analysis.proto +++ b/proto/code_analysis.proto @@ -2,13 +2,13 @@ syntax = "proto3"; package github.semantic; -import "ruby-terms.proto"; -import "ruby-diffs.proto"; -import "json-terms.proto"; -import "typescript-terms.proto"; -import "typescript-diffs.proto"; -import "python-terms.proto"; -import "python-diffs.proto"; +import "ruby_term.proto"; +import "ruby_diff.proto"; +import "json_term.proto"; +import "typescript_term.proto"; +import "typescript_diff.proto"; +import "python_term.proto"; +import "python_diff.proto"; import "types.proto"; import "error_details.proto"; @@ -36,8 +36,11 @@ service CodeAnalysis { // Calculate an import graph for a project. rpc GraphImports (ImportGraphRequest) returns (ImportGraphResponse); + // Calculate a call graph for a project. rpc GraphCalls (CallGraphRequest) returns (CallGraphResponse); + // Status and Health + // // Check health & status of the service. rpc CheckHealth (HealthCheckRequest) returns (HealthCheckResponse); } @@ -57,19 +60,23 @@ message ParseTreeResponse { } message RubyResponse { - repeated ruby_terms.RubyTerm terms = 1; + repeated rubyterm.RubyTerm terms = 1; + repeated DebugInfo errors = 2; } message JSONResponse { - repeated json_terms.JSONTerm terms = 1; + repeated jsonterm.JSONTerm terms = 1; + repeated DebugInfo errors = 2; } message TypeScriptResponse { - repeated typescript_terms.TypeScriptTerm terms = 1; + repeated typescriptterm.TypeScriptTerm terms = 1; + repeated DebugInfo errors = 2; } message PythonResponse { - repeated python_terms.PythonTerm terms = 1; + repeated pythonterm.PythonTerm terms = 1; + repeated DebugInfo errors = 2; } message SummarizeDiffRequest { @@ -96,15 +103,15 @@ message DiffTreeResponse { } message RubyDiffResponse { - repeated ruby_diffs.RubyDiff diffs = 1; + repeated rubydiff.RubyDiff diffs = 1; } message PythonDiffResponse { - repeated python_diffs.PythonDiff diffs = 1; + repeated pythondiff.PythonDiff diffs = 1; } message TypeScriptDiffResponse { - repeated typescript_diffs.TypeScriptDiff diffs = 1; + repeated typescriptdiff.TypeScriptDiff diffs = 1; } message CallGraphRequest { diff --git a/proto/json-terms.proto b/proto/json_term.proto similarity index 85% rename from proto/json-terms.proto rename to proto/json_term.proto index be75adeb2..8c4bfa408 100644 --- a/proto/json-terms.proto +++ b/proto/json_term.proto @@ -1,12 +1,12 @@ // This file was generated by proto-gen. Do not edit by hand. syntax = "proto3"; -package github.semantic.json_terms; +package github.semantic.jsonterm; import "types.proto"; -option java_package = "com.github.semantic.json_terms"; -option go_package = "github.com/semantic/json_terms;json"; +option java_package = "com.github.semantic.jsonterm"; +option go_package = "github.com/semantic/jsonterm;json"; message JSONTerm { JSONSyntax syntax = 1; diff --git a/proto/python-diffs.proto b/proto/python_diff.proto similarity index 98% rename from proto/python-diffs.proto rename to proto/python_diff.proto index 2ff33b136..75495f7c0 100644 --- a/proto/python-diffs.proto +++ b/proto/python_diff.proto @@ -1,12 +1,12 @@ // This file was generated by proto-gen. Do not edit by hand. syntax = "proto3"; -package github.semantic.python_diffs; +package github.semantic.pythondiff; import "types.proto"; -option java_package = "com.github.semantic.python_diffs"; -option go_package = "github.com/semantic/python_diffs;python"; +option java_package = "com.github.semantic.pythondiff"; +option go_package = "github.com/semantic/pythondiff;python"; message PythonDiff { oneof diff { diff --git a/proto/python-terms.proto b/proto/python_term.proto similarity index 98% rename from proto/python-terms.proto rename to proto/python_term.proto index 3991795d1..dd8a77d6d 100644 --- a/proto/python-terms.proto +++ b/proto/python_term.proto @@ -1,12 +1,12 @@ // This file was generated by proto-gen. Do not edit by hand. syntax = "proto3"; -package github.semantic.python_terms; +package github.semantic.pythonterm; import "types.proto"; -option java_package = "com.github.semantic.python_terms"; -option go_package = "github.com/semantic/python_terms;python"; +option java_package = "com.github.semantic.pythonterm"; +option go_package = "github.com/semantic/pythonterm;python"; message PythonTerm { PythonSyntax syntax = 1; diff --git a/proto/ruby-diffs.proto b/proto/ruby_diff.proto similarity index 98% rename from proto/ruby-diffs.proto rename to proto/ruby_diff.proto index b863e52b4..e6ebcd436 100644 --- a/proto/ruby-diffs.proto +++ b/proto/ruby_diff.proto @@ -1,12 +1,12 @@ // This file was generated by proto-gen. Do not edit by hand. syntax = "proto3"; -package github.semantic.ruby_diffs; +package github.semantic.rubydiff; import "types.proto"; -option java_package = "com.github.semantic.ruby_diffs"; -option go_package = "github.com/semantic/ruby_diffs;ruby"; +option java_package = "com.github.semantic.rubydiff"; +option go_package = "github.com/semantic/rubydiff;ruby"; message RubyDiff { oneof diff { diff --git a/proto/ruby-terms.proto b/proto/ruby_term.proto similarity index 98% rename from proto/ruby-terms.proto rename to proto/ruby_term.proto index 9dbc2764f..21c2d7e00 100644 --- a/proto/ruby-terms.proto +++ b/proto/ruby_term.proto @@ -1,12 +1,12 @@ // This file was generated by proto-gen. Do not edit by hand. syntax = "proto3"; -package github.semantic.ruby_terms; +package github.semantic.rubyterm; import "types.proto"; -option java_package = "com.github.semantic.ruby_terms"; -option go_package = "github.com/semantic/ruby_terms;ruby"; +option java_package = "com.github.semantic.rubyterm"; +option go_package = "github.com/semantic/rubyterm;ruby"; message RubyTerm { RubySyntax syntax = 1; diff --git a/proto/typescript-diffs.proto b/proto/typescript_diff.proto similarity index 99% rename from proto/typescript-diffs.proto rename to proto/typescript_diff.proto index 1d7c4f33e..b19b920e5 100644 --- a/proto/typescript-diffs.proto +++ b/proto/typescript_diff.proto @@ -1,12 +1,12 @@ // This file was generated by proto-gen. Do not edit by hand. syntax = "proto3"; -package github.semantic.typescript_diffs; +package github.semantic.typescriptdiff; import "types.proto"; -option java_package = "com.github.semantic.typescript_diffs"; -option go_package = "github.com/semantic/typescript_diffs;typescript"; +option java_package = "com.github.semantic.typescriptdiff"; +option go_package = "github.com/semantic/typescriptdiff;typescript"; message TypeScriptDiff { oneof diff { diff --git a/proto/typescript-terms.proto b/proto/typescript_term.proto similarity index 99% rename from proto/typescript-terms.proto rename to proto/typescript_term.proto index aa59df4b0..d1202331b 100644 --- a/proto/typescript-terms.proto +++ b/proto/typescript_term.proto @@ -1,12 +1,12 @@ // This file was generated by proto-gen. Do not edit by hand. syntax = "proto3"; -package github.semantic.typescript_terms; +package github.semantic.typescriptterm; import "types.proto"; -option java_package = "com.github.semantic.typescript_terms"; -option go_package = "github.com/semantic/typescript_terms;typescript"; +option java_package = "com.github.semantic.typescriptterm"; +option go_package = "github.com/semantic/typescriptterm;typescript"; message TypeScriptTerm { TypeScriptSyntax syntax = 1; diff --git a/semantic.cabal b/semantic.cabal index 4ff1943c4..8937bb30e 100644 --- a/semantic.cabal +++ b/semantic.cabal @@ -86,6 +86,7 @@ library , Data.Graph , Data.Graph.Adjacency , Data.Graph.Vertex + , Data.History , Data.JSON.Fields , Data.Language , Data.Map.Monoidal @@ -94,6 +95,9 @@ library , Data.Quieterm , Data.Range , Data.Record + , Data.Reprinting.Errors + , Data.Reprinting.Token + , Data.Reprinting.Splice , Data.Semigroup.App , Data.Scientific.Exts , Data.Source @@ -127,8 +131,12 @@ library , Language.Haskell.Syntax , Language.JSON.Grammar , Language.JSON.Assignment + , Language.JSON.PrettyPrint + , Language.MiniRuby.Assignment + , Language.MiniPython.Assignment , Language.Ruby.Grammar , Language.Ruby.Assignment + , Language.Ruby.PrettyPrint , Language.Ruby.Syntax , Language.TypeScript.Assignment , Language.TypeScript.Grammar @@ -142,10 +150,12 @@ library , Language.PHP.Syntax , Language.Python.Assignment , Language.Python.Grammar + , Language.Python.PrettyPrint , Language.Python.Syntax , Language.Java.Assignment , Language.Java.Grammar , Language.Java.Syntax + , Matching.Core , Numeric.Exts -- Parser glue , Parsing.CMark @@ -158,6 +168,10 @@ library , Rendering.Renderer , Rendering.Symbol , Rendering.TOC + , Reprinting.Tokenize + , Reprinting.Translate + , Reprinting.Typeset + , Reprinting.Pipeline -- High-level flow & operational functionality (logging, stats, etc.) , Semantic.AST , Semantic.CLI @@ -177,6 +191,7 @@ library , Semantic.Telemetry.Log , Semantic.Telemetry.Stat , Semantic.Util + , Semantic.Util.Rewriting , Semantic.Version -- Serialization , Serializing.DOT @@ -213,6 +228,7 @@ library , http-client-tls , http-types , kdt + , machines , mersenne-random-pure64 , mtl , network @@ -220,6 +236,7 @@ library , optparse-applicative , parallel , parsers + , prettyprinter , pretty-show , recursion-schemes , reducers @@ -308,6 +325,7 @@ test-suite test , Matching.Go.Spec , Numeric.Spec , Proto3.Roundtrip + , Reprinting.Spec , Rendering.TOC.Spec , Semantic.Spec , Semantic.CLI.Spec diff --git a/src/Control/Abstract/Matching.hs b/src/Control/Abstract/Matching.hs index af41cedd1..eeb4b8b4e 100644 --- a/src/Control/Abstract/Matching.hs +++ b/src/Control/Abstract/Matching.hs @@ -12,6 +12,7 @@ module Control.Abstract.Matching , succeeds , fails , runMatcher + , stepMatcher ) where import Data.Algebra @@ -92,11 +93,11 @@ match :: (f :< fs) => (f (Term (Sum fs) ann) -> b) -> Matcher b a -> Matcher (Term (Sum fs) ann) a -match f = Match (fmap f . project . termOut) +match f = Match (fmap f . projectTerm) -- | @narrow'@ attempts to project a union-type target to a more specific type. narrow' :: (f :< fs) => Matcher (Term (Sum fs) ann) (Maybe (f (Term (Sum fs) ann))) -narrow' = fmap (project . termOut) Target +narrow' = fmap projectTerm Target -- | 'narrow' behaves as @narrow'@, but fails if the target cannot be thus projected. narrow :: (f :< fs) => Matcher (Term (Sum fs) ann) (f (Term (Sum fs) ann)) @@ -113,13 +114,14 @@ runMatcher :: (Alternative m, Monad m, Corecursive t, Recursive t, Foldable (Bas runMatcher m = para (paraMatcher m) paraMatcher :: (Alternative m, Monad m, Corecursive t, Foldable (Base t)) => Matcher t a -> RAlgebra (Base t) t (m a) -paraMatcher m t = interp (embedTerm t) m <|> foldMapA snd t +paraMatcher m t = stepMatcher (embedTerm t) m <|> foldMapA snd t --- Simple interpreter. -interp :: (Alternative m, Monad m) => t -> Matcher t a -> m a -interp t (Choice a b) = interp t a <|> interp t b -interp t Target = pure t -interp t (Match f m) = foldMapA (`interp` m) (f t) -interp _ (Pure a) = pure a -interp _ Empty = empty -interp t (Then m f) = interp t m >>= interp t . f +-- | Run one step of a 'Matcher' computation. Look at 'runMatcher' if you want something +-- that folds over subterms. +stepMatcher :: (Alternative m, Monad m) => t -> Matcher t a -> m a +stepMatcher t (Choice a b) = stepMatcher t a <|> stepMatcher t b +stepMatcher t Target = pure t +stepMatcher t (Match f m) = foldMapA (`stepMatcher` m) (f t) +stepMatcher _ (Pure a) = pure a +stepMatcher _ Empty = empty +stepMatcher t (Then m f) = stepMatcher t m >>= stepMatcher t . f diff --git a/src/Data/History.hs b/src/Data/History.hs new file mode 100644 index 000000000..2602d0a7c --- /dev/null +++ b/src/Data/History.hs @@ -0,0 +1,38 @@ +{-# LANGUAGE TypeOperators, GADTs #-} + +module Data.History + ( History (..) + , mark + , remark + ) where + +import Data.Record +import Data.Range + +-- | 'History' values, when attached to a given 'Term', describe the ways in +-- which that term was modified during a refactoring pass, if any. +data History + = Refactored Range + -- ^ A 'Refactored' node was changed by a refactor but still has + -- (possibly-inaccurate) position information. + | Unmodified Range + -- ^ An 'Unmodified' node was not changed, but may have 'Refactored' children. + deriving (Show, Eq) + +-- | Convert a 'Term' annotated with a 'Range' to one annotated with a 'History'. +mark :: Functor f + => (Range -> History) + -> f (Record (Range ': fields)) + -> f (Record (History ': fields)) +mark f = fmap go where go (r :. a) = f r :. a + +-- | Change the 'History' annotation on a 'Term'. +remark :: Functor f + => (Range -> History) + -> f (Record (History ': fields)) + -> f (Record (History ': fields)) +remark f = fmap go where + go (r :. a) = x :. a where + x = case r of + Refactored r -> f r + Unmodified r -> f r diff --git a/src/Data/Reprinting/Errors.hs b/src/Data/Reprinting/Errors.hs new file mode 100644 index 000000000..c887c5b2d --- /dev/null +++ b/src/Data/Reprinting/Errors.hs @@ -0,0 +1,12 @@ +module Data.Reprinting.Errors ( TranslationError (..) ) where + +import Data.Reprinting.Token + +-- | Represents failure occurring in a 'Concrete' machine during the translation +-- phases of the reprinting pipeline. +data TranslationError + = UnbalancedPair Context [Context] + -- ^ Thrown if an unbalanced 'Enter'/'Exit' pair is encountered. + | NoTranslation Element [Context] + -- ^ Thrown if no translation found for a given element. + deriving (Eq, Show) diff --git a/src/Data/Reprinting/Splice.hs b/src/Data/Reprinting/Splice.hs new file mode 100644 index 000000000..7aaf470e6 --- /dev/null +++ b/src/Data/Reprinting/Splice.hs @@ -0,0 +1,73 @@ +module Data.Reprinting.Splice + ( Fragment(..) + , copy + , insert + , defer + , Splice(..) + , emit + , layout + , layouts + , space + , Whitespace(..) + , Indentation(..) + ) where + +import Data.Reprinting.Token +import Data.Sequence (singleton, fromList) +import Prologue hiding (Element) + +-- | An intermediate representation of concrete syntax in the reprinting pipeline. +data Fragment + = Verbatim Text + -- ^ Verbatim copy of original 'Text' (un-refactored). + | New Element [Context] Text + -- ^ New 'Text' to be inserted, along with original 'Element' and `Context` + -- allowing later steps to re-write. + | Defer Element [Context] + -- ^ To be handled further down the pipeline. + deriving (Eq, Show) + +-- | Copy along some original, un-refactored 'Text'. +copy :: Text -> Seq Fragment +copy = singleton . Verbatim + +-- | Insert some new 'Text'. +insert :: Element -> [Context] -> Text -> Seq Fragment +insert el c = singleton . New el c + +-- | Defer processing an element to a later stage. +defer :: Element -> [Context] -> Seq Fragment +defer el = singleton . Defer el + +-- | The final representation of concrete syntax in the reprinting pipeline. +data Splice + = Emit Text + | Layout Whitespace + deriving (Eq, Show) + +-- | Emit some 'Text' as a 'Splice'. +emit :: Text -> Seq Splice +emit = singleton . Emit + +-- | Construct a layout 'Splice'. +layout :: Whitespace -> Seq Splice +layout = singleton . Layout + +-- | Construct multiple layouts. +layouts :: [Whitespace] -> Seq Splice +layouts = fromList . fmap Layout + +-- | Single space. +space :: Seq Splice +space = layout Space + +-- | Indentation, spacing, and other whitespace. +data Whitespace + = HardWrap + | SoftWrap + | Space + | Indent Int Indentation + deriving (Eq, Show) + +data Indentation = Tabs | Spaces + deriving (Eq, Show) diff --git a/src/Data/Reprinting/Token.hs b/src/Data/Reprinting/Token.hs new file mode 100644 index 000000000..519eadcfd --- /dev/null +++ b/src/Data/Reprinting/Token.hs @@ -0,0 +1,71 @@ +module Data.Reprinting.Token + ( Token (..) + , Element (..) + , Control (..) + , Context (..) + , Operator (..) + ) where + +import Data.Text (Text) +import Data.Source (Source) + +-- | 'Token' encapsulates 'Element' and 'Control' tokens, as well as sliced +-- portions of the original 'Source' for a given AST. +data Token + = Chunk Source -- ^ Verbatim 'Source' from AST, unmodified. + | TElement Element -- ^ Content token to be rendered. + | TControl Control -- ^ AST's context. + deriving (Show, Eq) + +-- | 'Element' tokens describe atomic pieces of source code to be +-- output to a rendered document. These tokens are language-agnostic +-- and are interpreted into language-specific representations at a +-- later point in the reprinting pipeline. +data Element + = Fragment Text -- ^ A literal chunk of text. + | Truth Bool -- ^ A boolean value. + | Nullity -- ^ @null@ or @nil@ or some other zero value. + | TSep -- ^ Some sort of delimiter, interpreted in some 'Context'. + | TSym -- ^ Some sort of symbol, interpreted in some 'Context'. + | TThen + | TElse + | TOpen -- ^ The beginning of some 'Context', such as an @[@ or @{@. + | TClose -- ^ The opposite of 'TOpen'. + deriving (Eq, Show) + +-- | 'Control' tokens describe information about some AST's context. +-- Though these are ultimately rendered as whitespace (or nothing) on +-- the page, they are needed to provide information as to how deeply +-- subsequent entries in the pipeline should indent. +data Control + = Enter Context + | Exit Context + | Log String + deriving (Eq, Show) + +-- | A 'Context' represents a scope in which other tokens can be +-- interpreted. For example, in the 'Imperative' context a 'TSep' +-- could be a semicolon or newline, whereas in a 'List' context a +-- 'TSep' is probably going to be a comma. +data Context + = TList + | THash + | TPair + | TMethod + | TFunction + | TCall + | TParams + | TReturn + | TIf + | TInfixL Operator Int + | Imperative + deriving (Show, Eq) + +-- | A sum type representing every concievable infix operator a +-- language can define. These are handled by instances of 'Concrete' +-- and given appropriate precedence. +data Operator + = Add + | Multiply + | Subtract + deriving (Show, Eq) diff --git a/src/Data/Syntax.hs b/src/Data/Syntax.hs index 75be7e1a3..1865d9c24 100644 --- a/src/Data/Syntax.hs +++ b/src/Data/Syntax.hs @@ -18,6 +18,7 @@ import GHC.TypeLits import Diffing.Algorithm hiding (Empty) import Prelude import Prologue +import Reprinting.Tokenize hiding (Context, Element) import qualified Assigning.Assignment as Assignment import qualified Data.Error as Error import Proto3.Suite.Class @@ -165,6 +166,9 @@ instance Show1 Identifier where liftShowsPrec = genericLiftShowsPrec instance Evaluatable Identifier where eval (Identifier name) = pure (LvalLocal name) +instance Tokenize Identifier where + tokenize = yield . Fragment . formatName . Data.Syntax.name + instance FreeVariables1 Identifier where liftFreeVariables _ (Identifier x) = Set.singleton x @@ -197,6 +201,9 @@ instance Show1 Empty where liftShowsPrec _ _ _ _ = showString "Empty" instance Evaluatable Empty where eval _ = rvalBox unit +instance Tokenize Empty where + tokenize = ignore + -- | Syntax representing a parsing or assignment error. data Error a = Error { errorCallStack :: ErrorStack, errorExpected :: [String], errorActual :: Maybe String, errorChildren :: [a] } deriving (Declarations1, Diffable, Eq, Foldable, FreeVariables1, Functor, Generic1, Hashable1, Message1, Named1, Ord, Show, ToJSONFields1, Traversable) @@ -207,6 +214,10 @@ instance Show1 Error where liftShowsPrec = genericLiftShowsPrec instance Evaluatable Error +instance Tokenize Error where + -- TODO: Considering producing comments like "ERROR: due to.." instead of ignoring. + tokenize = ignore + instance Named String where nameOf _ = "string" @@ -295,3 +306,6 @@ instance Show1 Context where liftShowsPrec = genericLiftShowsPrec instance Evaluatable Context where eval Context{..} = subtermRef contextSubject + +instance Tokenize Context where + tokenize Context{..} = sequenceA_ (sepTrailing contextTerms) *> contextSubject diff --git a/src/Data/Syntax/Comment.hs b/src/Data/Syntax/Comment.hs index 338060d43..0d547983a 100644 --- a/src/Data/Syntax/Comment.hs +++ b/src/Data/Syntax/Comment.hs @@ -7,6 +7,7 @@ import Data.JSON.Fields import Diffing.Algorithm import Prologue import Proto3.Suite.Class +import Reprinting.Tokenize as Token -- | An unnested comment (line or block). newtype Comment a = Comment { commentContent :: Text } @@ -19,6 +20,9 @@ instance Show1 Comment where liftShowsPrec = genericLiftShowsPrec instance Evaluatable Comment where eval _ = rvalBox unit +instance Tokenize Comment where + tokenize = yield . Fragment . commentContent + -- TODO: nested comment types -- TODO: documentation comment types -- TODO: literate programming comment types? alternatively, consider those as markup diff --git a/src/Data/Syntax/Declaration.hs b/src/Data/Syntax/Declaration.hs index 7fc63c9c7..c41a14f94 100644 --- a/src/Data/Syntax/Declaration.hs +++ b/src/Data/Syntax/Declaration.hs @@ -9,6 +9,7 @@ import qualified Data.Set as Set import Diffing.Algorithm import Prologue import Proto3.Suite.Class +import Reprinting.Tokenize data Function a = Function { functionContext :: ![a], functionName :: !a, functionParameters :: ![a], functionBody :: !a } deriving (Eq, Ord, Show, Foldable, Traversable, Functor, Generic1, Hashable1, ToJSONFields1, Named1, Message1) @@ -31,6 +32,12 @@ instance Evaluatable Function where pure (Rval addr) where paramNames = foldMap (maybeToList . declaredName . subterm) +instance Tokenize Function where + tokenize Function{..} = within' TFunction $ do + functionName + within' TParams $ sequenceA_ (sep functionParameters) + functionBody + instance Declarations1 Function where liftDeclaredName declaredName = declaredName . functionName @@ -58,6 +65,12 @@ instance Evaluatable Method where pure (Rval addr) where paramNames = foldMap (maybeToList . declaredName . subterm) +instance Tokenize Method where + tokenize Method{..} = within' TMethod $ do + methodName + within' TParams $ sequenceA_ (sep methodParameters) + methodBody + instance Declarations1 Method where liftDeclaredName declaredName = declaredName . methodName diff --git a/src/Data/Syntax/Expression.hs b/src/Data/Syntax/Expression.hs index 3b3696be5..d5655d313 100644 --- a/src/Data/Syntax/Expression.hs +++ b/src/Data/Syntax/Expression.hs @@ -11,6 +11,7 @@ import Diffing.Algorithm hiding (Delete) import Prologue hiding (index, Member, This, null) import Prelude hiding (null) import Proto3.Suite.Class +import Reprinting.Tokenize -- | Typical prefix function application, like `f(x)` in many languages, or `f x` in Haskell. data Call a = Call { callContext :: ![a], callFunction :: !a, callParams :: ![a], callBlock :: !a } @@ -27,6 +28,13 @@ instance Evaluatable Call where args <- traverse subtermAddress callParams Rval <$> call op recv args +instance Tokenize Call where + tokenize Call{..} = within TCall $ do + -- TODO: callContext + callFunction + within' TParams $ sequenceA_ (sep callParams) + callBlock + data LessThan a = LessThan { lhs :: a, rhs :: a } deriving (Declarations1, Diffable, Eq, Foldable, FreeVariables1, Functor, Generic1, Hashable1, Ord, Show, ToJSONFields1, Traversable, Named1, Message1) @@ -126,6 +134,9 @@ instance Evaluatable Plus where eval t = rvalBox =<< (traverse subtermValue t >>= go) where go (Plus a b) = liftNumeric2 add a b where add = liftReal (+) +instance Tokenize Plus where + tokenize Plus{..} = within' (TInfixL Add 6) $ lhs *> yield TSym <* rhs + data Minus a = Minus { lhs :: a, rhs :: a } deriving (Declarations1, Diffable, Eq, Foldable, FreeVariables1, Functor, Generic1, Hashable1, Ord, Show, ToJSONFields1, Traversable, Named1, Message1) @@ -137,6 +148,9 @@ instance Evaluatable Minus where eval t = rvalBox =<< (traverse subtermValue t >>= go) where go (Minus a b) = liftNumeric2 sub a b where sub = liftReal (-) +instance Tokenize Minus where + tokenize Minus{..} = within' (TInfixL Subtract 6) $ lhs *> yield TSym <* rhs + data Times a = Times { lhs :: a, rhs :: a } deriving (Declarations1, Diffable, Eq, Foldable, FreeVariables1, Functor, Generic1, Hashable1, Ord, Show, ToJSONFields1, Traversable, Named1, Message1) @@ -148,6 +162,9 @@ instance Evaluatable Times where eval t = rvalBox =<< (traverse subtermValue t >>= go) where go (Times a b) = liftNumeric2 mul a b where mul = liftReal (*) +instance Tokenize Times where + tokenize Times{..} = within' (TInfixL Multiply 7) $ lhs *> yield TSym <* rhs + data DividedBy a = DividedBy { lhs :: a, rhs :: a } deriving (Declarations1, Diffable, Eq, Foldable, FreeVariables1, Functor, Generic1, Hashable1, Ord, Show, ToJSONFields1, Traversable, Named1, Message1) diff --git a/src/Data/Syntax/Literal.hs b/src/Data/Syntax/Literal.hs index 3770e1abd..1cb67378f 100644 --- a/src/Data/Syntax/Literal.hs +++ b/src/Data/Syntax/Literal.hs @@ -1,8 +1,8 @@ -{-# LANGUAGE DeriveAnyClass, ViewPatterns, ScopedTypeVariables, DuplicateRecordFields #-} +{-# LANGUAGE DeriveAnyClass, DuplicateRecordFields, ScopedTypeVariables, ViewPatterns #-} {-# OPTIONS_GHC -Wno-missing-export-lists #-} module Data.Syntax.Literal where -import Data.Abstract.Evaluatable +import Data.Abstract.Evaluatable as Eval import Data.JSON.Fields import Data.Scientific.Exts import qualified Data.Text as T @@ -11,6 +11,7 @@ import Numeric.Exts import Prelude hiding (Float, null) import Prologue hiding (Set, hash, null) import Proto3.Suite.Class +import Reprinting.Tokenize as Tok import Text.Read (readMaybe) -- Boolean @@ -31,6 +32,9 @@ instance Show1 Boolean where liftShowsPrec = genericLiftShowsPrec instance Evaluatable Boolean where eval (Boolean x) = boolean x >>= rvalBox +instance Tokenize Boolean where + tokenize = yield . Truth . booleanContent + -- Numeric -- | A literal integer of unspecified width. No particular base is implied. @@ -46,6 +50,9 @@ instance Evaluatable Data.Syntax.Literal.Integer where eval (Data.Syntax.Literal.Integer x) = rvalBox =<< (integer <$> either (const (throwEvalError (IntegerFormatError x))) pure (parseInteger x)) +instance Tokenize Data.Syntax.Literal.Integer where + tokenize = yield . Fragment . integerContent + -- | A literal float of unspecified width. newtype Float a = Float { floatContent :: Text } @@ -59,6 +66,9 @@ instance Evaluatable Data.Syntax.Literal.Float where eval (Float s) = rvalBox =<< (float <$> either (const (throwEvalError (FloatFormatError s))) pure (parseScientific s)) +instance Tokenize Data.Syntax.Literal.Float where + tokenize = yield . Fragment . floatContent + -- Rational literals e.g. `2/3r` newtype Rational a = Rational { value :: Text } deriving (Declarations1, Diffable, Eq, Foldable, FreeVariables1, Functor, Generic1, Hashable1, Ord, Show, ToJSONFields1, Traversable, Named1, Message1) @@ -130,6 +140,9 @@ instance Show1 TextElement where liftShowsPrec = genericLiftShowsPrec instance Evaluatable TextElement where eval (TextElement x) = rvalBox (string x) +instance Tokenize TextElement where + tokenize = yield . Fragment . textElementContent + -- | A sequence of textual contents within a string literal. newtype EscapeSequence a = EscapeSequence { value :: Text } deriving (Diffable, Eq, Foldable, Functor, Generic1, Hashable1, Ord, Show, Traversable, FreeVariables1, Declarations1, ToJSONFields1, Named1, Message1) @@ -150,6 +163,9 @@ instance Show1 Null where liftShowsPrec = genericLiftShowsPrec instance Evaluatable Null where eval _ = rvalBox null +instance Tokenize Null where + tokenize _ = yield Nullity + newtype Symbol a = Symbol { symbolElements :: [a] } deriving (Declarations1, Diffable, Eq, Foldable, FreeVariables1, Functor, Generic1, Hashable1, Ord, Show, ToJSONFields1, Traversable, Named1, Message1) @@ -195,6 +211,9 @@ instance Show1 Array where liftShowsPrec = genericLiftShowsPrec instance Evaluatable Array where eval (Array a) = rvalBox =<< array =<< traverse subtermAddress a +instance Tokenize Array where + tokenize = list . arrayElements + newtype Hash a = Hash { hashElements :: [a] } deriving (Eq, Ord, Show, Foldable, Traversable, Functor, Generic1, Hashable1, Diffable, FreeVariables1, Declarations1, ToJSONFields1, Named1, Message1) @@ -203,7 +222,10 @@ instance Ord1 Hash where liftCompare = genericLiftCompare instance Show1 Hash where liftShowsPrec = genericLiftShowsPrec instance Evaluatable Hash where - eval t = rvalBox =<< (hash <$> traverse (subtermValue >=> asPair) (hashElements t)) + eval t = rvalBox =<< (Eval.hash <$> traverse (subtermValue >=> asPair) (hashElements t)) + +instance Tokenize Hash where + tokenize = Tok.hash . hashElements data KeyValue a = KeyValue { key :: !a, value :: !a } deriving (Eq, Ord, Show, Foldable, Traversable, Functor, Generic1, Hashable1, Diffable, FreeVariables1, Declarations1, ToJSONFields1, Named1, Message1) @@ -216,6 +238,9 @@ instance Evaluatable KeyValue where eval (fmap subtermValue -> KeyValue{..}) = rvalBox =<< (kvPair <$> key <*> value) +instance Tokenize KeyValue where + tokenize (KeyValue k v) = pair k v + newtype Tuple a = Tuple { tupleContents :: [a] } deriving (Declarations1, Diffable, Eq, Foldable, FreeVariables1, Functor, Generic1, Hashable1, Ord, Show, ToJSONFields1, Traversable, Named1, Message1) diff --git a/src/Data/Syntax/Statement.hs b/src/Data/Syntax/Statement.hs index 166d8811f..d53828879 100644 --- a/src/Data/Syntax/Statement.hs +++ b/src/Data/Syntax/Statement.hs @@ -11,6 +11,7 @@ import Diffing.Algorithm import Prelude import Prologue import Proto3.Suite.Class +import Reprinting.Tokenize -- | Imperative sequence of statements/declarations s.t.: -- @@ -28,6 +29,9 @@ instance ToJSON1 Statements instance Evaluatable Statements where eval (Statements xs) = maybe (rvalBox unit) (runApp . foldMap1 (App . subtermRef)) (nonEmpty xs) +instance Tokenize Statements where + tokenize = imperative + -- | Conditional. This must have an else block, which can be filled with some default value when omitted in the source, e.g. 'pure ()' for C-style if-without-else or 'pure Nothing' for Ruby-style, in both cases assuming some appropriate Applicative context into which the If will be lifted. data If a = If { ifCondition :: !a, ifThenBody :: !a, ifElseBody :: !a } deriving (Declarations1, Diffable, Eq, Foldable, FreeVariables1, Functor, Generic1, Hashable1, Ord, Show, ToJSONFields1, Traversable, Named1, Message1) @@ -41,6 +45,14 @@ instance Evaluatable If where bool <- subtermValue cond Rval <$> ifthenelse bool (subtermAddress if') (subtermAddress else') +instance Tokenize If where + tokenize If{..} = within' TIf $ do + ifCondition + yield TThen + ifThenBody + yield TElse + ifElseBody + -- | Else statement. The else condition is any term, that upon successful completion, continues evaluation to the elseBody, e.g. `for ... else` in Python. data Else a = Else { elseCondition :: !a, elseBody :: !a } deriving (Declarations1, Diffable, Eq, Foldable, FreeVariables1, Functor, Generic1, Hashable1, Ord, Show, ToJSONFields1, Traversable, Named1, Message1) @@ -191,6 +203,9 @@ instance Show1 Return where liftShowsPrec = genericLiftShowsPrec instance Evaluatable Return where eval (Return x) = Rval <$> (subtermAddress x >>= earlyReturn) +instance Tokenize Return where + tokenize (Return x) = within' TReturn x + newtype Yield a = Yield { value :: a } deriving (Declarations1, Diffable, Eq, Foldable, FreeVariables1, Functor, Generic1, Hashable1, Ord, Show, ToJSONFields1, Traversable, Named1, Message1) diff --git a/src/Data/Syntax/Type.hs b/src/Data/Syntax/Type.hs index 3366b7b37..edaf110cb 100644 --- a/src/Data/Syntax/Type.hs +++ b/src/Data/Syntax/Type.hs @@ -8,6 +8,7 @@ import Diffing.Algorithm import Prelude hiding (Bool, Float, Int, Double) import Prologue hiding (Map) import Proto3.Suite.Class +import Reprinting.Tokenize data Array a = Array { arraySize :: !(Maybe a), arrayElementType :: !a } deriving (Declarations1, Diffable, Eq, Foldable, FreeVariables1, Functor, Generic1, Hashable1, Ord, Show, ToJSONFields1, Traversable, Named1, Message1) @@ -32,6 +33,11 @@ instance Show1 Annotation where liftShowsPrec = genericLiftShowsPrec instance Evaluatable Annotation where eval Annotation{annotationSubject = Subterm _ action} = action +instance Tokenize Annotation where + -- FIXME: This ignores annotationType. + -- TODO: Not sure what this should look like yet + tokenize Annotation{..} = annotationSubject + data Function a = Function { functionParameters :: ![a], functionReturn :: !a } deriving (Declarations1, Diffable, Eq, Foldable, FreeVariables1, Functor, Generic1, Hashable1, Ord, Show, ToJSONFields1, Traversable, Named1, Message1) diff --git a/src/Data/Term.hs b/src/Data/Term.hs index acbe2312e..22faabefb 100644 --- a/src/Data/Term.hs +++ b/src/Data/Term.hs @@ -1,14 +1,17 @@ -{-# LANGUAGE RankNTypes, TypeFamilies, TypeOperators, ScopedTypeVariables #-} +{-# LANGUAGE RankNTypes, TypeFamilies, TypeOperators, ScopedTypeVariables, FunctionalDependencies #-} module Data.Term ( Term(..) , termIn , termAnnotation , termOut +, injectTerm +, projectTerm , TermF(..) , termSize , hoistTerm , hoistTermF , stripTerm +, Annotated (..) ) where import Prologue @@ -16,6 +19,7 @@ import Data.Aeson import Data.JSON.Fields import Data.Record import Text.Show +import qualified Data.Sum as Sum import Proto3.Suite.Class import Proto3.Suite.DotProto import qualified Proto3.Wire.Encode as Encode @@ -30,10 +34,23 @@ termAnnotation = termFAnnotation . unTerm termOut :: Term syntax ann -> syntax (Term syntax ann) termOut = termFOut . unTerm +projectTerm :: forall f syntax ann . (f :< syntax) => Term (Sum syntax) ann -> Maybe (f (Term (Sum syntax) ann)) +projectTerm = Sum.project . termOut data TermF syntax ann recur = In { termFAnnotation :: ann, termFOut :: syntax recur } deriving (Eq, Ord, Foldable, Functor, Show, Traversable) +-- | A convenience typeclass to get the annotation out of a 'Term' or 'TermF'. +-- Useful in term-rewriting algebras. +class Annotated t ann | t -> ann where + annotation :: t -> ann + +instance Annotated (TermF syntax ann recur) ann where + annotation = termFAnnotation + +instance Annotated (Term syntax ann) ann where + annotation = termAnnotation + -- | Return the node count of a term. termSize :: (Foldable f, Functor f) => Term f annotation -> Int @@ -44,6 +61,9 @@ termSize = cata size where termIn :: ann -> syntax (Term syntax ann) -> Term syntax ann termIn = (Term .) . In +injectTerm :: (f :< syntax) => ann -> f (Term (Sum syntax) ann) -> Term (Sum syntax) ann +injectTerm a = termIn a . Sum.inject + hoistTerm :: Functor f => (forall a. f a -> g a) -> Term f a -> Term g a hoistTerm f = go where go (Term r) = Term (hoistTermF f (fmap go r)) diff --git a/src/Language/JSON/PrettyPrint.hs b/src/Language/JSON/PrettyPrint.hs new file mode 100644 index 000000000..b8aa7edc5 --- /dev/null +++ b/src/Language/JSON/PrettyPrint.hs @@ -0,0 +1,78 @@ +module Language.JSON.PrettyPrint + ( defaultBeautyOpts + , defaultJSONPipeline + , printingJSON + , beautifyingJSON + , minimizingJSON + ) where + +import Prologue hiding (throwError) + +import Control.Arrow +import Control.Monad.Effect +import Control.Monad.Effect.Exception (Exc, throwError) +import Data.Machine +import Data.Reprinting.Errors +import Data.Reprinting.Splice +import Data.Reprinting.Token + +-- | Default printing pipeline for JSON. +defaultJSONPipeline :: (Member (Exc TranslationError) effs) + => ProcessT (Eff effs) Fragment Splice +defaultJSONPipeline + = printingJSON + ~> beautifyingJSON defaultBeautyOpts + +-- | Print JSON syntax. +printingJSON :: Monad m => ProcessT m Fragment Fragment +printingJSON = auto step ~> flattened where + step :: Fragment -> Seq Fragment + step s@(Defer el cs) = + let ins = insert el cs + in case (el, listToMaybe cs) of + (Truth True, _) -> ins "true" + (Truth False, _) -> ins "false" + (Nullity, _) -> ins "null" + + (TOpen, Just TList) -> ins "[" + (TClose, Just TList) -> ins "]" + (TOpen, Just THash) -> ins "{" + (TClose, Just THash) -> ins "}" + + (TSep, Just TList) -> ins "," + (TSep, Just TPair) -> ins ":" + (TSep, Just THash) -> ins "," + + _ -> pure s + + step x = pure x + +-- TODO: Fill out and implement configurable options like indentation count, +-- tabs vs. spaces, etc. +data JSONBeautyOpts = JSONBeautyOpts { jsonIndent :: Int, jsonUseTabs :: Bool } + deriving (Eq, Show) + +defaultBeautyOpts :: JSONBeautyOpts +defaultBeautyOpts = JSONBeautyOpts 2 False + +-- | Produce JSON with configurable whitespace and layout. +beautifyingJSON :: (Member (Exc TranslationError) effs) + => JSONBeautyOpts -> ProcessT (Eff effs) Fragment Splice +beautifyingJSON _ = autoT (Kleisli step) ~> flattened where + step (Defer el cs) = throwError (NoTranslation el cs) + step (Verbatim txt) = pure $ emit txt + step (New el cs txt) = pure $ case (el, listToMaybe cs) of + (TOpen, Just THash) -> emit txt <> layouts [HardWrap, Indent 2 Spaces] + (TClose, Just THash) -> layout HardWrap <> emit txt + (TSep, Just TList) -> emit txt <> space + (TSep, Just TPair) -> emit txt <> space + (TSep, Just THash) -> emit txt <> layouts [HardWrap, Indent 2 Spaces] + _ -> emit txt + +-- | Produce whitespace minimal JSON. +minimizingJSON :: (Member (Exc TranslationError) effs) + => ProcessT (Eff effs) Fragment Splice +minimizingJSON = autoT (Kleisli step) ~> flattened where + step (Defer el cs) = throwError (NoTranslation el cs) + step (Verbatim txt) = pure $ emit txt + step (New _ _ txt) = pure $ emit txt diff --git a/src/Language/MiniPython/Assignment.hs b/src/Language/MiniPython/Assignment.hs new file mode 100644 index 000000000..8bdb820f3 --- /dev/null +++ b/src/Language/MiniPython/Assignment.hs @@ -0,0 +1,156 @@ +{-# LANGUAGE DataKinds, RankNTypes, TypeOperators #-} +{-# OPTIONS_GHC -fno-warn-orphans #-} -- FIXME +module Language.MiniPython.Assignment +( +-- Small version of Python to enable internal framework development. + assignment +, Syntax +, Grammar +, Term +) where + +import Assigning.Assignment hiding (Assignment, Error) +import qualified Assigning.Assignment as Assignment +import Data.Abstract.Name (name) +import Data.Record +import Data.Sum +import Data.Syntax + ( contextualize + , emptyTerm + , handleError + , infixContext + , makeTerm + , makeTerm' + , makeTerm'' + , makeTerm1 + , parseError + , postContextualize + ) +import qualified Data.Syntax as Syntax +import qualified Data.Syntax.Comment as Comment +import qualified Data.Syntax.Declaration as Declaration +import qualified Data.Syntax.Expression as Expression +import qualified Data.Syntax.Literal as Literal +import qualified Data.Syntax.Statement as Statement +import qualified Data.Syntax.Type as Type +import qualified Data.Term as Term +import Language.Python.Grammar as Grammar +import Prologue + + +-- | The type of Python syntax. +type Syntax = + '[ Declaration.Function + , Expression.Call + , Expression.Minus + , Expression.Plus + , Expression.Times + , Literal.Integer + , Literal.Boolean + , Statement.If + , Statement.Return + , Statement.Statements + , Syntax.Context + , Syntax.Empty + , Syntax.Error + , Syntax.Identifier + , Type.Annotation + , Comment.Comment + , [] + ] + +type Term = Term.Term (Sum Syntax) (Record Location) +type Assignment = Assignment.Assignment [] Grammar + +-- | Assignment from AST in Python's grammar onto a program in Python's syntax. +assignment :: Assignment Term +assignment = handleError $ makeTerm <$> symbol Module <*> children (Statement.Statements <$> manyTerm expression) <|> parseError + +expression :: Assignment Term +expression = handleError (choice expressionChoices) + +expressionChoices :: [Assignment Term] +expressionChoices = + [ binaryOperator + , boolean + , call + , expressionStatement + , functionDefinition + , identifier + , integer + , returnStatement + , ifStatement + ] + +-- NOTE: Important that we don't flatten out the Imperative for single item lists +expressions :: Assignment Term +expressions = makeTerm <$> location <*> manyTerm expression + +expressionStatement :: Assignment Term +expressionStatement = makeTerm'' <$> symbol ExpressionStatement <*> children (someTerm expression) + +expressionList :: Assignment Term +expressionList = makeTerm'' <$> symbol ExpressionList <*> children (someTerm expression) + +functionDefinition :: Assignment Term +functionDefinition = + makeFunctionDeclaration <$> symbol FunctionDefinition <*> children ((,,,) <$> term expression <* symbol Parameters <*> children (manyTerm expression) <*> optional (symbol Type *> children (term expression)) <*> expressions) + <|> makeFunctionDeclaration <$> (symbol Lambda' <|> symbol Lambda) <*> children ((,,,) <$ token AnonLambda <*> emptyTerm <*> (symbol LambdaParameters *> children (manyTerm expression) <|> pure []) <*> optional (symbol Type *> children (term expression)) <*> expressions) + where + makeFunctionDeclaration loc (functionName', functionParameters, ty, functionBody) = makeTerm loc $ Type.Annotation (makeTerm loc $ Declaration.Function [] functionName' functionParameters functionBody) (fromMaybe (makeTerm loc Syntax.Empty) ty) + +binaryOperator :: Assignment Term +binaryOperator = makeTerm' <$> symbol BinaryOperator <*> children (infixTerm expression (term expression) + [ (inject .) . Expression.Plus <$ symbol AnonPlus + , (inject .) . Expression.Minus <$ symbol AnonMinus + , (inject .) . Expression.Times <$ symbol AnonStar + ]) + +identifier :: Assignment Term +identifier = makeTerm <$> (symbol Identifier <|> symbol Identifier' <|> symbol DottedName) <*> (Syntax.Identifier . name <$> source) + +integer :: Assignment Term +integer = makeTerm <$> symbol Integer <*> (Literal.Integer <$> source) + +comment :: Assignment Term +comment = makeTerm <$> symbol Comment <*> (Comment.Comment <$> source) + +returnStatement :: Assignment Term +returnStatement = makeTerm <$> symbol ReturnStatement <*> children (Statement.Return <$> term (expressionList <|> emptyTerm)) + +call :: Assignment Term +call = makeTerm <$> symbol Call <*> children (Expression.Call [] <$> term (identifier <|> expression) <*> (symbol ArgumentList *> children (manyTerm expression)) <*> emptyTerm) + +boolean :: Assignment Term +boolean = makeTerm <$> token Grammar.True <*> pure Literal.true + <|> makeTerm <$> token Grammar.False <*> pure Literal.false + +ifStatement :: Assignment Term +ifStatement = makeTerm <$> symbol IfStatement <*> children (Statement.If <$> term expression <*> term (makeTerm <$> location <*> manyTermsTill expression (void (symbol ElseClause) <|> void (symbol ElifClause) <|> eof)) <*> (flip (foldr makeElif) <$> many elifClause <*> (symbol ElseClause *> children expressions <|> emptyTerm))) + where elifClause = (,) <$> symbol ElifClause <*> children (Statement.If <$> term expression <*> expressions) + makeElif (loc, makeIf) rest = makeTerm loc (makeIf rest) + +-- Helpers + +-- | Match a term optionally preceded by comment(s), or a sequence of comments if the term is not present. +manyTerm :: Assignment Term -> Assignment [Term] +manyTerm term = many (contextualize comment term <|> makeTerm1 <$> (Syntax.Context <$> some1 comment <*> emptyTerm)) + +someTerm :: Assignment Term -> Assignment [Term] +someTerm term = some (contextualize comment term <|> makeTerm1 <$> (Syntax.Context <$> some1 comment <*> emptyTerm)) + +term :: Assignment Term -> Assignment Term +term term = contextualize comment (postContextualize comment term) + +-- | Match a series of terms or comments until a delimiter is matched. +manyTermsTill :: Assignment Term -> Assignment b -> Assignment [Term] +manyTermsTill step end = manyTill (step <|> comment) end + +-- | Match infix terms separated by any of a list of operators, assigning any comments following each operand. +infixTerm :: Assignment Term + -> Assignment Term + -> [Assignment (Term -> Term -> Sum Syntax Term)] + -> Assignment (Sum Syntax Term) +infixTerm = infixContext comment + +{-# ANN module ("HLint: ignore Eta reduce" :: String) #-} diff --git a/src/Language/MiniRuby/Assignment.hs b/src/Language/MiniRuby/Assignment.hs new file mode 100644 index 000000000..820010b2b --- /dev/null +++ b/src/Language/MiniRuby/Assignment.hs @@ -0,0 +1,221 @@ +{-# LANGUAGE DataKinds, RankNTypes, TypeOperators #-} +{-# OPTIONS_GHC -fno-warn-orphans #-} -- FIXME +module Language.MiniRuby.Assignment +( +-- Small version of Ruby to enable internal framework development. + assignment +, Syntax +, Term +) where + +import Assigning.Assignment hiding (Assignment, Error) +import qualified Assigning.Assignment as Assignment +import Data.Abstract.Name (name) +import Data.List (elem) +import Data.Record +import Data.Sum +import Data.Syntax + ( contextualize + , emptyTerm + , handleError + , infixContext + , makeTerm + , makeTerm' + , makeTerm'' + , makeTerm1 + , parseError + , postContextualize + ) +import qualified Data.Syntax as Syntax +import qualified Data.Syntax.Comment as Comment +import qualified Data.Syntax.Declaration as Declaration +import qualified Data.Syntax.Expression as Expression +import qualified Data.Syntax.Literal as Literal +import qualified Data.Syntax.Statement as Statement +import qualified Data.Term as Term +import Language.Ruby.Grammar as Grammar +import qualified Language.Ruby.Syntax as Ruby.Syntax +import Prologue hiding (for) + +-- | Small version of Ruby syntax for testing the code rewriting pipeline. +type Syntax = + '[ Comment.Comment + , Declaration.Function + , Declaration.Method + , Expression.Minus + , Expression.Plus + , Expression.Times + , Ruby.Syntax.Send + , Statement.Statements + , Syntax.Context + , Syntax.Empty + , Syntax.Error + , Syntax.Identifier + , Literal.Integer + , [] + ] + +type Term = Term.Term (Sum Syntax) (Record Location) +type Assignment = Assignment.Assignment [] Grammar + +assignment :: Assignment Term +assignment = handleError $ makeTerm <$> symbol Program <*> children (Statement.Statements <$> many expression) <|> parseError + +expression :: Assignment Term +expression = term . handleError $ + choice [ binary + , identifier + , number + , method + , methodCall + , parenthesizedExpressions ] + +-- NOTE: Important that we don't flatten out the Imperative for single item lists +expressions :: Assignment Term +expressions = makeTerm <$> location <*> many expression + +parenthesizedExpressions :: Assignment Term +parenthesizedExpressions = makeTerm'' <$> symbol ParenthesizedStatements <*> children (many expression) + +number :: Assignment Term +number = makeTerm <$> symbol Grammar.Integer <*> (Literal.Integer <$> source) + +identifier :: Assignment Term +identifier = + vcallOrLocal + <|> mk Constant + <|> mk InstanceVariable + <|> mk ClassVariable + <|> mk GlobalVariable + <|> mk Operator + <|> mk Super + <|> mk Setter + <|> mk SplatArgument + <|> mk HashSplatArgument + <|> mk BlockArgument + <|> mk Uninterpreted + where + mk s = makeTerm <$> symbol s <*> (Syntax.Identifier . name <$> source) + vcallOrLocal = do + (loc, ident, locals) <- identWithLocals + let identTerm = makeTerm loc (Syntax.Identifier (name ident)) + if ident `elem` locals + then pure identTerm + else pure $ makeTerm loc (Ruby.Syntax.Send Nothing (Just identTerm) [] Nothing) + +method :: Assignment Term +method = makeTerm <$> symbol Method <*> (withNewScope . children) (Declaration.Method [] <$> emptyTerm <*> methodSelector <*> params <*> expressions') + where params = symbol MethodParameters *> children (many parameter) <|> pure [] + expressions' = makeTerm <$> location <*> many expression + +methodSelector :: Assignment Term +methodSelector = makeTerm <$> symbols <*> (Syntax.Identifier <$> (name <$> source)) + where + symbols = symbol Identifier + <|> symbol Constant + <|> symbol Operator + <|> symbol Setter + <|> symbol Super -- TODO(@charliesome): super calls are *not* method calls and need to be assigned into their own syntax terms + +parameter :: Assignment Term +parameter = postContextualize comment (term uncontextualizedParameter) + where + uncontextualizedParameter = + lhsIdent + <|> splatParameter + <|> hashSplatParameter + <|> blockParameter + <|> keywordParameter + <|> optionalParameter + <|> makeTerm <$> symbol DestructuredParameter <*> children (many parameter) + -- splat and hash splat arguments can be unnamed. we don't currently + -- support unnamed arguments in the term syntax, so the use of emptyTerm + -- here is a huge hack. what we should be able to do is return a Nothing + -- for the argument name for splats and hash splats. TODO fix me: + mkSplat s = symbol s *> children (lhsIdent <|> emptyTerm) + splatParameter = mkSplat SplatParameter + hashSplatParameter = mkSplat HashSplatParameter + blockParameter = symbol BlockParameter *> children lhsIdent + -- we don't yet care about default expressions for optional (including + -- keyword) parameters, but we need to match on them to prevent errors: + keywordParameter = symbol KeywordParameter *> children (lhsIdent <* optional expression) + optionalParameter = symbol OptionalParameter *> children (lhsIdent <* expression) + +lhsIdent :: Assignment Term +lhsIdent = do + (loc, ident, locals) <- identWithLocals + putLocals (ident : locals) + pure $ makeTerm loc (Syntax.Identifier (name ident)) + +methodCall :: Assignment Term +methodCall = makeTerm' <$> symbol MethodCall <*> children send -- (require <|> load <|> send) + where + send = inject <$> ((regularCall <|> funcCall <|> scopeCall <|> dotCall) <*> optional block) + + funcCall = Ruby.Syntax.Send Nothing <$> selector <*> args + regularCall = symbol Call *> children (Ruby.Syntax.Send <$> (Just <$> expression) <*> selector) <*> args + scopeCall = symbol ScopeResolution *> children (Ruby.Syntax.Send <$> (Just <$> expression) <*> selector) <*> args + dotCall = symbol Call *> children (Ruby.Syntax.Send <$> (Just <$> term expression) <*> pure Nothing <*> args) + + selector = Just <$> term methodSelector + -- require = inject <$> (symbol Identifier *> do + -- s <- rawSource + -- guard (s `elem` ["require", "require_relative"]) + -- Ruby.Syntax.Require (s == "require_relative") <$> nameExpression) + -- load = inject <$ symbol Identifier <*> do + -- s <- rawSource + -- guard (s == "load") + -- (symbol ArgumentList <|> symbol ArgumentListWithParens) *> children (Ruby.Syntax.Load <$> expression <*> optional expression) + -- nameExpression = (symbol ArgumentList <|> symbol ArgumentListWithParens) *> children expression + +args :: Assignment [Term] +args = (symbol ArgumentList <|> symbol ArgumentListWithParens) *> children (many expression) <|> many expression + +block :: Assignment Term +block = makeTerm <$> symbol DoBlock <*> scopedBlockChildren + <|> makeTerm <$> symbol Block <*> scopedBlockChildren + where scopedBlockChildren = withExtendedScope blockChildren + blockChildren = children (Declaration.Function [] <$> emptyTerm <*> params <*> expressions) + params = symbol BlockParameters *> children (many parameter) <|> pure [] + +binary :: Assignment Term +binary = makeTerm' <$> symbol Binary <*> children (infixTerm expression expression + [ (inject .) . Expression.Plus <$ symbol AnonPlus + , (inject .) . Expression.Minus <$ symbol AnonMinus' + , (inject .) . Expression.Times <$ symbol AnonStar' + ]) + +comment :: Assignment Term +comment = makeTerm <$> symbol Comment <*> (Comment.Comment <$> source) + +term :: Assignment Term -> Assignment Term +term term = contextualize comment term <|> makeTerm1 <$> (Syntax.Context <$> some1 comment <*> emptyTerm) + +-- | Match infix terms separated by any of a list of operators, assigning any comments following each operand. +infixTerm :: Assignment Term + -> Assignment Term + -> [Assignment (Term -> Term -> Sum Syntax Term)] + -> Assignment (Sum Syntax Term) +infixTerm = infixContext comment + +withExtendedScope :: Assignment a -> Assignment a +withExtendedScope inner = do + locals <- getLocals + result <- inner + putLocals locals + pure result + +withNewScope :: Assignment a -> Assignment a +withNewScope inner = withExtendedScope $ do + putLocals [] + inner + +identWithLocals :: Assignment (Record Location, Text, [Text]) +identWithLocals = do + loc <- symbol Identifier + -- source advances, so it's important we call getLocals first + locals <- getLocals + ident <- source + pure (loc, ident, locals) + +{-# ANN module ("HLint: ignore Eta reduce" :: String) #-} diff --git a/src/Language/Python/PrettyPrint.hs b/src/Language/Python/PrettyPrint.hs new file mode 100644 index 000000000..16957a53d --- /dev/null +++ b/src/Language/Python/PrettyPrint.hs @@ -0,0 +1,84 @@ +module Language.Python.PrettyPrint ( printingPython ) where + +import Control.Arrow +import Control.Monad.Effect +import Control.Monad.Effect.Exception (Exc, throwError) +import Data.Machine +import Data.Reprinting.Errors +import Data.Reprinting.Splice +import Data.Reprinting.Token as Token +import Data.Semigroup (stimes) +import Data.Sequence (Seq) + +-- | Print Python syntax. +printingPython :: (Member (Exc TranslationError) effs) => ProcessT (Eff effs) Fragment Splice +printingPython = autoT (Kleisli step) ~> flattened + +step :: (Member (Exc TranslationError) effs) => Fragment -> Eff effs (Seq Splice) +step (Verbatim txt) = pure $ emit txt +step (New _ _ txt) = pure $ emit txt +step (Defer el cs) = case (el, cs) of + -- Function declarations + (TOpen, TFunction:_) -> pure $ emit "def" <> space + (TOpen, TParams:TFunction:_) -> pure $ emit "(" + (TClose, TParams:TFunction:_) -> pure $ emit "):" + (TClose, TFunction:xs) -> pure $ endContext (depth xs) + + -- Return statements + (TOpen, TReturn:_) -> pure $ emit "return" <> space + (TClose, TReturn:_) -> pure mempty + (TOpen, Imperative:TReturn:_) -> pure mempty + (TSep, Imperative:TReturn:_) -> pure $ emit "," <> space + (TClose, Imperative:TReturn:_) -> pure mempty -- Don't hardwarp or indent for return statements + + -- If statements + (TOpen, TIf:_) -> pure $ emit "if" <> space + (TThen, TIf:_) -> pure $ emit ":" + (TElse, TIf:xs) -> pure $ endContext (depth xs) <> emit "else:" + (TClose, TIf:_) -> pure mempty + + -- Booleans + (Truth True, _) -> pure $ emit "True" + (Truth False, _) -> pure $ emit "False" + + -- Infix binary operators + (TOpen, TInfixL _ p:xs) -> emitIf (p < prec xs) "(" + (TSym, TInfixL Add _:_) -> pure $ space <> emit "+" <> space + (TSym, TInfixL Multiply _:_) -> pure $ space <> emit "*" <> space + (TSym, TInfixL Subtract _:_) -> pure $ space <> emit "-" <> space + (TClose, TInfixL _ p:xs) -> emitIf (p < prec xs) ")" + + -- General params handling + (TOpen, TParams:_) -> pure $ emit "(" + (TSep, TParams:_) -> pure $ emit "," <> space + (TClose, TParams:_) -> pure $ emit ")" + + -- Imperative context and whitespace handling + (TOpen, [Imperative]) -> pure mempty -- Don't indent at the top-level imperative context... + (TClose, [Imperative]) -> pure $ layout HardWrap -- but end the program with a newline. + (TOpen, Imperative:xs) -> pure $ layout HardWrap <> indent (depth xs) + (TSep, Imperative:xs) -> pure $ layout HardWrap <> indent (depth xs) + (TClose, Imperative:_) -> pure mempty + + _ -> throwError (NoTranslation el cs) + + where + emitIf predicate txt = pure $ if predicate then emit txt else mempty + endContext times = layout HardWrap <> indent (pred times) + +prec :: [Context] -> Int +prec cs = case filter isInfix cs of + (TInfixL _ n:_) -> n + _ -> 0 + where isInfix (TInfixL _ _) = True + isInfix _ = False + +-- | Depth of imperative scope. +depth :: [Context] -> Int +depth = length . filter (== Imperative) + +-- | Indent n times. +indent :: Integral b => b -> Seq Splice +indent times + | times > 0 = stimes times (layout (Indent 4 Spaces)) + | otherwise = mempty diff --git a/src/Language/Ruby/PrettyPrint.hs b/src/Language/Ruby/PrettyPrint.hs new file mode 100644 index 000000000..424f0f7b0 --- /dev/null +++ b/src/Language/Ruby/PrettyPrint.hs @@ -0,0 +1,70 @@ +module Language.Ruby.PrettyPrint ( printingRuby ) where + +import Control.Arrow +import Control.Monad.Effect +import Control.Monad.Effect.Exception (Exc, throwError) +import Data.Machine +import Data.Sequence (Seq) +import Data.Reprinting.Errors +import Data.Reprinting.Splice +import Data.Reprinting.Token as Token +import Data.Semigroup (stimes) + +-- | Print Ruby syntax. +printingRuby :: (Member (Exc TranslationError) effs) => ProcessT (Eff effs) Fragment Splice +printingRuby = autoT (Kleisli step) ~> flattened + +step :: (Member (Exc TranslationError) effs) => Fragment -> Eff effs (Seq Splice) +step (Verbatim txt) = pure $ emit txt +step (New _ _ txt) = pure $ emit txt +step (Defer el cs) = case (el, cs) of + (TOpen, TMethod:_) -> pure $ emit "def" <> space + (TClose, TMethod:xs) -> pure $ endContext (depth xs) <> emit "end" + + -- TODO: do..end vs {..} should be configurable. + (TOpen, TFunction:_) -> pure $ space <> emit "do" <> space + (TOpen, TParams:TFunction:_) -> pure $ emit "|" + (TClose, TParams:TFunction:_) -> pure $ emit "|" + (TClose, TFunction:xs) -> pure $ endContext (depth xs) <> emit "end" + + -- TODO: Parens for calls are a style choice, make configurable. + (TOpen, TParams:_) -> pure $ emit "(" + (TSep, TParams:_) -> pure $ emit "," <> space + (TClose, TParams:_) -> pure $ emit ")" + + (TOpen, TInfixL _ p:xs) -> emitIf (p < prec xs) "(" + (TSym, TInfixL Add _:_) -> pure $ space <> emit "+" <> space + (TSym, TInfixL Multiply _:_) -> pure $ space <> emit "*" <> space + (TSym, TInfixL Subtract _:_) -> pure $ space <> emit "-" <> space + (TClose, TInfixL _ p:xs) -> emitIf (p < prec xs) ")" + + (TOpen, [Imperative]) -> pure mempty + (TOpen, Imperative:xs) -> pure $ layout HardWrap <> indent (depth xs) + (TSep, Imperative:xs) -> pure $ layout HardWrap <> indent (depth xs) + (TClose, [Imperative]) -> pure $ layout HardWrap + (TClose, Imperative:xs) -> pure $ indent (pred (depth xs)) + + (TSep, TCall:_) -> pure $ emit "." + + _ -> throwError (NoTranslation el cs) + + where + emitIf predicate txt = pure $ if predicate then emit txt else mempty + endContext times = layout HardWrap <> indent (pred times) + +prec :: [Context] -> Int +prec cs = case filter isInfix cs of + (TInfixL _ n:_) -> n + _ -> 0 + where isInfix (TInfixL _ _) = True + isInfix _ = False + +-- | Depth of imperative scope. +depth :: [Context] -> Int +depth = length . filter (== Imperative) + +-- | Indent n times. +indent :: Integral b => b -> Seq Splice +indent times + | times > 0 = stimes times (layout (Indent 2 Spaces)) + | otherwise = mempty diff --git a/src/Language/Ruby/Syntax.hs b/src/Language/Ruby/Syntax.hs index 2190ff5ec..6a7a9a840 100644 --- a/src/Language/Ruby/Syntax.hs +++ b/src/Language/Ruby/Syntax.hs @@ -14,6 +14,7 @@ import qualified Data.Text as T import Diffing.Algorithm import Prologue import Proto3.Suite.Class +import Reprinting.Tokenize import System.FilePath.Posix @@ -66,6 +67,13 @@ instance Evaluatable Send where args <- traverse subtermAddress sendArgs Rval <$> call func recv args -- TODO pass through sendBlock +instance Tokenize Send where + tokenize Send{..} = within TCall $ do + maybe (pure ()) (\r -> r *> yield TSep) sendReceiver + fromMaybe (pure ()) sendSelector + within' TParams $ sequenceA_ (sep sendArgs) + fromMaybe (pure ()) sendBlock + data Require a = Require { requireRelative :: Bool, requirePath :: !a } deriving (Declarations1, Diffable, Eq, Foldable, FreeVariables1, Functor, Generic1, Hashable1, Ord, Show, ToJSONFields1, Traversable, Named1, Message1) diff --git a/src/Matching/Core.hs b/src/Matching/Core.hs new file mode 100644 index 000000000..c0b08e747 --- /dev/null +++ b/src/Matching/Core.hs @@ -0,0 +1,22 @@ +{-# LANGUAGE ScopedTypeVariables, TypeFamilies, TypeOperators #-} + +module Matching.Core +( matchHash +, matchArray +, matchFloat +) where + +import Prologue + +import Control.Abstract.Matching +import qualified Data.Syntax.Literal as Literal +import Data.Term + +matchHash :: (Literal.Hash :< fs, term ~ Term (Sum fs) ann) => Matcher term (Literal.Hash term) +matchHash = matchM projectTerm target + +matchArray :: (Literal.Array :< fs, term ~ Term (Sum fs) ann) => Matcher term (Literal.Array term) +matchArray = matchM projectTerm target + +matchFloat :: (Literal.Float :< fs, term ~ Term (Sum fs) ann) => Matcher term (Literal.Float term) +matchFloat = matchM projectTerm target diff --git a/src/Parsing/Parser.hs b/src/Parsing/Parser.hs index 191e1a657..019e832fa 100644 --- a/src/Parsing/Parser.hs +++ b/src/Parsing/Parser.hs @@ -20,7 +20,9 @@ module Parsing.Parser , jsonASTParser , markdownParser , pythonParser +, miniPythonParser , rubyParser +, miniRubyParser , typescriptParser , phpParser , haskellParser @@ -47,6 +49,8 @@ import qualified Language.Markdown.Assignment as Markdown import qualified Language.PHP.Assignment as PHP import qualified Language.Python.Assignment as Python import qualified Language.Ruby.Assignment as Ruby +import qualified Language.MiniRuby.Assignment as MiniRuby +import qualified Language.MiniPython.Assignment as MiniPython import qualified Language.TypeScript.Assignment as TypeScript import Prologue import TreeSitter.Go @@ -87,7 +91,7 @@ someAnalysisParser :: ( ApplyAll' typeclasses Go.Syntax ) => proxy typeclasses -- ^ A proxy for the list of typeclasses required, e.g. @(Proxy :: Proxy '[Show1])@. -> Language -- ^ The 'Language' to select. - -> SomeAnalysisParser typeclasses (Record Location) -- ^ A 'SomeAnalysisParser abstracting the syntax type to be produced. + -> SomeAnalysisParser typeclasses (Record Location) -- ^ A 'SomeAnalysisParser' abstracting the syntax type to be produced. someAnalysisParser _ Go = SomeAnalysisParser goParser (Proxy :: Proxy 'Go) someAnalysisParser _ Haskell = SomeAnalysisParser haskellParser (Proxy :: Proxy 'Haskell) someAnalysisParser _ Java = SomeAnalysisParser javaParser (Proxy :: Proxy 'Java) @@ -105,9 +109,9 @@ data Parser term where ASTParser :: (Bounded grammar, Enum grammar, Show grammar) => Ptr TS.Language -> Parser (AST [] grammar) -- | A parser producing an à la carte term given an 'AST'-producing parser and an 'Assignment' onto 'Term's in some syntax type. AssignmentParser :: (Enum grammar, Ix grammar, Show grammar, TS.Symbol grammar, Syntax.Error :< fs, Eq1 ast, Apply Foldable fs, Apply Functor fs, Foldable ast, Functor ast) - => Parser (Term ast (Node grammar)) -- A parser producing AST. - -> Assignment ast grammar (Term (Sum fs) (Record Location)) -- An assignment from AST onto 'Term's. - -> Parser (Term (Sum fs) (Record Location)) -- A parser producing 'Term's. + => Parser (Term ast (Node grammar)) -- ^ A parser producing AST. + -> Assignment ast grammar (Term (Sum fs) (Record Location)) -- ^ An assignment from AST onto 'Term's. + -> Parser (Term (Sum fs) (Record Location)) -- ^ A parser producing 'Term's. DeterministicParser :: (Enum grammar, Ord grammar, Show grammar, Element Syntax.Error syntaxes, Apply Foldable syntaxes, Apply Functor syntaxes) => Parser (AST [] grammar) -> Deterministic.Assignment grammar (Term (Sum syntaxes) (Record Location)) @@ -159,12 +163,18 @@ goParser = AssignmentParser (ASTParser tree_sitter_go) Go.assignment rubyParser :: Parser Ruby.Term rubyParser = AssignmentParser (ASTParser tree_sitter_ruby) Ruby.assignment +miniRubyParser :: Parser MiniRuby.Term +miniRubyParser = AssignmentParser (ASTParser tree_sitter_ruby) MiniRuby.assignment + phpParser :: Parser PHP.Term phpParser = AssignmentParser (ASTParser tree_sitter_php) PHP.assignment pythonParser :: Parser Python.Term pythonParser = AssignmentParser (ASTParser tree_sitter_python) Python.assignment +miniPythonParser :: Parser MiniPython.Term +miniPythonParser = AssignmentParser (ASTParser tree_sitter_python) MiniPython.assignment + javaParser :: Parser Java.Term javaParser = AssignmentParser javaASTParser Java.assignment diff --git a/src/Rendering/JSON.hs b/src/Rendering/JSON.hs index 7ecaf84aa..c8a2b090d 100644 --- a/src/Rendering/JSON.hs +++ b/src/Rendering/JSON.hs @@ -5,6 +5,7 @@ module Rendering.JSON , renderJSONTerm , renderJSONAST , renderSymbolTerms +, renderJSONError , SomeJSON(..) ) where @@ -43,7 +44,6 @@ instance ToJSON JSONStat where toJSON JSONStat{..} = object ("path" .= pathKeyForBlobPair jsonStatBlobs : toJSONFields (these Delete Insert Replace (runJoin jsonStatBlobs))) toEncoding JSONStat{..} = pairs (fold ("path" .= pathKeyForBlobPair jsonStatBlobs : toJSONFields (these Delete Insert Replace (runJoin jsonStatBlobs)))) - -- | Render a term to a value representing its JSON. renderJSONTerm :: ToJSON a => Blob -> a -> JSON "trees" SomeJSON renderJSONTerm blob content = JSON [ SomeJSON (JSONTerm blob content) ] @@ -71,6 +71,11 @@ instance ToJSON a => ToJSON (JSONAST a) where renderSymbolTerms :: ToJSON a => [a] -> JSON "files" SomeJSON renderSymbolTerms = JSON . map SomeJSON +renderJSONError :: Blob -> String -> JSON "trees" SomeJSON +renderJSONError Blob{..} e = JSON [ SomeJSON (object [ "error" .= err ]) ] + where err = object [ "message" .= e + , "path" .= blobPath + , "language" .= blobLanguage ] data SomeJSON where SomeJSON :: ToJSON a => a -> SomeJSON diff --git a/src/Rendering/Renderer.hs b/src/Rendering/Renderer.hs index 423db1aa2..3884a7e9c 100644 --- a/src/Rendering/Renderer.hs +++ b/src/Rendering/Renderer.hs @@ -11,6 +11,7 @@ module Rendering.Renderer , renderSymbolTerms , renderToSymbols , renderTreeGraph +, renderJSONError , Summaries(..) , TOCSummary(..) , SymbolFields(..) diff --git a/src/Reprinting/Pipeline.hs b/src/Reprinting/Pipeline.hs new file mode 100644 index 000000000..3cfa49155 --- /dev/null +++ b/src/Reprinting/Pipeline.hs @@ -0,0 +1,192 @@ +{- | + +This module represents the top-level interface for @semantic@'s +reprinting functionality. Reprinting here is defined as the +conversion, given some 'Source' code, of the parsed (and possibly +modified) syntax tree corresponding to that source, back into a +document representing that source code. + +The approach is based on techniques drawn from: + +* /A Pretty Good Formatting Pipeline/ by Bagge and Hasu (2010) +* /Scrap Your Reprinter/ by Orchard et al (2017) + +The reprinter was designed with the following goals in mind: + +* Laziness: a node that was unmodified in a refactoring pass + should draw its representation from the original source file, + rather than being explicitly pretty-printed. The only nodes + that do not consult the original document are those that were + synthesized during a refactoring operation. +* Generality: each syntax node should have one and only one + declaration that describes how reprinting takes place. No node + should be concerned with language-specific syntactic issues. +* Precedence-sensitivity: semantic syntax nodes do not contain + information about parenthesization of binary operators. + Binary operators should report their precedence and the + pipeline should insert parentheses as necessary. +* Modularity: each stage in the reprinting pipeline + should be testable independently. +* Time/space efficiency: the reprinting algorithm should scale + to trees with hundreds of thousands of nodes without linear + space usage. +* Roundtrip correctness: reprinting an unmodified syntax tree + should produce source text exactly corresponding to the original + file. + +The reprinter takes the form of a pipeline operating over a stream of +tokens. Each stage in the pipeline converts a given token to a +lower-level representation, ultimately resulting in a 'Doc' data type +from the @prettyprinter@ library (to which we defer the nitty-gritty +details of actually pretty-printing text). A representation of the +stages of the pipeline follows: + +@ + +[Start] + The Pipeline starts with a tree, where terms are annotated with 'History' to + denote what's been refactored. + (Language-agnostic) + | + | AST + | + v +[Tokenize] + A subterm algebra converting a tree (terms) to a stream of tokens. + (Language-agnostic) + | + | Seq Token + | + v +[Translate] + A stack machine interface through which tokens are interpreted to splices + (with context). A splice is a concrete representation of syntax, to which + additional language specific transformations can be applied. + (Language-agnostic) + | + | Seq Fragment + | + v +[PrettyPrint] --> --> --> <...> + A language specific stack machine interface allowing further refinement of the + sequence of splices. Language machines should emit specific keywords, + punctutation, and layout rules. Additional steps can be added for project + specific style, formatting, and even post-processing (minimizers, etc). + (Language-specific, Project-specific) + | + | Seq Splice + | + v +[Typeset] + A stack machine that converts splices to a Doc. (Language-agnostic) + | + | Doc + | + v +[Print] + A simple function that produces 'Text' or 'Source' with the desired layout + settings from a 'Doc'. (Language-agnostic) + | + | Text + | + v + +@ + +-} + +{-# LANGUAGE AllowAmbiguousTypes, ScopedTypeVariables, RankNTypes #-} +module Reprinting.Pipeline + ( runReprinter + , runTokenizing + , runContextualizing + , runTranslating + ) where + +import Control.Monad.Effect as Effect +import qualified Control.Monad.Effect.Exception as Exc +import Control.Monad.Effect.State +import Data.Machine hiding (Source) +import Data.Machine.Runner +import Data.Record +import Data.Reprinting.Errors +import Data.Reprinting.Splice +import Data.Reprinting.Token +import qualified Data.Source as Source +import Data.Term +import Data.Text.Prettyprint.Doc +import Data.Text.Prettyprint.Doc.Render.Text +import Reprinting.Tokenize +import Reprinting.Translate +import Reprinting.Typeset + + +-- | Run the reprinting pipeline given the original 'Source', a language +-- specific machine (`ProcessT`) and the provided 'Term'. +runReprinter :: + ( Show (Record fields) + , Tokenize a + , HasField fields History + ) + => Source.Source + -> ProcessT Translator Fragment Splice + -> Term a (Record fields) + -> Either TranslationError Source.Source +runReprinter src translating tree + = fmap go + . Effect.run + . Exc.runError + . fmap snd + . runState (mempty :: [Context]) + . foldT $ source (tokenizing src tree) + ~> contextualizing + ~> translating + ~> typesetting + where go = Source.fromText . renderStrict . layoutPretty defaultLayoutOptions + +-- | Run the reprinting pipeline up to tokenizing. +runTokenizing :: + ( Show (Record fields) + , Tokenize a + , HasField fields History + ) + => Source.Source + -> Term a (Record fields) + -> [Token] +runTokenizing src tree + = Data.Machine.run $ source (tokenizing src tree) + +-- | Run the reprinting pipeline up to contextualizing. +runContextualizing :: + ( Show (Record fields) + , Tokenize a + , HasField fields History + ) + => Source.Source + -> Term a (Record fields) + -> Either TranslationError [Fragment] +runContextualizing src tree + = Effect.run + . Exc.runError + . fmap snd + . runState (mempty :: [Context]) + . runT $ source (tokenizing src tree) + ~> contextualizing + +runTranslating :: + ( Show (Record fields) + , Tokenize a + , HasField fields History + ) + => Source.Source + -> ProcessT Translator Fragment Splice + -> Term a (Record fields) + -> Either TranslationError [Splice] +runTranslating src translating tree + = Effect.run + . Exc.runError + . fmap snd + . runState (mempty :: [Context]) + . runT $ source (tokenizing src tree) + ~> contextualizing + ~> translating diff --git a/src/Reprinting/Tokenize.hs b/src/Reprinting/Tokenize.hs new file mode 100644 index 000000000..849c4fd94 --- /dev/null +++ b/src/Reprinting/Tokenize.hs @@ -0,0 +1,191 @@ +{-# LANGUAGE GADTs, RankNTypes, TypeOperators, UndecidableInstances #-} + +module Reprinting.Tokenize + ( module Data.Reprinting.Token + , History (..) + , mark + , remark + -- * The Reprinter monad + , Tokenizer + , yield + , control + , within + , within' + , log + , ignore + , sep + , sepTrailing + , list + , hash + , pair + , imperative + -- * Tokenize interface + , Tokenize (..) + -- * Invocation/results + , tokenizing + ) where + +import Prelude hiding (fail, log) +import Prologue hiding (hash, Element) + +import Control.Monad.Effect +import Control.Monad.Effect.Reader +import Control.Monad.Effect.State +import Control.Monad.Effect.Writer +import Data.History +import Data.List (intersperse) +import Data.Range +import Data.Record +import Data.Reprinting.Token +import Data.Sequence (singleton) +import Data.Source +import Data.Term + +-- | The 'Tokenizer' monad represents a context in which 'Control' +-- tokens and 'Element' tokens can be sent to some downstream +-- consumer. Its primary interface is through the 'Tokenize' +-- typeclass. +type Tokenizer = Eff '[Reader RPContext, State RPState, Writer (Seq Token)] + +-- | Yield an 'Element' token in a 'Tokenizer' context. +yield :: Element -> Tokenizer () +yield = tell . singleton . TElement + +-- | Yield a 'Control' token in a 'Tokenizer' context. +control :: Control -> Tokenizer () +control = tell . singleton . TControl + +-- | Emit a log message to the token stream. Useful for debugging. +log :: String -> Tokenizer () +log = control . Log + +-- | Emit an Enter for the given context, then run the provided +-- action, then emit a corresponding Exit. +within :: Context -> Tokenizer () -> Tokenizer () +within c r = control (Enter c) *> r <* control (Exit c) + +-- | Like 'within', but adds 'TOpen' and 'TClose' elements around the action. +within' :: Context -> Tokenizer () -> Tokenizer () +within' c x = within c $ yield TOpen *> x <* yield TClose + +-- | Emit a sequence of tokens interspersed with 'TSep'. +sep :: Foldable t => t (Tokenizer ()) -> [Tokenizer ()] +sep = intersperse (yield TSep) . toList + +-- | Emit a sequence of tokens each with trailing 'TSep'. +sepTrailing :: Foldable t => t (Tokenizer ()) -> [Tokenizer ()] +sepTrailing = foldr (\x acc -> x : yield TSep : acc) mempty + +-- | Emit a sequence of tokens within a 'TList' Context with appropriate 'TOpen', +-- 'TClose' tokens surrounding. +list :: Foldable t => t (Tokenizer ()) -> Tokenizer () +list = within' TList . sequenceA_ . sep + +-- | Emit a sequence of tokens within a 'THash' Context with appropriate +-- 'TOpen', 'TClose' tokens surrounding and interspersing 'TSep'. +hash :: Foldable t => t (Tokenizer ()) -> Tokenizer () +hash = within' THash . sequenceA_ . sep + +-- | Emit key value tokens with a 'TSep' within an TPair Context +pair :: Tokenizer () -> Tokenizer () -> Tokenizer () +pair k v = within TPair $ k *> yield TSep <* v + +-- | Emit a sequence of tokens within an Imperative Context with appropriate +-- 'TOpen', 'TClose' tokens surrounding and interspersing 'TSep'. +imperative :: Foldable t => t (Tokenizer ()) -> Tokenizer () +imperative = within' Imperative . sequenceA_ . sep + +-- | Shortcut for @const (pure ())@, useful for when no action +-- should be taken. +ignore :: a -> Tokenizer () +ignore = const (pure ()) + +-- | An instance of the 'Tokenize' typeclass describes how to emit tokens to +-- pretty print the value of the supplied constructor in its AST context. +class (Show1 constr, Traversable constr) => Tokenize constr where + -- | Should emit control and data tokens. + tokenize :: FAlgebra constr (Tokenizer ()) + +-- | Sums of reprintable terms are reprintable. +instance (Apply Show1 fs, Apply Functor fs, Apply Foldable fs, Apply Traversable fs, Apply Tokenize fs) => Tokenize (Sum fs) where + tokenize = apply @Tokenize tokenize + +-- | Annotated terms are reprintable and operate in a context derived from the annotation. +instance (HasField fields History, Show (Record fields), Tokenize a) => Tokenize (TermF a (Record fields)) where + tokenize t = withHistory t (tokenize (termFOut t)) + +instance Tokenize [] where + tokenize = imperative + +-- | The top-level function. Pass in a 'Source' and a 'Term' and +-- you'll get out a 'Seq' of 'Token's for later processing. +tokenizing :: (Show (Record fields), Tokenize a, HasField fields History) => Source -> Term a (Record fields) -> Seq Token +tokenizing s t = let h = getField (termAnnotation t) in + run + . fmap fst + . runWriter + . fmap snd + . runState (RPState 0) + . runReader (RPContext s h Reprinting) + $ foldSubterms descend t *> finish + +-- Private interfaces + +newtype RPState = RPState + { _cursor :: Int -- from SYR, used to slice and dice a 'Source' (mutates) + } deriving (Show, Eq) + +setCursor :: Int -> RPState -> RPState +setCursor c s = s { _cursor = c } + +data RPContext = RPContext + { _source :: Source + , _history :: History + , _strategy :: Strategy + } deriving (Show, Eq) + +data Strategy + = Reprinting + | PrettyPrinting + deriving (Eq, Show) + +setStrategy :: Strategy -> RPContext -> RPContext +setStrategy s c = c { _strategy = s } + +setHistory :: History -> RPContext -> RPContext +setHistory h c = c { _history = h } + +chunk :: Source -> Tokenizer () +chunk = tell . singleton . Chunk + +finish :: Tokenizer () +finish = do + crs <- gets _cursor + src <- asks _source + chunk (dropSource crs src) + +withHistory :: (Annotated t (Record fields), HasField fields History) => t -> Tokenizer a -> Tokenizer a +withHistory x = local (setHistory (getField (annotation x))) + +withStrategy :: Strategy -> Tokenizer a -> Tokenizer a +withStrategy x = local (setStrategy x) + +-- | A subterm algebra inspired by the /Scrap Your Reprinter/ algorithm. +descend :: (Tokenize constr, HasField fields History) => SubtermAlgebra constr (Term a (Record fields)) (Tokenizer ()) +descend t = do + -- log (showsPrec1 0 (() <$ t) "") + hist <- asks _history + strat <- asks _strategy + let into s = withHistory (subterm s) (subtermRef s) + case (hist, strat) of + (Unmodified _, _) -> traverse_ into t + (Refactored _, PrettyPrinting) -> tokenize (fmap into t) + (Refactored r, Reprinting) -> do + crs <- gets _cursor + src <- asks _source + let delimiter = Range crs (start r) + log ("slicing: " <> show delimiter) + chunk (slice delimiter src) + modify' (setCursor (start r)) + tokenize (fmap (withStrategy PrettyPrinting . into) t) + modify' (setCursor (end r)) diff --git a/src/Reprinting/Translate.hs b/src/Reprinting/Translate.hs new file mode 100644 index 000000000..e44e73436 --- /dev/null +++ b/src/Reprinting/Translate.hs @@ -0,0 +1,55 @@ +{-# LANGUAGE AllowAmbiguousTypes, OverloadedLists, ScopedTypeVariables, TypeFamilyDependencies, TypeOperators #-} + +module Reprinting.Translate + ( Translator + , contextualizing + ) where + +import Prologue hiding (Element) + +import Control.Arrow +import Control.Monad.Effect +import Control.Monad.Effect.Exception (Exc) +import qualified Control.Monad.Effect.Exception as Exc +import Control.Monad.Effect.State +import Data.Machine +import Data.Reprinting.Splice +import Data.Reprinting.Token +import Data.Reprinting.Errors +import qualified Data.Source as Source + +type Translator = Eff '[State [Context], Exc TranslationError] + +-- | Prepare for language specific translation by contextualizing 'Token's to +-- 'Fragment's. +contextualizing :: + ( Member (State [Context]) effs + , Member (Exc TranslationError) effs + ) + => ProcessT (Eff effs) Token Fragment +contextualizing = autoT (Kleisli step) ~> flattened where + step t = case t of + Chunk source -> pure $ copy (Source.toText source) + TElement el -> toFragment el <$> get + TControl ctl -> case ctl of + Log _ -> pure mempty + Enter c -> enterContext c $> mempty + Exit c -> exitContext c $> mempty + + toFragment el cs = case el of + Fragment f -> insert el cs f + _ -> defer el cs + + enterContext :: (Member (State [Context]) effs) => Context -> Eff effs () + enterContext c = modify' (c :) + + exitContext :: + ( Member (State [Context]) effs + , Member (Exc TranslationError) effs + ) + => Context -> Eff effs () + exitContext c = do + current <- get + case current of + (x:xs) | x == c -> modify' (const xs) + cs -> Exc.throwError (UnbalancedPair c cs) diff --git a/src/Reprinting/Typeset.hs b/src/Reprinting/Typeset.hs new file mode 100644 index 000000000..2da1d797c --- /dev/null +++ b/src/Reprinting/Typeset.hs @@ -0,0 +1,40 @@ +module Reprinting.Typeset + ( typeset + , typesetting + , typesettingWithVisualWhitespace + ) where + +import Prologue + +import Data.Machine +import Data.Reprinting.Splice hiding (space) +import Data.Text.Prettyprint.Doc + +typeset :: Seq Splice -> Doc a +typeset = foldMap step + +typesetting :: Monad m => ProcessT m Splice (Doc a) +typesetting = auto step + +step :: Splice -> Doc a +step (Emit t) = pretty t +step (Layout SoftWrap) = softline +step (Layout HardWrap) = hardline +step (Layout Space) = space +step (Layout (Indent 0 Spaces)) = mempty +step (Layout (Indent n Spaces)) = stimes n space +step (Layout (Indent 0 Tabs)) = mempty +step (Layout (Indent n Tabs)) = stimes n "\t" + +-- | Typeset, but show whitespace with printable characters for debugging purposes. +typesettingWithVisualWhitespace :: Monad m => ProcessT m Splice (Doc a) +typesettingWithVisualWhitespace = auto step where + step :: Splice -> Doc a + step (Emit t) = pretty t + step (Layout SoftWrap) = softline + step (Layout HardWrap) = "\\n" <> hardline + step (Layout Space) = "." + step (Layout (Indent 0 Spaces)) = mempty + step (Layout (Indent n Spaces)) = stimes n "." + step (Layout (Indent 0 Tabs)) = mempty + step (Layout (Indent n Tabs)) = stimes n "\t" diff --git a/src/Semantic/Parse.hs b/src/Semantic/Parse.hs index 998ba8c48..bdda0155a 100644 --- a/src/Semantic/Parse.hs +++ b/src/Semantic/Parse.hs @@ -10,6 +10,7 @@ module Semantic.Parse import Analysis.ConstructorName (ConstructorName) import Analysis.Declaration (HasDeclaration, declarationAlgebra) import Analysis.PackageDef (HasPackageDef) +import Control.Monad.Effect.Exception import Data.AST import Data.Blob import Data.JSON.Fields @@ -29,34 +30,57 @@ import qualified Language.JSON.Assignment as JSON import qualified Language.Python.Assignment as Python runParse :: (Member Distribute effs, Member (Exc SomeException) effs, Member Task effs) => TermRenderer output -> [Blob] -> Eff effs Builder -runParse JSONTermRenderer = withParsedBlobs (render . renderJSONTerm) >=> serialize JSON -runParse SExpressionTermRenderer = withParsedBlobs (const (serialize (SExpression ByConstructorName))) -runParse ShowTermRenderer = withParsedBlobs (const (serialize Show . quieterm)) -runParse (SymbolsTermRenderer fields) = withParsedBlobs (\ blob -> decorate (declarationAlgebra blob) >=> render (renderSymbolTerms . renderToSymbols fields blob)) >=> serialize JSON -runParse DOTTermRenderer = withParsedBlobs (const (render renderTreeGraph)) >=> serialize (DOT (termStyle "terms")) +runParse JSONTermRenderer = withParsedBlobs renderJSONError (render . renderJSONTerm) >=> serialize JSON +runParse SExpressionTermRenderer = withParsedBlobs (\_ _ -> mempty) (const (serialize (SExpression ByConstructorName))) +runParse ShowTermRenderer = withParsedBlobs (\_ _ -> mempty) (const (serialize Show . quieterm)) +runParse (SymbolsTermRenderer fields) = withParsedBlobs (\_ _ -> mempty) (\ blob -> decorate (declarationAlgebra blob) >=> render (renderSymbolTerms . renderToSymbols fields blob)) >=> serialize JSON +runParse DOTTermRenderer = withParsedBlobs (\_ _ -> mempty) (const (render renderTreeGraph)) >=> serialize (DOT (termStyle "terms")) -runRubyParse :: (Member Distribute effs, Member Task effs) => [Blob] -> Eff effs [Term (Sum Ruby.Syntax) ()] -runRubyParse = flip distributeFor (\ blob -> do - term <- parse rubyParser blob - pure (() <$ term)) +-- NB: Our gRPC interface requires concrete 'Term's for each language to know +-- how to encode messages, so we have dedicated functions for parsing each +-- supported language. +runRubyParse :: (Member Distribute effs, Member (Exc SomeException) effs, Member Task effs) + => [Blob] -> Eff effs [Either SomeException (Term (Sum Ruby.Syntax) ())] +runRubyParse = flip distributeFor $ \blob -> + (Right . (() <$) <$> parse rubyParser blob) `catchError` (pure . Left) -runTypeScriptParse :: (Member Distribute effs, Member Task effs) => [Blob] -> Eff effs [Term (Sum TypeScript.Syntax) ()] -runTypeScriptParse = flip distributeFor (\ blob -> do - term <- parse typescriptParser blob - pure (() <$ term)) +runTypeScriptParse :: (Member Distribute effs, Member (Exc SomeException) effs, Member Task effs) + => [Blob] -> Eff effs [Either SomeException (Term (Sum TypeScript.Syntax) ())] +runTypeScriptParse = flip distributeFor $ \blob -> do + (Right . (() <$) <$> parse typescriptParser blob) `catchError` (pure . Left) -runPythonParse :: (Member Distribute effs, Member Task effs) => [Blob] -> Eff effs [Term (Sum Python.Syntax) ()] -runPythonParse = flip distributeFor (\ blob -> do - term <- parse pythonParser blob - pure (() <$ term)) +runPythonParse :: (Member Distribute effs, Member (Exc SomeException) effs, Member Task effs) + => [Blob] -> Eff effs [Either SomeException (Term (Sum Python.Syntax) ())] +runPythonParse = flip distributeFor $ \blob -> do + (Right . (() <$) <$> parse pythonParser blob) `catchError` (pure . Left) -runJSONParse :: (Member Distribute effs, Member Task effs) => [Blob] -> Eff effs [Term (Sum JSON.Syntax) ()] -runJSONParse = flip distributeFor (\ blob -> do - term <- parse jsonParser blob - pure (() <$ term)) +runJSONParse :: (Member Distribute effs, Member (Exc SomeException) effs, Member Task effs) + => [Blob] -> Eff effs [Either SomeException (Term (Sum JSON.Syntax) ())] +runJSONParse = flip distributeFor $ \blob -> do + (Right . (() <$) <$> parse jsonParser blob) `catchError` (pure . Left) -withParsedBlobs :: (Member Distribute effs, Member (Exc SomeException) effs, Member Task effs, Monoid output) => (forall syntax . (ConstructorName syntax, Foldable syntax, Functor syntax, HasDeclaration syntax, HasPackageDef syntax, Show1 syntax, ToJSONFields1 syntax) => Blob -> Term syntax (Record Location) -> Eff effs output) -> [Blob] -> Eff effs output -withParsedBlobs render = distributeFoldMap (\ blob -> parseSomeBlob blob >>= withSomeTerm (render blob)) +withParsedBlobs :: + ( Member Distribute effs + , Member (Exc SomeException) effs + , Member Task effs + , Monoid output + ) + => (Blob -> String -> output) + -> ( forall syntax . + ( ConstructorName syntax + , Foldable syntax + , Functor syntax + , HasDeclaration syntax + , HasPackageDef syntax + , Show1 syntax + , ToJSONFields1 syntax + ) => Blob -> Term syntax (Record Location) -> Eff effs output + ) + -> [Blob] + -> Eff effs output +withParsedBlobs onError render = distributeFoldMap $ \blob -> + (parseSomeBlob blob >>= withSomeTerm (render blob)) `catchError` \(SomeException e) -> + pure (onError blob (show e)) parseSomeBlob :: (Member (Exc SomeException) effs, Member Task effs) => Blob -> Eff effs (SomeTerm '[ConstructorName, Foldable, Functor, HasDeclaration, HasPackageDef, Show1, ToJSONFields1] (Record Location)) parseSomeBlob blob@Blob{..} = maybe (noLanguageForBlob blobPath) (`parse` blob) (someParser blobLanguage) diff --git a/src/Semantic/Task.hs b/src/Semantic/Task.hs index 491b059b1..d85e1de09 100644 --- a/src/Semantic/Task.hs +++ b/src/Semantic/Task.hs @@ -65,6 +65,7 @@ import Data.Bool import Data.ByteString.Builder import Data.Diff import qualified Data.Error as Error +import Data.Language (Language) import Data.Record import Data.Source (Source) import Data.Sum @@ -195,7 +196,8 @@ runTaskF = interpret $ \ task -> case task of logError :: Member Telemetry effs => Config -> Level -> Blob -> Error.Error String -> [(String, String)] -> Eff effs () logError Config{..} level blob err = writeLog level (Error.formatError configLogPrintSource configIsTerminal blob err) -data ParserCancelled = ParserTimedOut deriving (Show, Typeable) +data ParserCancelled = ParserTimedOut FilePath Language + deriving (Show, Typeable) instance Exception ParserCancelled @@ -206,7 +208,7 @@ runParser blob@Blob{..} parser = case parser of time "parse.tree_sitter_ast_parse" languageTag $ do config <- ask parseToAST (configTreeSitterParseTimeout config) language blob - >>= maybeM (throwError (SomeException ParserTimedOut)) + >>= maybeM (throwError (SomeException (ParserTimedOut blobPath blobLanguage))) AssignmentParser parser assignment -> runAssignment Assignment.assign parser assignment DeterministicParser parser assignment -> runAssignment Deterministic.assign parser assignment diff --git a/src/Semantic/Util.hs b/src/Semantic/Util.hs index 824895de1..39e186616 100644 --- a/src/Semantic/Util.hs +++ b/src/Semantic/Util.hs @@ -1,12 +1,13 @@ -{-# LANGUAGE TypeFamilies, TypeOperators #-} +{-# LANGUAGE ScopedTypeVariables, TypeFamilies, TypeOperators #-} {-# OPTIONS_GHC -Wno-missing-signatures -Wno-missing-export-lists #-} module Semantic.Util where -import Prelude hiding (readFile) +import Prelude hiding (id, (.), readFile) import Analysis.Abstract.Caching import Analysis.Abstract.Collecting import Control.Abstract +import Control.Category import Control.Exception (displayException) import Control.Monad.Effect.Trace (runPrintingTrace) import Data.Abstract.Address.Monovariant as Monovariant @@ -138,7 +139,6 @@ parseFile parser = runTask . (parse parser <=< readBlob . file) blob :: FilePath -> IO Blob blob = runTask . readBlob . file - mergeExcs :: Either (SomeExc (Sum excs)) (Either (SomeExc exc) result) -> Either (SomeExc (Sum (exc ': excs))) result mergeExcs = either (\ (SomeExc sum) -> Left (SomeExc (weaken sum))) (either (\ (SomeExc exc) -> Left (SomeExc (inject exc))) Right) diff --git a/src/Semantic/Util/Rewriting.hs b/src/Semantic/Util/Rewriting.hs new file mode 100644 index 000000000..7108ef1e6 --- /dev/null +++ b/src/Semantic/Util/Rewriting.hs @@ -0,0 +1,207 @@ +{-# LANGUAGE ScopedTypeVariables, TypeFamilies, TypeOperators #-} +{-# OPTIONS_GHC -Wno-missing-signatures -Wno-missing-export-lists #-} +module Semantic.Util.Rewriting where + +import Prelude hiding (id, (.), readFile) + +import Control.Abstract +import Control.Abstract.Matching +import Control.Category +import Data.Blob +import qualified Data.ByteString.Char8 as BC +import Data.History +import qualified Data.Language as Language +import Data.Machine +import Data.Machine.Runner +import Data.Project hiding (readFile) +import Data.Record +import qualified Data.Source as Source +import qualified Data.Sum as Sum +import qualified Data.Syntax.Literal as Literal +import Data.Term +import Language.JSON.PrettyPrint +import Language.Ruby.PrettyPrint +import Language.Python.PrettyPrint +import Matching.Core +import Parsing.Parser +import Prologue hiding (weaken) +import Reprinting.Pipeline +import Semantic.IO as IO +import Semantic.Task + +testPythonFile = do + let path = "test/fixtures/python/reprinting/function.py" + src <- blobSource <$> readBlobFromPath (File path Language.Python) + tree <- parseFile miniPythonParser path + pure (src, tree) + +testPythonPipeline = do + (src, tree) <- testPythonFile + printToTerm $ runReprinter src printingPython (mark Refactored tree) + +testPythonPipeline' = do + (src, tree) <- testPythonFile + pure $ runTokenizing src (mark Refactored tree) + +testPythonPipeline'' = do + (src, tree) <- testPythonFile + pure $ runContextualizing src (mark Refactored tree) + +testPythonPipeline''' = do + (src, tree) <- testPythonFile + pure $ runTranslating src printingPython (mark Refactored tree) + +testRubyFile = do + let path = "test/fixtures/ruby/reprinting/infix.rb" + src <- blobSource <$> readBlobFromPath (File path Language.Ruby) + tree <- parseFile miniRubyParser path + pure (src, tree) + +testRubyPipeline = do + (src, tree) <- testRubyFile + printToTerm $ runReprinter src printingRuby (mark Refactored tree) + +testRubyPipeline' = do + (src, tree) <- testRubyFile + pure $ runTokenizing src (mark Refactored tree) + +testRubyPipeline'' = do + (src, tree) <- testRubyFile + pure $ runContextualizing src (mark Refactored tree) + +testJSONPipeline = do + (src, tree) <- testJSONFile + printToTerm $ runReprinter src defaultJSONPipeline (mark Refactored tree) + +printToTerm = either (putStrLn . show) (BC.putStr . Source.sourceBytes) + +testJSONFile = do + let path = "test/fixtures/javascript/reprinting/map.json" + src <- blobSource <$> readBlobFromPath (File path Language.JSON) + tree <- parseFile jsonParser path + pure (src, tree) + +renameKey :: (Literal.TextElement :< fs, Literal.KeyValue :< fs, Apply Functor fs) => Term (Sum fs) (Record (History ': fields)) -> Term (Sum fs) (Record (History ': fields)) +renameKey p = case projectTerm p of + Just (Literal.KeyValue k v) + | Just (Literal.TextElement x) <- Sum.project (termOut k) + , x == "\"foo\"" + -> let newKey = termIn (termAnnotation k) (inject (Literal.TextElement "\"fooA\"")) + in remark Refactored (termIn (termAnnotation p) (inject (Literal.KeyValue newKey v))) + _ -> Term (fmap renameKey (unTerm p)) + +testRenameKey = do + (src, tree) <- testJSONFile + let tagged = renameKey (mark Unmodified tree) + printToTerm $ runReprinter src defaultJSONPipeline tagged + +increaseNumbers :: (Literal.Float :< fs, Apply Functor fs) => Term (Sum fs) (Record (History ': fields)) -> Term (Sum fs) (Record (History ': fields)) +increaseNumbers p = case Sum.project (termOut p) of + Just (Literal.Float t) -> remark Refactored (termIn (termAnnotation p) (inject (Literal.Float (t <> "0")))) + Nothing -> Term (fmap increaseNumbers (unTerm p)) + +addKVPair :: forall effs syntax ann fields term . + ( Apply Functor syntax + , Literal.Hash :< syntax + , Literal.Array :< syntax + , Literal.TextElement :< syntax + , Literal.KeyValue :< syntax + , ann ~ Record (History ': fields) + , term ~ Term (Sum syntax) ann + ) => + ProcessT (Eff effs) (Either term (term, Literal.Hash term)) term +addKVPair = repeatedly $ do + t <- await + Data.Machine.yield (either id injKVPair t) + where + injKVPair :: (term, Literal.Hash term) -> term + injKVPair (origTerm, Literal.Hash xs) = + remark Refactored (injectTerm ann (Literal.Hash (xs <> [newItem]))) + where + newItem = termIn ann (inject (Literal.KeyValue k v)) + k = termIn ann (inject (Literal.TextElement "\"added\"")) + v = termIn ann (inject (Literal.Array [])) + ann = termAnnotation origTerm + +testAddKVPair = do + (src, tree) <- testJSONFile + tagged <- runM $ cata (toAlgebra (fromMatcher matchHash ~> addKVPair)) (mark Unmodified tree) + printToTerm $ runReprinter src defaultJSONPipeline tagged + +overwriteFloats :: forall effs syntax ann fields term . + ( Apply Functor syntax + , Literal.Float :< syntax + , ann ~ Record (History ': fields) + , term ~ Term (Sum syntax) ann + ) => + ProcessT (Eff effs) (Either term (term, Literal.Float term)) term +overwriteFloats = repeatedly $ do + t <- await + Data.Machine.yield (either id injFloat t) + where injFloat :: (term, Literal.Float term) -> term + injFloat (term, _) = remark Refactored (termIn (termAnnotation term) (inject (Literal.Float "0"))) + +testOverwriteFloats = do + (src, tree) <- testJSONFile + tagged <- runM $ cata (toAlgebra (fromMatcher matchFloat ~> overwriteFloats)) (mark Unmodified tree) + printToTerm $ runReprinter src defaultJSONPipeline tagged + +findKV :: + ( Literal.KeyValue :< syntax + , Literal.TextElement :< syntax + , term ~ Term (Sum syntax) ann + ) => + Text -> ProcessT (Eff effs) term (Either term (term, Literal.KeyValue term)) +findKV name = fromMatcher (kvMatcher name) + +kvMatcher :: forall fs ann term . + ( Literal.KeyValue :< fs + , Literal.TextElement :< fs + , term ~ Term (Sum fs) ann + ) => + Text -> Matcher term (Literal.KeyValue term) +kvMatcher name = matchM projectTerm target <* matchKey where + matchKey + = match Literal.key . + match Literal.textElementContent $ + ensure (== name) + +changeKV :: forall effs syntax ann fields term . + ( Apply Functor syntax + , Literal.KeyValue :< syntax + , Literal.Array :< syntax + , Literal.Float :< syntax + , ann ~ Record (History ': fields) + , term ~ Term (Sum syntax) ann + ) => + ProcessT (Eff effs) (Either term (term, Literal.KeyValue term)) term +changeKV = auto $ either id injKV + where + injKV :: (term, Literal.KeyValue term) -> term + injKV (term, Literal.KeyValue k v) = case projectTerm v of + Just (Literal.Array elems) -> remark Refactored (termIn ann (inject (Literal.KeyValue k (newArray elems)))) + _ -> term + where newArray xs = termIn ann (inject (Literal.Array (xs <> [float]))) + float = termIn ann (inject (Literal.Float "4")) + ann = termAnnotation term + +testChangeKV = do + (src, tree) <- testJSONFile + tagged <- runM $ cata (toAlgebra (findKV "\"bar\"" ~> changeKV)) (mark Unmodified tree) + printToTerm $ runReprinter src defaultJSONPipeline tagged + +-- Temporary, until new KURE system lands. +fromMatcher :: Matcher from to -> ProcessT (Eff effs) from (Either from (from, to)) +fromMatcher m = auto go where go x = maybe (Left x) (\y -> Right (x, y)) (stepMatcher x m) + +-- Turn a 'ProccessT' into an FAlgebra. +toAlgebra :: (Traversable (Base t), Corecursive t) + => ProcessT (Eff effs) t t + -> FAlgebra (Base t) (Eff effs t) +toAlgebra m t = do + inner <- sequenceA t + res <- runT1 (source (Just (embed inner)) ~> m) + pure (fromMaybe (embed inner) res) + +parseFile :: Parser term -> FilePath -> IO term +parseFile parser = runTask . (parse parser <=< readBlob . file) diff --git a/test/Reprinting/Spec.hs b/test/Reprinting/Spec.hs new file mode 100644 index 000000000..e09aaaaad --- /dev/null +++ b/test/Reprinting/Spec.hs @@ -0,0 +1,60 @@ +{-# LANGUAGE OverloadedLists, TypeOperators #-} + +module Reprinting.Spec where + +import SpecHelpers hiding (project, inject) + +import Data.Functor.Foldable (embed, cata) +import qualified Data.Language as Language +import qualified Data.Syntax.Literal as Literal +import Data.Algebra +import Reprinting.Tokenize +import Reprinting.Pipeline +import Data.Sum +import Data.Foldable +import Semantic.IO +import Semantic.Util.Rewriting hiding (parseFile) +import Data.Blob +import Language.JSON.PrettyPrint +import Language.Ruby.PrettyPrint +import Language.Python.PrettyPrint + +spec :: Spec +spec = describe "reprinting" $ do + context "JSON" $ do + let path = "test/fixtures/javascript/reprinting/map.json" + (src, tree) <- runIO $ do + src <- blobSource <$> readBlobFromPath (File path Language.JSON) + tree <- parseFile jsonParser path + pure (src, tree) + + describe "tokenization" $ do + + it "should pass over a pristine tree" $ do + let tagged = mark Unmodified tree + let toks = tokenizing src tagged + toks `shouldBe` [Chunk src] + + it "should emit control tokens but only 1 chunk for a wholly-modified tree" $ do + let toks = tokenizing src (mark Refactored tree) + for_ @[] [TList, THash] $ \t -> do + toks `shouldSatisfy` elem (TControl (Enter t)) + toks `shouldSatisfy` elem (TControl (Exit t)) + + describe "pipeline" $ do + + it "should roundtrip exactly over a pristine tree" $ do + let tagged = mark Unmodified tree + let printed = runReprinter src defaultJSONPipeline tagged + printed `shouldBe` Right src + + it "should roundtrip exactly over a wholly-modified tree" $ do + let tagged = mark Refactored tree + let printed = runReprinter src defaultJSONPipeline tagged + printed `shouldBe` Right src + + it "should be able to parse the output of a refactor" $ do + let tagged = increaseNumbers (mark Refactored tree) + let (Right printed) = runReprinter src defaultJSONPipeline tagged + tree' <- runTask (parse jsonParser (Blob printed path Language.JSON)) + length tree' `shouldSatisfy` (/= 0) diff --git a/test/Semantic/Spec.hs b/test/Semantic/Spec.hs index 80c5fb014..c487ba1ae 100644 --- a/test/Semantic/Spec.hs +++ b/test/Semantic/Spec.hs @@ -11,10 +11,13 @@ import SpecHelpers spec :: Spec spec = parallel $ do describe "parseBlob" $ do - it "throws if given an unknown language" $ do - runTask (runParse SExpressionTermRenderer [methodsBlob { blobLanguage = Unknown }]) `shouldThrow` (\ code -> case code of - ExitFailure 1 -> True - _ -> False) + it "returns error if given an unknown language (json)" $ do + output <- fmap runBuilder . runTask $ runParse JSONTermRenderer [ methodsBlob { blobLanguage = Unknown } ] + output `shouldBe` "{\"trees\":[{\"error\":{\"path\":\"methods.rb\",\"language\":\"Unknown\",\"message\":\"NoLanguageForBlob \\\"methods.rb\\\"\"}}]}\n" + + it "drops results for sexpression output" $ do + output <- fmap runBuilder . runTask $ runParse SExpressionTermRenderer [ methodsBlob { blobLanguage = Unknown } ] + output `shouldBe` "" it "renders with the specified renderer" $ do output <- fmap runBuilder . runTask $ runParse SExpressionTermRenderer [methodsBlob] diff --git a/test/Spec.hs b/test/Spec.hs index b19d99eb7..319f7be57 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -21,6 +21,7 @@ import qualified Integration.Spec import qualified Matching.Go.Spec import qualified Numeric.Spec import qualified Rendering.TOC.Spec +import qualified Reprinting.Spec import qualified Semantic.Spec import qualified Semantic.CLI.Spec import qualified Semantic.IO.Spec @@ -57,6 +58,7 @@ main = do describe "Matching" Matching.Go.Spec.spec describe "Numeric" Numeric.Spec.spec describe "Rendering.TOC" Rendering.TOC.Spec.spec + describe "Reprinting.Spec" Reprinting.Spec.spec describe "Semantic" Semantic.Spec.spec describe "Semantic.CLI" Semantic.CLI.Spec.spec describe "Semantic.IO" Semantic.IO.Spec.spec diff --git a/test/fixtures/javascript/reprinting/map.json b/test/fixtures/javascript/reprinting/map.json new file mode 100644 index 000000000..e948665c6 --- /dev/null +++ b/test/fixtures/javascript/reprinting/map.json @@ -0,0 +1,5 @@ +{ + "foo": 100, + "bar": [1, 2, 3], + "baz": true +} diff --git a/test/fixtures/python/reprinting/function.out.py b/test/fixtures/python/reprinting/function.out.py new file mode 100644 index 000000000..f84c04cfd --- /dev/null +++ b/test/fixtures/python/reprinting/function.out.py @@ -0,0 +1,7 @@ +def Foo(x): + if True: + return x + else: + return 0 + +Foo(1) diff --git a/test/fixtures/python/reprinting/function.py b/test/fixtures/python/reprinting/function.py new file mode 100644 index 000000000..f84c04cfd --- /dev/null +++ b/test/fixtures/python/reprinting/function.py @@ -0,0 +1,7 @@ +def Foo(x): + if True: + return x + else: + return 0 + +Foo(1) diff --git a/test/fixtures/ruby/reprinting/function.out.rb b/test/fixtures/ruby/reprinting/function.out.rb new file mode 100644 index 000000000..f22641e7b --- /dev/null +++ b/test/fixtures/ruby/reprinting/function.out.rb @@ -0,0 +1,6 @@ +def foo(x) + x +end +5.times() do |i| + puts(i) +end diff --git a/test/fixtures/ruby/reprinting/function.rb b/test/fixtures/ruby/reprinting/function.rb new file mode 100644 index 000000000..f93e797d1 --- /dev/null +++ b/test/fixtures/ruby/reprinting/function.rb @@ -0,0 +1,7 @@ +def foo(x) + x +end + +5.times do |i| + puts i +end diff --git a/test/fixtures/ruby/reprinting/infix.rb b/test/fixtures/ruby/reprinting/infix.rb new file mode 100644 index 000000000..c9cc31066 --- /dev/null +++ b/test/fixtures/ruby/reprinting/infix.rb @@ -0,0 +1,4 @@ +3 - 4 + 10 +1 * 2 + 3 +(1 * 2) + 3 +1 * (2 + 3)