1
1
mirror of https://github.com/github/semantic.git synced 2024-12-23 23:11:50 +03:00

Streamline manually language parsing

This commit is contained in:
Timothy Clem 2019-08-16 14:06:49 -07:00
parent 0aab5ebd55
commit 147604e59d
3 changed files with 56 additions and 78 deletions

View File

@ -1,14 +1,15 @@
{-# LANGUAGE DeriveAnyClass, DeriveGeneric, KindSignatures #-}
{-# LANGUAGE DeriveAnyClass, DeriveGeneric, KindSignatures, LambdaCase #-}
module Data.Language
( Language (..)
, SLanguage (..)
, extensionsForLanguage
, parseLanguage
, knownLanguage
, languageForFilePath
, pathIsMinified
, supportedExts
, codeNavLanguages
, textToLanguage
, languageToText
) where
import Data.Aeson
@ -78,63 +79,25 @@ instance SLanguage 'PHP where
instance FromJSON Language where
parseJSON = withText "Language" $ \l ->
pure $ fromMaybe Unknown (parseLanguage l)
parseLanguage :: Text -> Maybe Language
parseLanguage l = case T.toLower l of
"go" -> Just Go
"haskell" -> Just Haskell
"java" -> Just Java
"javascript" -> Just JavaScript
"json" -> Just JSON
"jsx" -> Just JSX
"markdown" -> Just Markdown
"python" -> Just Python
"ruby" -> Just Ruby
"typescript" -> Just TypeScript
"php" -> Just PHP
_ -> Nothing
pure $ textToLanguage l
-- | Predicate failing on 'Unknown' and passing in all other cases.
knownLanguage :: Language -> Bool
knownLanguage = (/= Unknown)
extensionsForLanguage :: Language -> [String]
extensionsForLanguage language = case language of
Go -> [".go"]
Haskell -> [".hs"]
JavaScript -> [".js", ".mjs"]
PHP -> [".php", ".phpt"]
Python -> [".py"]
Ruby -> [".rb"]
TypeScript -> [".ts"]
TSX -> [".tsx", ".d.tsx"]
JSX -> [".jsx"]
_ -> []
extensionsForLanguage language = T.unpack <$> maybe mempty Lingo.languageExtensions (Map.lookup (languageToText language) Lingo.languages)
-- | Return a language based on a FilePath's extension.
languageForFilePath :: FilePath -> Language
languageForFilePath path = case Lingo.languageName <$> Lingo.languageForPath path of
Just "Go" -> Go
Just "Haskell" -> Haskell
Just "Java" -> Java
Just "JavaScript" -> JavaScript
Just "JSON" -> JSON
Just "JSX" -> JSX
Just "Markdown" -> Markdown
Just "PHP" -> PHP
Just "Python" -> Python
Just "Ruby" -> Ruby
Just "TSX" -> TSX
Just "TypeScript" -> TypeScript
_ -> Unknown
languageForFilePath path = maybe Unknown (textToLanguage . Lingo.languageName) (Lingo.languageForPath path)
supportedExts :: [String]
supportedExts = foldr append mempty supportedLanguages
where
append (Just l) b = fmap T.unpack (Lingo.languageExtensions l) <> b
append Nothing b = b
supportedLanguages = fmap lookup ["Go", "Ruby", "Python", "JavaScript", "TypeScript", "PHP"]
supportedLanguages = fmap lookup (languageToText <$> codeNavLanguages)
lookup k = Map.lookup k Lingo.languages
codeNavLanguages :: [Language]
@ -142,3 +105,35 @@ codeNavLanguages = [Go, Ruby, Python, JavaScript, TypeScript, PHP]
pathIsMinified :: FilePath -> Bool
pathIsMinified = isExtensionOf ".min.js"
languageToText :: Language -> T.Text
languageToText = \case
Unknown -> "Unknown"
Go -> "Go"
Haskell -> "Haskell"
Java -> "Java"
JavaScript -> "JavaScript"
JSON -> "JSON"
JSX -> "JSX"
Markdown -> "Markdown"
Python -> "Python"
Ruby -> "Ruby"
TypeScript -> "TypeScript"
TSX -> "TSX"
PHP -> "PHP"
textToLanguage :: T.Text -> Language
textToLanguage = \case
"Go" -> Go
"Haskell" -> Haskell
"Java" -> Java
"JavaScript" -> JavaScript
"JSON" -> JSON
"JSX" -> JSX
"Markdown" -> Markdown
"Python" -> Python
"Ruby" -> Ruby
"TypeScript" -> TypeScript
"TSX" -> TSX
"PHP" -> PHP
_ -> Unknown

View File

@ -64,38 +64,7 @@ instance APIConvert Legacy.Span Data.Span where
fromAPI Legacy.Span {..} = Data.Span <$> (start >>= preview bridging) <*> (end >>= preview bridging)
instance APIBridge T.Text Data.Language where
bridging = iso apiLanguageToLanguage languageToApiLanguage where
languageToApiLanguage :: Data.Language -> T.Text
languageToApiLanguage = \case
Data.Unknown -> "Unknown"
Data.Go -> "Go"
Data.Haskell -> "Haskell"
Data.Java -> "Java"
Data.JavaScript -> "JavaScript"
Data.JSON -> "JSON"
Data.JSX -> "JSX"
Data.Markdown -> "Markdown"
Data.Python -> "Python"
Data.Ruby -> "Ruby"
Data.TypeScript -> "TypeScript"
Data.TSX -> "TSX"
Data.PHP -> "PHP"
apiLanguageToLanguage :: T.Text -> Data.Language
apiLanguageToLanguage = \case
"Go" -> Data.Go
"Haskell" -> Data.Haskell
"Java" -> Data.Java
"JavaScript" -> Data.JavaScript
"JSON" -> Data.JSON
"JSX" -> Data.JSX
"Markdown" -> Data.Markdown
"Python" -> Data.Python
"Ruby" -> Data.Ruby
"TypeScript" -> Data.TypeScript
"TSX" -> Data.TSX
"PHP" -> Data.PHP
_ -> Data.Unknown
bridging = iso Data.textToLanguage Data.languageToText
instance APIBridge API.Blob Data.Blob where
bridging = iso apiBlobToBlob blobToApiBlob where

View File

@ -5,7 +5,7 @@ import Control.Exception as Exc (displayException)
import Data.Blob
import Data.Blob.IO
import Data.Handle
import Data.Language (languageForFilePath, parseLanguage)
import qualified Data.Language as Language
import Data.List (intercalate, uncons)
import Data.List.Split (splitWhen)
import Data.Project
@ -180,8 +180,22 @@ filePathReader = eitherReader parseFilePath
parseFilePath arg = case splitWhen (== ':') arg of
[a, b] | Just lang <- parseLanguage (T.pack b) -> Right (File a lang)
| Just lang <- parseLanguage (T.pack a) -> Right (File b lang)
[path] -> Right (File path (languageForFilePath path))
[path] -> Right (File path (Language.languageForFilePath path))
_ -> Left ("cannot parse `" <> arg <> "`\nexpecting FILE:LANGUAGE or just FILE")
parseLanguage :: Text -> Maybe Language.Language
parseLanguage l = case T.toLower l of
"go" -> Just Language.Go
"haskell" -> Just Language.Haskell
"java" -> Just Language.Java
"javascript" -> Just Language.JavaScript
"json" -> Just Language.JSON
"jsx" -> Just Language.JSX
"markdown" -> Just Language.Markdown
"python" -> Just Language.Python
"ruby" -> Just Language.Ruby
"typescript" -> Just Language.TypeScript
"php" -> Just Language.PHP
_ -> Nothing
options :: Eq a => [(String, a)] -> Mod OptionFields a -> Parser a
options options fields = option (optionsReader options) (fields <> showDefaultWith (findOption options) <> metavar (intercalate "|" (fmap fst options)))