1
1
mirror of https://github.com/github/semantic.git synced 2025-01-05 14:11:33 +03:00

Merge pull request #230 from github/lingo

Switch over to using lingo for language detection
This commit is contained in:
Timothy Clem 2019-08-26 11:05:18 -07:00 committed by GitHub
commit c1486db42d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 82 additions and 87 deletions

View File

@ -73,6 +73,7 @@ common dependencies
, unix ^>= 2.7.2.2
, proto3-suite
, proto3-wire
, lingo >= 0.1.0.1
common executable-flags
ghc-options: -threaded -rtsopts "-with-rtsopts=-N -A4m -n2m"
@ -360,6 +361,7 @@ test-suite test
, Data.Functor.Listable
, Data.Graph.Spec
, Data.Mergeable
, Data.Language.Spec
, Data.Range.Spec
, Data.Scientific.Spec
, Data.Semigroup.App.Spec

View File

@ -38,7 +38,7 @@ readBlobsFromDir :: MonadIO m => FilePath -> m [Blob]
readBlobsFromDir path = liftIO . fmap catMaybes $
findFilesInDir path supportedExts mempty >>= Async.mapConcurrently (readBlobFromFile . fileForPath)
-- | Read all blobs from the Git repo with Language.supportedExts
-- | Read all blobs from a git repo
readBlobsFromGitRepo :: MonadIO m => FilePath -> Git.OID -> [FilePath] -> [FilePath] -> m [Blob]
readBlobsFromGitRepo path oid excludePaths includePaths = liftIO . fmap catMaybes $
Git.lsTree path oid >>= Async.mapConcurrently (blobFromTreeEntry path)

View File

@ -1,19 +1,21 @@
{-# LANGUAGE DeriveAnyClass, DeriveGeneric, KindSignatures #-}
{-# LANGUAGE DeriveAnyClass, DeriveGeneric, KindSignatures, LambdaCase #-}
module Data.Language
( Language (..)
, SLanguage (..)
, extensionsForLanguage
, parseLanguage
, knownLanguage
, languageForFilePath
, pathIsMinified
, languageForType
, supportedExts
, codeNavLanguages
, textToLanguage
, languageToText
) where
import Data.Aeson
import qualified Data.Languages as Lingo
import qualified Data.Text as T
import qualified Data.Map.Strict as Map
import Prologue
import System.FilePath.Posix
@ -77,68 +79,61 @@ instance SLanguage 'PHP where
instance FromJSON Language where
parseJSON = withText "Language" $ \l ->
pure $ fromMaybe Unknown (parseLanguage l)
parseLanguage :: Text -> Maybe Language
parseLanguage l = case T.toLower l of
"go" -> Just Go
"haskell" -> Just Haskell
"java" -> Just Java
"javascript" -> Just JavaScript
"json" -> Just JSON
"jsx" -> Just JSX
"markdown" -> Just Markdown
"python" -> Just Python
"ruby" -> Just Ruby
"typescript" -> Just TypeScript
"php" -> Just PHP
_ -> Nothing
pure $ textToLanguage l
-- | Predicate failing on 'Unknown' and passing in all other cases.
knownLanguage :: Language -> Bool
knownLanguage = (/= Unknown)
-- | Returns a Language based on the file extension (including the ".").
languageForType :: String -> Language
languageForType mediaType = case mediaType of
".java" -> Java
".json" -> JSON
".hs" -> Haskell
".md" -> Markdown
".rb" -> Ruby
".go" -> Go
".js" -> JavaScript
".mjs" -> JavaScript
".ts" -> TypeScript
".tsx" -> TSX
".jsx" -> JSX
".py" -> Python
".php" -> PHP
".phpt" -> PHP
_ -> Unknown
extensionsForLanguage :: Language -> [String]
extensionsForLanguage language = case language of
Go -> [".go"]
Haskell -> [".hs"]
JavaScript -> [".js", ".mjs"]
PHP -> [".php", ".phpt"]
Python -> [".py"]
Ruby -> [".rb"]
TypeScript -> [".ts"]
TSX -> [".tsx", ".d.tsx"]
JSX -> [".jsx"]
_ -> []
extensionsForLanguage language = T.unpack <$> maybe mempty Lingo.languageExtensions (Map.lookup (languageToText language) Lingo.languages)
-- | Return a language based on a FilePath's extension, or Nothing if extension is not found or not supported.
-- | Return a language based on a FilePath's extension.
languageForFilePath :: FilePath -> Language
languageForFilePath = languageForType . takeExtension
languageForFilePath path = maybe Unknown (textToLanguage . Lingo.languageName) (Lingo.languageForPath path)
supportedExts :: [String]
supportedExts = [".go", ".py", ".rb", ".js", ".mjs", ".ts", ".php", ".phpt"]
supportedExts = foldr append mempty supportedLanguages
where
append (Just l) b = fmap T.unpack (Lingo.languageExtensions l) <> b
append Nothing b = b
supportedLanguages = fmap lookup (languageToText <$> codeNavLanguages)
lookup k = Map.lookup k Lingo.languages
codeNavLanguages :: [Language]
codeNavLanguages = [Go, Ruby, Python, JavaScript, TypeScript, PHP]
pathIsMinified :: FilePath -> Bool
pathIsMinified = isExtensionOf ".min.js"
languageToText :: Language -> T.Text
languageToText = \case
Unknown -> "Unknown"
Go -> "Go"
Haskell -> "Haskell"
Java -> "Java"
JavaScript -> "JavaScript"
JSON -> "JSON"
JSX -> "JSX"
Markdown -> "Markdown"
Python -> "Python"
Ruby -> "Ruby"
TypeScript -> "TypeScript"
TSX -> "TSX"
PHP -> "PHP"
textToLanguage :: T.Text -> Language
textToLanguage = \case
"Go" -> Go
"Haskell" -> Haskell
"Java" -> Java
"JavaScript" -> JavaScript
"JSON" -> JSON
"JSX" -> JSX
"Markdown" -> Markdown
"Python" -> Python
"Ruby" -> Ruby
"TypeScript" -> TypeScript
"TSX" -> TSX
"PHP" -> PHP
_ -> Unknown

View File

@ -64,38 +64,7 @@ instance APIConvert Legacy.Span Data.Span where
fromAPI Legacy.Span {..} = Data.Span <$> (start >>= preview bridging) <*> (end >>= preview bridging)
instance APIBridge T.Text Data.Language where
bridging = iso apiLanguageToLanguage languageToApiLanguage where
languageToApiLanguage :: Data.Language -> T.Text
languageToApiLanguage = \case
Data.Unknown -> "Unknown"
Data.Go -> "Go"
Data.Haskell -> "Haskell"
Data.Java -> "Java"
Data.JavaScript -> "JavaScript"
Data.JSON -> "JSON"
Data.JSX -> "JSX"
Data.Markdown -> "Markdown"
Data.Python -> "Python"
Data.Ruby -> "Ruby"
Data.TypeScript -> "TypeScript"
Data.TSX -> "TSX"
Data.PHP -> "PHP"
apiLanguageToLanguage :: T.Text -> Data.Language
apiLanguageToLanguage = \case
"Go" -> Data.Go
"Haskell" -> Data.Haskell
"Java" -> Data.Java
"JavaScript" -> Data.JavaScript
"JSON" -> Data.JSON
"JSX" -> Data.JSX
"Markdown" -> Data.Markdown
"Python" -> Data.Python
"Ruby" -> Data.Ruby
"TypeScript" -> Data.TypeScript
"TSX" -> Data.TSX
"PHP" -> Data.PHP
_ -> Data.Unknown
bridging = iso Data.textToLanguage Data.languageToText
instance APIBridge API.Blob Data.Blob where
bridging = iso apiBlobToBlob blobToApiBlob where

View File

@ -5,7 +5,7 @@ import Control.Exception as Exc (displayException)
import Data.Blob
import Data.Blob.IO
import Data.Handle
import Data.Language (languageForFilePath, parseLanguage)
import qualified Data.Language as Language
import Data.List (intercalate, uncons)
import Data.List.Split (splitWhen)
import Data.Project
@ -180,8 +180,22 @@ filePathReader = eitherReader parseFilePath
parseFilePath arg = case splitWhen (== ':') arg of
[a, b] | Just lang <- parseLanguage (T.pack b) -> Right (File a lang)
| Just lang <- parseLanguage (T.pack a) -> Right (File b lang)
[path] -> Right (File path (languageForFilePath path))
[path] -> Right (File path (Language.languageForFilePath path))
_ -> Left ("cannot parse `" <> arg <> "`\nexpecting FILE:LANGUAGE or just FILE")
parseLanguage :: Text -> Maybe Language.Language
parseLanguage l = case T.toLower l of
"go" -> Just Language.Go
"haskell" -> Just Language.Haskell
"java" -> Just Language.Java
"javascript" -> Just Language.JavaScript
"json" -> Just Language.JSON
"jsx" -> Just Language.JSX
"markdown" -> Just Language.Markdown
"python" -> Just Language.Python
"ruby" -> Just Language.Ruby
"typescript" -> Just Language.TypeScript
"php" -> Just Language.PHP
_ -> Nothing
options :: Eq a => [(String, a)] -> Mod OptionFields a -> Parser a
options options fields = option (optionsReader options) (fields <> showDefaultWith (findOption options) <> metavar (intercalate "|" (fmap fst options)))

View File

@ -0,0 +1,13 @@
module Data.Language.Spec (testTree) where
import Data.Language
import Test.Tasty
import Test.Tasty.HUnit
testTree :: TestTree
testTree = testGroup "Data.Language"
[ testCase "supportedExts returns expected list" $
supportedExts @=? [".go",".rb",".builder",".eye",".fcgi",".gemspec",".god",".jbuilder",".mspec",".pluginspec",".podspec",".rabl",".rake",".rbuild",".rbw",".rbx",".ru",".ruby",".spec",".thor",".watchr",".py",".bzl",".cgi",".fcgi",".gyp",".gypi",".lmi",".py3",".pyde",".pyi",".pyp",".pyt",".pyw",".rpy",".spec",".tac",".wsgi",".xpy",".js","._js",".bones",".es",".es6",".frag",".gs",".jake",".jsb",".jscad",".jsfl",".jsm",".jss",".mjs",".njs",".pac",".sjs",".ssjs",".xsjs",".xsjslib",".ts",".php",".aw",".ctp",".fcgi",".inc",".php3",".php4",".php5",".phps",".phpt"]
, testCase "codeNavLanguages returns expected list" $
codeNavLanguages @=? [Go, Ruby, Python, JavaScript, TypeScript, PHP]
]

View File

@ -14,6 +14,7 @@ import qualified Data.Abstract.Name.Spec
import qualified Data.Abstract.Path.Spec
import qualified Data.Functor.Classes.Generic.Spec
import qualified Data.Graph.Spec
import qualified Data.Language.Spec
import qualified Data.Range.Spec
import qualified Data.Scientific.Spec
import qualified Data.Semigroup.App.Spec
@ -46,6 +47,7 @@ tests :: (?session :: TaskSession) => [TestTree]
tests =
[ Integration.Spec.testTree
, Semantic.CLI.Spec.testTree
, Data.Language.Spec.testTree
, Data.Source.Spec.testTree
, Semantic.Stat.Spec.testTree
]