mirror of
https://github.com/github/semantic.git
synced 2025-01-05 14:11:33 +03:00
Merge pull request #230 from github/lingo
Switch over to using lingo for language detection
This commit is contained in:
commit
c1486db42d
@ -73,6 +73,7 @@ common dependencies
|
||||
, unix ^>= 2.7.2.2
|
||||
, proto3-suite
|
||||
, proto3-wire
|
||||
, lingo >= 0.1.0.1
|
||||
|
||||
common executable-flags
|
||||
ghc-options: -threaded -rtsopts "-with-rtsopts=-N -A4m -n2m"
|
||||
@ -360,6 +361,7 @@ test-suite test
|
||||
, Data.Functor.Listable
|
||||
, Data.Graph.Spec
|
||||
, Data.Mergeable
|
||||
, Data.Language.Spec
|
||||
, Data.Range.Spec
|
||||
, Data.Scientific.Spec
|
||||
, Data.Semigroup.App.Spec
|
||||
|
@ -38,7 +38,7 @@ readBlobsFromDir :: MonadIO m => FilePath -> m [Blob]
|
||||
readBlobsFromDir path = liftIO . fmap catMaybes $
|
||||
findFilesInDir path supportedExts mempty >>= Async.mapConcurrently (readBlobFromFile . fileForPath)
|
||||
|
||||
-- | Read all blobs from the Git repo with Language.supportedExts
|
||||
-- | Read all blobs from a git repo
|
||||
readBlobsFromGitRepo :: MonadIO m => FilePath -> Git.OID -> [FilePath] -> [FilePath] -> m [Blob]
|
||||
readBlobsFromGitRepo path oid excludePaths includePaths = liftIO . fmap catMaybes $
|
||||
Git.lsTree path oid >>= Async.mapConcurrently (blobFromTreeEntry path)
|
||||
|
@ -1,19 +1,21 @@
|
||||
{-# LANGUAGE DeriveAnyClass, DeriveGeneric, KindSignatures #-}
|
||||
{-# LANGUAGE DeriveAnyClass, DeriveGeneric, KindSignatures, LambdaCase #-}
|
||||
module Data.Language
|
||||
( Language (..)
|
||||
, SLanguage (..)
|
||||
, extensionsForLanguage
|
||||
, parseLanguage
|
||||
, knownLanguage
|
||||
, languageForFilePath
|
||||
, pathIsMinified
|
||||
, languageForType
|
||||
, supportedExts
|
||||
, codeNavLanguages
|
||||
, textToLanguage
|
||||
, languageToText
|
||||
) where
|
||||
|
||||
import Data.Aeson
|
||||
import qualified Data.Languages as Lingo
|
||||
import qualified Data.Text as T
|
||||
import qualified Data.Map.Strict as Map
|
||||
import Prologue
|
||||
import System.FilePath.Posix
|
||||
|
||||
@ -77,68 +79,61 @@ instance SLanguage 'PHP where
|
||||
|
||||
instance FromJSON Language where
|
||||
parseJSON = withText "Language" $ \l ->
|
||||
pure $ fromMaybe Unknown (parseLanguage l)
|
||||
|
||||
parseLanguage :: Text -> Maybe Language
|
||||
parseLanguage l = case T.toLower l of
|
||||
"go" -> Just Go
|
||||
"haskell" -> Just Haskell
|
||||
"java" -> Just Java
|
||||
"javascript" -> Just JavaScript
|
||||
"json" -> Just JSON
|
||||
"jsx" -> Just JSX
|
||||
"markdown" -> Just Markdown
|
||||
"python" -> Just Python
|
||||
"ruby" -> Just Ruby
|
||||
"typescript" -> Just TypeScript
|
||||
"php" -> Just PHP
|
||||
_ -> Nothing
|
||||
pure $ textToLanguage l
|
||||
|
||||
-- | Predicate failing on 'Unknown' and passing in all other cases.
|
||||
knownLanguage :: Language -> Bool
|
||||
knownLanguage = (/= Unknown)
|
||||
|
||||
-- | Returns a Language based on the file extension (including the ".").
|
||||
languageForType :: String -> Language
|
||||
languageForType mediaType = case mediaType of
|
||||
".java" -> Java
|
||||
".json" -> JSON
|
||||
".hs" -> Haskell
|
||||
".md" -> Markdown
|
||||
".rb" -> Ruby
|
||||
".go" -> Go
|
||||
".js" -> JavaScript
|
||||
".mjs" -> JavaScript
|
||||
".ts" -> TypeScript
|
||||
".tsx" -> TSX
|
||||
".jsx" -> JSX
|
||||
".py" -> Python
|
||||
".php" -> PHP
|
||||
".phpt" -> PHP
|
||||
_ -> Unknown
|
||||
|
||||
extensionsForLanguage :: Language -> [String]
|
||||
extensionsForLanguage language = case language of
|
||||
Go -> [".go"]
|
||||
Haskell -> [".hs"]
|
||||
JavaScript -> [".js", ".mjs"]
|
||||
PHP -> [".php", ".phpt"]
|
||||
Python -> [".py"]
|
||||
Ruby -> [".rb"]
|
||||
TypeScript -> [".ts"]
|
||||
TSX -> [".tsx", ".d.tsx"]
|
||||
JSX -> [".jsx"]
|
||||
_ -> []
|
||||
extensionsForLanguage language = T.unpack <$> maybe mempty Lingo.languageExtensions (Map.lookup (languageToText language) Lingo.languages)
|
||||
|
||||
-- | Return a language based on a FilePath's extension, or Nothing if extension is not found or not supported.
|
||||
-- | Return a language based on a FilePath's extension.
|
||||
languageForFilePath :: FilePath -> Language
|
||||
languageForFilePath = languageForType . takeExtension
|
||||
languageForFilePath path = maybe Unknown (textToLanguage . Lingo.languageName) (Lingo.languageForPath path)
|
||||
|
||||
supportedExts :: [String]
|
||||
supportedExts = [".go", ".py", ".rb", ".js", ".mjs", ".ts", ".php", ".phpt"]
|
||||
supportedExts = foldr append mempty supportedLanguages
|
||||
where
|
||||
append (Just l) b = fmap T.unpack (Lingo.languageExtensions l) <> b
|
||||
append Nothing b = b
|
||||
supportedLanguages = fmap lookup (languageToText <$> codeNavLanguages)
|
||||
lookup k = Map.lookup k Lingo.languages
|
||||
|
||||
codeNavLanguages :: [Language]
|
||||
codeNavLanguages = [Go, Ruby, Python, JavaScript, TypeScript, PHP]
|
||||
|
||||
pathIsMinified :: FilePath -> Bool
|
||||
pathIsMinified = isExtensionOf ".min.js"
|
||||
|
||||
languageToText :: Language -> T.Text
|
||||
languageToText = \case
|
||||
Unknown -> "Unknown"
|
||||
Go -> "Go"
|
||||
Haskell -> "Haskell"
|
||||
Java -> "Java"
|
||||
JavaScript -> "JavaScript"
|
||||
JSON -> "JSON"
|
||||
JSX -> "JSX"
|
||||
Markdown -> "Markdown"
|
||||
Python -> "Python"
|
||||
Ruby -> "Ruby"
|
||||
TypeScript -> "TypeScript"
|
||||
TSX -> "TSX"
|
||||
PHP -> "PHP"
|
||||
|
||||
textToLanguage :: T.Text -> Language
|
||||
textToLanguage = \case
|
||||
"Go" -> Go
|
||||
"Haskell" -> Haskell
|
||||
"Java" -> Java
|
||||
"JavaScript" -> JavaScript
|
||||
"JSON" -> JSON
|
||||
"JSX" -> JSX
|
||||
"Markdown" -> Markdown
|
||||
"Python" -> Python
|
||||
"Ruby" -> Ruby
|
||||
"TypeScript" -> TypeScript
|
||||
"TSX" -> TSX
|
||||
"PHP" -> PHP
|
||||
_ -> Unknown
|
||||
|
@ -64,38 +64,7 @@ instance APIConvert Legacy.Span Data.Span where
|
||||
fromAPI Legacy.Span {..} = Data.Span <$> (start >>= preview bridging) <*> (end >>= preview bridging)
|
||||
|
||||
instance APIBridge T.Text Data.Language where
|
||||
bridging = iso apiLanguageToLanguage languageToApiLanguage where
|
||||
languageToApiLanguage :: Data.Language -> T.Text
|
||||
languageToApiLanguage = \case
|
||||
Data.Unknown -> "Unknown"
|
||||
Data.Go -> "Go"
|
||||
Data.Haskell -> "Haskell"
|
||||
Data.Java -> "Java"
|
||||
Data.JavaScript -> "JavaScript"
|
||||
Data.JSON -> "JSON"
|
||||
Data.JSX -> "JSX"
|
||||
Data.Markdown -> "Markdown"
|
||||
Data.Python -> "Python"
|
||||
Data.Ruby -> "Ruby"
|
||||
Data.TypeScript -> "TypeScript"
|
||||
Data.TSX -> "TSX"
|
||||
Data.PHP -> "PHP"
|
||||
|
||||
apiLanguageToLanguage :: T.Text -> Data.Language
|
||||
apiLanguageToLanguage = \case
|
||||
"Go" -> Data.Go
|
||||
"Haskell" -> Data.Haskell
|
||||
"Java" -> Data.Java
|
||||
"JavaScript" -> Data.JavaScript
|
||||
"JSON" -> Data.JSON
|
||||
"JSX" -> Data.JSX
|
||||
"Markdown" -> Data.Markdown
|
||||
"Python" -> Data.Python
|
||||
"Ruby" -> Data.Ruby
|
||||
"TypeScript" -> Data.TypeScript
|
||||
"TSX" -> Data.TSX
|
||||
"PHP" -> Data.PHP
|
||||
_ -> Data.Unknown
|
||||
bridging = iso Data.textToLanguage Data.languageToText
|
||||
|
||||
instance APIBridge API.Blob Data.Blob where
|
||||
bridging = iso apiBlobToBlob blobToApiBlob where
|
||||
|
@ -5,7 +5,7 @@ import Control.Exception as Exc (displayException)
|
||||
import Data.Blob
|
||||
import Data.Blob.IO
|
||||
import Data.Handle
|
||||
import Data.Language (languageForFilePath, parseLanguage)
|
||||
import qualified Data.Language as Language
|
||||
import Data.List (intercalate, uncons)
|
||||
import Data.List.Split (splitWhen)
|
||||
import Data.Project
|
||||
@ -180,8 +180,22 @@ filePathReader = eitherReader parseFilePath
|
||||
parseFilePath arg = case splitWhen (== ':') arg of
|
||||
[a, b] | Just lang <- parseLanguage (T.pack b) -> Right (File a lang)
|
||||
| Just lang <- parseLanguage (T.pack a) -> Right (File b lang)
|
||||
[path] -> Right (File path (languageForFilePath path))
|
||||
[path] -> Right (File path (Language.languageForFilePath path))
|
||||
_ -> Left ("cannot parse `" <> arg <> "`\nexpecting FILE:LANGUAGE or just FILE")
|
||||
parseLanguage :: Text -> Maybe Language.Language
|
||||
parseLanguage l = case T.toLower l of
|
||||
"go" -> Just Language.Go
|
||||
"haskell" -> Just Language.Haskell
|
||||
"java" -> Just Language.Java
|
||||
"javascript" -> Just Language.JavaScript
|
||||
"json" -> Just Language.JSON
|
||||
"jsx" -> Just Language.JSX
|
||||
"markdown" -> Just Language.Markdown
|
||||
"python" -> Just Language.Python
|
||||
"ruby" -> Just Language.Ruby
|
||||
"typescript" -> Just Language.TypeScript
|
||||
"php" -> Just Language.PHP
|
||||
_ -> Nothing
|
||||
|
||||
options :: Eq a => [(String, a)] -> Mod OptionFields a -> Parser a
|
||||
options options fields = option (optionsReader options) (fields <> showDefaultWith (findOption options) <> metavar (intercalate "|" (fmap fst options)))
|
||||
|
13
test/Data/Language/Spec.hs
Normal file
13
test/Data/Language/Spec.hs
Normal file
@ -0,0 +1,13 @@
|
||||
module Data.Language.Spec (testTree) where
|
||||
|
||||
import Data.Language
|
||||
import Test.Tasty
|
||||
import Test.Tasty.HUnit
|
||||
|
||||
testTree :: TestTree
|
||||
testTree = testGroup "Data.Language"
|
||||
[ testCase "supportedExts returns expected list" $
|
||||
supportedExts @=? [".go",".rb",".builder",".eye",".fcgi",".gemspec",".god",".jbuilder",".mspec",".pluginspec",".podspec",".rabl",".rake",".rbuild",".rbw",".rbx",".ru",".ruby",".spec",".thor",".watchr",".py",".bzl",".cgi",".fcgi",".gyp",".gypi",".lmi",".py3",".pyde",".pyi",".pyp",".pyt",".pyw",".rpy",".spec",".tac",".wsgi",".xpy",".js","._js",".bones",".es",".es6",".frag",".gs",".jake",".jsb",".jscad",".jsfl",".jsm",".jss",".mjs",".njs",".pac",".sjs",".ssjs",".xsjs",".xsjslib",".ts",".php",".aw",".ctp",".fcgi",".inc",".php3",".php4",".php5",".phps",".phpt"]
|
||||
, testCase "codeNavLanguages returns expected list" $
|
||||
codeNavLanguages @=? [Go, Ruby, Python, JavaScript, TypeScript, PHP]
|
||||
]
|
@ -14,6 +14,7 @@ import qualified Data.Abstract.Name.Spec
|
||||
import qualified Data.Abstract.Path.Spec
|
||||
import qualified Data.Functor.Classes.Generic.Spec
|
||||
import qualified Data.Graph.Spec
|
||||
import qualified Data.Language.Spec
|
||||
import qualified Data.Range.Spec
|
||||
import qualified Data.Scientific.Spec
|
||||
import qualified Data.Semigroup.App.Spec
|
||||
@ -46,6 +47,7 @@ tests :: (?session :: TaskSession) => [TestTree]
|
||||
tests =
|
||||
[ Integration.Spec.testTree
|
||||
, Semantic.CLI.Spec.testTree
|
||||
, Data.Language.Spec.testTree
|
||||
, Data.Source.Spec.testTree
|
||||
, Semantic.Stat.Spec.testTree
|
||||
]
|
||||
|
Loading…
Reference in New Issue
Block a user