Extract module names from sources

This commit is contained in:
Vladimir Kalnitsky 2022-08-31 21:41:09 +04:00
parent 7593b0110f
commit d5db431281
10 changed files with 1796 additions and 446 deletions

2071
package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@ -7,31 +7,31 @@
"test": "test"
},
"bin": {
"purescript-docs-search": "dist/purescript-docs-search"
"purescript-docs-search": "dist/purescript-docs-search.cjs"
},
"files": [
"dist/purescript-docs-search",
"dist/purescript-docs-search.cjs",
"dist/docs-search-app.js",
"README.md",
"CHANGELOG.md"
],
"scripts": {
"test": "spago docs --no-search && ./dist/purescript-docs-search build-index && spago test && npm run check-version",
"test": "spago docs --no-search && ./dist/purescript-docs-search.cjs build-index && spago test && npm run check-version",
"bundle-app": "spago bundle-app --no-build --no-install -m Docs.Search.App --to dist/docs-search-app.js",
"esbuild-app": "esbuild dist/docs-search-app.js --target=es2016 --bundle --minify --outfile=dist/docs-search-app.min.js && mv dist/docs-search-app.min.js dist/docs-search-app.js",
"build-app": "npm run bundle-app && npm run esbuild-app",
"bundle-main": "spago bundle-app --no-build --no-install --platform node -m Docs.Search.Main --to dist/main.js",
"esbuild-main": "esbuild dist/main.js --platform=node --bundle --minify --outfile=dist/main.min.js && mv dist/main.min.js dist/main.js",
"add-shebang": "echo \"#!/usr/bin/env node\" > dist/purescript-docs-search && cat dist/main.js >> dist/purescript-docs-search",
"chmod-main": "chmod +x dist/purescript-docs-search",
"add-shebang": "echo \"#!/usr/bin/env node\" > dist/purescript-docs-search.cjs && cat dist/main.js >> dist/purescript-docs-search.cjs",
"chmod-main": "chmod +x dist/purescript-docs-search.cjs",
"build-main": "npm run bundle-main && npm run esbuild-main && npm run add-shebang && rm dist/main.js && npm run chmod-main",
"build": "spago build && npm run build-app && npm run build-main",
"clean": "rm -rf dist",
"check-version": "[ \"$(./dist/purescript-docs-search version)\" = \"$npm_package_version\" ]"
"check-version": "[ \"$(./dist/purescript-docs-search.cjs version)\" = \"$npm_package_version\" ]"
},
"repository": {
"type": "git",
"url": "git+https://github.com/spacchetti/purescript-docs-search.git"
"url": "git+https://github.com/purescript/purescript-docs-search.git"
},
"keywords": [
"purescript"
@ -39,15 +39,17 @@
"author": "Kalnitsky Vladimir <klntsky@gmail.com>",
"license": "BSD-3-Clause",
"bugs": {
"url": "https://github.com/spacchetti/purescript-docs-search/issues"
"url": "https://github.com/purescript/purescript-docs-search/issues"
},
"homepage": "https://github.com/purescript/purescript-docs-search#readme",
"dependencies": {
"punycode": "^2.1.1"
},
"homepage": "https://github.com/spacchetti/purescript-docs-search#readme",
"dependencies": {},
"devDependencies": {
"esbuild": "^0.11.10",
"glob": "^7.1.6",
"markdown-it": "^12.0.4",
"puppeteer": "^8.0.0",
"spago": "^0.20.0"
"spago": "^0.20.9"
}
}

View File

@ -33,6 +33,7 @@
, "node-readline"
, "optparse"
, "ordered-collections"
, "parallel"
, "partial"
, "prelude"
, "profunctor"

View File

@ -5,13 +5,14 @@ import Docs.Search.Config as Config
import Docs.Search.Declarations (Declarations(..), mkDeclarations)
import Docs.Search.DocsJson (DocsJson)
import Docs.Search.Extra ((>#>))
import Docs.Search.Meta (Meta)
import Docs.Search.ModuleIndex (PackedModuleIndex, mkPackedModuleIndex)
import Docs.Search.ModuleParser (parseModuleName)
import Docs.Search.PackageIndex (PackageInfo, mkPackageInfo)
import Docs.Search.Score (mkScores)
import Docs.Search.SearchResult (SearchResult)
import Docs.Search.TypeIndex (TypeIndex, mkTypeIndex)
import Docs.Search.Types (PackageName, PartId)
import Docs.Search.Meta (Meta)
import Docs.Search.Types (ModuleName, PackageName, PartId)
import Prelude
@ -20,6 +21,7 @@ import Data.Argonaut.Decode (decodeJson)
import Data.Argonaut.Decode.Error (printJsonDecodeError)
import Data.Argonaut.Encode (encodeJson)
import Data.Argonaut.Parser (jsonParser)
import Data.Array (concat)
import Data.Array as Array
import Data.Either (Either(..), either)
import Data.Foldable (sum)
@ -38,14 +40,15 @@ import Data.String.Common (replace) as String
import Data.String.Pattern (Pattern(..), Replacement(..))
import Data.Traversable (for, for_)
import Data.Tuple (Tuple(..), fst)
import Data.Tuple.Nested ((/\))
import Effect (Effect)
import Effect.Aff (Aff, launchAff_, parallel, sequential)
import Effect.Class (liftEffect)
import Effect.Console (log)
import Node.Encoding (Encoding(UTF8))
import Node.FS.Aff (mkdir, readFile, readTextFile, readdir, stat, writeFile, writeTextFile)
import Node.FS.Sync (exists)
import Node.FS.Stats (isDirectory, isFile)
import Node.FS.Sync (exists)
import Node.Process as Process
import Web.Bower.PackageMeta (PackageMeta(..))
@ -56,6 +59,7 @@ type Config =
, generatedDocs :: String
, noPatch :: Boolean
, packageName :: PackageName
, sourceFiles :: Array String
}
@ -71,8 +75,11 @@ run' cfg = do
liftEffect do
log "Building the search index..."
docsJsons <- decodeDocsJsons cfg
packageMetas <- decodeBowerJsons cfg
docsJsons /\ moduleNames /\ packageMetas <- sequential $
(\d h m -> d /\ h /\ m)
<$> parallel (decodeDocsJsons cfg)
<*> parallel (parseModuleHeaders cfg.sourceFiles)
<*> parallel (decodeBowerJsons cfg)
let countOfPackages = Array.length packageMetas
countOfModules = Array.length docsJsons
@ -89,14 +96,14 @@ run' cfg = do
index = mkDeclarations scores docsJsons
typeIndex = mkTypeIndex scores docsJsons
packageInfo = mkPackageInfo scores packageMetas
moduleIndex = mkPackedModuleIndex index
moduleIndex = mkPackedModuleIndex index moduleNames
meta = { localPackageName: cfg.packageName }
createDirectories cfg
void $ sequential do
ignore <$> parallel (writeIndex cfg index)
<*> parallel (writeTypeIndex cfg typeIndex)
<*> parallel (writeTypeIndex typeIndex)
<*> parallel (writePackageInfo packageInfo)
<*> parallel (writeModuleIndex moduleIndex)
<*> parallel (writeMeta meta)
@ -176,6 +183,24 @@ decodeDocsJsons cfg@{ docsFiles } = do
pure docsJsons
-- | This function accepts an array of globs pointing to project sources
-- | and returns a list of module names extracted from these files.
-- | Unfortunately, we can't get all module names from `docs.json`s, because
-- | reexport-only modules do not have a single declaration in them, so we can't
-- | count them normally in `mkPackedModuleIndex.extract`, where we only look at
-- | exported declarations.
parseModuleHeaders :: Array String -> Aff (Array ModuleName)
parseModuleHeaders globs = do
files <- getPathsByGlobs globs
concat <$> for files \filePath -> do
fileContents <- readTextFile UTF8 filePath
case parseModuleName fileContents of
Nothing -> do
liftEffect $ log $
"Module header decoding failed for " <> filePath <>
", unable to extract module name"
pure []
Just res -> pure [res]
decodeBowerJsons
:: forall rest
@ -210,8 +235,8 @@ decodeBowerJsons { bowerFiles } = do
-- | Write type index parts to files.
writeTypeIndex :: Config -> TypeIndex -> Aff Unit
writeTypeIndex { generatedDocs } typeIndex =
writeTypeIndex :: TypeIndex -> Aff Unit
writeTypeIndex typeIndex =
for_ entries \(Tuple typeShape results) -> do
writeTextFile UTF8 (unwrap Config.typeIndexDirectory <> "/" <> typeShape <> ".js")
(mkHeader typeShape <> stringify (encodeJson results))
@ -265,7 +290,7 @@ getIndex (Declarations trie) =
Trie.query prefix trie
else
-- Entries with path lengths > 1 have been added already.
List.filter (\(Tuple path value) -> List.length path == 1) (
List.filter (\(Tuple path _value) -> List.length path == 1) (
Trie.query prefix trie
)
in

View File

@ -6,6 +6,7 @@ import Docs.Search.DocsJson (DataDeclType(..))
import Docs.Search.Engine (mkEngineState, packageInfoToString, Result(..))
import Docs.Search.Engine as Engine
import Docs.Search.Extra (stringToList, (>#>))
import Docs.Search.IndexBuilder (parseModuleHeaders)
import Docs.Search.IndexBuilder as IndexBuilder
import Docs.Search.ModuleIndex (ModuleResult, mkPackedModuleIndex, unpackModuleIndex)
import Docs.Search.NodeEngine (nodeEngine)
@ -19,7 +20,8 @@ import Docs.Search.TypePrinter (keyword, showConstraint, showFunDeps, showType,
import Docs.Search.Types (ModuleName, PackageName, PackageInfo, Identifier)
import Prelude
import Prim hiding (Type, Constraint)
import Control.Parallel (parallel, sequential)
import Data.Array as Array
import Data.Identity (Identity(..))
import Data.List as List
@ -28,17 +30,20 @@ import Data.Newtype (un, unwrap, wrap)
import Data.Search.Trie as Trie
import Data.String (length) as String
import Data.String.Common (split, toLower, trim) as String
import Data.Tuple.Nested ((/\))
import Effect (Effect)
import Effect.Aff (launchAff_)
import Effect.Class (liftEffect)
import Effect.Console (log, clear) as Console
import Node.ReadLine (createConsoleInterface, question)
import Prim hiding (Type, Constraint)
type Config =
{ docsFiles :: Array String
, bowerFiles :: Array String
, packageName :: PackageName
, sourceFiles :: Array String
}
@ -48,14 +53,17 @@ run cfg = launchAff_ $ do
liftEffect do
Console.log "Loading search index..."
docsJsons <- IndexBuilder.decodeDocsJsons cfg
packageMetas <- IndexBuilder.decodeBowerJsons cfg
docsJsons /\ moduleNames /\ packageMetas <- sequential $
(\d h m -> d /\ h /\ m)
<$> parallel (IndexBuilder.decodeDocsJsons cfg)
<*> parallel (parseModuleHeaders cfg.sourceFiles)
<*> parallel (IndexBuilder.decodeBowerJsons cfg)
let scores = mkScores packageMetas
index = mkDeclarations scores docsJsons
typeIndex = docsJsons >>= resultsWithTypes scores
packageIndex = mkPackageIndex $ mkPackageInfo scores packageMetas
moduleIndex = unpackModuleIndex $ mkPackedModuleIndex index
moduleIndex = unpackModuleIndex $ mkPackedModuleIndex index moduleNames
engineState = mkEngineState (unwrap index) typeIndex packageIndex moduleIndex scores
let countOfDefinitions = Trie.size $ unwrap index

View File

@ -28,6 +28,7 @@ main = do
let defaultCommands = Search { docsFiles: defaultDocsFiles
, bowerFiles: defaultBowerFiles
, packageName: Config.defaultPackageName
, sourceFiles: defaultSourceFiles
}
case fromMaybe defaultCommands args of
@ -35,7 +36,6 @@ main = do
Search cfg -> Interactive.run cfg
Version -> log Config.version
getArgs :: Effect (Maybe Commands)
getArgs = execParser opts
where
@ -45,19 +45,16 @@ getArgs = execParser opts
<> progDesc "Search frontend for the documentation generated by the PureScript compiler."
)
data Commands
= BuildIndex IndexBuilder.Config
| Search Interactive.Config
| Version
derive instance genericCommands :: Generic Commands _
instance showCommands :: Show Commands where
show = genericShow
commands :: Parser (Maybe Commands)
commands = optional $ subparser
( command "build-index"
@ -80,39 +77,29 @@ commands = optional $ subparser
buildIndex :: Parser Commands
buildIndex = ado
docsFiles <- docsFilesOption
bowerFiles <- bowerFilesOption
packageName <- packageNameOption
sourceFiles <- sourceFilesOption
generatedDocs <- strOption
( long "generated-docs"
<> metavar "DIR"
<> value "./generated-docs/"
)
noPatch <- flag false true
( long "no-patch"
<> help "Do not patch the HTML docs, only build indices"
)
in BuildIndex { docsFiles, bowerFiles, generatedDocs, noPatch, packageName }
in BuildIndex { docsFiles, bowerFiles, generatedDocs, noPatch, packageName, sourceFiles }
startInteractive :: Parser Commands
startInteractive = ado
docsFiles <- docsFilesOption
bowerFiles <- bowerFilesOption
packageName <- packageNameOption
in Search { docsFiles, bowerFiles, packageName }
sourceFiles <- sourceFilesOption
in Search { docsFiles, bowerFiles, packageName, sourceFiles }
docsFilesOption :: Parser (Array String)
docsFilesOption = fromMaybe defaultDocsFiles <$>
@ -125,7 +112,6 @@ docsFilesOption = fromMaybe defaultDocsFiles <$>
)
)
bowerFilesOption :: Parser (Array String)
bowerFilesOption = fromMaybe defaultBowerFiles <$>
optional
@ -137,7 +123,6 @@ bowerFilesOption = fromMaybe defaultBowerFiles <$>
)
)
packageNameOption :: Parser PackageName
packageNameOption =
PackageName <$> strOption
@ -146,18 +131,28 @@ packageNameOption =
<> value (unwrap Config.defaultPackageName)
)
sourceFilesOption :: Parser (Array String)
sourceFilesOption = fromMaybe defaultSourceFiles <$>
optional
( some
( strOption
( long "source-files"
<> metavar "GLOB"
)
)
)
defaultDocsFiles :: Array String
defaultDocsFiles = [ "output/**/docs.json" ]
defaultBowerFiles :: Array String
defaultBowerFiles = [ ".spago/*/*/bower.json", "bower_components/purescript-*/bower.json" ]
defaultSourceFiles :: Array String
defaultSourceFiles = [ "src/**/*.purs" ]
many :: forall a f. Unfoldable f => Parser a -> Parser (f a)
many x = CA.many x <#> List.toUnfoldable
some :: forall a f. Unfoldable f => Parser a -> Parser (f a)
some x = CA.some x <#> NonEmpty.toUnfoldable

View File

@ -2,10 +2,10 @@ module Docs.Search.ModuleIndex where
import Docs.Search.Config as Config
import Docs.Search.Declarations (Declarations(..))
import Docs.Search.SearchResult (SearchResult(..))
import Docs.Search.Types (ModuleName, PackageInfo, PackageScore)
import Docs.Search.Extra (stringToList)
import Docs.Search.Score (Scores, getPackageScore)
import Docs.Search.SearchResult (SearchResult(..))
import Docs.Search.Types (ModuleName, PackageInfo(..), PackageScore)
import Prelude
@ -21,7 +21,7 @@ import Data.Lens.Record (prop)
import Data.List (List, (:))
import Data.Map (Map)
import Data.Map as Map
import Data.Maybe (fromMaybe)
import Data.Maybe (Maybe(..), fromMaybe)
import Data.Newtype (unwrap)
import Data.Search.Trie (Trie)
import Data.Search.Trie as Trie
@ -91,10 +91,15 @@ queryModuleIndex scores { index, modulePackages } query =
-- | Constructs a mapping from packages to modules
mkPackedModuleIndex :: Declarations -> PackedModuleIndex
mkPackedModuleIndex (Declarations trie) =
mkPackedModuleIndex :: Declarations -> Array ModuleName -> PackedModuleIndex
mkPackedModuleIndex (Declarations trie) moduleNames =
addLocalPackageModuleNames $
foldr (Map.unionWith Set.union) Map.empty $ extract <$> Trie.values trie
where
-- Add modules from src/ that may not contain any definitions, only
-- re-exports
addLocalPackageModuleNames = flip Map.alter LocalPackage $
Just <<< append (Set.fromFoldable moduleNames) <<< fromMaybe Set.empty
extract
:: List SearchResult
-> Map PackageInfo (Set ModuleName)

View File

@ -8,6 +8,7 @@ import Data.Foldable (intercalate)
import Data.Maybe (Maybe)
import Data.Newtype (wrap)
import Docs.Search.Types (ModuleName)
import Effect (Effect)
import StringParser (Parser, char, choice, fix, many, manyTill, noneOf, regex, runParser, sepBy, sepBy1, string, try, whiteSpace)
parseModuleName :: String -> Maybe ModuleName
@ -33,7 +34,6 @@ singleLineComment = do
void $ string "--"
void $ manyTill (void $ noneOf ['\n']) (char '\n')
moduleHeader :: Parser String
moduleHeader = do
void $ string "module"
@ -46,5 +46,5 @@ moduleName = sepBy1 moduleNameWord (string ".") <#> intercalate "."
moduleNameWord :: Parser String
moduleNameWord = do
first <- regex "[A-Z]"
rest <- regex "[a-z0-9]*"
rest <- regex "[A-Za-z0-9]*"
pure $ first <> rest

View File

@ -56,7 +56,7 @@ mkPackageInfo packageScores pms =
, score: getPackageScoreForPackageName packageScores packageName
, dependencies:
unwrap dependencies <#>
(_.packageName >>> RawPackageName >>> normalizePackageName)
_.packageName >>> RawPackageName >>> normalizePackageName
, repository: repository <#> (_.url)
}

View File

@ -1,16 +1,24 @@
module Test.ModuleParser where
import Docs.Search.IndexBuilder (getPathsByGlobs)
import Docs.Search.ModuleParser (multiLineComment, parseModuleName, singleLineComment)
import Docs.Search.Types (ModuleName(..))
import Prelude
import Data.Array as Array
import Data.Either (Either(..))
import Data.Foldable (for_)
import Data.Maybe (Maybe(..))
import Docs.Search.ModuleParser (multiLineComment, parseModuleName, singleLineComment)
import Docs.Search.Types (ModuleName(..))
import Effect.Class (liftEffect)
import Effect.Console as Console
import Effect.Exception (throw)
import Node.Encoding (Encoding(..))
import Node.FS.Aff (readTextFile)
import StringParser (runParser)
import Test.Spec (Spec, describe, it)
import Test.Spec.Assertions (shouldEqual)
tests :: Spec Unit
tests = do
describe "ModuleParser" do
@ -40,3 +48,14 @@ tests = do
runParser singleLineComment "-- asd\n" `shouldEqual` Right unit
it "single line comment #1" do
runParser singleLineComment "--\n" `shouldEqual` Right unit
it "Parses every module in .spago/" do
files <- getPathsByGlobs ["./.spago/**/*.purs"]
liftEffect $ Console.log $ "Modules in .spago: " <> show (Array.length files)
for_ files \filePath -> do
fileContents <- readTextFile UTF8 filePath
case parseModuleName fileContents of
Nothing -> do
liftEffect $ throw $
"Module header decoding failed for " <> filePath <>
", unable to extract module name"
Just _ -> pure unit