duckling/Duckling/Debug.hs

92 lines
2.6 KiB
Haskell
Raw Normal View History

-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree.
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE NoRebindableSyntax #-}
{-# LANGUAGE OverloadedStrings #-}
module Duckling.Debug
( allParses
, debug
, debugCustom
, fullParses
, ptree
) where
import Data.Maybe
import Data.Text (Text)
Locales support Summary: * Locales support for the library, following `<Lang>_<Region>` with ISO 639-1 code for `<Lang>` and ISO 3166-1 alpha-2 code for `<Region>` (#33) * `Locale` opaque type (composite of `Lang` and `Region`) with `makeLocale` smart constructor to only allow valid `(Lang, Region)` combinations * API: `Context`'s `lang` parameter has been replaced by `locale`, with optional `Region` and backward compatibility. * `Rules/<Lang>.hs` exposes - `langRules`: cross-locale rules for `<Lang>`, from `<Dimension>/<Lang>/Rules.hs` - `localeRules`: locale-specific rules, from `<Dimension>/<Lang>/<Region>/Rules.hs` - `defaultRules`: `langRules` + specific rules from select locales to ensure backward-compatibility * Corpus, tests & classifiers - 1 classifier per locale, with default classifier (`<Lang>_XX`) when no locale provided (backward-compatible) - Default classifiers are built on existing corpus - Locale classifiers are built on - `<Dimension>/<Lang>/Corpus.hs` exposes a common `corpus` to all locales of `<Lang>` - `<Dimension>/<Lang>/<Region>/Corpus.hs` exposes `allExamples`: a list of examples specific to the locale (following `<Dimension>/<Lang>/<Region>/Rules.hs`). - Locale classifiers use the language corpus extended with the locale examples as training set. - Locale examples need to use the same `Context` (i.e. reference time) as the language corpus. - For backward compatibility, `<Dimension>/<Lang>/Corpus.hs` can expose also `defaultCorpus`, which is `corpus` augmented with specific examples. This is controlled by `getDefaultCorpusForLang` in `Duckling.Ranking.Generate`. - Tests run against each classifier to make sure runtime works as expected. * MM/DD (en_US) vs DD/MM (en_GB) example to illustrate Reviewed By: JonCoens, blandinw Differential Revision: D6038096 fbshipit-source-id: f29c28d
2017-10-13 18:15:32 +03:00
import Prelude
import qualified Data.HashSet as HashSet
import qualified Data.Text as Text
import qualified Data.Text.IO as Text
import Duckling.Api
import Duckling.Dimensions.Types
import Duckling.Engine
Locales support Summary: * Locales support for the library, following `<Lang>_<Region>` with ISO 639-1 code for `<Lang>` and ISO 3166-1 alpha-2 code for `<Region>` (#33) * `Locale` opaque type (composite of `Lang` and `Region`) with `makeLocale` smart constructor to only allow valid `(Lang, Region)` combinations * API: `Context`'s `lang` parameter has been replaced by `locale`, with optional `Region` and backward compatibility. * `Rules/<Lang>.hs` exposes - `langRules`: cross-locale rules for `<Lang>`, from `<Dimension>/<Lang>/Rules.hs` - `localeRules`: locale-specific rules, from `<Dimension>/<Lang>/<Region>/Rules.hs` - `defaultRules`: `langRules` + specific rules from select locales to ensure backward-compatibility * Corpus, tests & classifiers - 1 classifier per locale, with default classifier (`<Lang>_XX`) when no locale provided (backward-compatible) - Default classifiers are built on existing corpus - Locale classifiers are built on - `<Dimension>/<Lang>/Corpus.hs` exposes a common `corpus` to all locales of `<Lang>` - `<Dimension>/<Lang>/<Region>/Corpus.hs` exposes `allExamples`: a list of examples specific to the locale (following `<Dimension>/<Lang>/<Region>/Rules.hs`). - Locale classifiers use the language corpus extended with the locale examples as training set. - Locale examples need to use the same `Context` (i.e. reference time) as the language corpus. - For backward compatibility, `<Dimension>/<Lang>/Corpus.hs` can expose also `defaultCorpus`, which is `corpus` augmented with specific examples. This is controlled by `getDefaultCorpusForLang` in `Duckling.Ranking.Generate`. - Tests run against each classifier to make sure runtime works as expected. * MM/DD (en_US) vs DD/MM (en_GB) example to illustrate Reviewed By: JonCoens, blandinw Differential Revision: D6038096 fbshipit-source-id: f29c28d
2017-10-13 18:15:32 +03:00
import Duckling.Locale
import Duckling.Resolve
import Duckling.Rules
import Duckling.Testing.Types
import Duckling.Types
-- -----------------------------------------------------------------
-- API
debug :: Locale -> Text -> [Seal Dimension] -> IO [Entity]
debug locale = debugCustom testContext {locale = locale} testOptions
allParses :: Locale -> Text -> [Seal Dimension] -> IO [Entity]
allParses l input targets = debugTokens input $ parses l input targets
fullParses :: Locale -> Text -> [Seal Dimension] -> IO [Entity]
fullParses l input targets =
debugTokens
input
$ filter
(\Resolved{range = Range start end} -> start == 0 && end == n)
$ parses l input targets
where
n = Text.length input
debugCustom :: Context -> Options -> Text -> [Seal Dimension] -> IO [Entity]
debugCustom context options input targets =
debugTokens
input
$ analyze input context options $ HashSet.fromList targets
ptree :: Text -> Entity -> IO ()
ptree input Entity {enode} = pnode input 0 enode
-- -----------------------------------------------------------------
-- Internals
parses :: Locale -> Text -> [Seal Dimension] -> [ResolvedToken]
parses l input targets =
filter isRelevantDimension tokens
where
tokens = parseAndResolve rules input testContext {locale = l} testOptions
rules = rulesFor l $ HashSet.fromList targets
isRelevantDimension Resolved{node = Node{token = (Token d _)}} =
case targets of
[] -> True
_ -> elem (Seal d) targets
debugTokens :: Text -> [ResolvedToken] -> IO [Entity]
debugTokens input tokens = do
mapM_ (ptree input) entities
return entities
where entities = map (formatToken input) tokens
pnode :: Text -> Int -> Node -> IO ()
pnode input depth Node {children, rule, nodeRange = Range start end} = do
Text.putStrLn out
mapM_ (pnode input (depth + 1)) children
where
out = Text.concat [ Text.replicate depth "-- ", name, " (", body, ")" ]
name = fromMaybe "regex" rule
body = Text.drop start $ Text.take end input