duckling/exe/ExampleMain.hs

182 lines
6.1 KiB
Haskell
Raw Permalink Normal View History

-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree.
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}
import Control.Applicative hiding (empty)
import Control.Arrow ((***))
import Control.Exception (SomeException, catch)
import Control.Monad (unless, when)
import Control.Monad.IO.Class
import Data.Aeson
import Data.ByteString (ByteString, empty)
import Data.HashMap.Strict (HashMap)
import Data.Maybe
Locales support Summary: * Locales support for the library, following `<Lang>_<Region>` with ISO 639-1 code for `<Lang>` and ISO 3166-1 alpha-2 code for `<Region>` (#33) * `Locale` opaque type (composite of `Lang` and `Region`) with `makeLocale` smart constructor to only allow valid `(Lang, Region)` combinations * API: `Context`'s `lang` parameter has been replaced by `locale`, with optional `Region` and backward compatibility. * `Rules/<Lang>.hs` exposes - `langRules`: cross-locale rules for `<Lang>`, from `<Dimension>/<Lang>/Rules.hs` - `localeRules`: locale-specific rules, from `<Dimension>/<Lang>/<Region>/Rules.hs` - `defaultRules`: `langRules` + specific rules from select locales to ensure backward-compatibility * Corpus, tests & classifiers - 1 classifier per locale, with default classifier (`<Lang>_XX`) when no locale provided (backward-compatible) - Default classifiers are built on existing corpus - Locale classifiers are built on - `<Dimension>/<Lang>/Corpus.hs` exposes a common `corpus` to all locales of `<Lang>` - `<Dimension>/<Lang>/<Region>/Corpus.hs` exposes `allExamples`: a list of examples specific to the locale (following `<Dimension>/<Lang>/<Region>/Rules.hs`). - Locale classifiers use the language corpus extended with the locale examples as training set. - Locale examples need to use the same `Context` (i.e. reference time) as the language corpus. - For backward compatibility, `<Dimension>/<Lang>/Corpus.hs` can expose also `defaultCorpus`, which is `corpus` augmented with specific examples. This is controlled by `getDefaultCorpusForLang` in `Duckling.Ranking.Generate`. - Tests run against each classifier to make sure runtime works as expected. * MM/DD (en_US) vs DD/MM (en_GB) example to illustrate Reviewed By: JonCoens, blandinw Differential Revision: D6038096 fbshipit-source-id: f29c28d
2017-10-13 18:15:32 +03:00
import Data.String
import Data.Text (Text)
import Data.Time.LocalTime.TimeZone.Series
import Prelude
import System.Directory
import System.Environment (lookupEnv)
import TextShow
import Text.Read (readMaybe)
import qualified Data.ByteString.Lazy as LBS
2021-06-03 20:22:54 +03:00
import qualified Data.ByteString.Lazy.Char8 as LBS8
import qualified Data.HashMap.Strict as HashMap
import qualified Data.Text as Text
import qualified Data.Text.Encoding as Text
import Data.Time.Clock.POSIX (posixSecondsToUTCTime)
import Snap.Core
import Snap.Http.Server
import Duckling.Core
import Duckling.Data.TimeZone
import Duckling.Dimensions (allDimensions)
import Duckling.Resolve (DucklingTime)
createIfMissing :: FilePath -> IO ()
createIfMissing f = do
exists <- doesFileExist f
unless exists $ writeFile f ""
shouldLog :: Maybe ConfigLog -> Bool
shouldLog Nothing = False
shouldLog (Just ConfigNoLog) = False
shouldLog _ = True
setupLogs :: Config a b -> IO ()
setupLogs conf = do
let shouldLogErrors = shouldLog $ getErrorLog conf
let shouldLogAccesses = shouldLog $ getAccessLog conf
when (shouldLogErrors || shouldLogAccesses) $ createDirectoryIfMissing False "log"
when shouldLogErrors $ createIfMissing "log/error.log"
when shouldLogAccesses $ createIfMissing "log/access.log"
loadTZs :: IO (HashMap Text TimeZoneSeries)
loadTZs = do
let defaultPath = "/usr/share/zoneinfo/"
let fallbackPath = "/etc/zoneinfo/"
loadTimeZoneSeries defaultPath `catch` (\(_ :: SomeException) -> loadTimeZoneSeries fallbackPath)
main :: IO ()
main = do
tzs <- loadTZs
p <- lookupEnv "PORT"
conf <- commandLineConfig $
maybe defaultConfig (`setPort` defaultConfig) (readMaybe =<< p)
setupLogs conf
httpServe conf $
ifTop (writeBS "quack!") <|>
route
[ ("targets", method GET targetsHandler)
, ("parse", method POST $ parseHandler tzs)
]
-- | Return which languages have which dimensions
targetsHandler :: Snap ()
targetsHandler = do
modifyResponse $ setHeader "Content-Type" "application/json"
writeLBS $ encode $
HashMap.fromList $ map dimText $ HashMap.toList supportedDimensions
where
dimText :: (Lang, [Seal Dimension]) -> (Text, [Text])
dimText = (Text.toLower . showt) *** map (\(Seal d) -> toName d)
-- | Parse some text into the given dimensions
parseHandler :: HashMap Text TimeZoneSeries -> Snap ()
parseHandler tzs = do
modifyResponse $ setHeader "Content-Type" "application/json"
t <- getPostParam "text"
l <- getPostParam "lang"
ds <- getPostParam "dims"
tz <- getPostParam "tz"
Locales support Summary: * Locales support for the library, following `<Lang>_<Region>` with ISO 639-1 code for `<Lang>` and ISO 3166-1 alpha-2 code for `<Region>` (#33) * `Locale` opaque type (composite of `Lang` and `Region`) with `makeLocale` smart constructor to only allow valid `(Lang, Region)` combinations * API: `Context`'s `lang` parameter has been replaced by `locale`, with optional `Region` and backward compatibility. * `Rules/<Lang>.hs` exposes - `langRules`: cross-locale rules for `<Lang>`, from `<Dimension>/<Lang>/Rules.hs` - `localeRules`: locale-specific rules, from `<Dimension>/<Lang>/<Region>/Rules.hs` - `defaultRules`: `langRules` + specific rules from select locales to ensure backward-compatibility * Corpus, tests & classifiers - 1 classifier per locale, with default classifier (`<Lang>_XX`) when no locale provided (backward-compatible) - Default classifiers are built on existing corpus - Locale classifiers are built on - `<Dimension>/<Lang>/Corpus.hs` exposes a common `corpus` to all locales of `<Lang>` - `<Dimension>/<Lang>/<Region>/Corpus.hs` exposes `allExamples`: a list of examples specific to the locale (following `<Dimension>/<Lang>/<Region>/Rules.hs`). - Locale classifiers use the language corpus extended with the locale examples as training set. - Locale examples need to use the same `Context` (i.e. reference time) as the language corpus. - For backward compatibility, `<Dimension>/<Lang>/Corpus.hs` can expose also `defaultCorpus`, which is `corpus` augmented with specific examples. This is controlled by `getDefaultCorpusForLang` in `Duckling.Ranking.Generate`. - Tests run against each classifier to make sure runtime works as expected. * MM/DD (en_US) vs DD/MM (en_GB) example to illustrate Reviewed By: JonCoens, blandinw Differential Revision: D6038096 fbshipit-source-id: f29c28d
2017-10-13 18:15:32 +03:00
loc <- getPostParam "locale"
ref <- getPostParam "reftime"
latent <- getPostParam "latent"
case t of
Nothing -> do
modifyResponse $ setResponseStatus 422 "Bad Input"
writeBS "Need a 'text' parameter to parse"
Just tx -> do
let timezone = parseTimeZone tz
now <- liftIO $ currentReftime tzs timezone
2021-06-03 20:22:54 +03:00
let
lang = parseLang l
context = Context
{ referenceTime = maybe now (parseRefTime timezone) ref
, locale = maybe (makeLocale lang Nothing) parseLocale loc
}
options = Options {withLatent = parseLatent latent}
2021-06-03 20:22:54 +03:00
cleanupDims =
LBS8.filter (/= '\\') -- strip out escape chars people throw in
. stripSuffix "\"" -- remove trailing double quote
. stripPrefix "\"" -- remote leading double quote
where
stripSuffix suffix str = fromMaybe str $ LBS.stripSuffix suffix str
stripPrefix prefix str = fromMaybe str $ LBS.stripPrefix prefix str
dims = fromMaybe (allDimensions lang) $ do
2021-06-03 20:22:54 +03:00
queryDims <- fmap (cleanupDims . LBS.fromStrict) ds
txtDims <- decode @[Text] queryDims
pure $ mapMaybe parseDimension txtDims
parsedResult = parse (Text.decodeUtf8 tx) context options dims
writeLBS $ encode parsedResult
where
defaultLang = EN
Locales support Summary: * Locales support for the library, following `<Lang>_<Region>` with ISO 639-1 code for `<Lang>` and ISO 3166-1 alpha-2 code for `<Region>` (#33) * `Locale` opaque type (composite of `Lang` and `Region`) with `makeLocale` smart constructor to only allow valid `(Lang, Region)` combinations * API: `Context`'s `lang` parameter has been replaced by `locale`, with optional `Region` and backward compatibility. * `Rules/<Lang>.hs` exposes - `langRules`: cross-locale rules for `<Lang>`, from `<Dimension>/<Lang>/Rules.hs` - `localeRules`: locale-specific rules, from `<Dimension>/<Lang>/<Region>/Rules.hs` - `defaultRules`: `langRules` + specific rules from select locales to ensure backward-compatibility * Corpus, tests & classifiers - 1 classifier per locale, with default classifier (`<Lang>_XX`) when no locale provided (backward-compatible) - Default classifiers are built on existing corpus - Locale classifiers are built on - `<Dimension>/<Lang>/Corpus.hs` exposes a common `corpus` to all locales of `<Lang>` - `<Dimension>/<Lang>/<Region>/Corpus.hs` exposes `allExamples`: a list of examples specific to the locale (following `<Dimension>/<Lang>/<Region>/Rules.hs`). - Locale classifiers use the language corpus extended with the locale examples as training set. - Locale examples need to use the same `Context` (i.e. reference time) as the language corpus. - For backward compatibility, `<Dimension>/<Lang>/Corpus.hs` can expose also `defaultCorpus`, which is `corpus` augmented with specific examples. This is controlled by `getDefaultCorpusForLang` in `Duckling.Ranking.Generate`. - Tests run against each classifier to make sure runtime works as expected. * MM/DD (en_US) vs DD/MM (en_GB) example to illustrate Reviewed By: JonCoens, blandinw Differential Revision: D6038096 fbshipit-source-id: f29c28d
2017-10-13 18:15:32 +03:00
defaultLocale = makeLocale defaultLang Nothing
defaultTimeZone = "America/Los_Angeles"
defaultLatent = False
parseDimension :: Text -> Maybe (Seal Dimension)
parseDimension x = fromName x <|> fromCustomName x
where
fromCustomName :: Text -> Maybe (Seal Dimension)
fromCustomName name = HashMap.lookup name m
m = HashMap.fromList
2021-06-03 20:22:54 +03:00
[ -- ("my-dimension", Seal (CustomDimension MyDimension))
]
parseTimeZone :: Maybe ByteString -> Text
parseTimeZone = maybe defaultTimeZone Text.decodeUtf8
Locales support Summary: * Locales support for the library, following `<Lang>_<Region>` with ISO 639-1 code for `<Lang>` and ISO 3166-1 alpha-2 code for `<Region>` (#33) * `Locale` opaque type (composite of `Lang` and `Region`) with `makeLocale` smart constructor to only allow valid `(Lang, Region)` combinations * API: `Context`'s `lang` parameter has been replaced by `locale`, with optional `Region` and backward compatibility. * `Rules/<Lang>.hs` exposes - `langRules`: cross-locale rules for `<Lang>`, from `<Dimension>/<Lang>/Rules.hs` - `localeRules`: locale-specific rules, from `<Dimension>/<Lang>/<Region>/Rules.hs` - `defaultRules`: `langRules` + specific rules from select locales to ensure backward-compatibility * Corpus, tests & classifiers - 1 classifier per locale, with default classifier (`<Lang>_XX`) when no locale provided (backward-compatible) - Default classifiers are built on existing corpus - Locale classifiers are built on - `<Dimension>/<Lang>/Corpus.hs` exposes a common `corpus` to all locales of `<Lang>` - `<Dimension>/<Lang>/<Region>/Corpus.hs` exposes `allExamples`: a list of examples specific to the locale (following `<Dimension>/<Lang>/<Region>/Rules.hs`). - Locale classifiers use the language corpus extended with the locale examples as training set. - Locale examples need to use the same `Context` (i.e. reference time) as the language corpus. - For backward compatibility, `<Dimension>/<Lang>/Corpus.hs` can expose also `defaultCorpus`, which is `corpus` augmented with specific examples. This is controlled by `getDefaultCorpusForLang` in `Duckling.Ranking.Generate`. - Tests run against each classifier to make sure runtime works as expected. * MM/DD (en_US) vs DD/MM (en_GB) example to illustrate Reviewed By: JonCoens, blandinw Differential Revision: D6038096 fbshipit-source-id: f29c28d
2017-10-13 18:15:32 +03:00
parseLocale :: ByteString -> Locale
parseLocale x = maybe defaultLocale (`makeLocale` mregion) mlang
where
(mlang, mregion) = case chunks of
[a, b] -> (readMaybe a :: Maybe Lang, readMaybe b :: Maybe Region)
_ -> (Nothing, Nothing)
chunks = map Text.unpack
$ Text.split (== '_') $ Text.toUpper $ Text.decodeUtf8 x
Locales support Summary: * Locales support for the library, following `<Lang>_<Region>` with ISO 639-1 code for `<Lang>` and ISO 3166-1 alpha-2 code for `<Region>` (#33) * `Locale` opaque type (composite of `Lang` and `Region`) with `makeLocale` smart constructor to only allow valid `(Lang, Region)` combinations * API: `Context`'s `lang` parameter has been replaced by `locale`, with optional `Region` and backward compatibility. * `Rules/<Lang>.hs` exposes - `langRules`: cross-locale rules for `<Lang>`, from `<Dimension>/<Lang>/Rules.hs` - `localeRules`: locale-specific rules, from `<Dimension>/<Lang>/<Region>/Rules.hs` - `defaultRules`: `langRules` + specific rules from select locales to ensure backward-compatibility * Corpus, tests & classifiers - 1 classifier per locale, with default classifier (`<Lang>_XX`) when no locale provided (backward-compatible) - Default classifiers are built on existing corpus - Locale classifiers are built on - `<Dimension>/<Lang>/Corpus.hs` exposes a common `corpus` to all locales of `<Lang>` - `<Dimension>/<Lang>/<Region>/Corpus.hs` exposes `allExamples`: a list of examples specific to the locale (following `<Dimension>/<Lang>/<Region>/Rules.hs`). - Locale classifiers use the language corpus extended with the locale examples as training set. - Locale examples need to use the same `Context` (i.e. reference time) as the language corpus. - For backward compatibility, `<Dimension>/<Lang>/Corpus.hs` can expose also `defaultCorpus`, which is `corpus` augmented with specific examples. This is controlled by `getDefaultCorpusForLang` in `Duckling.Ranking.Generate`. - Tests run against each classifier to make sure runtime works as expected. * MM/DD (en_US) vs DD/MM (en_GB) example to illustrate Reviewed By: JonCoens, blandinw Differential Revision: D6038096 fbshipit-source-id: f29c28d
2017-10-13 18:15:32 +03:00
parseLang :: Maybe ByteString -> Lang
parseLang l = fromMaybe defaultLang $ l >>=
readMaybe . Text.unpack . Text.toUpper . Text.decodeUtf8
parseRefTime :: Text -> ByteString -> DucklingTime
parseRefTime timezone refTime = makeReftime tzs timezone utcTime
where
milliseconds = readMaybe $ Text.unpack $ Text.decodeUtf8 refTime
utcTime = case milliseconds of
Just msec -> posixSecondsToUTCTime $ fromInteger msec / 1000
Nothing -> error "Please use milliseconds since epoch for reftime"
parseLatent :: Maybe ByteString -> Bool
parseLatent x = fromMaybe defaultLatent
(readMaybe (Text.unpack $ Text.toTitle $ Text.decodeUtf8 $ fromMaybe empty x)::Maybe Bool)