duckling/exe/ExampleMain.hs
Julien Odent ab0ad0256e Locales support
Summary:
* Locales support for the library, following `<Lang>_<Region>` with ISO 639-1 code for `<Lang>` and ISO 3166-1 alpha-2 code for `<Region>` (#33)
* `Locale` opaque type (composite of `Lang` and `Region`) with `makeLocale` smart constructor to only allow valid `(Lang, Region)` combinations
* API: `Context`'s `lang` parameter has been replaced by `locale`, with optional `Region` and backward compatibility.
*  `Rules/<Lang>.hs` exposes
  - `langRules`: cross-locale rules for `<Lang>`, from `<Dimension>/<Lang>/Rules.hs`
  - `localeRules`: locale-specific rules, from `<Dimension>/<Lang>/<Region>/Rules.hs`
  - `defaultRules`: `langRules` + specific rules from select locales to ensure backward-compatibility
* Corpus, tests & classifiers
  - 1 classifier per locale, with default classifier (`<Lang>_XX`) when no locale provided (backward-compatible)
  - Default classifiers are built on existing corpus
  - Locale classifiers are built on
  - `<Dimension>/<Lang>/Corpus.hs` exposes a common `corpus` to all locales of `<Lang>`
  - `<Dimension>/<Lang>/<Region>/Corpus.hs` exposes `allExamples`: a list of examples specific to the locale (following `<Dimension>/<Lang>/<Region>/Rules.hs`).
  - Locale classifiers use the language corpus extended with the locale examples as training set.
  - Locale examples need to use the same `Context` (i.e. reference time) as the language corpus.
  - For backward compatibility, `<Dimension>/<Lang>/Corpus.hs` can expose also `defaultCorpus`, which is `corpus` augmented with specific examples. This is controlled by `getDefaultCorpusForLang` in `Duckling.Ranking.Generate`.
  - Tests run against each classifier to make sure runtime works as expected.
* MM/DD (en_US) vs DD/MM (en_GB) example to illustrate

Reviewed By: JonCoens, blandinw

Differential Revision: D6038096

fbshipit-source-id: f29c28d
2017-10-13 08:34:21 -07:00

116 lines
3.5 KiB
Haskell

-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.
{-# LANGUAGE OverloadedStrings #-}
import Control.Applicative
import Control.Arrow ((***))
import Control.Monad (unless)
import Control.Monad.IO.Class
import Data.Aeson
import Data.ByteString (ByteString)
import Data.HashMap.Strict (HashMap)
import Data.Maybe
import Data.String
import Data.Text (Text)
import Data.Time.LocalTime.TimeZone.Series
import Prelude
import System.Directory
import TextShow
import Text.Read (readMaybe)
import qualified Data.ByteString.Lazy as LBS
import qualified Data.HashMap.Strict as HashMap
import qualified Data.Text as Text
import qualified Data.Text.Encoding as Text
import Snap.Core
import Snap.Http.Server
import Duckling.Core
import Duckling.Data.TimeZone
createIfMissing :: FilePath -> IO ()
createIfMissing f = do
exists <- doesFileExist f
unless exists $ writeFile f ""
setupLogs :: IO ()
setupLogs = do
createDirectoryIfMissing False "log"
createIfMissing "log/error.log"
createIfMissing "log/access.log"
main :: IO ()
main = do
setupLogs
tzs <- loadTimeZoneSeries "/usr/share/zoneinfo/"
quickHttpServe $
ifTop (writeBS "quack!") <|>
route
[ ("targets", method GET targetsHandler)
, ("parse", method POST $ parseHandler tzs)
]
-- | Return which languages have which dimensions
targetsHandler :: Snap ()
targetsHandler = do
modifyResponse $ setHeader "Content-Type" "application/json"
writeLBS $ encode $
HashMap.fromList . map dimText $ HashMap.toList supportedDimensions
where
dimText :: (Lang, [Some Dimension]) -> (Text, [Text])
dimText = (Text.toLower . showt) *** map (\(This d) -> toName d)
-- | Parse some text into the given dimensions
parseHandler :: HashMap Text TimeZoneSeries -> Snap ()
parseHandler tzs = do
modifyResponse $ setHeader "Content-Type" "application/json"
t <- getPostParam "text"
l <- getPostParam "lang"
ds <- getPostParam "dims"
tz <- getPostParam "tz"
loc <- getPostParam "locale"
case t of
Nothing -> do
modifyResponse $ setResponseStatus 422 "Bad Input"
writeBS "Need a 'text' parameter to parse"
Just tx -> do
refTime <- liftIO $ currentReftime tzs $
fromMaybe defaultTimeZone $ Text.decodeUtf8 <$> tz
let
context = Context
{ referenceTime = refTime
, locale = maybe (makeLocale (parseLang l) Nothing) parseLocale loc
}
dimParse = fromMaybe [] $ decode $ LBS.fromStrict $ fromMaybe "" ds
dims = mapMaybe fromName dimParse
parsedResult = parse (Text.decodeUtf8 tx) context dims
writeLBS $ encode parsedResult
where
defaultLang = EN
defaultLocale = makeLocale defaultLang Nothing
defaultTimeZone = "America/Los_Angeles"
parseLocale :: ByteString -> Locale
parseLocale x = maybe defaultLocale (`makeLocale` mregion) mlang
where
(mlang, mregion) = case chunks of
[a, b] -> (readMaybe a :: Maybe Lang, readMaybe b :: Maybe Region)
_ -> (Nothing, Nothing)
chunks = map Text.unpack . Text.split (== '_') . Text.toUpper
$ Text.decodeUtf8 x
parseLang :: Maybe ByteString -> Lang
parseLang l = fromMaybe defaultLang $ l >>=
readMaybe . Text.unpack . Text.toUpper . Text.decodeUtf8