2017-03-08 21:33:55 +03:00
|
|
|
|
-- Copyright (c) 2016-present, Facebook, Inc.
|
|
|
|
|
-- All rights reserved.
|
|
|
|
|
--
|
|
|
|
|
-- This source code is licensed under the BSD-style license found in the
|
|
|
|
|
-- LICENSE file in the root directory of this source tree. An additional grant
|
|
|
|
|
-- of patent rights can be found in the PATENTS file in the same directory.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
{-# LANGUAGE OverloadedStrings #-}
|
|
|
|
|
|
2017-03-16 23:42:15 +03:00
|
|
|
|
module Duckling.Numeral.TR.Corpus
|
2017-03-08 21:33:55 +03:00
|
|
|
|
( corpus ) where
|
|
|
|
|
|
|
|
|
|
import Prelude
|
|
|
|
|
import Data.String
|
|
|
|
|
|
Locales support
Summary:
* Locales support for the library, following `<Lang>_<Region>` with ISO 639-1 code for `<Lang>` and ISO 3166-1 alpha-2 code for `<Region>` (#33)
* `Locale` opaque type (composite of `Lang` and `Region`) with `makeLocale` smart constructor to only allow valid `(Lang, Region)` combinations
* API: `Context`'s `lang` parameter has been replaced by `locale`, with optional `Region` and backward compatibility.
* `Rules/<Lang>.hs` exposes
- `langRules`: cross-locale rules for `<Lang>`, from `<Dimension>/<Lang>/Rules.hs`
- `localeRules`: locale-specific rules, from `<Dimension>/<Lang>/<Region>/Rules.hs`
- `defaultRules`: `langRules` + specific rules from select locales to ensure backward-compatibility
* Corpus, tests & classifiers
- 1 classifier per locale, with default classifier (`<Lang>_XX`) when no locale provided (backward-compatible)
- Default classifiers are built on existing corpus
- Locale classifiers are built on
- `<Dimension>/<Lang>/Corpus.hs` exposes a common `corpus` to all locales of `<Lang>`
- `<Dimension>/<Lang>/<Region>/Corpus.hs` exposes `allExamples`: a list of examples specific to the locale (following `<Dimension>/<Lang>/<Region>/Rules.hs`).
- Locale classifiers use the language corpus extended with the locale examples as training set.
- Locale examples need to use the same `Context` (i.e. reference time) as the language corpus.
- For backward compatibility, `<Dimension>/<Lang>/Corpus.hs` can expose also `defaultCorpus`, which is `corpus` augmented with specific examples. This is controlled by `getDefaultCorpusForLang` in `Duckling.Ranking.Generate`.
- Tests run against each classifier to make sure runtime works as expected.
* MM/DD (en_US) vs DD/MM (en_GB) example to illustrate
Reviewed By: JonCoens, blandinw
Differential Revision: D6038096
fbshipit-source-id: f29c28d
2017-10-13 18:15:32 +03:00
|
|
|
|
import Duckling.Locale
|
2017-03-16 23:42:15 +03:00
|
|
|
|
import Duckling.Numeral.Types
|
2017-03-08 21:33:55 +03:00
|
|
|
|
import Duckling.Resolve
|
|
|
|
|
import Duckling.Testing.Types
|
|
|
|
|
|
|
|
|
|
corpus :: Corpus
|
2018-03-20 00:34:58 +03:00
|
|
|
|
corpus = (testContext {locale = makeLocale TR Nothing}, testOptions, allExamples)
|
2017-03-08 21:33:55 +03:00
|
|
|
|
|
|
|
|
|
allExamples :: [Example]
|
|
|
|
|
allExamples = concat
|
2017-03-16 23:42:15 +03:00
|
|
|
|
[ examples (NumeralValue 0)
|
2017-03-08 21:33:55 +03:00
|
|
|
|
[ "0"
|
|
|
|
|
, "yok"
|
|
|
|
|
, "hiç"
|
|
|
|
|
, "sıfır"
|
|
|
|
|
]
|
2017-03-16 23:42:15 +03:00
|
|
|
|
, examples (NumeralValue 1)
|
2017-03-08 21:33:55 +03:00
|
|
|
|
[ "1"
|
|
|
|
|
, "bir"
|
|
|
|
|
, "tek"
|
|
|
|
|
, "yek"
|
|
|
|
|
]
|
2017-03-16 23:42:15 +03:00
|
|
|
|
, examples (NumeralValue 2)
|
2017-03-08 21:33:55 +03:00
|
|
|
|
[ "2"
|
|
|
|
|
, "iki"
|
|
|
|
|
, "çift"
|
|
|
|
|
]
|
2017-03-16 23:42:15 +03:00
|
|
|
|
, examples (NumeralValue 33)
|
2017-03-08 21:33:55 +03:00
|
|
|
|
[ "33"
|
|
|
|
|
, "otuzüç"
|
|
|
|
|
, "otuz üç"
|
|
|
|
|
, "0033"
|
|
|
|
|
]
|
2017-03-16 23:42:15 +03:00
|
|
|
|
, examples (NumeralValue 14)
|
2017-03-08 21:33:55 +03:00
|
|
|
|
[ "14"
|
|
|
|
|
, "ondört"
|
|
|
|
|
, "on dört"
|
|
|
|
|
]
|
2017-03-16 23:42:15 +03:00
|
|
|
|
, examples (NumeralValue 16)
|
2017-03-08 21:33:55 +03:00
|
|
|
|
[ "16"
|
|
|
|
|
, "onaltı"
|
|
|
|
|
, "on altı"
|
|
|
|
|
]
|
2017-03-16 23:42:15 +03:00
|
|
|
|
, examples (NumeralValue 17)
|
2017-03-08 21:33:55 +03:00
|
|
|
|
[ "17"
|
|
|
|
|
, "onyedi"
|
|
|
|
|
, "on yedi"
|
|
|
|
|
]
|
2017-03-16 23:42:15 +03:00
|
|
|
|
, examples (NumeralValue 18)
|
2017-03-08 21:33:55 +03:00
|
|
|
|
[ "18"
|
|
|
|
|
, "onsekiz"
|
|
|
|
|
, "on sekiz"
|
|
|
|
|
]
|
2017-03-16 23:42:15 +03:00
|
|
|
|
, examples (NumeralValue 1.1)
|
2017-05-24 18:46:26 +03:00
|
|
|
|
[ "1,1"
|
|
|
|
|
, "1,10"
|
|
|
|
|
, "01,10"
|
2017-03-08 21:33:55 +03:00
|
|
|
|
, "bir virgül bir"
|
|
|
|
|
, "bir nokta bir"
|
|
|
|
|
]
|
2017-03-16 23:42:15 +03:00
|
|
|
|
, examples (NumeralValue 0.77)
|
2017-05-24 18:46:26 +03:00
|
|
|
|
[ "0,77"
|
|
|
|
|
, ",77"
|
|
|
|
|
, "sıfır virgül yetmişyedi"
|
|
|
|
|
, "sıfır virgül yetmiş yedi"
|
2017-03-08 21:33:55 +03:00
|
|
|
|
]
|
2017-03-16 23:42:15 +03:00
|
|
|
|
, examples (NumeralValue 100000)
|
2017-05-24 18:46:26 +03:00
|
|
|
|
[ "100.000"
|
2017-03-08 21:33:55 +03:00
|
|
|
|
, "100000"
|
|
|
|
|
, "100K"
|
|
|
|
|
, "100k"
|
|
|
|
|
, "100b"
|
|
|
|
|
]
|
2017-03-16 23:42:15 +03:00
|
|
|
|
, examples (NumeralValue 3000000)
|
2017-03-08 21:33:55 +03:00
|
|
|
|
[ "3M"
|
|
|
|
|
, "3000K"
|
|
|
|
|
, "3000000"
|
2017-05-24 18:46:26 +03:00
|
|
|
|
, "3.000.000"
|
2017-03-08 21:33:55 +03:00
|
|
|
|
]
|
2017-03-16 23:42:15 +03:00
|
|
|
|
, examples (NumeralValue 1200000)
|
2017-05-24 18:46:26 +03:00
|
|
|
|
[ "1.200.000"
|
2017-03-08 21:33:55 +03:00
|
|
|
|
, "1200000"
|
2017-05-24 18:46:26 +03:00
|
|
|
|
, "1,2M"
|
2017-03-08 21:33:55 +03:00
|
|
|
|
, "1200K"
|
2017-05-24 18:46:26 +03:00
|
|
|
|
, ",0012G"
|
2017-03-08 21:33:55 +03:00
|
|
|
|
, "1200B"
|
|
|
|
|
]
|
2017-03-16 23:42:15 +03:00
|
|
|
|
, examples (NumeralValue (-1200000))
|
2017-05-24 18:46:26 +03:00
|
|
|
|
[ "- 1.200.000"
|
2017-03-08 21:33:55 +03:00
|
|
|
|
, "-1200000"
|
2017-05-24 18:46:26 +03:00
|
|
|
|
, "eksi 1.200.000"
|
2017-03-08 21:33:55 +03:00
|
|
|
|
, "negatif 1200000"
|
2017-05-24 18:46:26 +03:00
|
|
|
|
, "-1,2M"
|
2017-03-08 21:33:55 +03:00
|
|
|
|
, "-1200K"
|
2017-05-24 18:46:26 +03:00
|
|
|
|
, "-,0012G"
|
2017-03-08 21:33:55 +03:00
|
|
|
|
, "-1200B"
|
|
|
|
|
]
|
2017-03-16 23:42:15 +03:00
|
|
|
|
, examples (NumeralValue 5000)
|
2017-03-08 21:33:55 +03:00
|
|
|
|
[ "5 bin"
|
|
|
|
|
, "beş bin"
|
|
|
|
|
]
|
2017-03-16 23:42:15 +03:00
|
|
|
|
, examples (NumeralValue 50)
|
2017-03-08 21:33:55 +03:00
|
|
|
|
[ "5 deste"
|
|
|
|
|
, "beş deste"
|
|
|
|
|
]
|
2017-03-16 23:42:15 +03:00
|
|
|
|
, examples (NumeralValue 200000)
|
2017-03-08 21:33:55 +03:00
|
|
|
|
[ "iki yüz bin"
|
|
|
|
|
, "ikiyüzbin"
|
|
|
|
|
]
|
2017-03-16 23:42:15 +03:00
|
|
|
|
, examples (NumeralValue 21011)
|
2017-03-08 21:33:55 +03:00
|
|
|
|
[ "yirmi bir bin on bir"
|
|
|
|
|
, "yirmibir bin onbir"
|
|
|
|
|
]
|
2017-03-16 23:42:15 +03:00
|
|
|
|
, examples (NumeralValue 721012)
|
2017-03-08 21:33:55 +03:00
|
|
|
|
[ "yedi yüz yirmibir bin on iki"
|
|
|
|
|
, "yedi yüz yirmi bir bin on iki"
|
|
|
|
|
, "yediyüz yirmibir bin oniki"
|
|
|
|
|
]
|
2017-03-16 23:42:15 +03:00
|
|
|
|
, examples (NumeralValue 300341)
|
2017-03-08 21:33:55 +03:00
|
|
|
|
[ "üçyüzbin üçyüz kırkbir"
|
|
|
|
|
, "üç yüz bin üç yüz kırk bir"
|
|
|
|
|
]
|
2017-03-16 23:42:15 +03:00
|
|
|
|
, examples (NumeralValue 40348)
|
2017-03-08 21:33:55 +03:00
|
|
|
|
[ "kırkbin üçyüz kırksekiz"
|
|
|
|
|
, "kırk bin üç yüz kırk sekiz"
|
|
|
|
|
]
|
2017-03-16 23:42:15 +03:00
|
|
|
|
, examples (NumeralValue 31256721)
|
2017-03-08 21:33:55 +03:00
|
|
|
|
[ "otuz bir milyon iki yüz elli altı bin yedi yüz yirmi bir"
|
|
|
|
|
]
|
2017-03-16 23:42:15 +03:00
|
|
|
|
, examples (NumeralValue 107)
|
2017-03-08 21:33:55 +03:00
|
|
|
|
[ "107"
|
|
|
|
|
, "yüz yedi"
|
|
|
|
|
]
|
2017-03-16 23:42:15 +03:00
|
|
|
|
, examples (NumeralValue 5.5)
|
2017-03-08 21:33:55 +03:00
|
|
|
|
[ "beş buçuk"
|
|
|
|
|
, "beşbuçuk"
|
|
|
|
|
, "5 buçuk"
|
2017-05-24 18:46:26 +03:00
|
|
|
|
, "5,5"
|
2017-03-08 21:33:55 +03:00
|
|
|
|
]
|
2017-03-16 23:42:15 +03:00
|
|
|
|
, examples (NumeralValue 3500000)
|
2017-05-24 18:46:26 +03:00
|
|
|
|
[ "3,5 milyon"
|
2017-03-08 21:33:55 +03:00
|
|
|
|
, "3500000"
|
|
|
|
|
, "üç buçuk milyon"
|
|
|
|
|
, "üçbuçuk milyon"
|
2017-05-24 18:46:26 +03:00
|
|
|
|
, "3,5M"
|
2017-03-08 21:33:55 +03:00
|
|
|
|
]
|
2017-03-16 23:42:15 +03:00
|
|
|
|
, examples (NumeralValue 0.5)
|
2017-03-08 21:33:55 +03:00
|
|
|
|
[ "yarım"
|
2017-05-24 18:46:26 +03:00
|
|
|
|
, "0,5"
|
2017-03-08 21:33:55 +03:00
|
|
|
|
]
|
2017-03-16 23:42:15 +03:00
|
|
|
|
, examples (NumeralValue 2500)
|
2017-05-24 18:46:26 +03:00
|
|
|
|
[ "2,5 bin"
|
2017-03-08 21:33:55 +03:00
|
|
|
|
, "2500"
|
2017-05-24 18:46:26 +03:00
|
|
|
|
, "iki bin beş yüz"
|
|
|
|
|
, "ikibin beşyüz"
|
2017-03-08 21:33:55 +03:00
|
|
|
|
]
|
2017-03-16 23:42:15 +03:00
|
|
|
|
, examples (NumeralValue 2200000)
|
2017-05-24 18:46:26 +03:00
|
|
|
|
[ "2,2 milyon"
|
2017-03-08 21:33:55 +03:00
|
|
|
|
, "iki nokta iki milyon"
|
2017-05-24 18:46:26 +03:00
|
|
|
|
, "iki virgül iki milyon"
|
2017-03-08 21:33:55 +03:00
|
|
|
|
]
|
2017-03-16 23:42:15 +03:00
|
|
|
|
, examples (NumeralValue 72.5)
|
2017-03-08 21:33:55 +03:00
|
|
|
|
[ "yetmişikibuçuk"
|
|
|
|
|
, "yetmişiki buçuk"
|
2017-05-24 18:46:26 +03:00
|
|
|
|
, "yetmiş iki buçuk"
|
|
|
|
|
, "72,5"
|
2017-03-08 21:33:55 +03:00
|
|
|
|
]
|
|
|
|
|
]
|