mirror of
https://github.com/facebook/duckling.git
synced 2024-11-24 07:23:03 +03:00
Support abbreviation of Crore and Lakh
Summary: Crore (1e7) and Lakh (1e5) are both commonly used to describe an amount of Indian currency. Common abbreviations are "Cr" (Crore) and "lkh", "L", "lac" (lakh). Additionally, common spellings of "crore" include "karor" and "koti" Reviewed By: patapizza Differential Revision: D25550546 fbshipit-source-id: 0c1479d9027431cb0d1182b5117eabca6f939cb2
This commit is contained in:
parent
c33249b4dd
commit
181037e469
@ -115,6 +115,8 @@ allExamples = concat
|
||||
, "3,000,000"
|
||||
, "3 million"
|
||||
, "30 lakh"
|
||||
, "30 lkh"
|
||||
, "30 l"
|
||||
]
|
||||
, examples (NumeralValue 1.2e6)
|
||||
[ "1,200,000"
|
||||
@ -123,6 +125,7 @@ allExamples = concat
|
||||
, "1200k"
|
||||
, ".0012G"
|
||||
, "12 lakhs"
|
||||
, "12 lkhs"
|
||||
]
|
||||
, examples (NumeralValue 5000)
|
||||
[ "5 thousand"
|
||||
@ -163,6 +166,7 @@ allExamples = concat
|
||||
, examples (NumeralValue 31256721)
|
||||
[ "thirty-one million two hundred fifty-six thousand seven hundred twenty-one"
|
||||
, "three crore twelve lakh fifty-six thousand seven hundred twenty-one"
|
||||
, "three cr twelve lac fifty-six thousand seven hundred twenty-one"
|
||||
]
|
||||
, examples (NumeralValue 2400)
|
||||
[ "two hundred dozens"
|
||||
@ -175,5 +179,9 @@ allExamples = concat
|
||||
[ "three billions"
|
||||
, "three thousand millions"
|
||||
, "three hundred crores"
|
||||
, "three hundred Cr"
|
||||
, "three hundred koti"
|
||||
, "three hundred krores"
|
||||
, "three hundred Kr"
|
||||
]
|
||||
]
|
||||
|
@ -16,7 +16,6 @@ module Duckling.Numeral.EN.Rules
|
||||
import Control.Applicative ((<|>))
|
||||
import Data.HashMap.Strict (HashMap)
|
||||
import Data.Maybe
|
||||
import Data.String
|
||||
import Data.Text (Text)
|
||||
import Prelude
|
||||
import qualified Data.HashMap.Strict as HashMap
|
||||
@ -126,20 +125,32 @@ ruleTens = Rule
|
||||
_ -> Nothing
|
||||
}
|
||||
|
||||
powersOfTensMap :: HashMap Text Int
|
||||
powersOfTensMap = HashMap.fromList
|
||||
[ ("hundred", 2)
|
||||
, ("thousand", 3)
|
||||
, ("lakh", 5)
|
||||
, ("lkh", 5)
|
||||
, ("l", 5)
|
||||
, ("lac", 5)
|
||||
, ("million", 6)
|
||||
, ("cr", 7)
|
||||
, ("crore", 7)
|
||||
, ("krore", 7)
|
||||
, ("kr", 7)
|
||||
, ("koti", 7)
|
||||
, ("billion", 9)
|
||||
, ("trillion", 12)
|
||||
]
|
||||
|
||||
rulePowersOfTen :: Rule
|
||||
rulePowersOfTen = Rule
|
||||
{ name = "powers of tens"
|
||||
, pattern = [regex "(hundred|thousand|lakh|million|crore|billion)s?"]
|
||||
, pattern = [regex "(hundred|thousand|l(ac|(a?kh)?)|million|((k|c)r(ore)?|koti)|billion)s?"]
|
||||
, prod = \tokens -> case tokens of
|
||||
(Token RegexMatch (GroupMatch (match : _)) : _) ->
|
||||
case Text.toLower match of
|
||||
"hundred" -> double 1e2 >>= withGrain 2 >>= withMultipliable
|
||||
"thousand" -> double 1e3 >>= withGrain 3 >>= withMultipliable
|
||||
"lakh" -> double 1e5 >>= withGrain 5 >>= withMultipliable
|
||||
"million" -> double 1e6 >>= withGrain 6 >>= withMultipliable
|
||||
"crore" -> double 1e7 >>= withGrain 7 >>= withMultipliable
|
||||
"billion" -> double 1e9 >>= withGrain 9 >>= withMultipliable
|
||||
_ -> Nothing
|
||||
(Token RegexMatch (GroupMatch (match : _)) : _) -> do
|
||||
grain <- HashMap.lookup (Text.toLower match) powersOfTensMap
|
||||
double (10 ^ grain) >>= withGrain grain >>= withMultipliable
|
||||
_ -> Nothing
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user