Support abbreviation of Crore and Lakh

Summary:
Crore (1e7) and Lakh (1e5) are both commonly used to describe an amount of Indian currency. Common abbreviations are "Cr" (Crore) and "lkh", "L", "lac" (lakh).

Additionally, common spellings of "crore" include "karor" and "koti"

Reviewed By: patapizza

Differential Revision: D25550546

fbshipit-source-id: 0c1479d9027431cb0d1182b5117eabca6f939cb2
This commit is contained in:
Daniel Cartwright 2020-12-15 11:12:50 -08:00 committed by Facebook GitHub Bot
parent c33249b4dd
commit 181037e469
2 changed files with 30 additions and 11 deletions

View File

@ -115,6 +115,8 @@ allExamples = concat
, "3,000,000"
, "3 million"
, "30 lakh"
, "30 lkh"
, "30 l"
]
, examples (NumeralValue 1.2e6)
[ "1,200,000"
@ -123,6 +125,7 @@ allExamples = concat
, "1200k"
, ".0012G"
, "12 lakhs"
, "12 lkhs"
]
, examples (NumeralValue 5000)
[ "5 thousand"
@ -163,6 +166,7 @@ allExamples = concat
, examples (NumeralValue 31256721)
[ "thirty-one million two hundred fifty-six thousand seven hundred twenty-one"
, "three crore twelve lakh fifty-six thousand seven hundred twenty-one"
, "three cr twelve lac fifty-six thousand seven hundred twenty-one"
]
, examples (NumeralValue 2400)
[ "two hundred dozens"
@ -175,5 +179,9 @@ allExamples = concat
[ "three billions"
, "three thousand millions"
, "three hundred crores"
, "three hundred Cr"
, "three hundred koti"
, "three hundred krores"
, "three hundred Kr"
]
]

View File

@ -16,7 +16,6 @@ module Duckling.Numeral.EN.Rules
import Control.Applicative ((<|>))
import Data.HashMap.Strict (HashMap)
import Data.Maybe
import Data.String
import Data.Text (Text)
import Prelude
import qualified Data.HashMap.Strict as HashMap
@ -126,20 +125,32 @@ ruleTens = Rule
_ -> Nothing
}
powersOfTensMap :: HashMap Text Int
powersOfTensMap = HashMap.fromList
[ ("hundred", 2)
, ("thousand", 3)
, ("lakh", 5)
, ("lkh", 5)
, ("l", 5)
, ("lac", 5)
, ("million", 6)
, ("cr", 7)
, ("crore", 7)
, ("krore", 7)
, ("kr", 7)
, ("koti", 7)
, ("billion", 9)
, ("trillion", 12)
]
rulePowersOfTen :: Rule
rulePowersOfTen = Rule
{ name = "powers of tens"
, pattern = [regex "(hundred|thousand|lakh|million|crore|billion)s?"]
, pattern = [regex "(hundred|thousand|l(ac|(a?kh)?)|million|((k|c)r(ore)?|koti)|billion)s?"]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match : _)) : _) ->
case Text.toLower match of
"hundred" -> double 1e2 >>= withGrain 2 >>= withMultipliable
"thousand" -> double 1e3 >>= withGrain 3 >>= withMultipliable
"lakh" -> double 1e5 >>= withGrain 5 >>= withMultipliable
"million" -> double 1e6 >>= withGrain 6 >>= withMultipliable
"crore" -> double 1e7 >>= withGrain 7 >>= withMultipliable
"billion" -> double 1e9 >>= withGrain 9 >>= withMultipliable
_ -> Nothing
(Token RegexMatch (GroupMatch (match : _)) : _) -> do
grain <- HashMap.lookup (Text.toLower match) powersOfTensMap
double (10 ^ grain) >>= withGrain grain >>= withMultipliable
_ -> Nothing
}