diff --git a/Duckling/AmountOfMoney/ES/Rules.hs b/Duckling/AmountOfMoney/ES/Rules.hs index 58b5ef8f..8955d279 100644 --- a/Duckling/AmountOfMoney/ES/Rules.hs +++ b/Duckling/AmountOfMoney/ES/Rules.hs @@ -28,7 +28,7 @@ ruleDollar :: Rule ruleDollar = Rule { name = "dollar" , pattern = - [ regex "d(\x00f3|o)lar(es)?" + [ regex "d(ó|o)lar(es)?" ] , prod = \_ -> Just . Token AmountOfMoney $ currencyOnly Dollar } diff --git a/Duckling/AmountOfMoney/FR/Rules.hs b/Duckling/AmountOfMoney/FR/Rules.hs index f5b74f32..ec55877c 100644 --- a/Duckling/AmountOfMoney/FR/Rules.hs +++ b/Duckling/AmountOfMoney/FR/Rules.hs @@ -44,7 +44,7 @@ rulePrecision :: Rule rulePrecision = Rule { name = "precision" , pattern = - [ regex "exactement|quasi|plus ou moins|environ|autour de|(a|\x00e0) peu pr(e|\x00e8)s" + [ regex "exactement|quasi|plus ou moins|environ|autour de|(a|à) peu pr(e|è)s" , financeWith TAmountOfMoney.value isJust ] , prod = \tokens -> case tokens of diff --git a/Duckling/AmountOfMoney/GA/Rules.hs b/Duckling/AmountOfMoney/GA/Rules.hs index 0e62066f..dc42a3e3 100644 --- a/Duckling/AmountOfMoney/GA/Rules.hs +++ b/Duckling/AmountOfMoney/GA/Rules.hs @@ -111,7 +111,7 @@ ruleInr :: Rule ruleInr = Rule { name = "INR" , pattern = - [ regex "r(\x00fa|u)pa(\x00ed|i)" + [ regex "r(ú|u)pa(í|i)" ] , prod = \_ -> Just . Token AmountOfMoney $ currencyOnly INR } @@ -137,7 +137,7 @@ ruleAmountofmoneyGlan = Rule { name = " glan" , pattern = [ financeWith TAmountOfMoney.value isJust - , regex "glan|baileach|(go )?d(\x00ed|i)reach" + , regex "glan|baileach|(go )?d(í|i)reach" ] , prod = \tokens -> case tokens of (token:_) -> Just token diff --git a/Duckling/AmountOfMoney/HR/Rules.hs b/Duckling/AmountOfMoney/HR/Rules.hs index c1320e47..ba30555a 100644 --- a/Duckling/AmountOfMoney/HR/Rules.hs +++ b/Duckling/AmountOfMoney/HR/Rules.hs @@ -64,7 +64,7 @@ ruleCent :: Rule ruleCent = Rule { name = "cent" , pattern = - [ regex "cent(i|a)?|penij(i|a)?|c|\x00a2|lp|lip(a|e)" + [ regex "cent(i|a)?|penij(i|a)?|c|¢|lp|lip(a|e)" ] , prod = \_ -> Just . Token AmountOfMoney $ currencyOnly Cent } diff --git a/Duckling/AmountOfMoney/ID/Rules.hs b/Duckling/AmountOfMoney/ID/Rules.hs index 8bc62943..7a2bd461 100644 --- a/Duckling/AmountOfMoney/ID/Rules.hs +++ b/Duckling/AmountOfMoney/ID/Rules.hs @@ -69,7 +69,7 @@ ruleJpy :: Rule ruleJpy = Rule { name = "JPY" , pattern = - [ regex "\x00a5\\." + [ regex "¥\\." ] , prod = \_ -> Just . Token AmountOfMoney $ currencyOnly JPY } diff --git a/Duckling/AmountOfMoney/KO/Rules.hs b/Duckling/AmountOfMoney/KO/Rules.hs index 70ed99a4..ab511dcc 100644 --- a/Duckling/AmountOfMoney/KO/Rules.hs +++ b/Duckling/AmountOfMoney/KO/Rules.hs @@ -27,7 +27,7 @@ ruleAmountofmoneyAbout = Rule { name = " about" , pattern = [ financeWith TAmountOfMoney.value isJust - , regex "\xc815\xb3c4|\xcbe4" + , regex "정도|쯤" ] , prod = \tokens -> case tokens of (token:_) -> Just token @@ -38,7 +38,7 @@ ruleAud :: Rule ruleAud = Rule { name = "AUD" , pattern = - [ regex "\xd638\xc8fc\xb2ec\xb7ec" + [ regex "호주달러" ] , prod = \_ -> Just . Token AmountOfMoney $ currencyOnly AUD } @@ -47,7 +47,7 @@ ruleKrw :: Rule ruleKrw = Rule { name = "₩" , pattern = - [ regex "\x20a9|\xc6d0" + [ regex "₩|원" ] , prod = \_ -> Just . Token AmountOfMoney $ currencyOnly KRW } @@ -56,7 +56,7 @@ ruleAboutAmountofmoney :: Rule ruleAboutAmountofmoney = Rule { name = "about " , pattern = - [ regex "\xc57d|\xb300\xcda9|\xc5bc\xcd94" + [ regex "약|대충|얼추" , financeWith TAmountOfMoney.value isJust ] , prod = \tokens -> case tokens of @@ -68,7 +68,7 @@ ruleCent :: Rule ruleCent = Rule { name = "cent" , pattern = - [ regex "cents?|\xc13c(\xd2b8|\xce20)" + [ regex "cents?|센(트|츠)" ] , prod = \_ -> Just . Token AmountOfMoney $ currencyOnly Cent } @@ -77,7 +77,7 @@ ruleExactlyAmountofmoney :: Rule ruleExactlyAmountofmoney = Rule { name = "exactly " , pattern = - [ regex "\xb531|\xc815\xd655\xd788" + [ regex "딱|정확히" , financeWith TAmountOfMoney.value isJust ] , prod = \tokens -> case tokens of @@ -103,7 +103,7 @@ ruleEuro :: Rule ruleEuro = Rule { name = "€" , pattern = - [ regex "\x20ac|\xc720\xb85c" + [ regex "€|유로" ] , prod = \_ -> Just . Token AmountOfMoney $ currencyOnly EUR } @@ -112,7 +112,7 @@ ruleDollar :: Rule ruleDollar = Rule { name = "$" , pattern = - [ regex "\xb2ec\xb7ec|\xbd88" + [ regex "달러|불" ] , prod = \_ -> Just . Token AmountOfMoney $ currencyOnly Dollar } @@ -121,7 +121,7 @@ ruleInr :: Rule ruleInr = Rule { name = "INR" , pattern = - [ regex "\xb8e8\xd53c|\xc778\xb3c4\xb8e8\xd53c" + [ regex "루피|인도루피" ] , prod = \_ -> Just . Token AmountOfMoney $ currencyOnly INR } @@ -130,7 +130,7 @@ rulePounds :: Rule rulePounds = Rule { name = "£" , pattern = - [ regex "\xd30c\xc6b4\xb4dc|\xc601\xad6d\xd30c\xc6b4\xb4dc" + [ regex "파운드|영국파운드" ] , prod = \_ -> Just . Token AmountOfMoney $ currencyOnly Pound } diff --git a/Duckling/AmountOfMoney/NB/Rules.hs b/Duckling/AmountOfMoney/NB/Rules.hs index 6d1a3fe7..a8d8a93c 100644 --- a/Duckling/AmountOfMoney/NB/Rules.hs +++ b/Duckling/AmountOfMoney/NB/Rules.hs @@ -56,7 +56,7 @@ ruleCent :: Rule ruleCent = Rule { name = "cent" , pattern = - [ regex "cents?|penn(y|ies)|(\x00f8)re" + [ regex "cents?|penn(y|ies)|(ø)re" ] , prod = \_ -> Just . Token AmountOfMoney $ currencyOnly Cent } diff --git a/Duckling/AmountOfMoney/RO/Rules.hs b/Duckling/AmountOfMoney/RO/Rules.hs index e030223e..16bbd77d 100644 --- a/Duckling/AmountOfMoney/RO/Rules.hs +++ b/Duckling/AmountOfMoney/RO/Rules.hs @@ -31,7 +31,7 @@ ruleIntersectAndNumeral = Rule { name = "intersect (and number)" , pattern = [ financeWith TAmountOfMoney.value isJust - , regex "(s|\x0219)i" + , regex "(s|ș)i" , dimension Numeral ] , prod = \tokens -> case tokens of @@ -71,7 +71,7 @@ rulePrecisionAmountofmoney :: Rule rulePrecisionAmountofmoney = Rule { name = "about/exactly " , pattern = - [ regex "exact|cam|aprox(\\.|imativ)?|aproape|(i|\x00ee)n jur (de)?" + [ regex "exact|cam|aprox(\\.|imativ)?|aproape|(i|î)n jur (de)?" , financeWith TAmountOfMoney.value isJust ] , prod = \tokens -> case tokens of @@ -83,7 +83,7 @@ ruleCent :: Rule ruleCent = Rule { name = "cent|bani" , pattern = - [ regex "bani?|cen(t|\x021b)i?|c|\x00a2" + [ regex "bani?|cen(t|ț)i?|c|¢" ] , prod = \_ -> Just . Token AmountOfMoney $ currencyOnly Cent } @@ -102,7 +102,7 @@ ruleIntersectAndXCents = Rule { name = "intersect (and X cents)" , pattern = [ financeWith TAmountOfMoney.value isJust - , regex "(s|\x0219)i" + , regex "(s|ș)i" , financeWith TAmountOfMoney.currency (== Cent) ] , prod = \tokens -> case tokens of @@ -140,7 +140,7 @@ ruleOtherPounds :: Rule ruleOtherPounds = Rule { name = "other pounds" , pattern = - [ regex "lir(a|\x0103) (egiptian|libanez)(a|\x0103)" + [ regex "lir(a|ă) (egiptian|libanez)(a|ă)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (_:match:_)):_) -> case Text.toLower match of diff --git a/Duckling/AmountOfMoney/Rules.hs b/Duckling/AmountOfMoney/Rules.hs index e5535254..cc3d9734 100644 --- a/Duckling/AmountOfMoney/Rules.hs +++ b/Duckling/AmountOfMoney/Rules.hs @@ -36,21 +36,21 @@ currencies = HashMap.fromList , ("aud", AUD) , ("bgn", BGN) , ("brl", BRL) - , ("\x00a2", Cent) + , ("¢", Cent) , ("c", Cent) , ("$", Dollar) , ("dollar", Dollar) , ("dollars", Dollar) , ("egp", EGP) - , ("\x20ac", EUR) + , ("€", EUR) , ("eur", EUR) , ("euro", EUR) , ("euros", EUR) , ("eurs", EUR) - , ("\x20acur", EUR) - , ("\x20acuro", EUR) - , ("\x20acuros", EUR) - , ("\x20acurs", EUR) + , ("€ur", EUR) + , ("€uro", EUR) + , ("€uros", EUR) + , ("€urs", EUR) , ("gbp", GBP) , ("hrk", HRK) , ("idr", IDR) @@ -59,7 +59,7 @@ currencies = HashMap.fromList , ("rs.", INR) , ("rupee", INR) , ("rupees", INR) - , ("\x00a5", JPY) + , ("¥", JPY) , ("jpy", JPY) , ("yen", JPY) , ("krw", KRW) @@ -68,7 +68,7 @@ currencies = HashMap.fromList , ("myr", MYR) , ("rm", MYR) , ("nok", NOK) - , ("\x00a3", Pound) + , ("£", Pound) , ("pt", PTS) , ("pta", PTS) , ("ptas", PTS) @@ -87,7 +87,7 @@ ruleCurrencies :: Rule ruleCurrencies = Rule { name = "currencies" , pattern = - [ regex "(aed|aud|bgn|brl|\x00a2|c|\\$|dollars?|egp|(e|\x20ac)uro?s?|\x20ac|gbp|hrk|idr|inr|\x00a5|jpy|krw|kwd|lbp|myr|rm|nok|\x00a3|pta?s?|qar|rs\\.?|ron|rupees?|sar|sek|sgb|us(d|\\$)|vnd|yen)" + [ regex "(aed|aud|bgn|brl|¢|c|\\$|dollars?|egp|(e|€)uro?s?|€|gbp|hrk|idr|inr|¥|jpy|krw|kwd|lbp|myr|rm|nok|£|pta?s?|qar|rs\\.?|ron|rupees?|sar|sek|sgb|us(d|\\$)|vnd|yen)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> do diff --git a/Duckling/AmountOfMoney/SV/Rules.hs b/Duckling/AmountOfMoney/SV/Rules.hs index dc5ecccc..b8857372 100644 --- a/Duckling/AmountOfMoney/SV/Rules.hs +++ b/Duckling/AmountOfMoney/SV/Rules.hs @@ -56,7 +56,7 @@ ruleCent :: Rule ruleCent = Rule { name = "cent" , pattern = - [ regex "cents?|penn(y|ies)|\x00f6re" + [ regex "cents?|penn(y|ies)|öre" ] , prod = \_ -> Just . Token AmountOfMoney $ currencyOnly Cent } diff --git a/Duckling/AmountOfMoney/VI/Rules.hs b/Duckling/AmountOfMoney/VI/Rules.hs index 2f37555d..9eeab7cb 100644 --- a/Duckling/AmountOfMoney/VI/Rules.hs +++ b/Duckling/AmountOfMoney/VI/Rules.hs @@ -28,7 +28,7 @@ ruleNg :: Rule ruleNg = Rule { name = "đồng" , pattern = - [ regex "\x0111\x1ed3ng?" + [ regex "đồng?" ] , prod = \_ -> Just . Token AmountOfMoney $ currencyOnly VND } @@ -37,7 +37,7 @@ ruleDollar :: Rule ruleDollar = Rule { name = "$" , pattern = - [ regex "\x0111\x00f4 la|\x0111\x00f4 m\x1ef9|\x0111(\x00f4)?" + [ regex "đô la|đô mỹ|đ(ô)?" ] , prod = \_ -> Just . Token AmountOfMoney $ currencyOnly Dollar } @@ -46,7 +46,7 @@ ruleVnd :: Rule ruleVnd = Rule { name = "VNĐ" , pattern = - [ regex "vn(\x0110|\\$)" + [ regex "vn(Đ|\\$)" ] , prod = \_ -> Just . Token AmountOfMoney $ currencyOnly VND } @@ -88,7 +88,7 @@ ruleIntersectAndNumeral = Rule { name = "intersect and number" , pattern = [ financeWith TAmountOfMoney.value isJust - , regex "v\x00e0" + , regex "và" , dimension Numeral ] , prod = \tokens -> case tokens of @@ -118,7 +118,7 @@ ruleIntersectVXXuxen = Rule { name = "intersect (và X xu|xen)" , pattern = [ financeWith TAmountOfMoney.value isJust - , regex "v\x00e0" + , regex "và" , financeWith TAmountOfMoney.currency (== Cent) ] , prod = \tokens -> case tokens of diff --git a/Duckling/Distance/ES/Rules.hs b/Duckling/Distance/ES/Rules.hs index db594e8b..d7c00c2f 100644 --- a/Duckling/Distance/ES/Rules.hs +++ b/Duckling/Distance/ES/Rules.hs @@ -25,7 +25,7 @@ ruleLatentDistKm = Rule { name = " km" , pattern = [ dimension Distance - , regex "k(il(\x00f3|o))?m?(etro)?s?" + , regex "k(il(ó|o))?m?(etro)?s?" ] , prod = \tokens -> case tokens of (Token Distance dd:_) -> @@ -51,7 +51,7 @@ ruleDistCentimeters = Rule { name = " centimeters" , pattern = [ dimension Distance - , regex "(cm|cent(\x00ed|i)m(etros?))" + , regex "(cm|cent(í|i)m(etros?))" ] , prod = \tokens -> case tokens of (Token Distance dd:_) -> diff --git a/Duckling/Distance/FR/Rules.hs b/Duckling/Distance/FR/Rules.hs index 2f600648..11ee4749 100644 --- a/Duckling/Distance/FR/Rules.hs +++ b/Duckling/Distance/FR/Rules.hs @@ -25,7 +25,7 @@ ruleLatentDistKm = Rule { name = " km" , pattern = [ dimension Distance - , regex "k(ilo)?m?((e|\x00e9|\x00e8)tre)?s?" + , regex "k(ilo)?m?((e|é|è)tre)?s?" ] , prod = \tokens -> case tokens of (Token Distance dd:_) -> @@ -38,7 +38,7 @@ ruleDistMeters = Rule { name = " meters" , pattern = [ dimension Distance - , regex "m((e|\x00e9|\x00e8)tres?)?" + , regex "m((e|é|è)tres?)?" ] , prod = \tokens -> case tokens of (Token Distance dd:_) -> @@ -51,7 +51,7 @@ ruleDistCentimeters = Rule { name = " centimeters" , pattern = [ dimension Distance - , regex "cm|centim(e|\x00e9|\x00e8)tres?" + , regex "cm|centim(e|é|è)tres?" ] , prod = \tokens -> case tokens of (Token Distance dd:_) -> diff --git a/Duckling/Distance/GA/Rules.hs b/Duckling/Distance/GA/Rules.hs index ea9a3364..028d9c07 100644 --- a/Duckling/Distance/GA/Rules.hs +++ b/Duckling/Distance/GA/Rules.hs @@ -25,7 +25,7 @@ ruleDistMeters = Rule { name = " meters" , pattern = [ dimension Distance - , regex "mh?(e|\x00e9)adai?r" + , regex "mh?(e|é)adai?r" ] , prod = \tokens -> case tokens of (Token Distance dd:_) -> @@ -38,7 +38,7 @@ ruleDistCentimeters = Rule { name = " centimeters" , pattern = [ dimension Distance - , regex "(c\\.?m\\.?|g?ch?eintimh?(e|\x00e9)adai?r)" + , regex "(c\\.?m\\.?|g?ch?eintimh?(e|é)adai?r)" ] , prod = \tokens -> case tokens of (Token Distance dd:_) -> @@ -51,7 +51,7 @@ ruleDistMiles = Rule { name = " miles" , pattern = [ dimension Distance - , regex "mh?(\x00ed|i)lt?e" + , regex "mh?(í|i)lt?e" ] , prod = \tokens -> case tokens of (Token Distance dd:_) -> @@ -64,7 +64,7 @@ ruleLatentDistKm = Rule { name = " km" , pattern = [ dimension Distance - , regex "(k\\.?(m\\.?)?|g?ch?ilim(e|\x00e9)adai?r)" + , regex "(k\\.?(m\\.?)?|g?ch?ilim(e|é)adai?r)" ] , prod = \tokens -> case tokens of (Token Distance dd:_) -> @@ -90,7 +90,7 @@ ruleLatentDistOrlach = Rule { name = " orlach" , pattern = [ dimension Distance - , regex "(''|([nth]-?)?orl(ach|aigh|a(\x00ed|i)|\\.))" + , regex "(''|([nth]-?)?orl(ach|aigh|a(í|i)|\\.))" ] , prod = \tokens -> case tokens of (Token Distance dd:_) -> diff --git a/Duckling/Distance/KO/Rules.hs b/Duckling/Distance/KO/Rules.hs index 3987d94c..3e61aad4 100644 --- a/Duckling/Distance/KO/Rules.hs +++ b/Duckling/Distance/KO/Rules.hs @@ -27,7 +27,7 @@ ruleLatentDistYard = Rule { name = " yard" , pattern = [ dimension Distance - , regex "y(ar)?ds?|\xc57c\xb4dc" + , regex "y(ar)?ds?|야드" ] , prod = \tokens -> case tokens of (Token Distance dd:_) -> @@ -40,7 +40,7 @@ ruleDistCentimeters = Rule { name = " centimeters" , pattern = [ dimension Distance - , regex "cm|\xc13c(\xd2f0|\xce58)((\xbbf8|\xba54)\xd130)?" + , regex "cm|센(티|치)((미|메)터)?" ] , prod = \tokens -> case tokens of (Token Distance dd:_) -> @@ -53,9 +53,9 @@ ruleLatentDistFeetAndLatentDistInch = Rule { name = " feet and inch " , pattern = [ dimension Distance - , regex "('|f(oo|ee)?ts?)|\xd53c\xd2b8" + , regex "('|f(oo|ee)?ts?)|피트" , dimension Distance - , regex "(''|inch(es)?)|\xc778\xce58" + , regex "(''|inch(es)?)|인치" ] , prod = \tokens -> case tokens of (Token Distance dd:_) -> @@ -68,7 +68,7 @@ ruleDistMeters = Rule { name = " meters" , pattern = [ dimension Distance - , regex "m|(\xbbf8|\xba54|\xb9e4)\xd130" + , regex "m|(미|메|매)터" ] , prod = \tokens -> case tokens of (Token Distance dd:_) -> @@ -81,7 +81,7 @@ ruleLatentDistFeet = Rule { name = " feet" , pattern = [ dimension Distance - , regex "('|f(oo|ee)?ts?)|\xd53c\xd2b8" + , regex "('|f(oo|ee)?ts?)|피트" ] , prod = \tokens -> case tokens of (Token Distance dd:_) -> @@ -94,7 +94,7 @@ ruleLatentDistKm = Rule { name = " km" , pattern = [ dimension Distance - , regex "km|(\xd0ac|\xd0a4)\xb85c((\xbbf8|\xba54)\xd130)?" + , regex "km|(킬|키)로((미|메)터)?" ] , prod = \tokens -> case tokens of (Token Distance dd:_) -> @@ -106,7 +106,7 @@ ruleHalf :: Rule ruleHalf = Rule { name = "half" , pattern = - [ regex "\xbc18" + [ regex "반" ] , prod = \tokens -> case tokens of (Token Numeral NumeralData {TNumeral.value = v}:_) -> @@ -119,7 +119,7 @@ ruleDistMiles = Rule { name = " miles" , pattern = [ dimension Distance - , regex "miles?|\xb9c8\xc77c(\xc988)?" + , regex "miles?|마일(즈)?" ] , prod = \tokens -> case tokens of (Token Distance dd:_) -> @@ -132,7 +132,7 @@ ruleLatentDistInch = Rule { name = " inch" , pattern = [ dimension Distance - , regex "(''|inch(es)?)|\xc778\xce58" + , regex "(''|inch(es)?)|인치" ] , prod = \tokens -> case tokens of (Token Distance dd:_) -> diff --git a/Duckling/Distance/PT/Rules.hs b/Duckling/Distance/PT/Rules.hs index e5c3bd85..2c0f2e89 100644 --- a/Duckling/Distance/PT/Rules.hs +++ b/Duckling/Distance/PT/Rules.hs @@ -25,7 +25,7 @@ ruleLatentDistKm = Rule { name = " km" , pattern = [ dimension Distance - , regex "k(il(\x00f3|o))?m?(etro)?s?" + , regex "k(il(ó|o))?m?(etro)?s?" ] , prod = \tokens -> case tokens of (Token Distance dd:_) -> @@ -51,7 +51,7 @@ ruleDistCentimeters = Rule { name = " centimeters" , pattern = [ dimension Distance - , regex "(cm|cent(\x00ed|i)m(etros?))" + , regex "(cm|cent(í|i)m(etros?))" ] , prod = \tokens -> case tokens of (Token Distance dd:_) -> diff --git a/Duckling/Distance/RO/Rules.hs b/Duckling/Distance/RO/Rules.hs index 9e349656..1102e49b 100644 --- a/Duckling/Distance/RO/Rules.hs +++ b/Duckling/Distance/RO/Rules.hs @@ -103,7 +103,7 @@ ruleDistMiles = Rule { name = " miles" , pattern = [ dimension Distance - , regex "mil(e|a|\x0103)" + , regex "mil(e|a|ă)" ] , prod = \tokens -> case tokens of (Token Distance dd:_) -> diff --git a/Duckling/Duration/DA/Rules.hs b/Duckling/Duration/DA/Rules.hs index 1d25c0f0..8590a6b1 100644 --- a/Duckling/Duration/DA/Rules.hs +++ b/Duckling/Duration/DA/Rules.hs @@ -30,7 +30,7 @@ ruleExactlyDuration :: Rule ruleExactlyDuration = Rule { name = "exactly " , pattern = - [ regex "pr(\x00e6)cis" + [ regex "pr(æ)cis" , dimension Duration ] , prod = \tokens -> case tokens of diff --git a/Duckling/Duration/DE/Rules.hs b/Duckling/Duration/DE/Rules.hs index e077d459..64c15147 100644 --- a/Duckling/Duration/DE/Rules.hs +++ b/Duckling/Duration/DE/Rules.hs @@ -104,7 +104,7 @@ ruleAboutDuration :: Rule ruleAboutDuration = Rule { name = "about " , pattern = - [ regex "ungef\x00e4hr|zirka" + [ regex "ungefähr|zirka" , dimension Duration ] , prod = \tokens -> case tokens of diff --git a/Duckling/Duration/GA/Rules.hs b/Duckling/Duration/GA/Rules.hs index b4baba18..9fd2c4ad 100644 --- a/Duckling/Duration/GA/Rules.hs +++ b/Duckling/Duration/GA/Rules.hs @@ -26,7 +26,7 @@ ruleCoics :: Rule ruleCoics = Rule { name = "coicís" , pattern = - [ regex "coic(\x00ed|i)s(\x00ed|i|e)?" + [ regex "coic(í|i)s(í|i|e)?" ] , prod = \_ -> Just . Token Duration $ duration TG.Day 14 } diff --git a/Duckling/Duration/HE/Rules.hs b/Duckling/Duration/HE/Rules.hs index 1ece3fc7..4be3e9e5 100644 --- a/Duckling/Duration/HE/Rules.hs +++ b/Duckling/Duration/HE/Rules.hs @@ -33,7 +33,7 @@ ruleQuarterOfAnHour :: Rule ruleQuarterOfAnHour = Rule { name = "quarter of an hour" , pattern = - [ regex "(1/4/s \x05e9\x05e2\x05d4|\x05e8\x05d1\x05e2 \x05e9\x05e2\x05d4)" + [ regex "(1/4/s שעה|רבע שעה)" ] , prod = \_ -> Just . Token Duration $ duration TG.Minute 15 } @@ -42,7 +42,7 @@ ruleHalfAnHour :: Rule ruleHalfAnHour = Rule { name = "half an hour" , pattern = - [ regex "(1/2/s \x05e9\x05e2\x05d4|\x05d7\x05e6\x05d9 \x05e9\x05e2\x05d4)" + [ regex "(1/2/s שעה|חצי שעה)" ] , prod = \_ -> Just . Token Duration $ duration TG.Minute 30 } @@ -51,7 +51,7 @@ ruleThreequartersOfAnHour :: Rule ruleThreequartersOfAnHour = Rule { name = "three-quarters of an hour" , pattern = - [ regex "(3/4/s \x05e9\x05e2\x05d4|\x05e9\x05dc\x05d5\x05e9\x05ea \x05e8\x05d1\x05e2\x05d9 \x05e9\x05e2\x05d4)" + [ regex "(3/4/s שעה|שלושת רבעי שעה)" ] , prod = \_ -> Just . Token Duration $ duration TG.Minute 45 } @@ -61,7 +61,7 @@ ruleNumbernumberHours = Rule { name = "number.number hours" , pattern = [ regex "(\\d+)\\.(\\d+)" - , regex "\x05e9\x05e2\x05d4|\x05e9\x05e2\x05d5\x05ea" + , regex "שעה|שעות" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (h:m:_)):_) -> do @@ -77,7 +77,7 @@ ruleIntegerAndAnHalfHours = Rule { name = " and an half hours" , pattern = [ Predicate isNatural - , regex "\x05d5\x05d7\x05e6\x05d9 (\x05e9\x05e2\x05d5\x05ea|\x05e9\x05e2\x05d4)" + , regex "וחצי (שעות|שעה)" ] , prod = \tokens -> case tokens of (Token Numeral (NumeralData {TNumeral.value = v}):_) -> @@ -89,7 +89,7 @@ ruleAboutDuration :: Rule ruleAboutDuration = Rule { name = "about " , pattern = - [ regex "(\x05d1\x05e2\x05e8\x05da|\x05e1\x05d1\x05d9\x05d1\x05d5\x05ea|\x05d1\x05e7\x05d9\x05e8\x05d5\x05d1)" + [ regex "(בערך|סביבות|בקירוב)" , dimension Duration ] , prod = \tokens -> case tokens of @@ -101,7 +101,7 @@ ruleExactlyDuration :: Rule ruleExactlyDuration = Rule { name = "exactly " , pattern = - [ regex "\x05d1\x05d3\x05d9\x05d5\x05e7" + [ regex "בדיוק" , dimension Duration ] , prod = \tokens -> case tokens of diff --git a/Duckling/Duration/HR/Rules.hs b/Duckling/Duration/HR/Rules.hs index d45f9c17..35b1d1e3 100644 --- a/Duckling/Duration/HR/Rules.hs +++ b/Duckling/Duration/HR/Rules.hs @@ -33,7 +33,7 @@ ruleExactlyDuration :: Rule ruleExactlyDuration = Rule { name = "exactly " , pattern = - [ regex "to(c|\x010d)no" + [ regex "to(c|č)no" , dimension Duration ] , prod = \tokens -> case tokens of @@ -59,7 +59,7 @@ ruleIntegerMoreUnitofduration = Rule { name = " more " , pattern = [ Predicate isNatural - , regex "vi(s|\x0161)e|manje" + , regex "vi(s|š)e|manje" , dimension TimeGrain ] , prod = \tokens -> case tokens of @@ -74,7 +74,7 @@ ruleQuarterOfAnHour :: Rule ruleQuarterOfAnHour = Rule { name = "quarter of an hour" , pattern = - [ regex "((1/4|frtalj|kvarat|(c|\x010d)etvrt)\\s?(h|sata)?)" + [ regex "((1/4|frtalj|kvarat|(c|č)etvrt)\\s?(h|sata)?)" ] , prod = \_ -> Just . Token Duration $ duration TG.Minute 15 } @@ -111,7 +111,7 @@ ruleThreequartersOfAnHour :: Rule ruleThreequartersOfAnHour = Rule { name = "three-quarters of an hour" , pattern = - [ regex "((3/4|tri-?frtalja|tri-?kvarat|tri-?(c|\x010d)etvrt(ine)?)\\s?(h|sata)?)" + [ regex "((3/4|tri-?frtalja|tri-?kvarat|tri-?(c|č)etvrt(ine)?)\\s?(h|sata)?)" ] , prod = \_ -> Just . Token Duration $ duration TG.Minute 45 } diff --git a/Duckling/Duration/KO/Rules.hs b/Duckling/Duration/KO/Rules.hs index 9350dcb0..8d4851fa 100644 --- a/Duckling/Duration/KO/Rules.hs +++ b/Duckling/Duration/KO/Rules.hs @@ -31,7 +31,7 @@ ruleHalfAnHour = Rule { name = "half an hour" , pattern = [ Predicate $ isGrain TG.Hour - , regex "\xbc18" + , regex "반" ] , prod = \_ -> Just . Token Duration $ duration TG.Minute 30 } @@ -40,7 +40,7 @@ ruleADay :: Rule ruleADay = Rule { name = "a day - 하루" , pattern = - [ regex "\xd558\xb8e8" + [ regex "하루" ] , prod = \_ -> Just . Token Duration $ duration TG.Day 1 } @@ -50,7 +50,7 @@ ruleNumeralnumberHours = Rule { name = "number.number hours" , pattern = [ regex "(\\d+)\\.(\\d+)" - , regex "\xc2dc\xac04" + , regex "시간" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (h:m:_)):_) -> do @@ -66,7 +66,7 @@ ruleIntegerAndAnHalfHours = Rule { name = " and an half hours" , pattern = [ Predicate isNatural - , regex "\xc2dc\xac04\xbc18" + , regex "시간반" ] , prod = \tokens -> case tokens of (Token Numeral (NumeralData {TNumeral.value = v}):_) -> @@ -78,7 +78,7 @@ ruleAboutDuration :: Rule ruleAboutDuration = Rule { name = "about " , pattern = - [ regex "\xb300\xcda9|\xc57d" + [ regex "대충|약" , dimension Duration ] , prod = \tokens -> case tokens of @@ -90,7 +90,7 @@ ruleExactlyDuration :: Rule ruleExactlyDuration = Rule { name = "exactly " , pattern = - [ regex "\xc815\xd655\xd788" + [ regex "정확히" , dimension Duration ] , prod = \tokens -> case tokens of diff --git a/Duckling/Duration/PL/Rules.hs b/Duckling/Duration/PL/Rules.hs index 1c39ffbb..a85b9e2f 100644 --- a/Duckling/Duration/PL/Rules.hs +++ b/Duckling/Duration/PL/Rules.hs @@ -30,7 +30,7 @@ ruleHalfAnHour :: Rule ruleHalfAnHour = Rule { name = "half an hour" , pattern = - [ regex "p(o|\x00f3)(l|\x0142) godziny" + [ regex "p(o|ó)(l|ł) godziny" ] , prod = \_ -> Just . Token Duration $ duration TG.Minute 30 } @@ -72,7 +72,7 @@ ruleIntegerAndAnHalfHours = Rule { name = " and an half hours" , pattern = [ Predicate isNatural - , regex "i (p(o|\x00f3)(l|\x0142)) godziny" + , regex "i (p(o|ó)(l|ł)) godziny" ] , prod = \tokens -> case tokens of (Token Numeral (NumeralData {TNumeral.value = v}):_) -> @@ -96,7 +96,7 @@ ruleAboutDuration :: Rule ruleAboutDuration = Rule { name = "about " , pattern = - [ regex "(oko(l|\x0142)o|miej wi(\x0119|e)cej|jakie(s|\x015b))" + [ regex "(oko(l|ł)o|miej wi(ę|e)cej|jakie(s|ś))" , dimension Duration ] , prod = \tokens -> case tokens of @@ -108,7 +108,7 @@ ruleExactlyDuration :: Rule ruleExactlyDuration = Rule { name = "exactly " , pattern = - [ regex "r(o|\x00f3)wno|dok(l|\x0142)adnie" + [ regex "r(o|ó)wno|dok(l|ł)adnie" , dimension Duration ] , prod = \tokens -> case tokens of diff --git a/Duckling/Duration/RO/Rules.hs b/Duckling/Duration/RO/Rules.hs index 27f3089c..634fe163 100644 --- a/Duckling/Duration/RO/Rules.hs +++ b/Duckling/Duration/RO/Rules.hs @@ -24,7 +24,7 @@ ruleQuarterOfAnHour :: Rule ruleQuarterOfAnHour = Rule { name = "quarter of an hour" , pattern = - [ regex "(1/4\\s?(h|or(a|\x0103))|sfert de or(a|\x0103))" + [ regex "(1/4\\s?(h|or(a|ă))|sfert de or(a|ă))" ] , prod = \_ -> Just . Token Duration $ duration TG.Minute 15 } @@ -33,7 +33,7 @@ ruleJumatateDeOra :: Rule ruleJumatateDeOra = Rule { name = "jumatate de ora" , pattern = - [ regex "(1/2\\s?(h|or(a|\x0103))|jum(a|\x0103)tate (de )?or(a|\x0103))" + [ regex "(1/2\\s?(h|or(a|ă))|jum(a|ă)tate (de )?or(a|ă))" ] , prod = \_ -> Just . Token Duration $ duration TG.Minute 30 } @@ -42,7 +42,7 @@ ruleTreiSferturiDeOra :: Rule ruleTreiSferturiDeOra = Rule { name = "trei sferturi de ora" , pattern = - [ regex "(3/4\\s?(h|or(a|\x0103))|trei sferturi de or(a|\x0103))" + [ regex "(3/4\\s?(h|or(a|ă))|trei sferturi de or(a|ă))" ] , prod = \_ -> Just . Token Duration $ duration TG.Minute 45 } @@ -63,7 +63,7 @@ ruleExactInJurDeDuration :: Rule ruleExactInJurDeDuration = Rule { name = "exact|in jur de " , pattern = - [ regex "(exact|aproximativ|(i|\x00ee)n jur de)" + [ regex "(exact|aproximativ|(i|î)n jur de)" , dimension Duration ] , prod = \tokens -> case tokens of diff --git a/Duckling/Duration/TR/Rules.hs b/Duckling/Duration/TR/Rules.hs index ed9bda2b..d201028f 100644 --- a/Duckling/Duration/TR/Rules.hs +++ b/Duckling/Duration/TR/Rules.hs @@ -30,7 +30,7 @@ import qualified Duckling.TimeGrain.Types as TG ruleDurationQuarterOfAnHour :: Rule ruleDurationQuarterOfAnHour = Rule { name = "quarter of an hour" - , pattern = [ regex "(1/4\\s?sa(at)?|\x00e7eyrek saat)" ] + , pattern = [ regex "(1/4\\s?sa(at)?|çeyrek saat)" ] , prod = \_ -> Just . Token Duration $ duration TG.Minute 15 } @@ -44,7 +44,7 @@ ruleDurationHalfAnHour = Rule ruleDurationThreeQuartersOfAnHour :: Rule ruleDurationThreeQuartersOfAnHour = Rule { name = "three-quarters of an hour" - , pattern = [regex "(3/4\\s?sa(at)?|\x00fc\x00e7e \231eyrek sa(at)?)"] + , pattern = [regex "(3/4\\s?sa(at)?|üçe \231eyrek sa(at)?)"] , prod = \_ -> Just . Token Duration $ duration TG.Minute 45 } @@ -90,7 +90,7 @@ ruleDurationAndHalfHour = Rule { name = " and an half hour" , pattern = [ Predicate isNatural - , regex "bu\x00e7euk sa(at)?" + , regex "buçeuk sa(at)?" ] , prod = \tokens -> case tokens of (Token Numeral (NumeralData {TNumeral.value = v}):_) -> @@ -103,7 +103,7 @@ ruleDurationPrecision :: Rule ruleDurationPrecision = Rule { name = " about|exactly" , pattern = - [ regex "(gibi|civar\305nda|yakla\x015f\305k|tam( olarak)?)" + [ regex "(gibi|civar\305nda|yaklaş\305k|tam( olarak)?)" , dimension Duration ] , prod = \tokens -> case tokens of diff --git a/Duckling/Numeral/AR/Rules.hs b/Duckling/Numeral/AR/Rules.hs index ca83571d..034cd6b4 100644 --- a/Duckling/Numeral/AR/Rules.hs +++ b/Duckling/Numeral/AR/Rules.hs @@ -28,7 +28,7 @@ ruleInteger5 :: Rule ruleInteger5 = Rule { name = "integer 4" , pattern = - [ regex "(\x0623\x0631\x0628\x0639(\x0629)?)" + [ regex "(أربع(ة)?)" ] , prod = \_ -> integer 4 } @@ -38,7 +38,7 @@ ruleInteger23 = Rule { name = "integer 101..999" , pattern = [ oneOf [100, 200 .. 900] - , regex "\x0648" + , regex "و" , numberBetween 1 100 ] , prod = \tokens -> case tokens of @@ -53,7 +53,7 @@ ruleInteger18 :: Rule ruleInteger18 = Rule { name = "integer 12" , pattern = - [ regex "(\x0625\x062b\x0646(\x062a)?\x0649 \x0639\x0634\x0631)" + [ regex "(إثن(ت)?ى عشر)" ] , prod = \_ -> integer 12 } @@ -75,18 +75,18 @@ ruleInteger19 :: Rule ruleInteger19 = Rule { name = "integer (20..90)" , pattern = - [ regex "(\x0639\x0634\x0631\x0648\x0646|\x062b\x0644\x0627\x062b\x0648\x0646|\x0623\x0631\x0628\x0639\x0648\x0646|\x062e\x0645\x0633\x0648\x0646|\x0633\x062a\x0648\x0646|\x0633\x0628\x0639\x0648\x0646|\x062b\x0645\x0627\x0646\x0648\x0646|\x062a\x0633\x0639\x0648\x0646)" + [ regex "(عشرون|ثلاثون|أربعون|خمسون|ستون|سبعون|ثمانون|تسعون)" ] , prod = \tokens -> case tokens of Token RegexMatch (GroupMatch (match:_)):_ -> case match of - "\x0639\x0634\x0631\x0648\x0646" -> integer 20 - "\x062b\x0644\x0627\x062b\x0648\x0646" -> integer 30 - "\x0623\x0631\x0628\x0639\x0648\x0646" -> integer 40 - "\x062e\x0645\x0633\x0648\x0646" -> integer 50 - "\x0633\x062a\x0648\x0646" -> integer 60 - "\x0633\x0628\x0639\x0648\x0646" -> integer 70 - "\x062b\x0645\x0627\x0646\x0648\x0646" -> integer 80 - "\x062a\x0633\x0639\x0648\x0646" -> integer 90 + "عشرون" -> integer 20 + "ثلاثون" -> integer 30 + "أربعون" -> integer 40 + "خمسون" -> integer 50 + "ستون" -> integer 60 + "سبعون" -> integer 70 + "ثمانون" -> integer 80 + "تسعون" -> integer 90 _ -> Nothing _ -> Nothing } @@ -96,7 +96,7 @@ ruleInteger22 = Rule { name = "integer 21..99" , pattern = [ numberBetween 1 10 - , regex "\x0648" + , regex "و" , oneOf [20, 30 .. 90] ] , prod = \tokens -> case tokens of @@ -147,7 +147,7 @@ ruleInteger15 :: Rule ruleInteger15 = Rule { name = "integer 11" , pattern = - [ regex "(\x0625\x062d\x062f\x0649 \x0639\x0634\x0631(\x0629)?)" + [ regex "(إحدى عشر(ة)?)" ] , prod = \_ -> integer 11 } @@ -168,21 +168,21 @@ rulePowersOfTen :: Rule rulePowersOfTen = Rule { name = "powers of tens" , pattern = - [ regex "(\x0645\x0627\x0626\x0629|\x0645\x0626\x0627\x062a|\x0623\x0644\x0641|\x0627\x0644\x0641|\x0622\x0644\x0627\x0641|\x0645\x0644\x0627\x064a\x064a(\x0646)?)" + [ regex "(مائة|مئات|ألف|الف|آلاف|ملايي(ن)?)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> case Text.toLower match of - "\x0645\x0627\x0626\x0629" -> + "مائة" -> double 1e2 >>= withGrain 2 >>= withMultipliable - "\x0645\x0626\x0627\x062a" -> + "مئات" -> double 1e2 >>= withGrain 2 >>= withMultipliable - "\x0623\x0644\x0641" -> double 1e3 >>= withGrain 3 >>= withMultipliable - "\x0627\x0644\x0641" -> double 1e3 >>= withGrain 3 >>= withMultipliable - "\x0622\x0644\x0627\x0641" -> + "ألف" -> double 1e3 >>= withGrain 3 >>= withMultipliable + "الف" -> double 1e3 >>= withGrain 3 >>= withMultipliable + "آلاف" -> double 1e3 >>= withGrain 3 >>= withMultipliable - "\x0645\x0644\x0627\x064a\x064a" -> + "ملايي" -> double 1e6 >>= withGrain 6 >>= withMultipliable - "\x0645\x0644\x0627\x064a\x064a\x0646" -> + "ملايين" -> double 1e6 >>= withGrain 6 >>= withMultipliable _ -> Nothing _ -> Nothing @@ -192,7 +192,7 @@ ruleInteger3 :: Rule ruleInteger3 = Rule { name = "integer 2" , pattern = - [ regex "(\x0627\x062b\x0646\x0627\x0646|\x0627\x062b\x0646\x064a\x0646)" + [ regex "(اثنان|اثنين)" ] , prod = \_ -> integer 2 } @@ -201,7 +201,7 @@ ruleInteger13 :: Rule ruleInteger13 = Rule { name = "integer 9" , pattern = - [ regex "(\x062a\x0633\x0639\x0629|\x062a\x0633\x0639)" + [ regex "(تسعة|تسع)" ] , prod = \_ -> integer 9 } @@ -210,7 +210,7 @@ ruleInteger12 :: Rule ruleInteger12 = Rule { name = "integer 8" , pattern = - [ regex "(\x062b\x0645\x0627\x0646\x064a\x0629|\x062b\x0645\x0627\x0646)" + [ regex "(ثمانية|ثمان)" ] , prod = \_ -> integer 8 } @@ -232,7 +232,7 @@ ruleInteger7 :: Rule ruleInteger7 = Rule { name = "integer 5" , pattern = - [ regex "(\x062e\x0645\x0633)(\x0629)?" + [ regex "(خمس)(ة)?" ] , prod = \_ -> integer 5 } @@ -241,7 +241,7 @@ ruleInteger14 :: Rule ruleInteger14 = Rule { name = "integer 10" , pattern = - [ regex "(\x0639\x0634\x0631\x0629|\x0639\x0634\x0631)" + [ regex "(عشرة|عشر)" ] , prod = \_ -> integer 10 } @@ -250,7 +250,7 @@ ruleInteger9 :: Rule ruleInteger9 = Rule { name = "integer 6" , pattern = - [ regex "(\x0633\x062a(\x0629)?)" + [ regex "(ست(ة)?)" ] , prod = \_ -> integer 6 } @@ -259,7 +259,7 @@ ruleInteger :: Rule ruleInteger = Rule { name = "integer 0" , pattern = - [ regex "(\x0635\x0641\x0631)" + [ regex "(صفر)" ] , prod = \_ -> integer 0 } @@ -268,7 +268,7 @@ ruleInteger4 :: Rule ruleInteger4 = Rule { name = "integer 3" , pattern = - [ regex "(\x062b\x0644\x0627\x062b|\x062b\x0644\x0627\x062b\x0629)" + [ regex "(ثلاث|ثلاثة)" ] , prod = \_ -> integer 3 } @@ -277,7 +277,7 @@ ruleInteger2 :: Rule ruleInteger2 = Rule { name = "integer 1" , pattern = - [ regex "(\x0648\x0627\x062d\x062f\x0629|\x0648\x0627\x062d\x062f\x0647|\x0648\x0627\x062d\x062f)" + [ regex "(واحدة|واحده|واحد)" ] , prod = \_ -> integer 1 } @@ -286,7 +286,7 @@ ruleInteger11 :: Rule ruleInteger11 = Rule { name = "integer 7" , pattern = - [ regex "(\x0633\x0628\x0639\x0629|\x0633\x0628\x0639)" + [ regex "(سبعة|سبع)" ] , prod = \_ -> integer 7 } @@ -295,19 +295,19 @@ ruleInteger20 :: Rule ruleInteger20 = Rule { name = "integer (100..900)" , pattern = - [ regex "(\x0645\x0627\x0626\x0629|\x0645\x0627\x0626\x062a\x0627\x0646|\x062b\x0644\x0627\x062b\x0645\x0627\x0626\x0629|\x0623\x0631\x0628\x0639\x0645\x0627\x0626\x0629|\x062e\x0645\x0633\x0645\x0627\x0626\x0629|\x0633\x062a\x0645\x0627\x0626\x0629|\x0633\x0628\x0639\x0645\x0627\x0626\x0629|\x062b\x0645\x0627\x0646\x0645\x0627\x0626\x0629|\x062a\x0633\x0639\x0645\x0627\x0626\x0629)" + [ regex "(مائة|مائتان|ثلاثمائة|أربعمائة|خمسمائة|ستمائة|سبعمائة|ثمانمائة|تسعمائة)" ] , prod = \tokens -> case tokens of Token RegexMatch (GroupMatch (match:_)):_ -> case match of - "\x0645\x0627\x0626\x0629" -> integer 100 - "\x0633\x0628\x0639\x0645\x0627\x0626\x0629" -> integer 700 - "\x062e\x0645\x0633\x0645\x0627\x0626\x0629" -> integer 500 - "\x0623\x0631\x0628\x0639\x0645\x0627\x0626\x0629" -> integer 400 - "\x0633\x062a\x0645\x0627\x0626\x0629" -> integer 600 - "\x0645\x0627\x0626\x062a\x0627\x0646" -> integer 200 - "\x062b\x0644\x0627\x062b\x0645\x0627\x0626\x0629" -> integer 300 - "\x062b\x0645\x0627\x0646\x0645\x0627\x0626\x0629" -> integer 800 - "\x062a\x0633\x0639\x0645\x0627\x0626\x0629" -> integer 900 + "مائة" -> integer 100 + "سبعمائة" -> integer 700 + "خمسمائة" -> integer 500 + "أربعمائة" -> integer 400 + "ستمائة" -> integer 600 + "مائتان" -> integer 200 + "ثلاثمائة" -> integer 300 + "ثمانمائة" -> integer 800 + "تسعمائة" -> integer 900 _ -> Nothing _ -> Nothing } @@ -317,7 +317,7 @@ ruleNumeralDotNumeral = Rule { name = "number dot number" , pattern = [ dimension Numeral - , regex "\x0641\x0627\x0635\x0644\x0629" + , regex "فاصلة" , numberWith TNumeral.grain isNothing ] , prod = \tokens -> case tokens of diff --git a/Duckling/Numeral/BG/Rules.hs b/Duckling/Numeral/BG/Rules.hs index 5ac30b5d..fd2b22e3 100644 --- a/Duckling/Numeral/BG/Rules.hs +++ b/Duckling/Numeral/BG/Rules.hs @@ -226,7 +226,7 @@ ruleSuffixes = Rule { name = "suffixes (K,M,G))" , pattern = [ dimension Numeral - , regex "((к|м|г)|(К|М|Г))(?=[\\W$\x20ac\x00a2\x00a3]|$)" + , regex "((к|м|г)|(К|М|Г))(?=[\\W$€¢£]|$)" ] , prod = \tokens -> case tokens of diff --git a/Duckling/Numeral/CS/Rules.hs b/Duckling/Numeral/CS/Rules.hs index 935b19a8..bcf8782c 100644 --- a/Duckling/Numeral/CS/Rules.hs +++ b/Duckling/Numeral/CS/Rules.hs @@ -50,9 +50,9 @@ ruleNumeralMap = HashMap.fromList , ( "dva", 2 ) , ( "dv\x0115", 2 ) , ( "t\x0159i", 3 ) - , ( "\x010dty\x0159i", 4 ) + , ( "čty\x0159i", 4 ) , ( "p\x0115t", 5) - , ( "\x0161est", 6) + , ( "šest", 6) , ( "sedm", 7) , ( "osm", 8) , ( "dev\x0115t", 9) @@ -63,7 +63,7 @@ ruleNumeral :: Rule ruleNumeral = Rule { name = "number (0..10)" , pattern = - [ regex "(nula|jed(en|n[ao])|dv(a|\x0115)|t(\x0159)i|(\x010d)ty(\x0159)i|p(\x0115)t|(\x0161)est|sedm|osm|dev(\x0115)t|deset)" + [ regex "(nula|jed(en|n[ao])|dv(a|\x0115)|t(\x0159)i|(č)ty(\x0159)i|p(\x0115)t|(š)est|sedm|osm|dev(\x0115)t|deset)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> diff --git a/Duckling/Numeral/DA/Rules.hs b/Duckling/Numeral/DA/Rules.hs index f941eb5f..3c5e53a8 100644 --- a/Duckling/Numeral/DA/Rules.hs +++ b/Duckling/Numeral/DA/Rules.hs @@ -57,7 +57,7 @@ ruleFew :: Rule ruleFew = Rule { name = "few" , pattern = - [ regex "(nogle )?f\x00e5" + [ regex "(nogle )?få" ] , prod = \_ -> integer 3 } @@ -137,7 +137,7 @@ ruleNumeralsSuffixesKMG = Rule { name = "numbers suffixes (K, M, G)" , pattern = [ dimension Numeral - , regex "([kmg])(?=[\\W\\$\x20ac]|$)" + , regex "([kmg])(?=[\\W\\$€]|$)" ] , prod = \tokens -> case tokens of (Token Numeral (NumeralData {TNumeral.value = v}): @@ -221,8 +221,8 @@ zeroNineteenMap = HashMap.fromList , ("intet", 0) , ("en", 1) , ("et", 1) - , ("\x00e9n", 1) - , ("\x00e9t", 1) + , ("én", 1) + , ("ét", 1) , ("to", 2) , ("tre", 3) , ("fire", 4) @@ -247,7 +247,7 @@ ruleInteger :: Rule ruleInteger = Rule { name = "integer (0..19)" , pattern = - [ regex "(intet|ingen|nul|en|et|\x00e9n|\x00e9t|to|tretten|tre|fire|femten|fem|seksten|seks|syv|otte|nitten|ni|ti|elleve|tolv|fjorten|sytten|atten)" + [ regex "(intet|ingen|nul|en|et|én|ét|to|tretten|tre|fire|femten|fem|seksten|seks|syv|otte|nitten|ni|ti|elleve|tolv|fjorten|sytten|atten)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> diff --git a/Duckling/Numeral/DE/Rules.hs b/Duckling/Numeral/DE/Rules.hs index 53e9ca5f..0bae6cca 100644 --- a/Duckling/Numeral/DE/Rules.hs +++ b/Duckling/Numeral/DE/Rules.hs @@ -103,7 +103,7 @@ ruleInteger3 :: Rule ruleInteger3 = Rule { name = "integer ([2-9][1-9])" , pattern = - [ regex "(ein|zwei|drei|vier|f\x00fcnf|sechs|sieben|acht|neun)und(zwanzig|dreissig|vierzig|f\x00fcnfzig|sechzig|siebzig|achtzig|neunzig)" + [ regex "(ein|zwei|drei|vier|fünf|sechs|sieben|acht|neun)und(zwanzig|dreissig|vierzig|fünfzig|sechzig|siebzig|achtzig|neunzig)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (m1:m2:_)):_) -> do @@ -112,7 +112,7 @@ ruleInteger3 = Rule "zwei" -> Just 2 "drei" -> Just 3 "vier" -> Just 4 - "f\x00fcnf" -> Just 5 + "fünf" -> Just 5 "sechs" -> Just 6 "sieben" -> Just 7 "acht" -> Just 8 @@ -122,7 +122,7 @@ ruleInteger3 = Rule "zwanzig" -> Just 20 "dreissig" -> Just 30 "vierzig" -> Just 40 - "f\x00fcnfzig" -> Just 50 + "fünfzig" -> Just 50 "sechzig" -> Just 60 "siebzig" -> Just 70 "achtzig" -> Just 80 @@ -179,7 +179,7 @@ ruleNumeralsSuffixesKMG = Rule { name = "numbers suffixes (K, M, G)" , pattern = [ dimension Numeral - , regex "([kmg])(?=[\\W\\$\x20ac]|$)" + , regex "([kmg])(?=[\\W\\$€]|$)" ] , prod = \tokens -> case tokens of (Token Numeral (NumeralData {TNumeral.value = v}): @@ -246,17 +246,17 @@ zeroNineteenMap = HashMap.fromList , ("zwei", 2) , ("drei", 3) , ("vier", 4) - , ("f\x00fcnf", 5) + , ("fünf", 5) , ("sechs", 6) , ("sieben", 7) , ("acht", 8) , ("neun", 9) , ("zehn", 10) , ("elf", 11) - , ("zw\x00f6lf", 12) + , ("zwölf", 12) , ("dreizehn", 13) , ("vierzehn", 14) - , ("f\x00fcnfzehn", 15) + , ("fünfzehn", 15) , ("sechzehn", 16) , ("siebzehn", 17) , ("achtzehn", 18) @@ -268,7 +268,7 @@ ruleToNineteen = Rule { name = "integer (0..19)" -- e.g. fourteen must be before four, -- otherwise four will always shadow fourteen - , pattern = [regex "(keine?|keine?s|keiner|keinen|null|nichts|eins?(er)?|zwei|dreizehn|drei|vierzehn|vier|f\x00fcnf|sechzehn|sechs|siebzehn|sieben|achtzehn|acht|neunzehn|neun|elf|zw\x00f6lf|f\x00fcfzehn)"] + , pattern = [regex "(keine?|keine?s|keiner|keinen|null|nichts|eins?(er)?|zwei|dreizehn|drei|vierzehn|vier|fünf|sechzehn|sechs|siebzehn|sieben|achtzehn|acht|neunzehn|neun|elf|zwölf|füfzehn)"] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> HashMap.lookup (Text.toLower match) zeroNineteenMap >>= integer @@ -279,7 +279,7 @@ ruleInteger :: Rule ruleInteger = Rule { name = "integer (0..19)" , pattern = - [ regex "(keine?|keine?s|keiner|keinen|null|nichts|eins?(er)?|zwei|dreizehn|drei|vierzehn|vier|f\x00fcnf|sechzehn|sechs|siebzehn|sieben|achtzehn|acht|neunzehn|neun|elf|zw\x00f6lf|f\x00fcfzehn)" + [ regex "(keine?|keine?s|keiner|keinen|null|nichts|eins?(er)?|zwei|dreizehn|drei|vierzehn|vier|fünf|sechzehn|sechs|siebzehn|sieben|achtzehn|acht|neunzehn|neun|elf|zwölf|füfzehn)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> case Text.toLower match of @@ -298,17 +298,17 @@ ruleInteger = Rule "zwei" -> integer 2 "drei" -> integer 3 "vier" -> integer 4 - "f\x00fcnf" -> integer 5 + "fünf" -> integer 5 "sechs" -> integer 6 "sieben" -> integer 7 "acht" -> integer 8 "neun" -> integer 9 "zehn" -> integer 10 "elf" -> integer 11 - "zw\x00f6lf" -> integer 12 + "zwölf" -> integer 12 "dreizehn" -> integer 13 "vierzehn" -> integer 14 - "f\x00fcnfzehn" -> integer 15 + "fünfzehn" -> integer 15 "sechzehn" -> integer 16 "siebzehn" -> integer 17 "achtzehn" -> integer 18 @@ -321,14 +321,14 @@ ruleInteger2 :: Rule ruleInteger2 = Rule { name = "integer (20..90)" , pattern = - [ regex "(zwanzig|dreissig|vierzig|f\x00fcnfzig|sechzig|siebzig|achtzig|neunzig)" + [ regex "(zwanzig|dreissig|vierzig|fünfzig|sechzig|siebzig|achtzig|neunzig)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> case Text.toLower match of "zwanzig" -> integer 20 "dreissig" -> integer 30 "vierzig" -> integer 40 - "f\x00fcnfzig" -> integer 50 + "fünfzig" -> integer 50 "sechzig" -> integer 60 "siebzig" -> integer 70 "achtzig" -> integer 80 diff --git a/Duckling/Numeral/EN/Rules.hs b/Duckling/Numeral/EN/Rules.hs index 475d4b2d..11383b17 100644 --- a/Duckling/Numeral/EN/Rules.hs +++ b/Duckling/Numeral/EN/Rules.hs @@ -234,7 +234,7 @@ ruleSuffixes = Rule { name = "suffixes (K,M,G))" , pattern = [ dimension Numeral - , regex "(k|m|g)(?=[\\W$\x20ac\x00a2\x00a3]|$)" + , regex "(k|m|g)(?=[\\W$€¢£]|$)" ] , prod = \tokens -> case tokens of diff --git a/Duckling/Numeral/ES/Rules.hs b/Duckling/Numeral/ES/Rules.hs index 937f2b3d..e0b0b5ba 100644 --- a/Duckling/Numeral/ES/Rules.hs +++ b/Duckling/Numeral/ES/Rules.hs @@ -110,12 +110,12 @@ zeroToFifteenMap = HashMap.fromList , ( "una" , 1 ) , ( "uno" , 1 ) , ( "dos" , 2 ) - , ( "tr\x00e9s" , 3 ) + , ( "trés" , 3 ) , ( "tres" , 3 ) , ( "cuatro" , 4 ) , ( "cinco" , 5 ) , ( "seis" , 6 ) - , ( "s\x00e9is" , 6 ) + , ( "séis" , 6 ) , ( "siete" , 7 ) , ( "ocho" , 8 ) , ( "nueve" , 9 ) @@ -132,7 +132,7 @@ ruleNumeral :: Rule ruleNumeral = Rule { name = "number (0..15)" , pattern = - [ regex "((c|z)ero|un(o|a)?|dos|tr(\x00e9|e)s|cuatro|cinco|s(e|\x00e9)is|siete|ocho|nueve|die(z|s)|once|doce|trece|catorce|quince)" + [ regex "((c|z)ero|un(o|a)?|dos|tr(é|e)s|cuatro|cinco|s(e|é)is|siete|ocho|nueve|die(z|s)|once|doce|trece|catorce|quince)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> @@ -143,20 +143,20 @@ ruleNumeral = Rule sixteenToTwentyNineMap :: HashMap.HashMap Text.Text Integer sixteenToTwentyNineMap = HashMap.fromList [ ( "dieciseis" , 16 ) - , ( "diesis\x00e9is" , 16 ) + , ( "diesiséis" , 16 ) , ( "diesiseis" , 16 ) - , ( "diecis\x00e9is" , 16 ) + , ( "dieciséis" , 16 ) , ( "diecisiete" , 17 ) , ( "dieciocho" , 18 ) , ( "diecinueve" , 19 ) , ( "veintiuno" , 21 ) , ( "veintiuna" , 21 ) , ( "veintidos" , 22 ) - , ( "veintitr\x00e9s" , 23 ) + , ( "veintitrés" , 23 ) , ( "veintitres" , 23 ) , ( "veinticuatro" , 24 ) , ( "veinticinco" , 25 ) - , ( "veintis\x00e9is" , 26 ) + , ( "veintiséis" , 26 ) , ( "veintiseis" , 26 ) , ( "veintisiete" , 27 ) , ( "veintiocho" , 28 ) @@ -167,7 +167,7 @@ ruleNumeral5 :: Rule ruleNumeral5 = Rule { name = "number (16..19 21..29)" , pattern = - [ regex "(die(c|s)is(\x00e9|e)is|diecisiete|dieciocho|diecinueve|veintiun(o|a)|veintidos|veintitr(\x00e9|e)s|veinticuatro|veinticinco|veintis(\x00e9|e)is|veintisiete|veintiocho|veintinueve)" + [ regex "(die(c|s)is(é|e)is|diecisiete|dieciocho|diecinueve|veintiun(o|a)|veintidos|veintitr(é|e)s|veinticuatro|veinticinco|veintis(é|e)is|veintisiete|veintiocho|veintinueve)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> @@ -194,7 +194,7 @@ ruleNumeralsSuffixesKMG = Rule { name = "numbers suffixes (K, M, G)" , pattern = [ dimension Numeral - , regex "([kmg])(?=[\\W\\$\x20ac]|$)" + , regex "([kmg])(?=[\\W\\$€]|$)" ] , prod = \tokens -> case tokens of (Token Numeral (NumeralData {TNumeral.value = v}): diff --git a/Duckling/Numeral/ET/Rules.hs b/Duckling/Numeral/ET/Rules.hs index 16c50fb5..46b27194 100644 --- a/Duckling/Numeral/ET/Rules.hs +++ b/Duckling/Numeral/ET/Rules.hs @@ -77,7 +77,7 @@ ruleTen :: Rule ruleTen = Rule { name = "ten" , pattern = - [ regex "k\x00fcmme" + [ regex "kümme" ] , prod = \_ -> integer 10 >>= withGrain 1 } @@ -122,7 +122,7 @@ ruleAFew :: Rule ruleAFew = Rule { name = "(a )?few" , pattern = - [ regex "m\x00f5ni" + [ regex "mõni" ] , prod = \_ -> integer 3 } @@ -158,7 +158,7 @@ ruleNumeralsSuffixesKMG = Rule { name = "numbers suffixes (K, M, G)" , pattern = [ dimension Numeral - , regex "([kmg])(?=[\\W\\$\x20ac]|$)" + , regex "([kmg])(?=[\\W\\$€]|$)" ] , prod = \tokens -> case tokens of (Token Numeral (NumeralData {TNumeral.value = v}): @@ -187,12 +187,12 @@ ruleInteger :: Rule ruleInteger = Rule { name = "integer (0..19)" , pattern = - [ regex "(null|\x00fcksteist|\x00fcks|kaksteist|kaks|kolmteist|kolm|neliteist|neli|viisteist|viis|kuusteist|kuus|seitseteist|seitse|kaheksateist|kaheksa|\x00fcheksateist|\x00fcheksa|k\x00fcmme)" + [ regex "(null|üksteist|üks|kaksteist|kaks|kolmteist|kolm|neliteist|neli|viisteist|viis|kuusteist|kuus|seitseteist|seitse|kaheksateist|kaheksa|üheksateist|üheksa|kümme)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> case Text.toLower match of "null" -> integer 0 - "\x00fcks" -> integer 1 + "üks" -> integer 1 "kaks" -> integer 2 "kolm" -> integer 3 "neli" -> integer 4 @@ -200,9 +200,9 @@ ruleInteger = Rule "kuus" -> integer 6 "seitse" -> integer 7 "kaheksa" -> integer 8 - "\x00fcheksa" -> integer 9 - "k\x00fcmme" -> integer 10 - "\x00fcksteist" -> integer 11 + "üheksa" -> integer 9 + "kümme" -> integer 10 + "üksteist" -> integer 11 "kaksteist" -> integer 12 "kolmteist" -> integer 13 "neliteist" -> integer 14 @@ -210,7 +210,7 @@ ruleInteger = Rule "kuusteist" -> integer 16 "seitseteist" -> integer 17 "kaheksateist" -> integer 18 - "\x00fcheksateist" -> integer 19 + "üheksateist" -> integer 19 _ -> Nothing _ -> Nothing } @@ -219,7 +219,7 @@ ruleInteger4 :: Rule ruleInteger4 = Rule { name = "integer (200..900)" , pattern = - [ regex "(kakssada|kolmsada|nelisada|viissada|kuussada|seitsesada|kaheksasada|\x00fcheksasada)" + [ regex "(kakssada|kolmsada|nelisada|viissada|kuussada|seitsesada|kaheksasada|üheksasada)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> case Text.toLower match of @@ -230,7 +230,7 @@ ruleInteger4 = Rule "kuussada" -> integer 600 >>= withGrain 2 >>= withMultipliable "seitsesada" -> integer 700 >>= withGrain 2 >>= withMultipliable "kaheksasada" -> integer 800 >>= withGrain 2 >>= withMultipliable - "\x00fcheksasada" -> integer 900 >>= withGrain 2 >>= withMultipliable + "üheksasada" -> integer 900 >>= withGrain 2 >>= withMultipliable _ -> Nothing _ -> Nothing } @@ -239,18 +239,18 @@ ruleInteger2 :: Rule ruleInteger2 = Rule { name = "integer (20..90)" , pattern = - [ regex "((kaks|kolm|neli|viis|kuus|seitse|kaheksa|(\x00fc)heksa)k(\x00fc)mmend)" + [ regex "((kaks|kolm|neli|viis|kuus|seitse|kaheksa|(ü)heksa)k(ü)mmend)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> case Text.toLower match of - "kaksk\x00fcmmend" -> integer 20 - "kolmk\x00fcmmend" -> integer 30 - "nelik\x00fcmmend" -> integer 40 - "viisk\x00fcmmend" -> integer 50 - "kuusk\x00fcmmend" -> integer 60 - "seitsek\x00fcmmend" -> integer 70 - "kaheksak\x00fcmmend" -> integer 80 - "\x00fcheksak\x00fcmmend" -> integer 90 + "kakskümmend" -> integer 20 + "kolmkümmend" -> integer 30 + "nelikümmend" -> integer 40 + "viiskümmend" -> integer 50 + "kuuskümmend" -> integer 60 + "seitsekümmend" -> integer 70 + "kaheksakümmend" -> integer 80 + "üheksakümmend" -> integer 90 _ -> Nothing _ -> Nothing } diff --git a/Duckling/Numeral/FR/Rules.hs b/Duckling/Numeral/FR/Rules.hs index 8148883f..81821ab8 100644 --- a/Duckling/Numeral/FR/Rules.hs +++ b/Duckling/Numeral/FR/Rules.hs @@ -144,7 +144,7 @@ ruleNumerals5 = Rule ruleNumeralMap :: HashMap Text Integer ruleNumeralMap = HashMap.fromList [ ( "zero" , 0 ) - , ( "z\x00e9ro", 0 ) + , ( "zéro", 0 ) , ( "un" , 1 ) , ( "une" , 1 ) , ( "deux" , 2 ) @@ -168,7 +168,7 @@ ruleNumeral :: Rule ruleNumeral = Rule { name = "number (0..16)" , pattern = - [ regex "(z(e|\x00e9)ro|une?|deux|trois|quatre|cinq|six|sept|huit|neuf|dix|onze|douze|treize|quatorze|quinze|seize)" + [ regex "(z(e|é)ro|une?|deux|trois|quatre|cinq|six|sept|huit|neuf|dix|onze|douze|treize|quatorze|quinze|seize)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> @@ -211,7 +211,7 @@ ruleNumeralsSuffixesKMG = Rule { name = "numbers suffixes (K, M, G)" , pattern = [ dimension Numeral - , regex "([kmg])(?=[\\W$\x20ac\x00a2\x00a3]|$)" + , regex "([kmg])(?=[\\W$€¢£]|$)" ] , prod = \tokens -> case tokens of (Token Numeral (NumeralData {TNumeral.value = v}): diff --git a/Duckling/Numeral/GA/Rules.hs b/Duckling/Numeral/GA/Rules.hs index cee095d5..f180006f 100644 --- a/Duckling/Numeral/GA/Rules.hs +++ b/Duckling/Numeral/GA/Rules.hs @@ -27,7 +27,7 @@ ruleNumeralsPrefixWithNegativeOrMinus :: Rule ruleNumeralsPrefixWithNegativeOrMinus = Rule { name = "numbers prefix with -, negative or minus" , pattern = - [ regex "-|m(\x00ed|i)neas(\\sa)?\\s?" + [ regex "-|m(í|i)neas(\\sa)?\\s?" , dimension Numeral ] , prod = \tokens -> case tokens of @@ -54,19 +54,19 @@ ruleNumerals2 :: Rule ruleNumerals2 = Rule { name = "numbers, 1-10" , pattern = - [ regex "(aon|dh(\x00e1|a)|tr(\x00ed|i)|ceithre|c(\x00fa|u)ig|seacht|s(\x00e9|e)|ocht|naoi|deich)" + [ regex "(aon|dh(á|a)|tr(í|i)|ceithre|c(ú|u)ig|seacht|s(é|e)|ocht|naoi|deich)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> case Text.toLower match of "aon" -> integer 1 "dha" -> integer 2 - "dh\x00e1" -> integer 2 - "tr\x00ed" -> integer 3 + "dhá" -> integer 2 + "trí" -> integer 3 "tri" -> integer 3 "ceithre" -> integer 4 "cuig" -> integer 5 - "c\x00faig" -> integer 5 - "s\x00e9" -> integer 6 + "cúig" -> integer 5 + "sé" -> integer 6 "se" -> integer 6 "seacht" -> integer 7 "ocht" -> integer 8 @@ -103,7 +103,7 @@ ruleDag :: Rule ruleDag = Rule { name = "déag" , pattern = - [ regex "d(\x00e9|e)ag" + [ regex "d(é|e)ag" ] , prod = \_ -> integer 10 } @@ -113,7 +113,7 @@ ruleNumeralsSuffixesKMG = Rule { name = "numbers suffixes (K, M, G)" , pattern = [ dimension Numeral - , regex "([kmg])(?=[\\W\\$\x20ac]|$)" + , regex "([kmg])(?=[\\W\\$€]|$)" ] , prod = \tokens -> case tokens of (Token Numeral (NumeralData {TNumeral.value = v}): @@ -130,15 +130,15 @@ ruleOldVigesimalNumeralsS :: Rule ruleOldVigesimalNumeralsS = Rule { name = "old vigesimal numbers, 20s" , pattern = - [ regex "is (dh?(\x00e1|a) fhichead|tr(\x00ed|i) fichid|ceithre fichid)" + [ regex "is (dh?(á|a) fhichead|tr(í|i) fichid|ceithre fichid)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> case Text.toLower match of - "d\x00e1 fhichead" -> integer 40 + "dá fhichead" -> integer 40 "da fhichead" -> integer 40 - "dh\x00e1 fhichead" -> integer 40 + "dhá fhichead" -> integer 40 "dha fhichead" -> integer 40 - "tr\x00ed fichid" -> integer 60 + "trí fichid" -> integer 60 "tri fichid" -> integer 60 "ceithre fichid" -> integer 80 _ -> Nothing @@ -149,16 +149,16 @@ ruleOldVigesimalNumeralsS2 :: Rule ruleOldVigesimalNumeralsS2 = Rule { name = "old vigesimal numbers, 20s + 10" , pattern = - [ regex "d(\x00e9|e)ag is (fiche|dh?(\x00e1|a) fhichead|tr(\x00ed|i) fichid|ceithre fichid)" + [ regex "d(é|e)ag is (fiche|dh?(á|a) fhichead|tr(í|i) fichid|ceithre fichid)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> case Text.toLower match of "fiche" -> integer 30 - "d\x00e1 fhichead" -> integer 50 + "dá fhichead" -> integer 50 "da fhichead" -> integer 50 - "dh\x00e1 fhichead" -> integer 50 + "dhá fhichead" -> integer 50 "dha fhichead" -> integer 50 - "tr\x00ed fichid" -> integer 70 + "trí fichid" -> integer 70 "tri fichid" -> integer 70 "ceithre fichid" -> integer 90 _ -> Nothing @@ -169,7 +169,7 @@ ruleAmhin :: Rule ruleAmhin = Rule { name = "amháin" , pattern = - [ regex "amh(\x00e1|a)in" + [ regex "amh(á|a)in" ] , prod = \_ -> integer 1 } @@ -178,21 +178,21 @@ ruleNumerals :: Rule ruleNumerals = Rule { name = "numbers, 20-90" , pattern = - [ regex "(fiche|tr(\x00ed|i)ocha|daichead|caoga|seasca|seacht(\x00f3|o)|ocht(\x00f3|o)|n(\x00f3|o)cha)" + [ regex "(fiche|tr(í|i)ocha|daichead|caoga|seasca|seacht(ó|o)|ocht(ó|o)|n(ó|o)cha)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> case Text.toLower match of "fiche" -> integer 20 "triocha" -> integer 30 - "tr\x00edocha" -> integer 30 + "tríocha" -> integer 30 "daichead" -> integer 40 "caoga" -> integer 50 "seasca" -> integer 60 "seachto" -> integer 70 - "seacht\x00f3" -> integer 70 + "seachtó" -> integer 70 "ochto" -> integer 80 - "ocht\x00f3" -> integer 80 - "n\x00f3cha" -> integer 90 + "ochtó" -> integer 80 + "nócha" -> integer 90 "nocha" -> integer 90 _ -> Nothing _ -> Nothing @@ -214,21 +214,21 @@ ruleCountNumerals :: Rule ruleCountNumerals = Rule { name = "count numbers" , pattern = - [ regex "a (n(\x00e1|a)id|haon|d(\x00f3|o)|tr(\x00ed|i)|ceathair|c(\x00fa|u)ig|s(\x00e9|e)|seacht|hocht|naoi|deich)" + [ regex "a (n(á|a)id|haon|d(ó|o)|tr(í|i)|ceathair|c(ú|u)ig|s(é|e)|seacht|hocht|naoi|deich)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> case Text.toLower match of "naid" -> integer 0 - "n\x00e1id" -> integer 0 + "náid" -> integer 0 "haon" -> integer 1 - "d\x00f3" -> integer 2 + "dó" -> integer 2 "do" -> integer 2 - "tr\x00ed" -> integer 3 + "trí" -> integer 3 "tri" -> integer 3 "ceathair" -> integer 4 "cuig" -> integer 5 - "c\x00faig" -> integer 5 - "s\x00e9" -> integer 6 + "cúig" -> integer 5 + "sé" -> integer 6 "se" -> integer 6 "seacht" -> integer 7 "hocht" -> integer 8 diff --git a/Duckling/Numeral/HE/Rules.hs b/Duckling/Numeral/HE/Rules.hs index 28cda986..9b8d7326 100644 --- a/Duckling/Numeral/HE/Rules.hs +++ b/Duckling/Numeral/HE/Rules.hs @@ -28,7 +28,7 @@ ruleInteger5 :: Rule ruleInteger5 = Rule { name = "integer 4" , pattern = - [ regex "(\x05d0\x05e8\x05d1\x05e2(\x05d4)?)" + [ regex "(ארבע(ה)?)" ] , prod = \_ -> integer 4 } @@ -52,7 +52,7 @@ ruleIntersectWithAnd = Rule { name = "intersect (with and)" , pattern = [ numberWith (fromMaybe 0 . TNumeral.grain) (>1) - , regex "\x05d5" + , regex "ו" , numberWith TNumeral.multipliable not ] , prod = \tokens -> case tokens of @@ -82,7 +82,7 @@ ruleCompositeTensWithAnd = Rule { name = "integer 21..99 (with and)" , pattern = [ oneOf [ 20, 30..90 ] - , regex "\x05d5" + , regex "ו" , numberBetween 1 10 ] , prod = \tokens -> case tokens of @@ -97,7 +97,7 @@ ruleNumeralsPrefixWithNegativeOrMinus :: Rule ruleNumeralsPrefixWithNegativeOrMinus = Rule { name = "numbers prefix with -, negative or minus" , pattern = - [ regex "-|\x05de\x05d9\x05e0\x05d5\x05e1" + [ regex "-|מינוס" , dimension Numeral ] , prod = \tokens -> case tokens of @@ -122,7 +122,7 @@ ruleInteger10 :: Rule ruleInteger10 = Rule { name = "integer 9" , pattern = - [ regex "(\x05ea\x05e9\x05e2(\x05d4)?)" + [ regex "(תשע(ה)?)" ] , prod = \_ -> integer 9 } @@ -131,18 +131,18 @@ ruleInteger15 :: Rule ruleInteger15 = Rule { name = "integer (20..90)" , pattern = - [ regex "(\x05e2\x05e9\x05e8\x05d9\x05dd|\x05e9\x05dc\x05d5\x05e9\x05d9\x05dd|\x05d0\x05e8\x05d1\x05e2\x05d9\x05dd|\x05d7\x05de\x05d9\x05e9\x05d9\x05dd|\x05e9\x05d9\x05e9\x05d9\x05dd|\x05e9\x05d1\x05e2\x05d9\x05dd|\x05e9\x05de\x05d5\x05e0\x05d9\x05dd|\x05ea\x05e9\x05e2\x05d9\x05dd)" + [ regex "(עשרים|שלושים|ארבעים|חמישים|שישים|שבעים|שמונים|תשעים)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> case match of - "\x05e2\x05e9\x05e8\x05d9\x05dd" -> integer 20 - "\x05e9\x05dc\x05d5\x05e9\x05d9\x05dd" -> integer 30 - "\x05d0\x05e8\x05d1\x05e2\x05d9\x05dd" -> integer 40 - "\x05d7\x05de\x05d9\x05e9\x05d9\x05dd" -> integer 50 - "\x05e9\x05d9\x05e9\x05d9\x05dd" -> integer 60 - "\x05e9\x05d1\x05e2\x05d9\x05dd" -> integer 70 - "\x05e9\x05de\x05d5\x05e0\x05d9\x05dd" -> integer 80 - "\x05ea\x05e9\x05e2\x05d9\x05dd" -> integer 90 + "עשרים" -> integer 20 + "שלושים" -> integer 30 + "ארבעים" -> integer 40 + "חמישים" -> integer 50 + "שישים" -> integer 60 + "שבעים" -> integer 70 + "שמונים" -> integer 80 + "תשעים" -> integer 90 _ -> Nothing _ -> Nothing } @@ -162,7 +162,7 @@ ruleInteger3 :: Rule ruleInteger3 = Rule { name = "integer 2" , pattern = - [ regex "(\x05e9\x05ea\x05d9\x05d9\x05dd|\x05e9\x05e0\x05d9\x05d9\x05dd)" + [ regex "(שתיים|שניים)" ] , prod = \_ -> integer 2 } @@ -171,7 +171,7 @@ ruleSingle :: Rule ruleSingle = Rule { name = "single" , pattern = - [ regex "\x05d9\x05d7\x05d9\x05d3" + [ regex "יחיד" ] , prod = \_ -> integer 1 } @@ -180,7 +180,7 @@ ruleInteger13 :: Rule ruleInteger13 = Rule { name = "integer 12" , pattern = - [ regex "(\x05e9\x05e0\x05d9\x05d9\x05dd \x05e2\x05e9\x05e8|\x05ea\x05e8\x05d9 \x05e2\x05e9\x05e8)" + [ regex "(שניים עשר|תרי עשר)" ] , prod = \_ -> integer 12 } @@ -201,7 +201,7 @@ ruleInteger6 :: Rule ruleInteger6 = Rule { name = "integer 5" , pattern = - [ regex "(\x05d7\x05de(\x05e9|\x05d9\x05e9\x05d4))" + [ regex "(חמ(ש|ישה))" ] , prod = \_ -> integer 5 } @@ -210,21 +210,21 @@ rulePowersOfTen :: Rule rulePowersOfTen = Rule { name = "powers of tens" , pattern = - [ regex "(\x05de\x05d0(\x05d4|\x05d5\x05ea)|\x05d0\x05dc(\x05e3|\x05e4\x05d9\x05dd)|\x05de\x05d9\x05dc\x05d9\x05d5(\x05df|\x05e0\x05d9\x05dd))" + [ regex "(מא(ה|ות)|אל(ף|פים)|מיליו(ן|נים))" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> case Text.toLower match of - "\x05de\x05d0\x05d4" -> + "מאה" -> double 1e2 >>= withGrain 2 >>= withMultipliable - "\x05de\x05d0\x05d5\x05ea" -> + "מאות" -> double 1e2 >>= withGrain 2 >>= withMultipliable - "\x05d0\x05dc\x05e3" -> + "אלף" -> double 1e3 >>= withGrain 3 >>= withMultipliable - "\x05d0\x05dc\x05e4\x05d9\x05dd" -> + "אלפים" -> double 1e3 >>= withGrain 3 >>= withMultipliable - "\x05de\x05d9\x05dc\x05d9\x05d5\x05df" -> + "מיליון" -> double 1e6 >>= withGrain 6 >>= withMultipliable - "\x05de\x05d9\x05dc\x05d9\x05d5\x05e0\x05d9\x05dd" -> + "מיליונים" -> double 1e6 >>= withGrain 6 >>= withMultipliable _ -> Nothing _ -> Nothing @@ -234,7 +234,7 @@ ruleInteger7 :: Rule ruleInteger7 = Rule { name = "integer 6" , pattern = - [ regex "(\x05e9\x05e9(\x05d4)?)" + [ regex "(שש(ה)?)" ] , prod = \_ -> integer 6 } @@ -257,7 +257,7 @@ ruleInteger8 :: Rule ruleInteger8 = Rule { name = "integer 7" , pattern = - [ regex "(\x05e9\x05d1\x05e2(\x05d4)?)" + [ regex "(שבע(ה)?)" ] , prod = \_ -> integer 7 } @@ -266,7 +266,7 @@ ruleCouple :: Rule ruleCouple = Rule { name = "couple" , pattern = - [ regex "\x05d6\x05d5\x05d2( \x05e9\x05dc)?" + [ regex "זוג( של)?" ] , prod = \_ -> integer 2 } @@ -289,7 +289,7 @@ ruleInteger9 :: Rule ruleInteger9 = Rule { name = "integer 8" , pattern = - [ regex "(\x05e9\x05de\x05d5\x05e0\x05d4)" + [ regex "(שמונה)" ] , prod = \_ -> integer 8 } @@ -298,7 +298,7 @@ ruleInteger :: Rule ruleInteger = Rule { name = "integer 0" , pattern = - [ regex "(\x05d0\x05e4\x05e1|\x05db\x05dc\x05d5\x05dd)" + [ regex "(אפס|כלום)" ] , prod = \_ -> integer 0 } @@ -307,7 +307,7 @@ ruleInteger4 :: Rule ruleInteger4 = Rule { name = "integer 3" , pattern = - [ regex "(\x05e9\x05dc\x05d5\x05e9(\x05d4)?)" + [ regex "(שלוש(ה)?)" ] , prod = \_ -> integer 3 } @@ -316,7 +316,7 @@ ruleInteger2 :: Rule ruleInteger2 = Rule { name = "integer 1" , pattern = - [ regex "(\x05d0\x05d7\x05d3|\x05d0\x05d7\x05ea)" + [ regex "(אחד|אחת)" ] , prod = \_ -> integer 1 } @@ -325,7 +325,7 @@ ruleInteger11 :: Rule ruleInteger11 = Rule { name = "integer 10" , pattern = - [ regex "(\x05e2\x05e9\x05e8(\x05d4)?)" + [ regex "(עשר(ה)?)" ] , prod = \_ -> integer 10 } @@ -335,7 +335,7 @@ ruleNumeralDotNumeral = Rule { name = "number dot number" , pattern = [ dimension Numeral - , regex "\x05e0\x05e7\x05d5\x05d3\x05d4" + , regex "נקודה" , numberWith TNumeral.grain isNothing ] , prod = \tokens -> case tokens of diff --git a/Duckling/Numeral/HR/Rules.hs b/Duckling/Numeral/HR/Rules.hs index 50541b99..410ec985 100644 --- a/Duckling/Numeral/HR/Rules.hs +++ b/Duckling/Numeral/HR/Rules.hs @@ -98,7 +98,7 @@ ruleInteger3 :: Rule ruleInteger3 = Rule { name = "integer (100..900)" , pattern = - [ regex "(sto|dvjest(o|a)|tristo|(c|\x010d)etiristo|petsto|(\x0161|s)esto|sedamsto|osamsto|devetsto)" + [ regex "(sto|dvjest(o|a)|tristo|(c|č)etiristo|petsto|(š|s)esto|sedamsto|osamsto|devetsto)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> case Text.toLower match of @@ -131,7 +131,7 @@ rulePowersOfTen :: Rule rulePowersOfTen = Rule { name = "powers of tens" , pattern = - [ regex "(stotin(u|a|e)|tisu(c|\x0107)(a|u|e)|milij(u|o)na?)" + [ regex "(stotin(u|a|e)|tisu(c|ć)(a|u|e)|milij(u|o)na?)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> case Text.toLower match of @@ -187,7 +187,7 @@ ruleNumbersSuffixesKMG = Rule { name = "numbers suffixes (K, M, G)" , pattern = [ dimension Numeral - , regex "([kmg])(?=[\\W\\$\x20ac]|$)" + , regex "([kmg])(?=[\\W\\$€]|$)" ] , prod = \tokens -> case tokens of (Token Numeral (NumeralData {TNumeral.value = v}): @@ -222,7 +222,7 @@ ruleInteger :: Rule ruleInteger = Rule { name = "integer (0..19)" , pattern = - [ regex "(ni(s|\x0161)ta|ni(s|\x0161)tica|nula|jedanaest|dvanaest|trinaest|jeda?n(a|u|o(ga?)?)?|dv(i?je)?(a|o)?(ma)?|tri(ma)?|(\x010d|c)etiri|(\x010d|c)etrnaest|petnaest|pet|(s|\x0161)esnaest|(\x0161|s)est|sedamnaest|sedam|osamnaest|osam|devetnaest|devet)" + [ regex "(ni(s|š)ta|ni(s|š)tica|nula|jedanaest|dvanaest|trinaest|jeda?n(a|u|o(ga?)?)?|dv(i?je)?(a|o)?(ma)?|tri(ma)?|(č|c)etiri|(č|c)etrnaest|petnaest|pet|(s|š)esnaest|(š|s)est|sedamnaest|sedam|osamnaest|osam|devetnaest|devet)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> case Text.toLower match of @@ -283,7 +283,7 @@ ruleInteger2 :: Rule ruleInteger2 = Rule { name = "integer (20..90)" , pattern = - [ regex "(dvadeset|trideset|(c|\x010d)etrdeset|pedeset|(\x0161|s)esdeset|sedamdeset|osamdeset|devedeset)" + [ regex "(dvadeset|trideset|(c|č)etrdeset|pedeset|(š|s)esdeset|sedamdeset|osamdeset|devedeset)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> case Text.toLower match of @@ -322,7 +322,7 @@ ruleNumberDotNumber = Rule { name = "number dot number" , pattern = [ dimension Numeral - , regex "cijela|to(c|\x010d)ka|zarez" + , regex "cijela|to(c|č)ka|zarez" , numberWith TNumeral.grain isNothing ] , prod = \tokens -> case tokens of diff --git a/Duckling/Numeral/HU/Rules.hs b/Duckling/Numeral/HU/Rules.hs index fb13efc8..9bcdd44d 100644 --- a/Duckling/Numeral/HU/Rules.hs +++ b/Duckling/Numeral/HU/Rules.hs @@ -136,7 +136,7 @@ ruleTens :: Rule ruleTens = Rule { name = "integer (20,30..90)" , pattern = - [ regex "(h\x00FAsz|harminc|negyven|\x00f6tven|hatvan|hetven|nyolcvan|kilencven)" + [ regex "(h\x00FAsz|harminc|negyven|ötven|hatvan|hetven|nyolcvan|kilencven)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> diff --git a/Duckling/Numeral/ID/Rules.hs b/Duckling/Numeral/ID/Rules.hs index b8cb0dbb..e2f22221 100644 --- a/Duckling/Numeral/ID/Rules.hs +++ b/Duckling/Numeral/ID/Rules.hs @@ -184,7 +184,7 @@ ruleNumeralsSuffixesKMG = Rule { name = "numbers suffixes (K, M, G)" , pattern = [ dimension Numeral - , regex "([kmg])(?=[\\W\\$\x20ac]|$)" + , regex "([kmg])(?=[\\W\\$€]|$)" ] , prod = \tokens -> case tokens of (Token Numeral (NumeralData {TNumeral.value = v}): diff --git a/Duckling/Numeral/IT/Rules.hs b/Duckling/Numeral/IT/Rules.hs index 8fb5e2bf..19e6ba16 100644 --- a/Duckling/Numeral/IT/Rules.hs +++ b/Duckling/Numeral/IT/Rules.hs @@ -174,7 +174,7 @@ ruleNumeralsSuffixesKMG = Rule { name = "numbers suffixes (K, M, G)" , pattern = [ dimension Numeral - , regex "([kmg])(?=[\\W\\$\x20ac]|$)" + , regex "([kmg])(?=[\\W\\$€]|$)" ] , prod = \tokens -> case tokens of (Token Numeral (NumeralData {TNumeral.value = v}): @@ -191,14 +191,14 @@ ruleNumeral4 :: Rule ruleNumeral4 = Rule { name = "number (21..29 31..39 41..49 51..59 61..69 71..79 81..89 91..99)" , pattern = - [ regex "((venti|trenta|quaranta|cinquanta|sessanta|settanta|ottanta|novanta)(due|tre|tr\x00e9|quattro|cinque|sei|sette|nove))|((vent|trent|quarant|cinquant|sessant|settant|ottant|novant)(uno|otto))" + [ regex "((venti|trenta|quaranta|cinquanta|sessanta|settanta|ottanta|novanta)(due|tre|tré|quattro|cinque|sei|sette|nove))|((vent|trent|quarant|cinquant|sessant|settant|ottant|novant)(uno|otto))" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> case Text.toLower match of "ventuno" -> integer 21 "ventidue" -> integer 22 "ventitre" -> integer 23 - "ventitr\x00e9" -> integer 23 + "ventitré" -> integer 23 "ventiquattro" -> integer 24 "venticinque" -> integer 25 "ventisei" -> integer 26 @@ -208,7 +208,7 @@ ruleNumeral4 = Rule "trentuno" -> integer 31 "trentadue" -> integer 32 "trentatre" -> integer 33 - "trentatr\x00e9" -> integer 33 + "trentatré" -> integer 33 "trentaquattro" -> integer 34 "trentacinque" -> integer 35 "trentasei" -> integer 36 @@ -218,7 +218,7 @@ ruleNumeral4 = Rule "quarantuno" -> integer 41 "quarantadue" -> integer 42 "quarantatre" -> integer 43 - "quarantatr\x00e9" -> integer 43 + "quarantatré" -> integer 43 "quarantaquattro" -> integer 44 "quarantacinque" -> integer 45 "quarantasei" -> integer 46 @@ -228,7 +228,7 @@ ruleNumeral4 = Rule "cinquantuno" -> integer 51 "cinquantadue" -> integer 52 "cinquantatre" -> integer 53 - "cinquantatr\x00e9" -> integer 53 + "cinquantatré" -> integer 53 "cinquantaquattro" -> integer 54 "cinquantacinque" -> integer 55 "cinquantasei" -> integer 56 @@ -237,7 +237,7 @@ ruleNumeral4 = Rule "cinquantanove" -> integer 59 "sessantuno" -> integer 61 "sessantadue" -> integer 62 - "sessantatr\x00e9" -> integer 63 + "sessantatré" -> integer 63 "sessantatre" -> integer 63 "sessantaquattro" -> integer 64 "sessantacinque" -> integer 65 @@ -247,7 +247,7 @@ ruleNumeral4 = Rule "sessantanove" -> integer 69 "settantuno" -> integer 71 "settantadue" -> integer 72 - "settantatr\x00e9" -> integer 73 + "settantatré" -> integer 73 "settantatre" -> integer 73 "settantaquattro" -> integer 74 "settantacinque" -> integer 75 @@ -257,7 +257,7 @@ ruleNumeral4 = Rule "settantanove" -> integer 79 "ottantuno" -> integer 81 "ottantadue" -> integer 82 - "ottantatr\x00e9" -> integer 83 + "ottantatré" -> integer 83 "ottantatre" -> integer 83 "ottantaquattro" -> integer 84 "ottantacinque" -> integer 85 @@ -268,7 +268,7 @@ ruleNumeral4 = Rule "novantuno" -> integer 91 "novantadue" -> integer 92 "novantatre" -> integer 93 - "novantatr\x00e9" -> integer 93 + "novantatré" -> integer 93 "novantaquattro" -> integer 94 "novantacinque" -> integer 95 "novantasei" -> integer 96 diff --git a/Duckling/Numeral/JA/Rules.hs b/Duckling/Numeral/JA/Rules.hs index 235f68e3..88358ed7 100644 --- a/Duckling/Numeral/JA/Rules.hs +++ b/Duckling/Numeral/JA/Rules.hs @@ -27,7 +27,7 @@ ruleInteger5 :: Rule ruleInteger5 = Rule { name = "integer (100)" , pattern = - [ regex "\x767e" + [ regex "百" ] , prod = \_ -> integer 100 } @@ -36,7 +36,7 @@ ruleNumeralsPrefixWithNegativeOrMinus :: Rule ruleNumeralsPrefixWithNegativeOrMinus = Rule { name = "numbers prefix with -, negative or minus" , pattern = - [ regex "-|\x30de\x30a4\x30ca\x30b9\\s?|\x8ca0\\s?" + [ regex "-|マイナス\\s?|負\\s?" , dimension Numeral ] , prod = \tokens -> case tokens of @@ -48,22 +48,22 @@ ruleInteger17 :: Rule ruleInteger17 = Rule { name = "integer (0..10)" , pattern = - [ regex "(\x30bc\x30ed|\x96f6|\x4e00|\x4e8c|\x4e09|\x56db|\x4e94|\x516d|\x4e03|\x516b|\x4e5d|\x5341)" + [ regex "(ゼロ|零|一|二|三|四|五|六|七|八|九|十)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> case match of - "\x30bc\x30ed" -> integer 0 - "\x96f6" -> integer 0 - "\x4e00" -> integer 1 - "\x4e8c" -> integer 2 - "\x4e09" -> integer 3 - "\x56db" -> integer 4 - "\x4e94" -> integer 5 - "\x516d" -> integer 6 - "\x4e03" -> integer 7 - "\x516b" -> integer 8 - "\x4e5d" -> integer 9 - "\x5341" -> integer 10 + "ゼロ" -> integer 0 + "零" -> integer 0 + "一" -> integer 1 + "二" -> integer 2 + "三" -> integer 3 + "四" -> integer 4 + "五" -> integer 5 + "六" -> integer 6 + "七" -> integer 7 + "八" -> integer 8 + "九" -> integer 9 + "十" -> integer 10 _ -> Nothing _ -> Nothing } @@ -85,7 +85,7 @@ ruleInteger10 :: Rule ruleInteger10 = Rule { name = "integer (1000..1999)" , pattern = - [ regex "\x5343" + [ regex "千" , numberBetween 1 1000 ] , prod = \tokens -> case tokens of @@ -111,7 +111,7 @@ ruleInteger15 = Rule { name = "integer (20000..90000)" , pattern = [ numberBetween 2 10 - , regex "\x4e07" + , regex "万" ] , prod = \tokens -> case tokens of (Token Numeral (NumeralData {TNumeral.value = v}):_) -> double $ v * 10000 @@ -134,7 +134,7 @@ ruleNumeral = Rule { name = "个" , pattern = [ dimension Numeral - , regex "\x4e2a" + , regex "个" ] , prod = \tokens -> case tokens of (token:_) -> Just token @@ -146,7 +146,7 @@ ruleInteger3 = Rule { name = "integer (20..90)" , pattern = [ numberBetween 2 10 - , regex "\x5341" + , regex "十" ] , prod = \tokens -> case tokens of (Token Numeral (NumeralData {TNumeral.value = v}):_) -> double $ v * 10 @@ -157,7 +157,7 @@ ruleInteger13 :: Rule ruleInteger13 = Rule { name = "integer (10000)" , pattern = - [ regex "\x4e07" + [ regex "万" ] , prod = \_ -> integer 10000 } @@ -166,7 +166,7 @@ ruleInteger6 :: Rule ruleInteger6 = Rule { name = "integer (100..199)" , pattern = - [ regex "\x767e" + [ regex "百" , numberBetween 1 100 ] , prod = \tokens -> case tokens of @@ -193,15 +193,15 @@ ruleNumeralsSuffixesKMG = Rule { name = "numbers suffixes (K, M, G, 千, 万)" , pattern = [ dimension Numeral - , regex "(k|m|g|\x5343|\x4e07)" + , regex "(k|m|g|千|万)" ] , prod = \tokens -> case tokens of (Token Numeral (NumeralData {TNumeral.value = v}): Token RegexMatch (GroupMatch (match:_)): _) -> case Text.toLower match of "k" -> double $ v * 1e3 - "\x5343" -> double $ v * 1e3 - "\x4e07" -> double $ v * 1e4 + "千" -> double $ v * 1e3 + "万" -> double $ v * 1e4 "m" -> double $ v * 1e6 "g" -> double $ v * 1e9 _ -> Nothing @@ -213,7 +213,7 @@ ruleInteger7 = Rule { name = "integer (200..900)" , pattern = [ numberBetween 2 10 - , regex "\x767e" + , regex "百" ] , prod = \tokens -> case tokens of (Token Numeral (NumeralData {TNumeral.value = v}):_) -> double $ v * 100 @@ -224,7 +224,7 @@ ruleInteger14 :: Rule ruleInteger14 = Rule { name = "integer (10000..19999)" , pattern = - [ regex "\x4e07" + [ regex "万" , numberBetween 1 10000 ] , prod = \tokens -> case tokens of @@ -265,7 +265,7 @@ ruleInteger9 :: Rule ruleInteger9 = Rule { name = "integer (1000)" , pattern = - [ regex "\x5343" + [ regex "千" ] , prod = \_ -> integer 1000 } @@ -274,22 +274,22 @@ ruleInteger :: Rule ruleInteger = Rule { name = "integer (0..10)" , pattern = - [ regex "\x30bc\x30ed|\x96f6|\x4e00|\x4e8c|\x4e09|\x56db|\x4e94|\x516d|\x4e03|\x516b|\x4e5d|\x5341" + [ regex "ゼロ|零|一|二|三|四|五|六|七|八|九|十" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> case match of - "\x96f6" -> integer 0 - "\x30bc\x30ed" -> integer 0 - "\x4e00" -> integer 1 - "\x4e8c" -> integer 2 - "\x4e09" -> integer 3 - "\x56db" -> integer 4 - "\x4e94" -> integer 5 - "\x516d" -> integer 6 - "\x4e03" -> integer 7 - "\x516b" -> integer 8 - "\x4e5d" -> integer 9 - "\x5341" -> integer 10 + "零" -> integer 0 + "ゼロ" -> integer 0 + "一" -> integer 1 + "二" -> integer 2 + "三" -> integer 3 + "四" -> integer 4 + "五" -> integer 5 + "六" -> integer 6 + "七" -> integer 7 + "八" -> integer 8 + "九" -> integer 9 + "十" -> integer 10 _ -> Nothing _ -> Nothing } @@ -312,7 +312,7 @@ ruleInteger2 :: Rule ruleInteger2 = Rule { name = "integer (11..19)" , pattern = - [ regex "\x5341" + [ regex "十" , numberBetween 1 10 ] , prod = \tokens -> case tokens of @@ -325,7 +325,7 @@ ruleInteger11 = Rule { name = "integer (2000..9000)" , pattern = [ numberBetween 2 10 - , regex "\x5343" + , regex "千" ] , prod = \tokens -> case tokens of (Token Numeral (NumeralData {TNumeral.value = v}):_) -> double $ v * 1000 diff --git a/Duckling/Numeral/KO/Rules.hs b/Duckling/Numeral/KO/Rules.hs index 37324d2e..2a8e42d9 100644 --- a/Duckling/Numeral/KO/Rules.hs +++ b/Duckling/Numeral/KO/Rules.hs @@ -28,15 +28,15 @@ ruleIntegerForOrdinals :: Rule ruleIntegerForOrdinals = Rule { name = "integer (1..4) - for ordinals" , pattern = - [ regex "(\xd55c|\xccab|\xb450|\xc138|\xb124)" + [ regex "(한|첫|두|세|네)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> case match of - "\xd55c" -> integer 1 - "\xccab" -> integer 1 - "\xb450" -> integer 2 - "\xc138" -> integer 3 - "\xb124" -> integer 4 + "한" -> integer 1 + "첫" -> integer 1 + "두" -> integer 2 + "세" -> integer 3 + "네" -> integer 4 _ -> Nothing _ -> Nothing } @@ -58,7 +58,7 @@ ruleFew :: Rule ruleFew = Rule { name = "few 몇" , pattern = - [ regex "\xba87" + [ regex "몇" ] , prod = \_ -> integer 3 } @@ -106,7 +106,7 @@ ruleNumeralsPrefixWithOr :: Rule ruleNumeralsPrefixWithOr = Rule { name = "numbers prefix with -, 마이너스, or 마이나스" , pattern = - [ regex "-|\xb9c8\xc774\xb108\xc2a4\\s?|\xb9c8\xc774\xb098\xc2a4\\s?" + [ regex "-|마이너스\\s?|마이나스\\s?" , dimension Numeral ] , prod = \tokens -> case tokens of @@ -118,7 +118,7 @@ ruleHalf :: Rule ruleHalf = Rule { name = "half - 반" , pattern = - [ regex "\xbc18" + [ regex "반" ] , prod = \_ -> double 0.5 } @@ -127,7 +127,7 @@ ruleInteger :: Rule ruleInteger = Rule { name = "integer 0" , pattern = - [ regex "\xc601|\xacf5|\xbe75" + [ regex "영|공|빵" ] , prod = \_ -> integer 0 } @@ -136,19 +136,19 @@ ruleIntegerTypeAndOrdinals :: Rule ruleIntegerTypeAndOrdinals = Rule { name = "integer (20..90) - TYPE 2 and ordinals" , pattern = - [ regex "(\xc5f4|\xc2a4\xbb3c|\xc11c\xb978|\xb9c8\xd754|\xc270|\xc608\xc21c|\xc77c\xd754|\xc5ec\xb4e0|\xc544\xd754)" + [ regex "(열|스물|서른|마흔|쉰|예순|일흔|여든|아흔)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> case match of - "\xc5f4" -> integer 10 - "\xc2a4\xbb3c" -> integer 20 - "\xc11c\xb978" -> integer 30 - "\xb9c8\xd754" -> integer 40 - "\xc270" -> integer 50 - "\xc608\xc21c" -> integer 60 - "\xc77c\xd754" -> integer 70 - "\xc5ec\xb4e0" -> integer 80 - "\xc544\xd754" -> integer 90 + "열" -> integer 10 + "스물" -> integer 20 + "서른" -> integer 30 + "마흔" -> integer 40 + "쉰" -> integer 50 + "예순" -> integer 60 + "일흔" -> integer 70 + "여든" -> integer 80 + "아흔" -> integer 90 _ -> Nothing _ -> Nothing } @@ -157,20 +157,20 @@ ruleIntegerType1 :: Rule ruleIntegerType1 = Rule { name = "integer - TYPE 1" , pattern = - [ regex "(\xc601|\xc77c|\xc774|\xc0bc|\xc0ac|\xc624|\xc721|\xce60|\xd314|\xad6c)" + [ regex "(영|일|이|삼|사|오|육|칠|팔|구)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> case match of - "\xc601" -> integer 0 - "\xc77c" -> integer 1 - "\xc774" -> integer 2 - "\xc0bc" -> integer 3 - "\xc0ac" -> integer 4 - "\xc624" -> integer 5 - "\xc721" -> integer 6 - "\xce60" -> integer 7 - "\xd314" -> integer 8 - "\xad6c" -> integer 9 + "영" -> integer 0 + "일" -> integer 1 + "이" -> integer 2 + "삼" -> integer 3 + "사" -> integer 4 + "오" -> integer 5 + "육" -> integer 6 + "칠" -> integer 7 + "팔" -> integer 8 + "구" -> integer 9 _ -> Nothing _ -> Nothing } @@ -179,16 +179,16 @@ ruleIntegerType1PowersOfTen :: Rule ruleIntegerType1PowersOfTen = Rule { name = "integer - TYPE 1: powers of ten" , pattern = - [ regex "(\xc2ed|\xbc31|\xcc9c|\xb9cc|\xc5b5|\xc870)" + [ regex "(십|백|천|만|억|조)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> case match of - "\xc2ed" -> double 10 >>= withGrain 1 >>= withMultipliable - "\xbc31" -> double 1e2 >>= withGrain 2 >>= withMultipliable - "\xcc9c" -> double 1e3 >>= withGrain 3 >>= withMultipliable - "\xb9cc" -> double 1e4 >>= withGrain 4 >>= withMultipliable - "\xc5b5" -> double 1e8 >>= withGrain 8 >>= withMultipliable - "\xc870" -> double 1e12 >>= withGrain 12 >>= withMultipliable + "십" -> double 10 >>= withGrain 1 >>= withMultipliable + "백" -> double 1e2 >>= withGrain 2 >>= withMultipliable + "천" -> double 1e3 >>= withGrain 3 >>= withMultipliable + "만" -> double 1e4 >>= withGrain 4 >>= withMultipliable + "억" -> double 1e8 >>= withGrain 8 >>= withMultipliable + "조" -> double 1e12 >>= withGrain 12 >>= withMultipliable _ -> Nothing _ -> Nothing } @@ -224,19 +224,19 @@ ruleIntegerType2 :: Rule ruleIntegerType2 = Rule { name = "integer (1..10) - TYPE 2" , pattern = - [ regex "(\xd558\xb098|\xb458|\xc14b|\xb137|\xb2e4\xc12f|\xc5ec\xc12f|\xc77c\xacf1|\xc5ec\xb35f|\xc544\xd649)" + [ regex "(하나|둘|셋|넷|다섯|여섯|일곱|여덟|아홉)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> case match of - "\xd558\xb098" -> integer 1 - "\xb458" -> integer 2 - "\xc14b" -> integer 3 - "\xb137" -> integer 4 - "\xb2e4\xc12f" -> integer 5 - "\xc5ec\xc12f" -> integer 6 - "\xc77c\xacf1" -> integer 7 - "\xc5ec\xb35f" -> integer 8 - "\xc544\xd649" -> integer 9 + "하나" -> integer 1 + "둘" -> integer 2 + "셋" -> integer 3 + "넷" -> integer 4 + "다섯" -> integer 5 + "여섯" -> integer 6 + "일곱" -> integer 7 + "여덟" -> integer 8 + "아홉" -> integer 9 _ -> Nothing _ -> Nothing } @@ -246,7 +246,7 @@ ruleFraction = Rule { name = "fraction" , pattern = [ dimension Numeral - , regex "\xbd84(\xc758|\xc5d0)" + , regex "분(의|에)" , dimension Numeral ] , prod = \tokens -> case tokens of @@ -262,22 +262,22 @@ ruleNumeralDotNumeral = Rule { name = "number dot number - 삼점사" , pattern = [ dimension Numeral - , regex "(\xc810|\xca5c)((\xc601|\xc77c|\xc774|\xc0bc|\xc0ac|\xc624|\xc721|\xce60|\xd314|\xad6c)+)" + , regex "(점|쩜)((영|일|이|삼|사|오|육|칠|팔|구)+)" ] , prod = \tokens -> case tokens of (Token Numeral (NumeralData {TNumeral.value = v1}): Token RegexMatch (GroupMatch (_:match:_)): _) -> do - let getDigit '\xc601' = Just "0" - getDigit '\xc77c' = Just "1" - getDigit '\xc774' = Just "2" - getDigit '\xc0bc' = Just "3" - getDigit '\xc0ac' = Just "4" - getDigit '\xc624' = Just "5" - getDigit '\xc721' = Just "6" - getDigit '\xce60' = Just "7" - getDigit '\xd314' = Just "8" - getDigit '\xad6c' = Just "9" + let getDigit '영' = Just "0" + getDigit '일' = Just "1" + getDigit '이' = Just "2" + getDigit '삼' = Just "3" + getDigit '사' = Just "4" + getDigit '오' = Just "5" + getDigit '육' = Just "6" + getDigit '칠' = Just "7" + getDigit '팔' = Just "8" + getDigit '구' = Just "9" getDigit _ = Nothing v2 <- parseDouble . Text.concat . mapMaybe getDigit $ Text.unpack match double $ v1 + decimalsToDouble v2 diff --git a/Duckling/Numeral/MY/Rules.hs b/Duckling/Numeral/MY/Rules.hs index e293b4b7..d442873b 100644 --- a/Duckling/Numeral/MY/Rules.hs +++ b/Duckling/Numeral/MY/Rules.hs @@ -27,7 +27,7 @@ ruleInteger5 = Rule { name = "integer (11..99) " , pattern = [ numberBetween 1 10 - , regex "\x1006\x101a\x103a\x1037" + , regex "ဆယ့်" , numberBetween 1 10 ] , prod = \tokens -> case tokens of @@ -42,20 +42,20 @@ ruleIntegerNumeric :: Rule ruleIntegerNumeric = Rule { name = "integer (0..9) - numeric" , pattern = - [ regex "(\x1040|\x1041|\x1042|\x1043|\x1044|\x1045|\x1046|\x1047|\x1048|\x1049)" + [ regex "(၀|၁|၂|၃|၄|၅|၆|၇|၈|၉)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> case match of - "\x1040" -> integer 0 - "\x1041" -> integer 1 - "\x1042" -> integer 2 - "\x1043" -> integer 3 - "\x1044" -> integer 4 - "\x1045" -> integer 5 - "\x1046" -> integer 6 - "\x1047" -> integer 7 - "\x1048" -> integer 8 - "\x1049" -> integer 9 + "၀" -> integer 0 + "၁" -> integer 1 + "၂" -> integer 2 + "၃" -> integer 3 + "၄" -> integer 4 + "၅" -> integer 5 + "၆" -> integer 6 + "၇" -> integer 7 + "၈" -> integer 8 + "၉" -> integer 9 _ -> Nothing _ -> Nothing } @@ -64,7 +64,7 @@ ruleInteger3 :: Rule ruleInteger3 = Rule { name = "integer (11..19) " , pattern = - [ regex "\x1006\x101a\x103a\x1037" + [ regex "ဆယ့်" , numberBetween 1 10 ] , prod = \tokens -> case tokens of @@ -76,13 +76,13 @@ ruleIntegerPali :: Rule ruleIntegerPali = Rule { name = "integer (1..3) - pali" , pattern = - [ regex "(\x1015\x1011\x1019|\x1012\x102f\x1010\x102d\x101a|\x1010\x1010\x102d\x101a)" + [ regex "(ပထမ|ဒုတိယ|တတိယ)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> case match of - "\x1015\x1011\x1019" -> integer 1 - "\x1012\x102f\x1010\x102d\x101a" -> integer 2 - "\x1010\x1010\x102d\x101a" -> integer 3 + "ပထမ" -> integer 1 + "ဒုတိယ" -> integer 2 + "တတိယ" -> integer 3 _ -> Nothing _ -> Nothing } @@ -92,7 +92,7 @@ ruleInteger6 = Rule { name = "integer (100..900)" , pattern = [ numberBetween 1 10 - , regex "\x101b\x102c" + , regex "ရာ" ] , prod = \tokens -> case tokens of (Token Numeral (NumeralData {TNumeral.value = v}):_) -> double $ v * 100 @@ -104,7 +104,7 @@ ruleInteger7 = Rule { name = "integer (1000..9000)" , pattern = [ numberBetween 1 10 - , regex "\x1011\x1031\x102c\x1004\x103a" + , regex "ထောင်" ] , prod = \tokens -> case tokens of (Token Numeral (NumeralData {TNumeral.value = v}):_) -> double $ v * 1000 @@ -116,7 +116,7 @@ ruleInteger8 = Rule { name = "integer (10000..90000)" , pattern = [ numberBetween 1 10 - , regex "\x101e\x1031\x102c\x1004\x103a\x1038" + , regex "သောင်း" ] , prod = \tokens -> case tokens of (Token Numeral (NumeralData {TNumeral.value = v}):_) -> double $ v * 10000 @@ -127,7 +127,7 @@ ruleInteger :: Rule ruleInteger = Rule { name = "integer 0" , pattern = - [ regex "\x101e\x102f\x1036\x100a|\x1019\x101b\x103e\x102d" + [ regex "သုံည|မရှိ" ] , prod = \_ -> integer 0 } @@ -137,7 +137,7 @@ ruleInteger4 = Rule { name = "integer (10..90)" , pattern = [ numberBetween 1 10 - , regex "\x1006\x101a\x103a" + , regex "ဆယ်" ] , prod = \tokens -> case tokens of (Token Numeral (NumeralData {TNumeral.value = v}):_) -> double $ v * 10 @@ -148,20 +148,20 @@ ruleInteger2 :: Rule ruleInteger2 = Rule { name = "integer (1..10)" , pattern = - [ regex "(\x1010\x1005\x103a|\x1014\x103e\x1005\x103a|\x101e\x102f\x1036\x1038|\x101c\x1031\x1038|\x1004\x102b\x1038|\x1001\x103c\x1031\x102b\x1000\x103a|\x1001\x102f\x1014\x103e\x1005\x103a|\x101b\x103e\x1005\x103a|\x1000\x102d\x102f\x1038|\x1010\x1005\x103a\x1006\x101a\x103a)" + [ regex "(တစ်|နှစ်|သုံး|လေး|ငါး|ခြေါက်|ခုနှစ်|ရှစ်|ကိုး|တစ်ဆယ်)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> case match of - "\x1010\x1005\x103a" -> integer 1 - "\x1014\x103e\x1005\x103a" -> integer 2 - "\x101e\x102f\x1036\x1038" -> integer 3 - "\x101c\x1031\x1038" -> integer 4 - "\x1004\x102b\x1038" -> integer 5 - "\x1001\x103c\x1031\x102b\x1000\x103a" -> integer 6 - "\x1001\x102f\x1014\x103e\x1005\x103a" -> integer 7 - "\x101b\x103e\x1005\x103a" -> integer 8 - "\x1000\x102d\x102f\x1038" -> integer 9 - "\x1010\x1005\x103a\x1006\x101a\x103a" -> integer 10 + "တစ်" -> integer 1 + "နှစ်" -> integer 2 + "သုံး" -> integer 3 + "လေး" -> integer 4 + "ငါး" -> integer 5 + "ခြေါက်" -> integer 6 + "ခုနှစ်" -> integer 7 + "ရှစ်" -> integer 8 + "ကိုး" -> integer 9 + "တစ်ဆယ်" -> integer 10 _ -> Nothing _ -> Nothing } diff --git a/Duckling/Numeral/NB/Rules.hs b/Duckling/Numeral/NB/Rules.hs index 3fec9422..a03f21de 100644 --- a/Duckling/Numeral/NB/Rules.hs +++ b/Duckling/Numeral/NB/Rules.hs @@ -72,7 +72,7 @@ ruleFew :: Rule ruleFew = Rule { name = "few" , pattern = - [ regex "(noen )?f\x00e5" + [ regex "(noen )?få" ] , prod = \_ -> integer 3 } @@ -158,7 +158,7 @@ ruleNumeralsSuffixesKMG = Rule { name = "numbers suffixes (K, M, G)" , pattern = [ dimension Numeral - , regex "([kmg])(?=[\\W\\$\x20ac]|$)" + , regex "([kmg])(?=[\\W\\$€]|$)" ] , prod = \tokens -> case tokens of (Token Numeral (NumeralData {TNumeral.value = v}): @@ -214,7 +214,7 @@ zeroToNineteenMap = HashMap.fromList , ( "intet" , 0 ) , ( "en" , 1 ) , ( "ett" , 1 ) - , ( "\x00e9n" , 1 ) + , ( "én" , 1 ) , ( "to" , 2 ) , ( "tre" , 3 ) , ( "fire" , 4 ) @@ -231,7 +231,7 @@ zeroToNineteenMap = HashMap.fromList , ( "fjorten" , 14 ) , ( "femten" , 15 ) , ( "seksten" , 16 ) - , ( "s\x00f8tten" , 17 ) + , ( "søtten" , 17 ) , ( "sytten" , 17 ) , ( "atten" , 18 ) , ( "nitten" , 19 ) @@ -241,7 +241,7 @@ ruleInteger :: Rule ruleInteger = Rule { name = "integer (0..19)" , pattern = - [ regex "(intet|ingen|null|en|ett|\x00e9n|to|tretten|tre|fire|femten|fem|seksten|seks|syv|sju|\x00e5tte|nitten|ni|ti|elleve|tolv|fjorten|sytten|s\x00f8tten|atten)" + [ regex "(intet|ingen|null|en|ett|én|to|tretten|tre|fire|femten|fem|seksten|seks|syv|sju|åtte|nitten|ni|ti|elleve|tolv|fjorten|sytten|søtten|atten)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> @@ -255,12 +255,12 @@ dozensMap = HashMap.fromList , ( "tjue" , 20 ) , ( "tredve" , 30 ) , ( "tretti" , 30 ) - , ( "f\x00f8rti" , 40 ) + , ( "førti" , 40 ) , ( "femti" , 50 ) , ( "seksti" , 60 ) , ( "sytti" , 70 ) - , ( "s\x00f8tti" , 70 ) - , ( "\x00e5tti" , 80 ) + , ( "søtti" , 70 ) + , ( "åtti" , 80 ) , ( "nitti" , 90 ) ] @@ -268,7 +268,7 @@ ruleInteger2 :: Rule ruleInteger2 = Rule { name = "integer (20..90)" , pattern = - [ regex "(tyve|tjue|tredve|tretti|f\x00f8rti|femti|seksti|sytti|s\x00f8tti|\x00e5tti|nitti)" + [ regex "(tyve|tjue|tredve|tretti|førti|femti|seksti|sytti|søtti|åtti|nitti)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> diff --git a/Duckling/Numeral/NL/Rules.hs b/Duckling/Numeral/NL/Rules.hs index 07bd8e2e..33124986 100644 --- a/Duckling/Numeral/NL/Rules.hs +++ b/Duckling/Numeral/NL/Rules.hs @@ -102,7 +102,7 @@ ruleInteger3 :: Rule ruleInteger3 = Rule { name = "integer ([2-9][1-9])" , pattern = - [ regex "(een|twee|drie|vier|vijf|zes|zeven|acht|negen)(?:e|\x00eb)n(twintig|dertig|veertig|vijftig|zestig|zeventig|tachtig|negentig)" + [ regex "(een|twee|drie|vier|vijf|zes|zeven|acht|negen)(?:e|ë)n(twintig|dertig|veertig|vijftig|zestig|zeventig|tachtig|negentig)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (m1:m2:_)):_) -> do @@ -143,7 +143,7 @@ ruleNumeralsSuffixesKMG = Rule { name = "numbers suffixes (K, M, G)" , pattern = [ dimension Numeral - , regex "([kmg])(?=[\\W\\$\x20ac]|$)" + , regex "([kmg])(?=[\\W\\$€]|$)" ] , prod = \tokens -> case tokens of (Token Numeral (NumeralData {TNumeral.value = v}): @@ -210,7 +210,7 @@ zeroNineteenMap = HashMap.fromList [ ("niks", 0) , ("nul", 0) , ("geen", 0) - , ("\x00e9\x00e9n", 1) + , ("één", 1) , ("een", 1) , ("twee", 2) , ("drie", 3) @@ -236,7 +236,7 @@ ruleInteger :: Rule ruleInteger = Rule { name = "integer (0..19)" , pattern = - [ regex "(geen|nul|niks|een|\x00e9\x00e9n|twee|drie|vier|vijftien|vijf|zestien|zes|zeventien|zeven|achtien|acht|negentien|negen|tien|elf|twaalf|dertien|veertien)" + [ regex "(geen|nul|niks|een|één|twee|drie|vier|vijftien|vijf|zestien|zes|zeventien|zeven|achtien|acht|negentien|negen|tien|elf|twaalf|dertien|veertien)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> diff --git a/Duckling/Numeral/PL/Rules.hs b/Duckling/Numeral/PL/Rules.hs index c35eead2..2b8bbaf6 100644 --- a/Duckling/Numeral/PL/Rules.hs +++ b/Duckling/Numeral/PL/Rules.hs @@ -28,7 +28,7 @@ ruleSixteen :: Rule ruleSixteen = Rule { name = "sixteen" , pattern = - [ regex "szesna(s|\x015b)(tu|cie|toma)" + [ regex "szesna(s|ś)(tu|cie|toma)" ] , prod = \_ -> integer 16 } @@ -37,7 +37,7 @@ ruleFourteen :: Rule ruleFourteen = Rule { name = "fourteen" , pattern = - [ regex "czterna(s|\x015b)(tu|cie|toma)" + [ regex "czterna(s|ś)(tu|cie|toma)" ] , prod = \_ -> integer 14 } @@ -46,7 +46,7 @@ ruleTwo :: Rule ruleTwo = Rule { name = "two" , pattern = - [ regex "dw(a|(o|\x00f3)(ch|m)|oma|iema|ie)" + [ regex "dw(a|(o|ó)(ch|m)|oma|iema|ie)" ] , prod = \_ -> integer 2 } @@ -55,7 +55,7 @@ ruleSixty :: Rule ruleSixty = Rule { name = "sixty" , pattern = - [ regex "sze(\x015b\x0107)dziesi(\x0105)t|sze(\x015b\x0107)dziesi(\x0119)ci(u|oma)" + [ regex "sze(ść)dziesi(ą)t|sze(ść)dziesi(ę)ci(u|oma)" ] , prod = \_ -> integer 60 } @@ -92,7 +92,7 @@ ruleOne :: Rule ruleOne = Rule { name = "one" , pattern = - [ regex "jed(en|nego|nemu|nym|nej|n(a|\x0105))" + [ regex "jed(en|nego|nemu|nym|nej|n(a|ą))" ] , prod = \_ -> integer 1 } @@ -114,7 +114,7 @@ ruleTen :: Rule ruleTen = Rule { name = "ten" , pattern = - [ regex "dzisi(e|\x0119)(\x0107|c)(iu|ioma)?" + [ regex "dzisi(e|ę)(ć|c)(iu|ioma)?" ] , prod = \_ -> integer 10 } @@ -149,7 +149,7 @@ ruleNine :: Rule ruleNine = Rule { name = "nine" , pattern = - [ regex "dziewi(e|\x0119)(\x0107|c)(iu|ioma)?" + [ regex "dziewi(e|ę)(ć|c)(iu|ioma)?" ] , prod = \_ -> integer 9 } @@ -167,7 +167,7 @@ ruleTwelve :: Rule ruleTwelve = Rule { name = "twelve" , pattern = - [ regex "dwunast(u|oma)|dwana(\x015b|s)cie" + [ regex "dwunast(u|oma)|dwana(ś|s)cie" ] , prod = \_ -> integer 12 } @@ -187,7 +187,7 @@ ruleFifteen :: Rule ruleFifteen = Rule { name = "fifteen" , pattern = - [ regex "pi(\x0119)tna(s|\x015b)(ta|tu|cie|toma)" + [ regex "pi(ę)tna(s|ś)(ta|tu|cie|toma)" ] , prod = \_ -> integer 15 } @@ -196,7 +196,7 @@ ruleEleven :: Rule ruleEleven = Rule { name = "eleven" , pattern = - [ regex "jedena(stu|(s|\x015b)cie|stoma)" + [ regex "jedena(stu|(s|ś)cie|stoma)" ] , prod = \_ -> integer 11 } @@ -205,7 +205,7 @@ ruleThirteen :: Rule ruleThirteen = Rule { name = "thirteen" , pattern = - [ regex "trzyna(\x015b|s)(tu|cie|toma)" + [ regex "trzyna(ś|s)(tu|cie|toma)" ] , prod = \_ -> integer 13 } @@ -214,7 +214,7 @@ ruleThirty :: Rule ruleThirty = Rule { name = "thirty" , pattern = - [ regex "trzydzie(\x015b)ci|trzydziest(u|oma)" + [ regex "trzydzie(ś)ci|trzydziest(u|oma)" ] , prod = \_ -> integer 30 } @@ -223,7 +223,7 @@ ruleNumeral2 :: Rule ruleNumeral2 = Rule { name = "number 200" , pattern = - [ regex "dwie((\x015b)cie| setki)" + [ regex "dwie((ś)cie| setki)" ] , prod = \_ -> integer 200 >>= withGrain 2 } @@ -232,7 +232,7 @@ ruleSeventeen :: Rule ruleSeventeen = Rule { name = "seventeen" , pattern = - [ regex "siedemna(s|\x015b)(tu|cie|toma)" + [ regex "siedemna(s|ś)(tu|cie|toma)" ] , prod = \_ -> integer 17 } @@ -250,7 +250,7 @@ ruleNumeral9 :: Rule ruleNumeral9 = Rule { name = "number 900" , pattern = - [ regex "dziewi(\x0119\x0107)(set| setek)" + [ regex "dziewi(ęć)(set| setek)" ] , prod = \_ -> integer 900 >>= withGrain 2 } @@ -268,7 +268,7 @@ ruleTwenty :: Rule ruleTwenty = Rule { name = "twenty" , pattern = - [ regex "dwadzie(\x015b|s)cia|dwudziest(u|oma)" + [ regex "dwadzie(ś|s)cia|dwudziest(u|oma)" ] , prod = \_ -> integer 20 } @@ -286,7 +286,7 @@ ruleEight :: Rule ruleEight = Rule { name = "eight" , pattern = - [ regex "o(s|\x015b)(iem|miu|mioma)" + [ regex "o(s|ś)(iem|miu|mioma)" ] , prod = \_ -> integer 8 } @@ -295,7 +295,7 @@ ruleNumeral5 :: Rule ruleNumeral5 = Rule { name = "number 500" , pattern = - [ regex "pi(\x0119\x0107)(set| setek)" + [ regex "pi(ęć)(set| setek)" ] , prod = \_ -> integer 500 >>= withGrain 2 } @@ -322,7 +322,7 @@ ruleThousand :: Rule ruleThousand = Rule { name = "thousand" , pattern = - [ regex "ty(s|\x015b)i(a|\x0105|\x0119)c(e|y)?" + [ regex "ty(s|ś)i(a|ą|ę)c(e|y)?" ] , prod = \_ -> integer 1000 >>= withGrain 3 >>= withMultipliable } @@ -331,7 +331,7 @@ ruleMillion :: Rule ruleMillion = Rule { name = "million" , pattern = - [ regex "milion(y|(\x00f3)w)?" + [ regex "milion(y|(ó)w)?" ] , prod = \_ -> integer 1000000 >>= withGrain 6 >>= withMultipliable } @@ -385,7 +385,7 @@ ruleNumeralsSuffixesKMG = Rule { name = "numbers suffixes (K, M, G)" , pattern = [ dimension Numeral - , regex "([kmg])(?=[\\W\\$\x20ac]|$)" + , regex "([kmg])(?=[\\W\\$€]|$)" ] , prod = \tokens -> case tokens of (Token Numeral (NumeralData {TNumeral.value = v}): @@ -429,7 +429,7 @@ ruleSix :: Rule ruleSix = Rule { name = "six" , pattern = - [ regex "sze(s|\x015b)(c|\x0107)(iu|oma|u)?" + [ regex "sze(s|ś)(c|ć)(iu|oma|u)?" ] , prod = \_ -> integer 6 } @@ -438,7 +438,7 @@ ruleNumeral6 :: Rule ruleNumeral6 = Rule { name = "number 600" , pattern = - [ regex "(sze\x015b\x0107(set| setek))" + [ regex "(sześć(set| setek))" ] , prod = \_ -> integer 600 >>= withGrain 2 } @@ -456,7 +456,7 @@ ruleFive :: Rule ruleFive = Rule { name = "five" , pattern = - [ regex "pi(e|\x0119)(c|\x0107)(iu|oma|u)?" + [ regex "pi(e|ę)(c|ć)(iu|oma|u)?" ] , prod = \_ -> integer 5 } @@ -465,7 +465,7 @@ ruleFourty :: Rule ruleFourty = Rule { name = "fou?rty" , pattern = - [ regex "czterdzie(\x015b)ci|czterdziest(u|oma)" + [ regex "czterdzie(ś)ci|czterdziest(u|oma)" ] , prod = \_ -> integer 40 } @@ -492,7 +492,7 @@ ruleNineteen :: Rule ruleNineteen = Rule { name = "nineteen" , pattern = - [ regex "dziewietna(s|\x015b)(tu|cie|toma)" + [ regex "dziewietna(s|ś)(tu|cie|toma)" ] , prod = \_ -> integer 19 } @@ -515,7 +515,7 @@ ruleEighteen :: Rule ruleEighteen = Rule { name = "eighteen" , pattern = - [ regex "osiemna(s|\x015b)(tu|cie|toma)" + [ regex "osiemna(s|ś)(tu|cie|toma)" ] , prod = \_ -> integer 18 } @@ -550,7 +550,7 @@ ruleFifty :: Rule ruleFifty = Rule { name = "fifty" , pattern = - [ regex "pi(\x0119\x0107)dziesi(\x0105)t|pi(\x0119\x0107)dziesi(\x0119)ci(u|oma)" + [ regex "pi(ęć)dziesi(ą)t|pi(ęć)dziesi(ę)ci(u|oma)" ] , prod = \_ -> integer 50 } diff --git a/Duckling/Numeral/PT/Rules.hs b/Duckling/Numeral/PT/Rules.hs index 34faadbe..31d07a8c 100644 --- a/Duckling/Numeral/PT/Rules.hs +++ b/Duckling/Numeral/PT/Rules.hs @@ -80,7 +80,7 @@ ruleNumeral2 :: Rule ruleNumeral2 = Rule { name = "number (20..90)" , pattern = - [ regex "(vinte|trinta|quarenta|cincoenta|cinq(\x00fc)enta|cinquenta|sessenta|setenta|oitenta|noventa)" + [ regex "(vinte|trinta|quarenta|cincoenta|cinq(ü)enta|cinquenta|sessenta|setenta|oitenta|noventa)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> case Text.toLower match of @@ -102,7 +102,7 @@ ruleNumeral :: Rule ruleNumeral = Rule { name = "number (0..15)" , pattern = - [ regex "(zero|uma?|d(oi|ua)s|tr(\x00ea|e)s|quatro|cinco|seis|sete|oito|nove|dez|onze|doze|treze|(ca|qua)torze|quinze)" + [ regex "(zero|uma?|d(oi|ua)s|tr(ê|e)s|quatro|cinco|seis|sete|oito|nove|dez|onze|doze|treze|(ca|qua)torze|quinze)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> case Text.toLower match of @@ -111,7 +111,7 @@ ruleNumeral = Rule "um" -> integer 1 "dois" -> integer 2 "duas" -> integer 2 - "tr\x00eas" -> integer 3 + "três" -> integer 3 "tres" -> integer 3 "quatro" -> integer 4 "cinco" -> integer 5 @@ -167,7 +167,7 @@ ruleNumeralsSuffixesKMG = Rule { name = "numbers suffixes (K, M, G)" , pattern = [ dimension Numeral - , regex "([kmg])(?=[\\W\\$\x20ac]|$)" + , regex "([kmg])(?=[\\W\\$€]|$)" ] , prod = \tokens -> case tokens of (Token Numeral (NumeralData {TNumeral.value = v}): @@ -224,7 +224,7 @@ ruleDozen :: Rule ruleDozen = Rule { name = "dozen" , pattern = - [ regex "d(\x00fa|u)zias?" + [ regex "d(ú|u)zias?" ] , prod = \_ -> integer 12 >>= withGrain 1 >>= withMultipliable } diff --git a/Duckling/Numeral/RO/Rules.hs b/Duckling/Numeral/RO/Rules.hs index 6db4ef49..e71bd2f9 100644 --- a/Duckling/Numeral/RO/Rules.hs +++ b/Duckling/Numeral/RO/Rules.hs @@ -138,7 +138,7 @@ ruleIntersectCuI = Rule { name = "intersect (cu și)" , pattern = [ numberWith (fromMaybe 0 . TNumeral.grain) (>1) - , regex "(s|\x0219)i" + , regex "(s|ș)i" , numberWith TNumeral.multipliable not ] , prod = \tokens -> case tokens of @@ -166,13 +166,13 @@ rulePowersOfTen :: Rule rulePowersOfTen = Rule { name = "powers of tens" , pattern = - [ regex "(sut(a|e|\x0103)?|milio(n|ane)?|miliar(de?)?|mi[ei]?)" + [ regex "(sut(a|e|ă)?|milio(n|ane)?|miliar(de?)?|mi[ei]?)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> case Text.toLower match of "suta" -> double 1e2 >>= withGrain 2 >>= withMultipliable "sute" -> double 1e2 >>= withGrain 2 >>= withMultipliable - "sut\x0103" -> double 1e2 >>= withGrain 2 >>= withMultipliable + "sută" -> double 1e2 >>= withGrain 2 >>= withMultipliable "mi" -> double 1e3 >>= withGrain 3 >>= withMultipliable "mie" -> double 1e3 >>= withGrain 3 >>= withMultipliable "mii" -> double 1e3 >>= withGrain 3 >>= withMultipliable @@ -200,13 +200,13 @@ zeroTenMap = HashMap.fromList , ("unu", 1) , ("unul", 1) , ("intai", 1) - , ("\x00eentai", 1) - , ("int\x00e2i", 1) - , ("\x00eent\x00e2i", 1) + , ("întai", 1) + , ("intâi", 1) + , ("întâi", 1) , ("o", 1) , ("doi", 2) , ("doua", 2) - , ("dou\x0103", 2) + , ("două", 2) , ("trei", 3) , ("patru", 4) , ("cinci", 5) @@ -216,7 +216,7 @@ zeroTenMap = HashMap.fromList , ("\537apte", 7) , ("opt", 8) , ("noua", 9) - , ("nou\x0103", 9) + , ("nouă", 9) , ("zece", 10) , ("zeci", 10) ] @@ -225,7 +225,7 @@ ruleIntegerZeroTen :: Rule ruleIntegerZeroTen = Rule { name = "integer (0..10)" , pattern = - [ regex "(zero|nimic|nici(\\s?o|\\sun(a|ul?))|una|unul?|doi|dou(a|\x0103)|trei|patru|cinci|(s|\x0219)ase|(s|\x0219)apte|opt|nou(a|\x0103)|zec[ei]|(i|\x00ee)nt(a|\x00e2)i|un|o)" + [ regex "(zero|nimic|nici(\\s?o|\\sun(a|ul?))|una|unul?|doi|dou(a|ă)|trei|patru|cinci|(s|ș)ase|(s|ș)apte|opt|nou(a|ă)|zec[ei]|(i|î)nt(a|â)i|un|o)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> @@ -250,14 +250,14 @@ elevenNineteenMap = HashMap.fromList , ("opti", 18) , ("opt", 18) , ("noua", 19) - , ("nou\x0103", 19) + , ("nouă", 19) ] ruleInteger :: Rule ruleInteger = Rule { name = "integer (11..19)" , pattern = - [ regex "(cin|sapti|opti)(s|\x0219)pe|(cinci|(s|\x0219)apte|opt)sprezece|(un|doi|trei|pai|(s|\x0219)ai|nou(a|\x0103))((s|\x0219)pe|sprezece)" + [ regex "(cin|sapti|opti)(s|ș)pe|(cinci|(s|ș)apte|opt)sprezece|(un|doi|trei|pai|(s|ș)ai|nou(a|ă))((s|ș)pe|sprezece)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (e1:_:e2:_:r:_)):_) -> do @@ -274,7 +274,7 @@ ruleInteger2 :: Rule ruleInteger2 = Rule { name = "integer (20..90)" , pattern = - [ regex "(dou(a|\x0103)|trei|patru|cinci|(s|\x0219)ai|(s|\x0219)apte|opt|nou(a|\x0103))\\s?zeci" + [ regex "(dou(a|ă)|trei|patru|cinci|(s|ș)ai|(s|ș)apte|opt|nou(a|ă))\\s?zeci" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> do diff --git a/Duckling/Numeral/RU/Rules.hs b/Duckling/Numeral/RU/Rules.hs index 4587d48c..e6732b59 100644 --- a/Duckling/Numeral/RU/Rules.hs +++ b/Duckling/Numeral/RU/Rules.hs @@ -29,21 +29,21 @@ import Duckling.Types dozensMap :: HashMap Text Integer dozensMap = HashMap.fromList - [ ( "\x0434\x0432\x0430\x0434\x0446\x0430\x0442\x044c", 20) - , ( "\x0442\x0440\x0438\x0434\x0446\x0430\x0442\x044c", 30) - , ( "\x0441\x043e\x0440\x043e\x043a", 40) - , ( "\x043f\x044f\x0442\x044c\x0434\x0435\x0441\x044f\x0442", 50) - , ( "\x0448\x0435\x0441\x0442\x044c\x0434\x0435\x0441\x044f\x0442", 60) - , ( "\x0441\x0435\x043c\x044c\x0434\x0435\x0441\x044f\x0442", 70) - , ( "\x0432\x043e\x0441\x0435\x043c\x044c\x0434\x0435\x0441\x044f\x0442", 80) - , ( "\x0434\x0435\x0432\x044f\x043d\x043e\x0441\x0442\x043e", 90) + [ ( "двадцать", 20) + , ( "тридцать", 30) + , ( "сорок", 40) + , ( "пятьдесят", 50) + , ( "шестьдесят", 60) + , ( "семьдесят", 70) + , ( "восемьдесят", 80) + , ( "девяносто", 90) ] ruleInteger5 :: Rule ruleInteger5 = Rule { name = "integer (20..90)" , pattern = - [ regex "(\x0434\x0432\x0430\x0434\x0446\x0430\x0442\x044c|\x0442\x0440\x0438\x0434\x0446\x0430\x0442\x044c|\x0441\x043e\x0440\x043e\x043a|\x043f\x044f\x0442\x044c\x0434\x0435\x0441\x044f\x0442|\x0448\x0435\x0441\x0442\x044c\x0434\x0435\x0441\x044f\x0442|\x0441\x0435\x043c\x044c\x0434\x0435\x0441\x044f\x0442|\x0432\x043e\x0441\x0435\x043c\x044c\x0434\x0435\x0441\x044f\x0442|\x0434\x0435\x0432\x044f\x043d\x043e\x0441\x0442\x043e)" + [ regex "(двадцать|тридцать|сорок|пятьдесят|шестьдесят|семьдесят|восемьдесят|девяносто)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> @@ -91,29 +91,29 @@ ruleInteger3 :: Rule ruleInteger3 = Rule { name = "integer 2" , pattern = - [ regex "(\x0434\x0432\x0430|\x0434\x0432\x0435|\x0434\x0432\x043e\x0435|\x043f\x0430\x0440\x0430|\x043f\x0430\x0440\x0443|\x043f\x0430\x0440\x043e\x0447\x043a\x0443|\x043f\x0430\x0440\x043e\x0447\x043a\x0430)" + [ regex "(два|две|двое|пара|пару|парочку|парочка)" ] , prod = \_ -> integer 2 } hundredsMap :: HashMap Text Integer hundredsMap = HashMap.fromList - [ ( "\x0441\x0442\x043e", 100) - , ( "\x0434\x0432\x0435\x0441\x0442\x0438", 200) - , ( "\x0442\x0440\x0438\x0441\x0442\x043e", 300) - , ( "\x0447\x0435\x0442\x044b\x0440\x0435\x0441\x0442\x043e", 400) - , ( "\x043f\x044f\x0442\x044c\x0441\x043e\x0442", 500) - , ( "\x0448\x0435\x0441\x0442\x044c\x0441\x043e\x0442", 600) - , ( "\x0441\x0435\x043c\x044c\x0441\x043e\x0442", 700) - , ( "\x0432\x043e\x0441\x0435\x043c\x044c\x0441\x043e\x0442", 800) - , ( "\x0434\x0435\x0432\x044f\x0442\x044c\x0441\x043e\x0442", 900) + [ ( "сто", 100) + , ( "двести", 200) + , ( "тристо", 300) + , ( "четыресто", 400) + , ( "пятьсот", 500) + , ( "шестьсот", 600) + , ( "семьсот", 700) + , ( "восемьсот", 800) + , ( "девятьсот", 900) ] ruleInteger6 :: Rule ruleInteger6 = Rule { name = "integer (100..900)" , pattern = - [ regex "(\x0441\x0442\x043e|\x0434\x0432\x0435\x0441\x0442\x0438|\x0442\x0440\x0438\x0441\x0442\x043e|\x0447\x0435\x0442\x044b\x0440\x0435\x0441\x0442\x043e|\x043f\x044f\x0442\x044c\x0441\x043e\x0442|\x0448\x0435\x0441\x0442\x044c\x0441\x043e\x0442|\x0441\x0435\x043c\x044c\x0441\x043e\x0442|\x0432\x043e\x0441\x0435\x043c\x044c\x0441\x043e\x0442|\x0434\x0435\x0432\x044f\x0442\x044c\x0441\x043e\x0442)" + [ regex "(сто|двести|тристо|четыресто|пятьсот|шестьсот|семьсот|восемьсот|девятьсот)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> @@ -125,7 +125,7 @@ ruleNumeralsPrefixWithMinus :: Rule ruleNumeralsPrefixWithMinus = Rule { name = "numbers prefix with -, minus" , pattern = - [ regex "-|\x043c\x0438\x043d\x0443\x0441\\s?" + [ regex "-|минус\\s?" , dimension Numeral ] , prod = \tokens -> case tokens of @@ -138,18 +138,18 @@ ruleNumeralsSuffixesKMG = Rule { name = "numbers suffixes (K, M, G)" , pattern = [ dimension Numeral - , regex "((\x043a|\x043c|\x0433)|(\x041a|\x041c|\x0413))(?=[\\W\\$\x20ac]|$)" + , regex "((к|м|г)|(К|М|Г))(?=[\\W\\$€]|$)" ] , prod = \tokens -> case tokens of (Token Numeral (NumeralData {TNumeral.value = v}): Token RegexMatch (GroupMatch (match:_)): _) -> case Text.toLower match of - "\x043a" -> double $ v * 1e3 - "\x041a" -> double $ v * 1e3 - "\x043c" -> double $ v * 1e6 - "\x041c" -> double $ v * 1e6 - "\x0433" -> double $ v * 1e9 - "\x0413" -> double $ v * 1e9 + "к" -> double $ v * 1e3 + "К" -> double $ v * 1e3 + "м" -> double $ v * 1e6 + "М" -> double $ v * 1e6 + "г" -> double $ v * 1e9 + "Г" -> double $ v * 1e9 _ -> Nothing _ -> Nothing } @@ -186,37 +186,37 @@ ruleInteger :: Rule ruleInteger = Rule { name = "integer 0" , pattern = - [ regex "(\x043d\x043e\x043b\x044c)" + [ regex "(ноль)" ] , prod = \_ -> integer 0 } threeToNineteenMap:: HashMap Text Integer threeToNineteenMap = HashMap.fromList - [ ( "\x0442\x0440\x0438", 3) - , ( "\x0447\x0435\x0442\x044b\x0440\x0435", 4) - , ( "\x043f\x044f\x0442\x044c", 5) - , ( "\x0448\x0435\x0441\x0442\x044c", 6) - , ( "\x0441\x0435\x043c\x044c", 7) - , ( "\x0432\x043e\x0441\x0435\x043c\x044c", 8) - , ( "\x0434\x0435\x0432\x044f\x0442\x044c", 9) - , ( "\x0434\x0435\x0441\x044f\x0442\x044c", 10) - , ( "\x043e\x0434\x0438\x043d\x0430\x0434\x0446\x0430\x0442\x044c", 11) - , ( "\x0434\x0432\x0435\x043d\x0430\x0434\x0446\x0430\x0442\x044c", 12) - , ( "\x0442\x0440\x0438\x043d\x0430\x0434\x0446\x0430\x0442\x044c", 13) - , ( "\x0447\x0435\x0442\x044b\x0440\x043d\x0430\x0434\x0446\x0430\x0442\x044c", 14) - , ( "\x043f\x044f\x0442\x043d\x0430\x0434\x0446\x0430\x0442\x044c", 15) - , ( "\x0448\x0435\x0441\x0442\x043d\x0430\x0434\x0446\x0430\x0442\x044c", 16) - , ( "\x0441\x0435\x043c\x043d\x0430\x0434\x0446\x0430\x0442\x044c", 17) - , ( "\x0432\x043e\x0441\x0435\x043c\x043d\x0430\x0434\x0446\x0430\x0442\x044c", 18) - , ( "\x0434\x0435\x0432\x044f\x0442\x043d\x0430\x0434\x0446\x0430\x0442\x044c", 19) + [ ( "три", 3) + , ( "четыре", 4) + , ( "пять", 5) + , ( "шесть", 6) + , ( "семь", 7) + , ( "восемь", 8) + , ( "девять", 9) + , ( "десять", 10) + , ( "одинадцать", 11) + , ( "двенадцать", 12) + , ( "тринадцать", 13) + , ( "четырнадцать", 14) + , ( "пятнадцать", 15) + , ( "шестнадцать", 16) + , ( "семнадцать", 17) + , ( "восемнадцать", 18) + , ( "девятнадцать", 19) ] ruleInteger4 :: Rule ruleInteger4 = Rule { name = "integer (3..19)" , pattern = - [ regex "(\x0442\x0440\x0438|\x0447\x0435\x0442\x044b\x0440\x043d\x0430\x0434\x0446\x0430\x0442\x044c|\x0447\x0435\x0442\x044b\x0440\x0435|\x043f\x044f\x0442\x043d\x0430\x0434\x0446\x0430\x0442\x044c|\x043f\x044f\x0442\x044c|\x0448\x0435\x0441\x0442\x043d\x0430\x0434\x0446\x0430\x0442\x044c|\x0448\x0435\x0441\x0442\x044c|\x0441\x0435\x043c\x043d\x0430\x0434\x0446\x0430\x0442\x044c|\x0441\x0435\x043c\x044c|\x0432\x043e\x0441\x0435\x043c\x043d\x0430\x0434\x0446\x0430\x0442\x044c|\x0432\x043e\x0441\x0435\x043c\x044c|\x0434\x0435\x0432\x044f\x0442\x043d\x0430\x0434\x0446\x0430\x0442\x044c|\x0434\x0435\x0432\x044f\x0442\x044c|\x0434\x0435\x0441\x044f\x0442\x044c|\x043e\x0434\x0438\x043d\x0430\x0434\x0446\x0430\x0442\x044c|\x0434\x0432\x0435\x043d\x0430\x0434\x0446\x0430\x0442\x044c|\x0442\x0440\x0438\x043d\x0430\x0434\x0446\x0430\x0442\x044c)" + [ regex "(три|четырнадцать|четыре|пятнадцать|пять|шестнадцать|шесть|семнадцать|семь|восемнадцать|восемь|девятнадцать|девять|десять|одинадцать|двенадцать|тринадцать)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> @@ -228,7 +228,7 @@ ruleInteger2 :: Rule ruleInteger2 = Rule { name = "integer 1" , pattern = - [ regex "(\x043e\x0434\x0438\x043d|\x043e\x0434\x043d\x0430|\x043e\x0434\x043d\x0443)" + [ regex "(один|одна|одну)" ] , prod = \_ -> integer 1 } @@ -238,7 +238,7 @@ ruleNumeralDotNumeral = Rule { name = "number dot number" , pattern = [ dimension Numeral - , regex "\x0442\x043e\x0447\x043a\x0430" + , regex "точка" , numberWith TNumeral.grain isNothing ] , prod = \tokens -> case tokens of diff --git a/Duckling/Numeral/SV/Rules.hs b/Duckling/Numeral/SV/Rules.hs index 253c5368..a6ac0d05 100644 --- a/Duckling/Numeral/SV/Rules.hs +++ b/Duckling/Numeral/SV/Rules.hs @@ -75,7 +75,7 @@ ruleFew :: Rule ruleFew = Rule { name = "few" , pattern = - [ regex "(n\x00e5gra )?f\x00e5" + [ regex "(några )?få" ] , prod = \_ -> integer 3 } @@ -161,7 +161,7 @@ ruleNumeralsSuffixesKMG = Rule { name = "numbers suffixes (K, M, G)" , pattern = [ dimension Numeral - , regex "([kmg])(?=[\\W\\$\x20ac]|$)" + , regex "([kmg])(?=[\\W\\$€]|$)" ] , prod = \tokens -> case tokens of (Token Numeral (NumeralData {TNumeral.value = v}): @@ -217,13 +217,13 @@ zeroToNineteenMap = HashMap.fromList , ( "noll" , 0 ) , ( "en" , 1 ) , ( "ett" , 1 ) - , ( "tv\x00e5" , 2 ) + , ( "två" , 2 ) , ( "tre" , 3 ) , ( "fyra" , 4 ) , ( "fem" , 5 ) , ( "sex" , 6 ) , ( "sju" , 7 ) - , ( "\x00e5tta", 8 ) + , ( "åtta", 8 ) , ( "nio" , 9 ) , ( "tio" , 10 ) , ( "elva" , 11 ) @@ -241,7 +241,7 @@ ruleInteger :: Rule ruleInteger = Rule { name = "integer (0..19)" , pattern = - [ regex "(inget|ingen|noll|en|ett|tv\x00e5|tretton|tre|fyra|femton|fem|sexton|sex|sjutton|sju|\x00e5tta|nio|tio|elva|tolv|fjorton|arton|nitton)" + [ regex "(inget|ingen|noll|en|ett|två|tretton|tre|fyra|femton|fem|sexton|sex|sjutton|sju|åtta|nio|tio|elva|tolv|fjorton|arton|nitton)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> @@ -257,7 +257,7 @@ dozenMap = HashMap.fromList , ( "femtio" , 50) , ( "sextio" , 60) , ( "sjuttio" , 70) - , ( "\x00e5ttio" , 80) + , ( "åttio" , 80) , ( "nittio" , 90) ] @@ -265,7 +265,7 @@ ruleInteger2 :: Rule ruleInteger2 = Rule { name = "integer (20..90)" , pattern = - [ regex "(tjugo|trettio|fyrtio|femtio|sextio|sjuttio|\x00e5ttio|nittio)" + [ regex "(tjugo|trettio|fyrtio|femtio|sextio|sjuttio|åttio|nittio)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> diff --git a/Duckling/Numeral/TR/Rules.hs b/Duckling/Numeral/TR/Rules.hs index debb9fde..a83bca72 100644 --- a/Duckling/Numeral/TR/Rules.hs +++ b/Duckling/Numeral/TR/Rules.hs @@ -29,15 +29,15 @@ import Duckling.Types hundredsMap :: HashMap Text Integer hundredsMap = HashMap.fromList - [ ( "y\x00fcz", 100) - , ( "ikiy\x00fcz", 200) - , ( "\x00fc\x00e7y\x00fcz", 300) - , ( "d\x00f6rty\x00fcz", 400) - , ( "be\x015fy\x00fcz", 500) - , ( "alt\x0131y\x00fcz", 600) - , ( "yediy\x00fcz", 700) - , ( "sekizy\x00fcz", 800) - , ( "dokuzy\x00fcz", 900) + [ ( "yüz", 100) + , ( "ikiyüz", 200) + , ( "üçyüz", 300) + , ( "dörtyüz", 400) + , ( "beşyüz", 500) + , ( "altıyüz", 600) + , ( "yediyüz", 700) + , ( "sekizyüz", 800) + , ( "dokuzyüz", 900) ] @@ -45,7 +45,7 @@ ruleInteger5 :: Rule ruleInteger5 = Rule { name = "integer 100..900" , pattern = - [ regex "(y\x00fcz|ikiy\x00fcz|\x00fc\x00e7y\x00fcz|d\x00f6rty\x00fcz|be\x015fy\x00fcz|alt\x0131y\x00fcz|yediy\x00fcz|sekizy\x00fcz|dokuzy\x00fcz)" + [ regex "(yüz|ikiyüz|üçyüz|dörtyüz|beşyüz|altıyüz|yediyüz|sekizyüz|dokuzyüz)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> @@ -83,7 +83,7 @@ ruleACoupleOf :: Rule ruleACoupleOf = Rule { name = "a couple (of)" , pattern = - [ regex "(bir )?\x00e7ift" + [ regex "(bir )?çift" ] , prod = \_ -> integer 2 >>= withGrain 1 } @@ -151,23 +151,23 @@ ruleDecimalNumeral = Rule numeralSuffixesHalfsuffixTextMap :: HashMap Text Double numeralSuffixesHalfsuffixTextMap = HashMap.fromList - [ ( "birbu\x00e7uk", 1.5) - , ( "bibu\x00e7uk", 1.5) - , ( "ikibu\x00e7uk", 2.5) - , ( "\x00fc\231bu\x00e7uk", 3.5) - , ( "d\x00f6rtbu\x00e7uk", 4.5) - , ( "be\351bu\x00e7uk", 5.5) - , ( "alt\x0131bu\x00e7uk", 6.5) - , ( "yedibu\x00e7uk", 7.5) - , ( "sekizbu\x00e7uk", 8.5) - , ( "dokuzbu\x00e7uk", 9.5) + [ ( "birbuçuk", 1.5) + , ( "bibuçuk", 1.5) + , ( "ikibuçuk", 2.5) + , ( "ü\231buçuk", 3.5) + , ( "dörtbuçuk", 4.5) + , ( "be\351buçuk", 5.5) + , ( "altıbuçuk", 6.5) + , ( "yedibuçuk", 7.5) + , ( "sekizbuçuk", 8.5) + , ( "dokuzbuçuk", 9.5) ] ruleNumeralSuffixesHalfsuffixText :: Rule ruleNumeralSuffixesHalfsuffixText = Rule { name = "number suffixes (half-suffix text) (1..9)" , pattern = - [ regex "((bir?|iki|\x00fc\x00e7|d\x00f6rt|be\x015f|alt\x0131|yedi|sekiz|dokuz)(bu\x00e7uk))" + [ regex "((bir?|iki|üç|dört|beş|altı|yedi|sekiz|dokuz)(buçuk))" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> @@ -180,98 +180,98 @@ tenToNintynineMap = HashMap.fromList [ ( "onbi", 11) , ( "onbir", 11) , ( "oniki", 12) - , ( "on\x00fc\x00e7", 13) - , ( "ond\x00f6rt", 14) - , ( "onbe\x015f", 15) - , ( "onalt\x0131", 16) + , ( "onüç", 13) + , ( "ondört", 14) + , ( "onbeş", 15) + , ( "onaltı", 16) , ( "onyedi", 17) , ( "onsekiz", 18) , ( "ondokuz", 19) , ( "yirmibi", 21) , ( "yirmibir", 21) , ( "yirmiiki", 22) - , ( "yirmi\x00fc\x00e7", 23) - , ( "yirmid\x00f6rt", 24) - , ( "yirmibe\x015f", 25) - , ( "yirmialt\x0131", 26) + , ( "yirmiüç", 23) + , ( "yirmidört", 24) + , ( "yirmibeş", 25) + , ( "yirmialtı", 26) , ( "yirmiyedi", 27) , ( "yirmisekiz", 28) , ( "yirmidokuz", 29) , ( "otuzbi", 31) , ( "otuzbir", 31) , ( "otuziki", 32) - , ( "otuz\x00fc\x00e7", 33) - , ( "otuzd\x00f6rt", 34) - , ( "otuzbe\x015f", 35) - , ( "otuzalt\x0131", 36) + , ( "otuzüç", 33) + , ( "otuzdört", 34) + , ( "otuzbeş", 35) + , ( "otuzaltı", 36) , ( "otuzyedi", 37) , ( "otuzsekiz", 38) , ( "otuzdokuz", 39) - , ( "k\x0131rkbir", 41) - , ( "k\x0131rkbi", 41) - , ( "k\x0131rkiki", 42) - , ( "k\x0131rk\x00fc\x00e7", 43) - , ( "k\x0131rkd\x00f6rt", 44) - , ( "k\x0131rkbe\x015f", 45) - , ( "k\x0131rkalt\x0131", 46) - , ( "k\x0131rkyedi", 47) - , ( "k\x0131rksekiz", 48) - , ( "k\x0131rkdokuz", 49) + , ( "kırkbir", 41) + , ( "kırkbi", 41) + , ( "kırkiki", 42) + , ( "kırküç", 43) + , ( "kırkdört", 44) + , ( "kırkbeş", 45) + , ( "kırkaltı", 46) + , ( "kırkyedi", 47) + , ( "kırksekiz", 48) + , ( "kırkdokuz", 49) , ( "ellibi", 51) , ( "ellibir", 51) , ( "elliiki", 52) - , ( "elli\x00fc\x00e7", 53) - , ( "ellid\x00f6rt", 54) - , ( "ellibe\x015f", 55) - , ( "ellialt\x0131", 56) + , ( "elliüç", 53) + , ( "ellidört", 54) + , ( "ellibeş", 55) + , ( "ellialtı", 56) , ( "elliyedi", 57) , ( "ellisekiz", 58) , ( "ellidokuz", 59) - , ( "altm\x0131\x015fbir", 61) - , ( "atm\x0131\x015fbir", 61) - , ( "atm\x0131\x015fiki", 62) - , ( "altm\x0131\x015fiki", 62) - , ( "atm\x0131\x015f\x00fc\x00e7", 63) - , ( "altm\x0131\x015f\x00fc\x00e7", 63) - , ( "atm\x0131\x015fd\x00f6rt", 64) - , ( "altm\x0131\x015fd\x00f6rt", 64) - , ( "atm\x0131\x015fbe\x015f", 65) - , ( "altm\x0131\x015fbe\x015f", 65) - , ( "atm\x0131\x015falt\x0131", 66) - , ( "altm\x0131\x015falt\x0131", 66) - , ( "altm\x0131\x015fyedi", 67) - , ( "atm\x0131\x015fyedi", 67) - , ( "altm\x0131\x015fsekiz", 68) - , ( "atm\x0131\x015fsekiz", 68) - , ( "atm\x0131\x015fdokuz", 69) - , ( "altm\x0131\x015fdokuz", 69) - , ( "yetmi\x015fbir", 71) - , ( "yetmi\x015fbi", 71) - , ( "yetmi\x015fiki", 72) - , ( "yetmi\x015f\x00fc\x00e7", 73) - , ( "yetmi\x015fd\x00f6rt", 74) - , ( "yetmi\x015fbe\x015f", 75) - , ( "yetmi\x015falt\x0131", 76) - , ( "yetmi\x015fyedi", 77) - , ( "yetmi\x015fsekiz", 78) - , ( "yetmi\x015fdokuz", 79) + , ( "altmışbir", 61) + , ( "atmışbir", 61) + , ( "atmışiki", 62) + , ( "altmışiki", 62) + , ( "atmışüç", 63) + , ( "altmışüç", 63) + , ( "atmışdört", 64) + , ( "altmışdört", 64) + , ( "atmışbeş", 65) + , ( "altmışbeş", 65) + , ( "atmışaltı", 66) + , ( "altmışaltı", 66) + , ( "altmışyedi", 67) + , ( "atmışyedi", 67) + , ( "altmışsekiz", 68) + , ( "atmışsekiz", 68) + , ( "atmışdokuz", 69) + , ( "altmışdokuz", 69) + , ( "yetmişbir", 71) + , ( "yetmişbi", 71) + , ( "yetmişiki", 72) + , ( "yetmişüç", 73) + , ( "yetmişdört", 74) + , ( "yetmişbeş", 75) + , ( "yetmişaltı", 76) + , ( "yetmişyedi", 77) + , ( "yetmişsekiz", 78) + , ( "yetmişdokuz", 79) , ( "seksenbir", 81) , ( "seksenbi", 81) , ( "sekseniki", 82) - , ( "seksen\x00fc\x00e7", 83) - , ( "seksend\x00f6rt", 84) - , ( "seksenbe\x015f", 85) - , ( "seksenalt\x0131", 86) + , ( "seksenüç", 83) + , ( "seksendört", 84) + , ( "seksenbeş", 85) + , ( "seksenaltı", 86) , ( "seksenyedi", 87) , ( "seksensekiz", 88) , ( "seksendokuz", 89) , ( "doksanbi", 91) , ( "doksanbir", 91) , ( "doksaniki", 92) - , ( "doksan\x00fc\x00e7", 93) - , ( "doksand\x00f6rt", 94) - , ( "doksanbe\x015f", 95) - , ( "doksanalt\x0131", 96) + , ( "doksanüç", 93) + , ( "doksandört", 94) + , ( "doksanbeş", 95) + , ( "doksanaltı", 96) , ( "doksanyedi", 97) , ( "doksansekiz", 98) , ( "doksandokuz", 99) @@ -281,7 +281,7 @@ ruleInteger3 :: Rule ruleInteger3 = Rule { name = "integer 11..19 21..29 31..39 41..49 51..59 61..69 71..79 81..89 91..99" , pattern = - [ regex "((on|yirmi|otuz|k\x0131rk|elli|atm\x0131\x015f|altm\x0131\x015f|yetmi\x015f|seksen|doksan)(bir|bi|iki|\x00fc\x00e7|d\x00f6rt|be\x015f|alt\x0131|yedi|sekiz|dokuz))" + [ regex "((on|yirmi|otuz|kırk|elli|atmış|altmış|yetmiş|seksen|doksan)(bir|bi|iki|üç|dört|beş|altı|yedi|sekiz|dokuz))" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> @@ -294,10 +294,10 @@ thousandsMap :: HashMap Text Integer thousandsMap = HashMap.fromList [ ( "bin", 1000) , ( "ikibin", 2000) - , ( "\x00fc\x00e7bin", 3000) - , ( "d\x00f6rtbin", 4000) - , ( "be\x015fbin", 5000) - , ( "alt\x0131bin", 6000) + , ( "üçbin", 3000) + , ( "dörtbin", 4000) + , ( "beşbin", 5000) + , ( "altıbin", 6000) , ( "yedibin", 7000) , ( "sekizbin", 8000) , ( "dokuzbin", 9000) @@ -307,7 +307,7 @@ ruleInteger6 :: Rule ruleInteger6 = Rule { name = "integer 1000..9000" , pattern = - [ regex "(bin|ikibin|\x00fc\x00e7bin|d\x00f6rtbin|be\x015fbin|alt\x0131bin|yedibin|sekizbin|dokuzbin)" + [ regex "(bin|ikibin|üçbin|dörtbin|beşbin|altıbin|yedibin|sekizbin|dokuzbin)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> @@ -334,7 +334,7 @@ ruleNumeralsSuffixesKMG = Rule { name = "numbers suffixes (K, M, G)" , pattern = [ dimension Numeral - , regex "([kmgb])(?=[\\W\\$\x20ac]|$)" + , regex "([kmgb])(?=[\\W\\$€]|$)" ] , prod = \tokens -> case tokens of (Token Numeral (NumeralData {TNumeral.value = v}): @@ -352,11 +352,11 @@ rulePowersOfTen :: Rule rulePowersOfTen = Rule { name = "powers of tens" , pattern = - [ regex "(y(\x00fc)z|bin|milyon)" + [ regex "(y(ü)z|bin|milyon)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> case Text.toLower match of - "y\x00fcz" -> double 1e2 >>= withGrain 2 >>= withMultipliable + "yüz" -> double 1e2 >>= withGrain 2 >>= withMultipliable "bin" -> double 1e3 >>= withGrain 3 >>= withMultipliable "milyon" -> double 1e6 >>= withGrain 6 >>= withMultipliable _ -> Nothing @@ -368,11 +368,11 @@ tenThousandsMap = HashMap.fromList [ ( "onbin", 10000) , ( "yirmibin", 20000) , ( "otuzbin", 30000) - , ( "k\x0131rkbin", 40000) + , ( "kırkbin", 40000) , ( "ellibin", 50000) - , ( "altm\x0131\x015fbin", 60000) - , ( "atm\x0131\x015fbin", 60000) - , ( "yetmi\x015fbin", 70000) + , ( "altmışbin", 60000) + , ( "atmışbin", 60000) + , ( "yetmişbin", 70000) , ( "seksenbin", 80000) , ( "doksanbin", 90000) ] @@ -381,7 +381,7 @@ ruleInteger7 :: Rule ruleInteger7 = Rule { name = "integer 10000..90000" , pattern = - [ regex "(onbin|yirmibin|otuzbin|k\x0131rkbin|ellibin|atm\x0131\x015fbin|altm\x0131\x015fbin|yetmi\x015fbin|seksenbin|doksanbin)" + [ regex "(onbin|yirmibin|otuzbin|kırkbin|ellibin|atmışbin|altmışbin|yetmişbin|seksenbin|doksanbin)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> @@ -391,22 +391,22 @@ ruleInteger7 = Rule hundredThousandsMap :: HashMap Text Integer hundredThousandsMap = HashMap.fromList - [ ( "y\x00fczbin", 100000) - , ( "ikiy\x00fczbin", 200000) - , ( "\x00fc\x00e7y\x00fczbin", 300000) - , ( "d\x00f6rty\x00fczbin", 400000) - , ( "be\x015fy\x00fczbin", 500000) - , ( "alt\x0131y\x00fczbin", 600000) - , ( "yediy\x00fczbin", 700000) - , ( "sekizy\x00fczbin", 800000) - , ( "dokuzy\x00fczbin", 900000) + [ ( "yüzbin", 100000) + , ( "ikiyüzbin", 200000) + , ( "üçyüzbin", 300000) + , ( "dörtyüzbin", 400000) + , ( "beşyüzbin", 500000) + , ( "altıyüzbin", 600000) + , ( "yediyüzbin", 700000) + , ( "sekizyüzbin", 800000) + , ( "dokuzyüzbin", 900000) ] ruleInteger8 :: Rule ruleInteger8 = Rule { name = "integer 100000..900000" , pattern = - [ regex "(y\x00fczbin|ikiy\x00fczbin|\x00fc\x00e7y\x00fczbin|d\x00f6rty\x00fczbin|be\x015fy\x00fczbin|alt\x0131y\x00fczbin|yediy\x00fczbin|sekizy\x00fczbin|dokuzy\x00fczbin)" + [ regex "(yüzbin|ikiyüzbin|üçyüzbin|dörtyüzbin|beşyüzbin|altıyüzbin|yediyüzbin|sekizyüzbin|dokuzyüzbin)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> @@ -418,118 +418,118 @@ ruleHalf :: Rule ruleHalf = Rule { name = "half" , pattern = - [ regex "(yar\x0131m)" + [ regex "(yarım)" ] , prod = \_ -> double 0.5 } integer9Map :: HashMap Text Double integer9Map = HashMap.fromList - [ ( "onbirbu\x00e7uk", 11.5) - , ( "onbibu\x00e7uk", 11.5) - , ( "onikibu\x00e7uk", 12.5) - , ( "on\x00fc\x00e7bu\x00e7uk", 13.5) - , ( "ond\x00f6rtbu\x00e7uk", 14.5) - , ( "onbe\x015fbu\x00e7uk", 15.5) - , ( "onalt\x0131bu\x00e7uk", 16.5) - , ( "onyedibu\x00e7uk", 17.5) - , ( "onsekizbu\x00e7uk", 18.5) - , ( "ondokuzbu\x00e7uk", 19.5) - , ( "yirmibibu\x00e7uk", 21.5) - , ( "yirmibirbu\x00e7uk", 21.5) - , ( "yirmiikibu\x00e7uk", 22.5) - , ( "yirmi\x00fc\x00e7bu\x00e7uk", 23.5) - , ( "yirmid\x00f6rtbu\x00e7uk", 24.5) - , ( "yirmibe\x015fbu\x00e7uk", 25.5) - , ( "yirmialt\x0131bu\x00e7uk", 26.5) - , ( "yirmiyedibu\x00e7uk", 27.5) - , ( "yirmisekizbu\x00e7uk", 28.5) - , ( "yirmidokuzbu\x00e7uk", 29.5) - , ( "otuzbibu\x00e7uk", 31.5) - , ( "otuzbirbu\x00e7uk", 31.5) - , ( "otuzikibu\x00e7uk", 32.5) - , ( "otuz\x00fc\x00e7bu\x00e7uk", 33.5) - , ( "otuzd\x00f6rtbu\x00e7uk", 34.5) - , ( "otuzbe\x015fbu\x00e7uk", 35.5) - , ( "otuzalt\x0131bu\x00e7uk", 36.5) - , ( "otuzyedibu\x00e7uk", 37.5) - , ( "otuzsekizbu\x00e7uk", 38.5) - , ( "otuzdokuzbu\x00e7uk", 39.5) - , ( "k\x0131rkbirbu\x00e7uk", 41.5) - , ( "k\x0131rkbibu\x00e7uk", 41.5) - , ( "k\x0131rkikibu\x00e7uk", 42.5) - , ( "k\x0131rk\x00fc\x00e7bu\x00e7uk", 43.5) - , ( "k\x0131rkd\x00f6rtbu\x00e7uk", 44.5) - , ( "k\x0131rkbe\x015fbu\x00e7uk", 45.5) - , ( "k\x0131rkalt\x0131bu\x00e7uk", 46.5) - , ( "k\x0131rkyedibu\x00e7uk", 47.5) - , ( "k\x0131rksekizbu\x00e7uk", 48.5) - , ( "k\x0131rkdokuzbu\x00e7uk", 49.5) - , ( "ellibibu\x00e7uk", 51.5) - , ( "ellibirbu\x00e7uk", 51.5) - , ( "elliikibu\x00e7uk", 52.5) - , ( "elli\x00fc\x00e7bu\x00e7uk", 53.5) - , ( "ellid\x00f6rtbu\x00e7uk", 54.5) - , ( "ellibe\x015fbu\x00e7uk", 55.5) - , ( "ellialt\x0131bu\x00e7uk", 56.5) - , ( "elliyedibu\x00e7uk", 57.5) - , ( "ellisekizbu\x00e7uk", 58.5) - , ( "ellidokuzbu\x00e7uk", 59.5) - , ( "altm\x0131\x015fbirbu\x00e7uk", 61.5) - , ( "atm\x0131\x015fbirbu\x00e7uk", 61.5) - , ( "altm\x0131\x015fikibu\x00e7uk", 62.5) - , ( "atm\x0131\x015fikibu\x00e7uk", 62.5) - , ( "atm\x0131\x015f\x00fc\x00e7bu\x00e7uk", 63.5) - , ( "altm\x0131\x015f\x00fc\x00e7bu\x00e7uk", 63.5) - , ( "altm\x0131\x015fd\x00f6rtbu\x00e7uk", 64.5) - , ( "atm\x0131\x015fd\x00f6rtbu\x00e7uk", 64.5) - , ( "altm\x0131\x015fbe\x015fbu\x00e7uk", 65.5) - , ( "atm\x0131\x015fbe\x015fbu\x00e7uk", 65.5) - , ( "altm\x0131\x015falt\x0131bu\x00e7uk", 66.5) - , ( "atm\x0131\x015falt\x0131bu\x00e7uk", 66.5) - , ( "atm\x0131\x015fyedibu\x00e7uk", 67.5) - , ( "altm\x0131\x015fyedibu\x00e7uk", 67.5) - , ( "altm\x0131\x015fsekizbu\x00e7uk", 68.5) - , ( "atm\x0131\x015fsekizbu\x00e7uk", 68.5) - , ( "altm\x0131\x015fdokuzbu\x00e7uk", 69.5) - , ( "atm\x0131\x015fdokuzbu\x00e7uk", 69.5) - , ( "yetmi\x015fbibu\x00e7uk", 71.5) - , ( "yetmi\x015fbirbu\x00e7uk", 71.5) - , ( "yetmi\x015fikibu\x00e7uk", 72.5) - , ( "yetmi\x015f\x00fc\x00e7bu\x00e7uk", 73.5) - , ( "yetmi\x015fd\x00f6rtbu\x00e7uk", 74.5) - , ( "yetmi\x015fbe\x015fbu\x00e7uk", 75.5) - , ( "yetmi\x015falt\x0131bu\x00e7uk", 76.5) - , ( "yetmi\x015fyedibu\x00e7uk", 77.5) - , ( "yetmi\x015fsekizbu\x00e7uk", 78.5) - , ( "yetmi\x015fdokuzbu\x00e7uk", 79.5) - , ( "seksenbibu\x00e7uk", 81.5) - , ( "seksenbirbu\x00e7uk", 81.5) - , ( "seksenikibu\x00e7uk", 82.5) - , ( "seksen\x00fc\x00e7bu\x00e7uk", 83.5) - , ( "seksend\x00f6rtbu\x00e7uk", 84.5) - , ( "seksenbe\x015fbu\x00e7uk", 85.5) - , ( "seksenalt\x0131bu\x00e7uk", 86.5) - , ( "seksenyedibu\x00e7uk", 87.5) - , ( "seksensekizbu\x00e7uk", 88.5) - , ( "seksendokuzbu\x00e7uk", 89.5) - , ( "doksanbirbu\x00e7uk", 91.5) - , ( "doksanbibu\x00e7uk", 91.5) - , ( "doksanikibu\x00e7uk", 92.5) - , ( "doksan\x00fc\x00e7bu\x00e7uk", 93.5) - , ( "doksand\x00f6rtbu\x00e7uk", 94.5) - , ( "doksanbe\x015fbu\x00e7uk", 95.5) - , ( "doksanalt\x0131bu\x00e7uk", 96.5) - , ( "doksanyedibu\x00e7uk", 97.5) - , ( "doksansekizbu\x00e7uk", 98.5) - , ( "doksandokuzbu\x00e7uk", 99.5) + [ ( "onbirbuçuk", 11.5) + , ( "onbibuçuk", 11.5) + , ( "onikibuçuk", 12.5) + , ( "onüçbuçuk", 13.5) + , ( "ondörtbuçuk", 14.5) + , ( "onbeşbuçuk", 15.5) + , ( "onaltıbuçuk", 16.5) + , ( "onyedibuçuk", 17.5) + , ( "onsekizbuçuk", 18.5) + , ( "ondokuzbuçuk", 19.5) + , ( "yirmibibuçuk", 21.5) + , ( "yirmibirbuçuk", 21.5) + , ( "yirmiikibuçuk", 22.5) + , ( "yirmiüçbuçuk", 23.5) + , ( "yirmidörtbuçuk", 24.5) + , ( "yirmibeşbuçuk", 25.5) + , ( "yirmialtıbuçuk", 26.5) + , ( "yirmiyedibuçuk", 27.5) + , ( "yirmisekizbuçuk", 28.5) + , ( "yirmidokuzbuçuk", 29.5) + , ( "otuzbibuçuk", 31.5) + , ( "otuzbirbuçuk", 31.5) + , ( "otuzikibuçuk", 32.5) + , ( "otuzüçbuçuk", 33.5) + , ( "otuzdörtbuçuk", 34.5) + , ( "otuzbeşbuçuk", 35.5) + , ( "otuzaltıbuçuk", 36.5) + , ( "otuzyedibuçuk", 37.5) + , ( "otuzsekizbuçuk", 38.5) + , ( "otuzdokuzbuçuk", 39.5) + , ( "kırkbirbuçuk", 41.5) + , ( "kırkbibuçuk", 41.5) + , ( "kırkikibuçuk", 42.5) + , ( "kırküçbuçuk", 43.5) + , ( "kırkdörtbuçuk", 44.5) + , ( "kırkbeşbuçuk", 45.5) + , ( "kırkaltıbuçuk", 46.5) + , ( "kırkyedibuçuk", 47.5) + , ( "kırksekizbuçuk", 48.5) + , ( "kırkdokuzbuçuk", 49.5) + , ( "ellibibuçuk", 51.5) + , ( "ellibirbuçuk", 51.5) + , ( "elliikibuçuk", 52.5) + , ( "elliüçbuçuk", 53.5) + , ( "ellidörtbuçuk", 54.5) + , ( "ellibeşbuçuk", 55.5) + , ( "ellialtıbuçuk", 56.5) + , ( "elliyedibuçuk", 57.5) + , ( "ellisekizbuçuk", 58.5) + , ( "ellidokuzbuçuk", 59.5) + , ( "altmışbirbuçuk", 61.5) + , ( "atmışbirbuçuk", 61.5) + , ( "altmışikibuçuk", 62.5) + , ( "atmışikibuçuk", 62.5) + , ( "atmışüçbuçuk", 63.5) + , ( "altmışüçbuçuk", 63.5) + , ( "altmışdörtbuçuk", 64.5) + , ( "atmışdörtbuçuk", 64.5) + , ( "altmışbeşbuçuk", 65.5) + , ( "atmışbeşbuçuk", 65.5) + , ( "altmışaltıbuçuk", 66.5) + , ( "atmışaltıbuçuk", 66.5) + , ( "atmışyedibuçuk", 67.5) + , ( "altmışyedibuçuk", 67.5) + , ( "altmışsekizbuçuk", 68.5) + , ( "atmışsekizbuçuk", 68.5) + , ( "altmışdokuzbuçuk", 69.5) + , ( "atmışdokuzbuçuk", 69.5) + , ( "yetmişbibuçuk", 71.5) + , ( "yetmişbirbuçuk", 71.5) + , ( "yetmişikibuçuk", 72.5) + , ( "yetmişüçbuçuk", 73.5) + , ( "yetmişdörtbuçuk", 74.5) + , ( "yetmişbeşbuçuk", 75.5) + , ( "yetmişaltıbuçuk", 76.5) + , ( "yetmişyedibuçuk", 77.5) + , ( "yetmişsekizbuçuk", 78.5) + , ( "yetmişdokuzbuçuk", 79.5) + , ( "seksenbibuçuk", 81.5) + , ( "seksenbirbuçuk", 81.5) + , ( "seksenikibuçuk", 82.5) + , ( "seksenüçbuçuk", 83.5) + , ( "seksendörtbuçuk", 84.5) + , ( "seksenbeşbuçuk", 85.5) + , ( "seksenaltıbuçuk", 86.5) + , ( "seksenyedibuçuk", 87.5) + , ( "seksensekizbuçuk", 88.5) + , ( "seksendokuzbuçuk", 89.5) + , ( "doksanbirbuçuk", 91.5) + , ( "doksanbibuçuk", 91.5) + , ( "doksanikibuçuk", 92.5) + , ( "doksanüçbuçuk", 93.5) + , ( "doksandörtbuçuk", 94.5) + , ( "doksanbeşbuçuk", 95.5) + , ( "doksanaltıbuçuk", 96.5) + , ( "doksanyedibuçuk", 97.5) + , ( "doksansekizbuçuk", 98.5) + , ( "doksandokuzbuçuk", 99.5) ] ruleInteger9 :: Rule ruleInteger9 = Rule { name = "integer 11..19 21..29 31..39 41..49 51..59 61..69 71..79 81..89 91..99" , pattern = - [ regex "((on|yirmi|otuz|k\x0131rk|elli|atm\x0131\x015f|altm\x0131\x015f|yetmi\x015f|seksen|doksan)(bir|bi|iki|\x00fc\x00e7|d\x00f6rt|be\x015f|alt\x0131|yedi|sekiz|dokuz)(bu\x00e7uk))" + [ regex "((on|yirmi|otuz|kırk|elli|atmış|altmış|yetmiş|seksen|doksan)(bir|bi|iki|üç|dört|beş|altı|yedi|sekiz|dokuz)(buçuk))" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> @@ -541,7 +541,7 @@ ruleDozen :: Rule ruleDozen = Rule { name = "dozen" , pattern = - [ regex "d\x00fczine" + [ regex "düzine" ] , prod = \_ -> integer 12 >>= withGrain 1 >>= withMultipliable } @@ -550,16 +550,16 @@ oneToNineMap :: HashMap Text Integer oneToNineMap = HashMap.fromList [ ( "s\305f\305r", 0) , ( "yok", 0) - , ( "hi\x00e7", 0) + , ( "hiç", 0) , ( "bir", 1) , ( "bi", 1) , ( "yek", 1) , ( "tek", 1) , ( "iki", 2) - , ( "\x00fc\x00e7", 3) - , ( "d\x00f6rt", 4) - , ( "be\x015f", 5) - , ( "alt\x0131", 6) + , ( "üç", 3) + , ( "dört", 4) + , ( "beş", 5) + , ( "altı", 6) , ( "yedi", 7) , ( "sekiz", 8) , ( "dokuz", 9) @@ -569,7 +569,7 @@ ruleInteger :: Rule ruleInteger = Rule { name = "integer (0..9)" , pattern = - [ regex "(yok|hi(\x00e7)|s(\x0131)f(\x0131)r|bir?|[ty]ek|iki|(\x00fc)(\x00e7)|d(\x00f6)rt|be(\x015f)|alt(\x0131)|yedi|sekiz|dokuz)" + [ regex "(yok|hi(ç)|s(ı)f(ı)r|bir?|[ty]ek|iki|(ü)(ç)|d(ö)rt|be(ş)|alt(ı)|yedi|sekiz|dokuz)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> @@ -579,23 +579,23 @@ ruleInteger = Rule numeralSuffixesHalfsuffixText2Map :: HashMap Text Double numeralSuffixesHalfsuffixText2Map = HashMap.fromList - [ ( "onbu\x00e7uk", 10.5) - , ( "yirmibu\x00e7uk", 20.5) - , ( "otuzbu\x00e7uk", 30.5) - , ( "k\x0131rkbu\x00e7uk", 40.5) - , ( "ellibu\x00e7uk", 50.5) - , ( "atm\x0131\x015fbu\x00e7uk", 60.5) - , ( "altm\x0131\x015fbu\x00e7uk", 60.5) - , ( "yetmi\x015fbu\x00e7uk", 70.5) - , ( "seksenbu\x00e7uk", 80.5) - , ( "doksanbu\x00e7uk", 90.5) + [ ( "onbuçuk", 10.5) + , ( "yirmibuçuk", 20.5) + , ( "otuzbuçuk", 30.5) + , ( "kırkbuçuk", 40.5) + , ( "ellibuçuk", 50.5) + , ( "atmışbuçuk", 60.5) + , ( "altmışbuçuk", 60.5) + , ( "yetmişbuçuk", 70.5) + , ( "seksenbuçuk", 80.5) + , ( "doksanbuçuk", 90.5) ] ruleNumeralSuffixesHalfsuffixText2 :: Rule ruleNumeralSuffixesHalfsuffixText2 = Rule { name = "number suffixes (half-suffix text) (10..90)" , pattern = - [ regex "((on|yirmi|otuz|k\x0131rk|elli|atm\x0131\x015f|altm\x0131\x015f|yetmi\x015f|seksen|doksan)(bu\x00e7uk))" + [ regex "((on|yirmi|otuz|kırk|elli|atmış|altmış|yetmiş|seksen|doksan)(buçuk))" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> @@ -609,7 +609,7 @@ ruleNumeralSuffixesHalfSuffix = Rule { name = "number suffixes (half-suffix)" , pattern = [ dimension Numeral - , regex "(bu\x00e7uk)(?=[\\W\\$\x20ac]|$)" + , regex "(buçuk)(?=[\\W\\$€]|$)" ] , prod = \tokens -> case tokens of (Token Numeral (NumeralData {TNumeral.value = v}):_) -> double $ v + 0.5 @@ -644,11 +644,11 @@ tensMap = HashMap.fromList [ ( "on", 10) , ( "yirmi", 20) , ( "otuz", 30) - , ( "k\x0131rk", 40) + , ( "kırk", 40) , ( "elli", 50) - , ( "altm\x0131\x015f", 60) - , ( "atm\x0131\x015f", 60) - , ( "yetmi\x015f", 70) + , ( "altmış", 60) + , ( "atmış", 60) + , ( "yetmiş", 70) , ( "seksen", 80) , ( "doksan", 90) ] @@ -657,7 +657,7 @@ ruleInteger2 :: Rule ruleInteger2 = Rule { name = "integer (10..90)" , pattern = - [ regex "(on|yirmi|otuz|k\x0131rk|elli|atm\x0131\x015f|altm\x0131\x015f|yetmi\x015f|seksen|doksan)" + [ regex "(on|yirmi|otuz|kırk|elli|atmış|altmış|yetmiş|seksen|doksan)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> @@ -669,7 +669,7 @@ ruleQuarter :: Rule ruleQuarter = Rule { name = "quarter" , pattern = - [ regex "(\x00e7eyrek)" + [ regex "(çeyrek)" ] , prod = \_ -> double 0.25 } @@ -679,7 +679,7 @@ ruleNumeralDotNumeral = Rule { name = "number dot number" , pattern = [ dimension Numeral - , regex "nokta|virg\x00fcl" + , regex "nokta|virgül" , numberWith TNumeral.grain isNothing ] , prod = \tokens -> case tokens of diff --git a/Duckling/Numeral/UK/Rules.hs b/Duckling/Numeral/UK/Rules.hs index 1296c8a2..7c195027 100644 --- a/Duckling/Numeral/UK/Rules.hs +++ b/Duckling/Numeral/UK/Rules.hs @@ -29,21 +29,21 @@ import Duckling.Types twentyNinetyMap :: HashMap Text Integer twentyNinetyMap = HashMap.fromList - [ ( "\x0434\x0432\x0430\x0434\x0446\x044f\x0442\x044c" , 20 ) - , ( "\x0442\x0440\x0438\x0434\x0446\x044f\x0442\x044c" , 30 ) - , ( "\x0441\x043e\x0440\x043e\x043a" , 40 ) - , ( "\x043f\x2018\x044f\x0442\x0434\x0435\x0441\x044f\x0442" , 50 ) - , ( "\x0448\x0456\x0441\x0442\x0434\x0435\x0441\x044f\x0442" , 60 ) - , ( "\x0441\x0456\x043c\x0434\x0435\x0441\x044f\x0442" , 70 ) - , ( "\x0434\x0435\x0432\x2018\x044f\x043d\x043e\x0441\x0442\x043e" , 90 ) - , ( "\x0432\x0456\x0441\x0456\x043c\x0434\x0435\x0441\x044f\x0442" , 80 ) + [ ( "двадцять" , 20 ) + , ( "тридцять" , 30 ) + , ( "сорок" , 40 ) + , ( "п‘ятдесят" , 50 ) + , ( "шістдесят" , 60 ) + , ( "сімдесят" , 70 ) + , ( "дев‘яносто" , 90 ) + , ( "вісімдесят" , 80 ) ] ruleInteger5 :: Rule ruleInteger5 = Rule { name = "integer (20..90)" , pattern = - [ regex "(\x0434\x0432\x0430\x0434\x0446\x044f\x0442\x044c|\x0442\x0440\x0438\x0434\x0446\x044f\x0442\x044c|\x0441\x043e\x0440\x043e\x043a|\x043f\x2018\x044f\x0442\x0434\x0435\x0441\x044f\x0442|\x0448\x0456\x0441\x0442\x0434\x0435\x0441\x044f\x0442|\x0441\x0456\x043c\x0434\x0435\x0441\x044f\x0442|\x0432\x0456\x0441\x0456\x043c\x0434\x0435\x0441\x044f\x0442|\x0434\x0435\x0432\x2018\x044f\x043d\x043e\x0441\x0442\x043e)" + [ regex "(двадцять|тридцять|сорок|п‘ятдесят|шістдесят|сімдесят|вісімдесят|дев‘яносто)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> @@ -91,28 +91,28 @@ ruleInteger3 :: Rule ruleInteger3 = Rule { name = "integer 2" , pattern = - [ regex "(\x0434\x0432\x0430|\x0434\x0432\x0456|\x0434\x0432\x043e\x0454|\x043f\x0430\x0440\x0430|\x043f\x0430\x0440\x0443|\x043f\x0430\x0440\x043e\x0447\x043a\x0443|\x043f\x0430\x0440\x043e\x0447\x043a\x0430)" + [ regex "(два|дві|двоє|пара|пару|парочку|парочка)" ] , prod = \_ -> integer 2 } hundredsMap :: HashMap Text Integer hundredsMap = HashMap.fromList - [ ( "\x0441\x0442\x043e" , 100 ) - , ( "\x0434\x0432\x0456\x0441\x0442\x0456" , 200 ) - , ( "\x0442\x0440\x0438\x0441\x0442\x0430" , 300 ) - , ( "\x0447\x043e\x0442\x0438\x0440\x0438\x0441\x0442\x0430" , 400 ) - , ( "\x043f\x2018\x044f\x0442\x0441\x043e\x0442" , 500 ) - , ( "\x0448\x0456\x0441\x0442\x0441\x043e\x0442" , 600 ) - , ( "\x0441\x0456\x043c\x0441\x043e\x0442" , 700 ) - , ( "\x0432\x0456\x0441\x0456\x043c\x0441\x043e\x0442" , 800 ) - , ( "\x0434\x0435\x0432\x2018\x044f\x0442\x0441\x043e\x0442" , 900 ) + [ ( "сто" , 100 ) + , ( "двісті" , 200 ) + , ( "триста" , 300 ) + , ( "чотириста" , 400 ) + , ( "п‘ятсот" , 500 ) + , ( "шістсот" , 600 ) + , ( "сімсот" , 700 ) + , ( "вісімсот" , 800 ) + , ( "дев‘ятсот" , 900 ) ] ruleInteger6 :: Rule ruleInteger6 = Rule { name = "integer (100..900)" , pattern = - [ regex "(\x0441\x0442\x043e|\x0434\x0432\x0456\x0441\x0442\x0456|\x0442\x0440\x0438\x0441\x0442\x0430|\x0447\x043e\x0442\x0438\x0440\x0438\x0441\x0442\x0430|\x043f\x2018\x044f\x0442\x0441\x043e\x0442|\x0448\x0456\x0441\x0442\x0441\x043e\x0442|\x0441\x0456\x043c\x0441\x043e\x0442|\x0432\x0456\x0441\x0456\x043c\x0441\x043e\x0442|\x0434\x0435\x0432\x2018\x044f\x0442\x0441\x043e\x0442)" + [ regex "(сто|двісті|триста|чотириста|п‘ятсот|шістсот|сімсот|вісімсот|дев‘ятсот)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> @@ -124,7 +124,7 @@ ruleNumeralsPrefixWithMinus :: Rule ruleNumeralsPrefixWithMinus = Rule { name = "numbers prefix with -, minus" , pattern = - [ regex "-|\x043c\x0456\x043d\x0443\x0441\\s?" + [ regex "-|мінус\\s?" , dimension Numeral ] , prod = \tokens -> case tokens of @@ -137,18 +137,18 @@ ruleNumeralsSuffixesKMG = Rule { name = "numbers suffixes (K, M, G)" , pattern = [ dimension Numeral - , regex "((\x043a|\x043c|\x0433)|(\x041a|\x041c|\x0413))(?=[\\W\\$\x20ac]|$)" + , regex "((к|м|г)|(К|М|Г))(?=[\\W\\$€]|$)" ] , prod = \tokens -> case tokens of (Token Numeral (NumeralData {TNumeral.value = v}): Token RegexMatch (GroupMatch (match:_)): _) -> case Text.toLower match of - "\x043a" -> double $ v * 1e3 - "\x041a" -> double $ v * 1e3 - "\x043c" -> double $ v * 1e6 - "\x041c" -> double $ v * 1e6 - "\x0433" -> double $ v * 1e9 - "\x0413" -> double $ v * 1e9 + "к" -> double $ v * 1e3 + "К" -> double $ v * 1e3 + "м" -> double $ v * 1e6 + "М" -> double $ v * 1e6 + "г" -> double $ v * 1e9 + "Г" -> double $ v * 1e9 _ -> Nothing _ -> Nothing } @@ -185,36 +185,36 @@ ruleInteger :: Rule ruleInteger = Rule { name = "integer 0" , pattern = - [ regex "(\x043d\x0443\x043b\x044c)" + [ regex "(нуль)" ] , prod = \_ -> integer 0 } threeNineteenMap :: HashMap Text Integer threeNineteenMap = HashMap.fromList - [ ( "\x0442\x0440\x0438" , 3 ) - , ( "\x0447\x043e\x0442\x0438\x0440\x0438" , 4 ) - , ( "\x043f\x2018\x044f\x0442\x044c" , 5 ) - , ( "\x0448\x0456\x0441\x0442\x044c" , 6 ) - , ( "\x0441\x0456\x043c" , 7 ) - , ( "\x0432\x0456\x0441\x0456\x043c" , 8 ) - , ( "\x0434\x0435\x0432\x2018\x044f\x0442\x044c" , 9 ) - , ( "\x0434\x0435\x0441\x044f\x0442\x044c" , 10 ) - , ( "\x043e\x0434\x0438\x043d\x0430\x0434\x0446\x044f\x0442\x044c" , 11 ) - , ( "\x0434\x0432\x0430\x043d\x0430\x0434\x0446\x044f\x0442\x044c" , 12 ) - , ( "\x0442\x0440\x0438\x043d\x0430\x0434\x0446\x044f\x0442\x044c" , 13 ) - , ( "\x0447\x043e\x0442\x0438\x0440\x043d\x0430\x0434\x0446\x044f\x0442\x044c" , 14 ) - , ( "\x043f\x2018\x044f\x0442\x043d\x0430\x0434\x0446\x044f\x0442\x044c" , 15 ) - , ( "\x0448\x0456\x0441\x0442\x043d\x0430\x0434\x0446\x044f\x0442\x044c" , 16 ) - , ( "\x0441\x0456\x043c\x043d\x0430\x0434\x0446\x044f\x0442\x044c" , 17 ) - , ( "\x0432\x0456\x0441\x0456\x043c\x043d\x0430\x0434\x0446\x044f\x0442\x044c" , 18 ) - , ( "\x0434\x0435\x0432\x2018\x044f\x0442\x043d\x0430\x0434\x0446\x044f\x0442\x044c" , 19 ) + [ ( "три" , 3 ) + , ( "чотири" , 4 ) + , ( "п‘ять" , 5 ) + , ( "шість" , 6 ) + , ( "сім" , 7 ) + , ( "вісім" , 8 ) + , ( "дев‘ять" , 9 ) + , ( "десять" , 10 ) + , ( "одинадцять" , 11 ) + , ( "дванадцять" , 12 ) + , ( "тринадцять" , 13 ) + , ( "чотирнадцять" , 14 ) + , ( "п‘ятнадцять" , 15 ) + , ( "шістнадцять" , 16 ) + , ( "сімнадцять" , 17 ) + , ( "вісімнадцять" , 18 ) + , ( "дев‘ятнадцять" , 19 ) ] ruleInteger4 :: Rule ruleInteger4 = Rule { name = "integer (3..19)" , pattern = - [ regex "(\x0442\x0440\x0438|\x0447\x043e\x0442\x0438\x0440\x043d\x0430\x0434\x0446\x044f\x0442\x044c|\x0447\x043e\x0442\x0438\x0440\x0438|\x043f\x2018\x044f\x0442\x043d\x0430\x0434\x0446\x044f\x0442\x044c|\x043f\x2018\x044f\x0442\x044c|\x0448\x0456\x0441\x0442\x043d\x0430\x0434\x0446\x044f\x0442\x044c|\x0448\x0456\x0441\x0442\x044c|\x0441\x0456\x043c\x043d\x0430\x0434\x0446\x044f\x0442\x044c|\x0441\x0456\x043c|\x0432\x0456\x0441\x0456\x043c\x043d\x0430\x0434\x0446\x044f\x0442\x044c|\x0432\x0456\x0441\x0456\x043c|\x0434\x0435\x0432\x2018\x044f\x0442\x043d\x0430\x0434\x0446\x044f\x0442\x044c|\x0434\x0435\x0432\x2018\x044f\x0442\x044c|\x0434\x0435\x0441\x044f\x0442\x044c|\x043e\x0434\x0438\x043d\x0430\x0434\x0446\x044f\x0442\x044c|\x0434\x0432\x0430\x043d\x0430\x0434\x0446\x044f\x0442\x044c|\x0442\x0440\x0438\x043d\x0430\x0434\x0446\x044f\x0442\x044c)" + [ regex "(три|чотирнадцять|чотири|п‘ятнадцять|п‘ять|шістнадцять|шість|сімнадцять|сім|вісімнадцять|вісім|дев‘ятнадцять|дев‘ять|десять|одинадцять|дванадцять|тринадцять)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> @@ -226,7 +226,7 @@ ruleInteger2 :: Rule ruleInteger2 = Rule { name = "integer 1" , pattern = - [ regex "(\x043e\x0434\x0438\x043d|\x043e\x0434\x043d\x0430|\x043e\x0434\x043d\x0443|\x043e\x0434\x043d\x0435|\x043e\x0434\x043d\x043e\x0433\x043e)" + [ regex "(один|одна|одну|одне|одного)" ] , prod = \_ -> integer 1 } @@ -236,7 +236,7 @@ ruleNumeralDotNumeral = Rule { name = "number dot number" , pattern = [ dimension Numeral - , regex "\x043a\x0440\x0430\x043f\x043a\x0430" + , regex "крапка" , numberWith TNumeral.grain isNothing ] , prod = \tokens -> case tokens of diff --git a/Duckling/Numeral/VI/Rules.hs b/Duckling/Numeral/VI/Rules.hs index ebc968ac..3903f473 100644 --- a/Duckling/Numeral/VI/Rules.hs +++ b/Duckling/Numeral/VI/Rules.hs @@ -27,21 +27,21 @@ import Duckling.Types powersOfTenMap :: HashMap.HashMap Text.Text (Double, Int) powersOfTenMap = HashMap.fromList - [ ( "tr\x0103", (1e2, 2) ) - , ( "tr\x0103m", (1e2, 2) ) - , ( "ngh\x00ec", (1e3, 3) ) - , ( "ngh\x00ecn", (1e3, 3) ) - , ( "tri\x1ec7", (1e6, 6) ) - , ( "tri\x1ec7u", (1e6, 6) ) + [ ( "tră", (1e2, 2) ) + , ( "trăm", (1e2, 2) ) + , ( "nghì", (1e3, 3) ) + , ( "nghìn", (1e3, 3) ) + , ( "triệ", (1e6, 6) ) + , ( "triệu", (1e6, 6) ) , ( "t", (1e9, 9) ) - , ( "t\x1ef7", (1e9, 9) ) + , ( "tỷ", (1e9, 9) ) ] rulePowersOfTen :: Rule rulePowersOfTen = Rule { name = "powers of tens" , pattern = - [ regex "(tr\x0103m?|ngh\x00ecn?|tri\x1ec7u?|t\x1ef7?)" + [ regex "(trăm?|nghìn?|triệu?|tỷ?)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> @@ -84,7 +84,7 @@ ruleNumeralsPrefixWithM :: Rule ruleNumeralsPrefixWithM = Rule { name = "numbers prefix with -, âm" , pattern = - [ regex "-|\x00e2m\\s?" + [ regex "-|âm\\s?" , dimension Numeral ] , prod = \tokens -> case tokens of @@ -97,7 +97,7 @@ ruleNumerals2 = Rule { name = "numbers 25 35 45 55 65 75 85 95" , pattern = [ oneOf [20, 30 .. 90] - , regex "l\x0103m" + , regex "lăm" ] , prod = \tokens -> case tokens of (Token Numeral (NumeralData {TNumeral.value = v}):_) -> double $ v + 5 @@ -146,7 +146,7 @@ ruleNumeralDot = Rule { name = "number dot 1 9" , pattern = [ dimension Numeral - , regex "ch\x1ea5m|ph\x1ea9y" + , regex "chấm|phẩy" , numberWith TNumeral.grain isNothing ] , prod = \tokens -> case tokens of @@ -186,7 +186,7 @@ ruleNumeralsSuffixesKMG = Rule { name = "numbers suffixes (K, M, G)" , pattern = [ dimension Numeral - , regex "([kmg])(?=[\\W\\$\x20ac]|$)" + , regex "([kmg])(?=[\\W\\$€]|$)" ] , prod = \tokens -> case tokens of (Token Numeral (NumeralData {TNumeral.value = v}): @@ -215,53 +215,53 @@ ruleNumeralNghn = Rule integerMap :: HashMap.HashMap Text.Text Integer integerMap = HashMap.fromList - [ ("kh\x00f4ng", 0) - , ("m\x1ed9t", 1) - , ("linh m\x1ed9t", 1) - , ("l\x1ebb m\x1ed9t", 1) + [ ("không", 0) + , ("một", 1) + , ("linh một", 1) + , ("lẻ một", 1) , ("hai", 2) - , ("l\x1ebb hai", 2) + , ("lẻ hai", 2) , ("linh hai", 2) , ("ba", 3) - , ("l\x1ebb", 3) + , ("lẻ", 3) , ("linh ba", 3) - , ("l\x1ebb b\x1ed1n", 4) - , ("linh b\x1ed1n", 4) - , ("b\x1ed1n", 4) - , ("n\x0103m", 5) - , ("l\x1ebb n\x0103m", 5) - , ("linh n\x0103m", 5) - , ("linh s\x00e1u", 6) - , ("s\x00e1u", 6) - , ("l\x1ebb s\x00e1u", 6) - , ("linh b\x1ea3y", 7) - , ("l\x1ebb b\x1ea3y", 7) - , ("b\x1ea3y", 7) - , ("l\x1ebb t\x00e1m", 8) - , ("linh t\x00e1m", 8) - , ("t\x00e1m", 8) - , ("l\x1ebb ch\x00edn", 9) - , ("ch\x00edn", 9) - , ("linh ch\x00edn", 9) - , ("linh m\x01b0\x1eddi", 10) - , ("m\x01b0\x1eddi", 10) - , ("l\x1ebb m\x01b0\x1eddi", 10) - , ("m\x01b0\x1eddi m\x1ed9t", 11) - , ("m\x01b0\x1eddi hai", 12) - , ("m\x01b0\x1eddi ba", 13) - , ("m\x01b0\x1eddi b\x1ed1n", 14) - , ("m\x01b0\x1eddi l\x0103m", 15) - , ("m\x01b0\x1eddi s\x00e1u", 16) - , ("m\x01b0\x1eddi b\x1ea3y", 17) - , ("m\x01b0\x1eddi t\x00e1m", 18) - , ("m\x01b0\x1eddi ch\x00edn", 19) + , ("lẻ bốn", 4) + , ("linh bốn", 4) + , ("bốn", 4) + , ("năm", 5) + , ("lẻ năm", 5) + , ("linh năm", 5) + , ("linh sáu", 6) + , ("sáu", 6) + , ("lẻ sáu", 6) + , ("linh bảy", 7) + , ("lẻ bảy", 7) + , ("bảy", 7) + , ("lẻ tám", 8) + , ("linh tám", 8) + , ("tám", 8) + , ("lẻ chín", 9) + , ("chín", 9) + , ("linh chín", 9) + , ("linh mười", 10) + , ("mười", 10) + , ("lẻ mười", 10) + , ("mười một", 11) + , ("mười hai", 12) + , ("mười ba", 13) + , ("mười bốn", 14) + , ("mười lăm", 15) + , ("mười sáu", 16) + , ("mười bảy", 17) + , ("mười tám", 18) + , ("mười chín", 19) ] ruleInteger :: Rule ruleInteger = Rule { name = "integer (0..19)" , pattern = - [ regex "(kh\x00f4ng|m\x1ed9t|linh m\x1ed9t|l\x1ebb m\x1ed9t|hai|linh hai|l\x1ebb hai|ba|linh ba|l\x1ebb ba|b\x1ed1n|linh b\x1ed1n|l\x1ebb b\x1ed1n|n\x0103m|linh n\x0103m|l\x1ebb n\x0103m|s\x00e1u|l\x1ebb s\x00e1u|linh s\x00e1u|b\x1ea3y|l\x1ebb b\x1ea3y|linh b\x1ea3y|t\x00e1m|linh t\x00e1m|l\x1ebb t\x00e1m|ch\x00edn|linh ch\x00edn|l\x1ebb ch\x00edn|m\x01b0\x1eddi m\x1ed9t|m\x01b0\x1eddi hai|m\x01b0\x1eddi ba|m\x01b0\x1eddi b\x1ed1n|m\x01b0\x1eddi l\x0103m|m\x01b0\x1eddi s\x00e1u|m\x01b0\x1eddi b\x1ea3y|m\x01b0\x1eddi t\x00e1m|m\x01b0\x1eddi ch\x00edn|m\x01b0\x1eddi|linh m\x01b0\x1eddi)" + [ regex "(không|một|linh một|lẻ một|hai|linh hai|lẻ hai|ba|linh ba|lẻ ba|bốn|linh bốn|lẻ bốn|năm|linh năm|lẻ năm|sáu|lẻ sáu|linh sáu|bảy|lẻ bảy|linh bảy|tám|linh tám|lẻ tám|chín|linh chín|lẻ chín|mười một|mười hai|mười ba|mười bốn|mười lăm|mười sáu|mười bảy|mười tám|mười chín|mười|linh mười)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> @@ -271,21 +271,21 @@ ruleInteger = Rule tensMap :: HashMap.HashMap Text.Text Integer tensMap = HashMap.fromList - [ ("hai m\x01b0\x01a1i", 20) - , ("ba m\x01b0\x01a1i", 30) - , ("b\x1ed1n m\x01b0\x01a1i", 40) - , ("n\x0103m m\x01b0\x01a1i", 50) - , ("s\x00e1u m\x01b0\x01a1i", 60) - , ("b\x1ea3y m\x01b0\x01a1i", 70) - , ("t\x00e1m m\x01b0\x01a1i", 80) - , ("ch\x00edn m\x01b0\x01a1i", 90) + [ ("hai mươi", 20) + , ("ba mươi", 30) + , ("bốn mươi", 40) + , ("năm mươi", 50) + , ("sáu mươi", 60) + , ("bảy mươi", 70) + , ("tám mươi", 80) + , ("chín mươi", 90) ] ruleInteger2 :: Rule ruleInteger2 = Rule { name = "integer (20..90)" , pattern = - [ regex "(hai m\x01b0\x01a1i|ba m\x01b0\x01a1i|b\x1ed1n m\x01b0\x01a1i|n\x0103m m\x01b0\x01a1i|s\x00e1u m\x01b0\x01a1i|b\x1ea3y m\x01b0\x01a1i|t\x00e1m m\x01b0\x01a1i|ch\x00edn m\x01b0\x01a1i)" + [ regex "(hai mươi|ba mươi|bốn mươi|năm mươi|sáu mươi|bảy mươi|tám mươi|chín mươi)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> @@ -298,7 +298,7 @@ ruleNumerals = Rule { name = "numbers 21 31 41 51 61 71 81 91" , pattern = [ oneOf [20, 30 .. 90] - , regex "m\x1ed1t" + , regex "mốt" ] , prod = \tokens -> case tokens of (Token Numeral (NumeralData {TNumeral.value = v}):_) -> double $ v + 1 @@ -309,7 +309,7 @@ ruleT :: Rule ruleT = Rule { name = "tá" , pattern = - [ regex "t\x00e1" + [ regex "tá" ] , prod = \_ -> integer 12 >>= withGrain 1 >>= withMultipliable } diff --git a/Duckling/Numeral/ZH/Rules.hs b/Duckling/Numeral/ZH/Rules.hs index e6f93cdb..92b772f7 100644 --- a/Duckling/Numeral/ZH/Rules.hs +++ b/Duckling/Numeral/ZH/Rules.hs @@ -29,7 +29,7 @@ ruleInteger5 :: Rule ruleInteger5 = Rule { name = "integer (0..10)" , pattern = - [ regex "(\x3007|\x96f6|\x4e00|\x4e8c|\x4e24|\x5169|\x4e09|\x56db|\x4e94|\x516d|\x4e03|\x516b|\x4e5d|\x5341)(\x4e2a|\x500b)?" + [ regex "(〇|零|一|二|两|兩|三|四|五|六|七|八|九|十)(个|個)?" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> @@ -39,20 +39,20 @@ ruleInteger5 = Rule integerMap :: HashMap.HashMap Text.Text Integer integerMap = HashMap.fromList - [ ( "\x3007", 0 ) - , ( "\x96f6", 0 ) - , ( "\x4e00", 1 ) - , ( "\x5169", 2 ) - , ( "\x4e24", 2 ) - , ( "\x4e8c", 2 ) - , ( "\x4e09", 3 ) - , ( "\x56db", 4 ) - , ( "\x4e94", 5 ) - , ( "\x516d", 6 ) - , ( "\x4e03", 7 ) - , ( "\x516b", 8 ) - , ( "\x4e5d", 9 ) - , ( "\x5341", 10 ) + [ ( "〇", 0 ) + , ( "零", 0 ) + , ( "一", 1 ) + , ( "兩", 2 ) + , ( "两", 2 ) + , ( "二", 2 ) + , ( "三", 3 ) + , ( "四", 4 ) + , ( "五", 5 ) + , ( "六", 6 ) + , ( "七", 7 ) + , ( "八", 8 ) + , ( "九", 9 ) + , ( "十", 10 ) ] @@ -60,7 +60,7 @@ ruleNumeralsPrefixWithNegativeOrMinus :: Rule ruleNumeralsPrefixWithNegativeOrMinus = Rule { name = "numbers prefix with -, negative or minus" , pattern = - [ regex "-|\x8d1f\\s?|\x8ca0\\s?" + [ regex "-|负\\s?|負\\s?" , dimension Numeral ] , prod = \tokens -> case tokens of @@ -109,7 +109,7 @@ ruleNumeral = Rule { name = "个" , pattern = [ dimension Numeral - , regex "\x4e2a" + , regex "个" ] , prod = \tokens -> case tokens of (token:_) -> Just token @@ -121,7 +121,7 @@ ruleInteger3 = Rule { name = "integer (20..90)" , pattern = [ numberBetween 2 10 - , regex "\x5341" + , regex "十" ] , prod = \tokens -> case tokens of (Token Numeral (NumeralData {TNumeral.value = v}):_) -> double $ v * 10 @@ -164,7 +164,7 @@ ruleInteger2 :: Rule ruleInteger2 = Rule { name = "integer (11..19)" , pattern = - [ regex "\x5341" + [ regex "十" , numberBetween 1 10 ] , prod = \tokens -> case tokens of diff --git a/Duckling/Ordinal/AR/Rules.hs b/Duckling/Ordinal/AR/Rules.hs index 503aa7f7..4e99da43 100644 --- a/Duckling/Ordinal/AR/Rules.hs +++ b/Duckling/Ordinal/AR/Rules.hs @@ -22,7 +22,7 @@ ruleOrdinalsTh :: Rule ruleOrdinalsTh = Rule { name = "ordinals 7th" , pattern = - [ regex "(\x0633\x0627\x0628\x0639 | \x0633\x0627\x0628\x0639\x0629 | \x0627\x0644\x0633\x0627\x0628\x0639 | \x0627\x0644\x0633\x0627\x0628\x0639\x0629)" + [ regex "(سابع | سابعة | السابع | السابعة)" ] , prod = \_ -> Just $ ordinal 7 } @@ -31,7 +31,7 @@ ruleOrdinalsSecond :: Rule ruleOrdinalsSecond = Rule { name = "ordinals second" , pattern = - [ regex "(\x062b\x0627\x0646\x064a|\x062b\x0627\x0646\x064a\x0629|\x0627\x0644\x062b\x0627\x0646\x064a|\x0627\x0644\x062b\x0627\x0646\x064a\x0629)" + [ regex "(ثاني|ثانية|الثاني|الثانية)" ] , prod = \_ -> Just $ ordinal 2 } @@ -40,7 +40,7 @@ ruleOrdinalsFirst :: Rule ruleOrdinalsFirst = Rule { name = "ordinals first" , pattern = - [ regex "(\x0623\x0648\x0644|\x0627\x0644\x0623\x0648\x0644|\x0623\x0648\x0644\x0649|\x0627\x0644\x0623\x0648\x0644\x0649)" + [ regex "(أول|الأول|أولى|الأولى)" ] , prod = \_ -> Just $ ordinal 1 } @@ -49,7 +49,7 @@ ruleOrdinalsFirst5 :: Rule ruleOrdinalsFirst5 = Rule { name = "ordinals first" , pattern = - [ regex "(\x0633\x0627\x062f\x0633 | \x0633\x0627\x062f\x0633\x0629 | \x0627\x0644\x0633\x0627\x062f\x0633 | \x0627\x0644\x0633\x0627\x062f\x0633\x0629)" + [ regex "(سادس | سادسة | السادس | السادسة)" ] , prod = \_ -> Just $ ordinal 6 } @@ -58,7 +58,7 @@ ruleOrdinalsTh2 :: Rule ruleOrdinalsTh2 = Rule { name = "ordinals 8th" , pattern = - [ regex "(\x062b\x0627\x0645\x0646 | \x062b\x0627\x0645\x0646\x0629 | \x0627\x0644\x062b\x0627\x0645\x0646 | \x0627\x0644\x062b\x0627\x0645\x0646\x0629)" + [ regex "(ثامن | ثامنة | الثامن | الثامنة)" ] , prod = \_ -> Just $ ordinal 8 } @@ -67,7 +67,7 @@ ruleOrdinalsFirst2 :: Rule ruleOrdinalsFirst2 = Rule { name = "ordinals first" , pattern = - [ regex "(\x062b\x0627\x0644\x062b|\x062b\x0627\x0644\x062b\x0629|\x0627\x0644\x062b\x0627\x0644\x062b|\x0627\x0644\x062b\x0627\x0644\x062b\x0629)" + [ regex "(ثالث|ثالثة|الثالث|الثالثة)" ] , prod = \_ -> Just $ ordinal 3 } @@ -76,7 +76,7 @@ ruleOrdinalsTh4 :: Rule ruleOrdinalsTh4 = Rule { name = "ordinals 10th" , pattern = - [ regex "(\x0639\x0627\x0634\x0631 | \x0639\x0627\x0634\x0631\x0629 | \x0627\x0644\x0639\x0627\x0634\x0631 | \x0627\x0644\x0639\x0627\x0634\x0631\x0629)" + [ regex "(عاشر | عاشرة | العاشر | العاشرة)" ] , prod = \_ -> Just $ ordinal 10 } @@ -85,7 +85,7 @@ ruleOrdinalsTh3 :: Rule ruleOrdinalsTh3 = Rule { name = "ordinals 9th" , pattern = - [ regex "(\x062a\x0627\x0633\x0639 | \x062a\x0627\x0633\x0639\x0629 | \x0627\x0644\x062a\x0627\x0633\x0639 | \x0627\x0644\x062a\x0627\x0633\x0639\x0629)" + [ regex "(تاسع | تاسعة | التاسع | التاسعة)" ] , prod = \_ -> Just $ ordinal 9 } @@ -94,7 +94,7 @@ ruleOrdinalsFirst4 :: Rule ruleOrdinalsFirst4 = Rule { name = "ordinals first" , pattern = - [ regex "(\x062e\x0627\x0645\x0633 | \x0627\x0644\x062e\x0627\x0645\x0633 | \x062e\x0627\x0645\x0633\x0629 | \x0627\x0644\x062e\x0627\x0645\x0633\x0629)" + [ regex "(خامس | الخامس | خامسة | الخامسة)" ] , prod = \_ -> Just $ ordinal 5 } @@ -103,7 +103,7 @@ ruleOrdinalsFirst3 :: Rule ruleOrdinalsFirst3 = Rule { name = "ordinals first" , pattern = - [ regex "(\x0631\x0627\x0628\x0639|\x0631\x0627\x0628\x0639\x0629 | \x0627\x0644\x0631\x0627\x0628\x0639|\x0627\x0644\x0631\x0627\x0628\x0639\x0629)" + [ regex "(رابع|رابعة | الرابع|الرابعة)" ] , prod = \_ -> Just $ ordinal 4 } diff --git a/Duckling/Ordinal/DA/Rules.hs b/Duckling/Ordinal/DA/Rules.hs index 2dd5b190..14d4b584 100644 --- a/Duckling/Ordinal/DA/Rules.hs +++ b/Duckling/Ordinal/DA/Rules.hs @@ -26,11 +26,11 @@ ruleOrdinalsFirstst :: Rule ruleOrdinalsFirstst = Rule { name = "ordinals (first..31st)" , pattern = - [ regex "(f\x00f8rste|anden|tredje|fjerde|femte|sjette|syvende|ottende|niende|tiende|elfte|tolvte|trettende|fjortende|femtende|sekstende|syttende|attende|nittende|tyvende|tenogtyvende|toogtyvende|treogtyvende|fireogtyvende|femogtyvende|seksogtyvende|syvogtyvende|otteogtyvende|niogtyvende|tredivte|enogtredivte)" + [ regex "(første|anden|tredje|fjerde|femte|sjette|syvende|ottende|niende|tiende|elfte|tolvte|trettende|fjortende|femtende|sekstende|syttende|attende|nittende|tyvende|tenogtyvende|toogtyvende|treogtyvende|fireogtyvende|femogtyvende|seksogtyvende|syvogtyvende|otteogtyvende|niogtyvende|tredivte|enogtredivte)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> case Text.toLower match of - "f\x00f8rste" -> Just $ ordinal 1 + "første" -> Just $ ordinal 1 "anden" -> Just $ ordinal 2 "tredje" -> Just $ ordinal 3 "fjerde" -> Just $ ordinal 4 diff --git a/Duckling/Ordinal/DE/Rules.hs b/Duckling/Ordinal/DE/Rules.hs index 311ba95a..0eebc1af 100644 --- a/Duckling/Ordinal/DE/Rules.hs +++ b/Duckling/Ordinal/DE/Rules.hs @@ -26,7 +26,7 @@ ruleOrdinalsFirstth :: Rule ruleOrdinalsFirstth = Rule { name = "ordinals (first..19th)" , pattern = - [ regex "(erste(r|s)?|zweite(r|s)|dritte(r|s)|vierte(r|s)|fuenfte(r|s)|sechste(r|s)|siebte(r|s)|achte(r|s)|neunte(r|s)|zehnte(r|s)|elfter|zw\x00f6lfter|dreizenter|vierzehnter|f\x00fcnfzehnter|sechzenter|siebzehnter|achtzehnter|neunzehnter)" + [ regex "(erste(r|s)?|zweite(r|s)|dritte(r|s)|vierte(r|s)|fuenfte(r|s)|sechste(r|s)|siebte(r|s)|achte(r|s)|neunte(r|s)|zehnte(r|s)|elfter|zwölfter|dreizenter|vierzehnter|fünfzehnter|sechzenter|siebzehnter|achtzehnter|neunzehnter)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> case Text.toLower match of @@ -42,9 +42,9 @@ ruleOrdinalsFirstth = Rule "viertes" -> Just $ ordinal 4 "vierte" -> Just $ ordinal 4 "vierter" -> Just $ ordinal 4 - "f\x00fcnftes" -> Just $ ordinal 5 - "f\x00fcnfter" -> Just $ ordinal 5 - "f\x00fcnfte" -> Just $ ordinal 5 + "fünftes" -> Just $ ordinal 5 + "fünfter" -> Just $ ordinal 5 + "fünfte" -> Just $ ordinal 5 "sechste" -> Just $ ordinal 6 "sechstes" -> Just $ ordinal 6 "sechster" -> Just $ ordinal 6 @@ -61,10 +61,10 @@ ruleOrdinalsFirstth = Rule "zehnter" -> Just $ ordinal 10 "zehntes" -> Just $ ordinal 10 "elfter" -> Just $ ordinal 11 - "zw\x00f6lfter" -> Just $ ordinal 12 + "zwölfter" -> Just $ ordinal 12 "dreizehnter" -> Just $ ordinal 13 "vierzehnter" -> Just $ ordinal 14 - "f\x00fcnfzehnter" -> Just $ ordinal 15 + "fünfzehnter" -> Just $ ordinal 15 "sechzehnter" -> Just $ ordinal 16 "siebzehnter" -> Just $ ordinal 17 "achtzehnter" -> Just $ ordinal 18 diff --git a/Duckling/Ordinal/ES/Rules.hs b/Duckling/Ordinal/ES/Rules.hs index 0d891452..8120e326 100644 --- a/Duckling/Ordinal/ES/Rules.hs +++ b/Duckling/Ordinal/ES/Rules.hs @@ -50,13 +50,13 @@ ordinalsMap = HashMap.fromList , ( "sexto" , 6 ) , ( "sexta" , 6 ) , ( "sextas" , 6 ) - , ( "s\x00e9ptimas" , 7 ) + , ( "séptimas" , 7 ) , ( "septimas" , 7 ) - , ( "s\x00e9ptima" , 7 ) + , ( "séptima" , 7 ) , ( "septimos" , 7 ) , ( "septima" , 7 ) - , ( "s\x00e9ptimo" , 7 ) - , ( "s\x00e9ptimos" , 7 ) + , ( "séptimo" , 7 ) + , ( "séptimos" , 7 ) , ( "septimo" , 7 ) , ( "octavas" , 8 ) , ( "octavo" , 8 ) @@ -66,21 +66,21 @@ ordinalsMap = HashMap.fromList , ( "novena" , 9 ) , ( "noveno" , 9 ) , ( "novenas" , 9 ) - , ( "d\233cimos" , 10 ) + , ( "décimos" , 10 ) , ( "decimo" , 10 ) , ( "decimos" , 10 ) - , ( "d\233cimo" , 10 ) + , ( "décimo" , 10 ) , ( "decimas" , 10 ) - , ( "d\233cima" , 10 ) + , ( "décima" , 10 ) , ( "decima" , 10 ) - , ( "d\233cimas" , 10 ) + , ( "décimas" , 10 ) ] ruleOrdinalsPrimero :: Rule ruleOrdinalsPrimero = Rule { name = "ordinals (primero..10)" , pattern = - [ regex "((primer|segund|cuart|quint|sext|s(e|\x00e9)ptim|octav|noven|d(e|\x00e9)cim)(os?|as?)|(prim|terc)er)" + [ regex "((primer|segund|cuart|quint|sext|s[eé]ptim|octav|noven|d[eé]cim)(os?|as?)|(prim|terc)er)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> diff --git a/Duckling/Ordinal/ET/Rules.hs b/Duckling/Ordinal/ET/Rules.hs index dc654373..d9297e7f 100644 --- a/Duckling/Ordinal/ET/Rules.hs +++ b/Duckling/Ordinal/ET/Rules.hs @@ -26,7 +26,7 @@ ruleOrdinalsFirstth :: Rule ruleOrdinalsFirstth = Rule { name = "ordinals (first..19th)" , pattern = - [ regex "(esimene|teine|kolmas|neljas|viies|kuues|seitsmes|kaheksas|\x00fcheksas|k\x00fcmnes|\x00fcheteistk\x00fcmnes|kaheteistk\x00fcmnes|kolmeteistk\x00fcmnes|neljateistk\x00fcmnes|viieteistk\x00fcmnes|kuueteistk\x00fcmnes|seitsmeteistk\x00fcmnes|kaheksateistk\x00fcmnes|\x00fcheksateistk\x00fcmnes)" + [ regex "(esimene|teine|kolmas|neljas|viies|kuues|seitsmes|kaheksas|üheksas|kümnes|üheteistkümnes|kaheteistkümnes|kolmeteistkümnes|neljateistkümnes|viieteistkümnes|kuueteistkümnes|seitsmeteistkümnes|kaheksateistkümnes|üheksateistkümnes)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> case Text.toLower match of @@ -38,17 +38,17 @@ ruleOrdinalsFirstth = Rule "kuues" -> Just $ ordinal 6 "seitsmes" -> Just $ ordinal 7 "kaheksas" -> Just $ ordinal 8 - "\x00fcheksas" -> Just $ ordinal 9 - "k\x00fcmnes" -> Just $ ordinal 10 - "\x00fcheteistk\x00fcmnes" -> Just $ ordinal 11 - "kaheteistk\x00fcmnes" -> Just $ ordinal 12 - "kolmeteistk\x00fcmnes" -> Just $ ordinal 13 - "neljateistk\x00fcmnes" -> Just $ ordinal 14 - "viieteistk\x00fcmnes" -> Just $ ordinal 15 - "kuueteistk\x00fcmnes" -> Just $ ordinal 16 - "seitsmeteistk\x00fcmnes" -> Just $ ordinal 17 - "kaheksateistk\x00fcmnes" -> Just $ ordinal 18 - "\x00fcheksateistk\x00fcmnes" -> Just $ ordinal 19 + "üheksas" -> Just $ ordinal 9 + "kümnes" -> Just $ ordinal 10 + "üheteistkümnes" -> Just $ ordinal 11 + "kaheteistkümnes" -> Just $ ordinal 12 + "kolmeteistkümnes" -> Just $ ordinal 13 + "neljateistkümnes" -> Just $ ordinal 14 + "viieteistkümnes" -> Just $ ordinal 15 + "kuueteistkümnes" -> Just $ ordinal 16 + "seitsmeteistkümnes" -> Just $ ordinal 17 + "kaheksateistkümnes" -> Just $ ordinal 18 + "üheksateistkümnes" -> Just $ ordinal 19 _ -> Nothing _ -> Nothing } diff --git a/Duckling/Ordinal/FR/Rules.hs b/Duckling/Ordinal/FR/Rules.hs index 17f47313..bdf3cfd0 100644 --- a/Duckling/Ordinal/FR/Rules.hs +++ b/Duckling/Ordinal/FR/Rules.hs @@ -27,48 +27,48 @@ import Duckling.Types ruleOrdinalsPremierseiziemeMap :: HashMap Text Int ruleOrdinalsPremierseiziemeMap = HashMap.fromList - [ ( "premi\x00e8re" , 1 ) + [ ( "première" , 1 ) , ( "premiere" , 1 ) , ( "premier" , 1 ) - , ( "deuxi\x00e8me" , 2 ) + , ( "deuxième" , 2 ) , ( "deuxieme" , 2 ) , ( "second" , 2 ) , ( "seconde" , 2 ) - , ( "troisi\x00e8me" , 3 ) + , ( "troisième" , 3 ) , ( "troisieme" , 3 ) , ( "quatrieme" , 4 ) - , ( "quatri\x00e8me" , 4 ) + , ( "quatrième" , 4 ) , ( "cinquieme" , 5 ) - , ( "cinqui\x00e8me" , 5 ) - , ( "sixi\x00e8me" , 6 ) + , ( "cinquième" , 5 ) + , ( "sixième" , 6 ) , ( "sixieme" , 6 ) , ( "septieme" , 7 ) - , ( "septi\x00e8me" , 7 ) - , ( "huiti\x00e8me" , 8 ) + , ( "septième" , 7 ) + , ( "huitième" , 8 ) , ( "huitieme" , 8 ) , ( "neuvieme" , 9 ) - , ( "neuvi\x00e8me" , 9 ) - , ( "dixi\x00e8me" , 10 ) + , ( "neuvième" , 9 ) + , ( "dixième" , 10 ) , ( "dixieme" , 10 ) - , ( "onzi\x00e8me" , 11 ) + , ( "onzième" , 11 ) , ( "onzieme" , 11 ) , ( "douzieme" , 12 ) - , ( "douzi\x00e8me" , 12 ) + , ( "douzième" , 12 ) , ( "treizieme" , 13 ) - , ( "treizi\x00e8me" , 13 ) - , ( "quatorzi\x00e8me", 14 ) + , ( "treizième" , 13 ) + , ( "quatorzième", 14 ) , ( "quatorzieme" , 14 ) - , ( "quinzi\x00e8me" , 15 ) + , ( "quinzième" , 15 ) , ( "quinzieme" , 15 ) , ( "seizieme" , 16 ) - , ( "seizi\x00e8me" , 16 ) + , ( "seizième" , 16 ) ] ruleOrdinalsPremierseizieme :: Rule ruleOrdinalsPremierseizieme = Rule { name = "ordinals (premier..seizieme)" , pattern = - [ regex "(premi(ere?|\x00e8re)|(deux|trois|quatr|cinqu|six|sept|huit|neuv|dix|onz|douz|treiz|quatorz|quinz|seiz)i(e|\x00e8)me|seconde?)" + [ regex "(premi(ere?|ère)|(deux|trois|quatr|cinqu|six|sept|huit|neuv|dix|onz|douz|treiz|quatorz|quinz|seiz)i(e|è)me|seconde?)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> @@ -80,7 +80,7 @@ ruleOrdinalDigits :: Rule ruleOrdinalDigits = Rule { name = "ordinal (digits)" , pattern = - [ regex "0*(\\d+) ?(ere?|\x00e8re|\x00e8me|eme|e)" + [ regex "0*(\\d+) ?(ere?|ère|ème|eme|e)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> do diff --git a/Duckling/Ordinal/GA/Rules.hs b/Duckling/Ordinal/GA/Rules.hs index 9d323acb..96f7f08a 100644 --- a/Duckling/Ordinal/GA/Rules.hs +++ b/Duckling/Ordinal/GA/Rules.hs @@ -26,79 +26,79 @@ ruleOrdinalsChadDaraEtc :: Rule ruleOrdinalsChadDaraEtc = Rule { name = "ordinals (chéad, dara, etc.)" , pattern = - [ regex "(ch(\x00e9|e)ad|aon(\x00fa|u)|t-aon(\x00fa|u)|dara|tr(\x00ed|i)(\x00fa|u)|ceathr(\x00fa|u)|c(\x00fa|u)igi(\x00fa|u)|s(\x00e9|e)(\x00fa|u)|seacht(\x00fa|u)|ocht(\x00fa|u)|t-ocht(\x00fa|u)|nao(\x00fa|u)|deichi(\x00fa|u)|fichi(\x00fa|u)|tr(\x00ed|i)ochad(\x00fa|u)|daichead(\x00fa|u)|caogad(\x00fa|u)|seascad(\x00fa|u)|seacht(\x00f3|o)d(\x00fa|u)|ocht(\x00f3|o)d(\x00fa|u)|t-ocht(\x00f3|o)d(\x00fa|u)|n(\x00f3|o)chad(\x00fa|u)|c(\x00e9|e)ad(\x00fa|u)|mili(\x00fa|u)|milli(\x00fa|u)n(\x00fa|u))" + [ regex "(ch(é|e)ad|aon(ú|u)|t-aon(ú|u)|dara|tr(í|i)(ú|u)|ceathr(ú|u)|c(ú|u)igi(ú|u)|s(é|e)(ú|u)|seacht(ú|u)|ocht(ú|u)|t-ocht(ú|u)|nao(ú|u)|deichi(ú|u)|fichi(ú|u)|tr(í|i)ochad(ú|u)|daichead(ú|u)|caogad(ú|u)|seascad(ú|u)|seacht(ó|o)d(ú|u)|ocht(ó|o)d(ú|u)|t-ocht(ó|o)d(ú|u)|n(ó|o)chad(ú|u)|c(é|e)ad(ú|u)|mili(ú|u)|milli(ú|u)n(ú|u))" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> case Text.toLower match of "t-aonu" -> Just $ ordinal 1 "aonu" -> Just $ ordinal 1 - "aon\x00fa" -> Just $ ordinal 1 - "ch\x00e9ad" -> Just $ ordinal 1 + "aonú" -> Just $ ordinal 1 + "chéad" -> Just $ ordinal 1 "chead" -> Just $ ordinal 1 - "t-aon\x00fa" -> Just $ ordinal 1 + "t-aonú" -> Just $ ordinal 1 "dara" -> Just $ ordinal 2 - "tri\x00fa" -> Just $ ordinal 3 - "tr\x00edu" -> Just $ ordinal 3 - "tr\x00ed\x00fa" -> Just $ ordinal 3 + "triú" -> Just $ ordinal 3 + "tríu" -> Just $ ordinal 3 + "tríú" -> Just $ ordinal 3 "triu" -> Just $ ordinal 3 - "ceathr\x00fa" -> Just $ ordinal 4 + "ceathrú" -> Just $ ordinal 4 "ceathru" -> Just $ ordinal 4 - "c\x00faigiu" -> Just $ ordinal 5 - "c\x00faigi\x00fa" -> Just $ ordinal 5 + "cúigiu" -> Just $ ordinal 5 + "cúigiú" -> Just $ ordinal 5 "cuigiu" -> Just $ ordinal 5 - "cuigi\x00fa" -> Just $ ordinal 5 - "s\x00e9u" -> Just $ ordinal 6 - "s\x00e9\x00fa" -> Just $ ordinal 6 + "cuigiú" -> Just $ ordinal 5 + "séu" -> Just $ ordinal 6 + "séú" -> Just $ ordinal 6 "seu" -> Just $ ordinal 6 - "se\x00fa" -> Just $ ordinal 6 + "seú" -> Just $ ordinal 6 "seachtu" -> Just $ ordinal 7 - "seacht\x00fa" -> Just $ ordinal 7 - "t-ocht\x00fa" -> Just $ ordinal 8 + "seachtú" -> Just $ ordinal 7 + "t-ochtú" -> Just $ ordinal 8 "ochtu" -> Just $ ordinal 8 "t-ochtu" -> Just $ ordinal 8 - "ocht\x00fa" -> Just $ ordinal 8 + "ochtú" -> Just $ ordinal 8 "naou" -> Just $ ordinal 9 - "nao\x00fa" -> Just $ ordinal 9 + "naoú" -> Just $ ordinal 9 "deichiu" -> Just $ ordinal 10 - "deichi\x00fa" -> Just $ ordinal 10 + "deichiú" -> Just $ ordinal 10 "fichiu" -> Just $ ordinal 20 - "fichi\x00fa" -> Just $ ordinal 20 - "tr\x00edochadu" -> Just $ ordinal 30 + "fichiú" -> Just $ ordinal 20 + "tríochadu" -> Just $ ordinal 30 "triochadu" -> Just $ ordinal 30 - "tr\x00edochad\x00fa" -> Just $ ordinal 30 - "triochad\x00fa" -> Just $ ordinal 30 - "daichead\x00fa" -> Just $ ordinal 40 + "tríochadú" -> Just $ ordinal 30 + "triochadú" -> Just $ ordinal 30 + "daicheadú" -> Just $ ordinal 40 "daicheadu" -> Just $ ordinal 40 "caogadu" -> Just $ ordinal 50 - "caogad\x00fa" -> Just $ ordinal 50 + "caogadú" -> Just $ ordinal 50 "seascadu" -> Just $ ordinal 60 - "seascad\x00fa" -> Just $ ordinal 60 + "seascadú" -> Just $ ordinal 60 "seachtodu" -> Just $ ordinal 70 - "seachtod\x00fa" -> Just $ ordinal 70 - "seacht\x00f3d\x00fa" -> Just $ ordinal 70 - "seacht\x00f3du" -> Just $ ordinal 70 - "ocht\x00f3du" -> Just $ ordinal 80 + "seachtodú" -> Just $ ordinal 70 + "seachtódú" -> Just $ ordinal 70 + "seachtódu" -> Just $ ordinal 70 + "ochtódu" -> Just $ ordinal 80 "ochtodu" -> Just $ ordinal 80 "t-ochtodu" -> Just $ ordinal 80 - "t-ocht\x00f3d\x00fa" -> Just $ ordinal 80 - "t-ochtod\x00fa" -> Just $ ordinal 80 - "ocht\x00f3d\x00fa" -> Just $ ordinal 80 - "t-ocht\x00f3du" -> Just $ ordinal 80 - "ochtod\x00fa" -> Just $ ordinal 80 - "n\x00f3chad\x00fa" -> Just $ ordinal 90 - "n\x00f3chadu" -> Just $ ordinal 90 - "nochad\x00fa" -> Just $ ordinal 90 + "t-ochtódú" -> Just $ ordinal 80 + "t-ochtodú" -> Just $ ordinal 80 + "ochtódú" -> Just $ ordinal 80 + "t-ochtódu" -> Just $ ordinal 80 + "ochtodú" -> Just $ ordinal 80 + "nóchadú" -> Just $ ordinal 90 + "nóchadu" -> Just $ ordinal 90 + "nochadú" -> Just $ ordinal 90 "nochadu" -> Just $ ordinal 90 - "c\x00e9ad\x00fa" -> Just $ ordinal 100 - "cead\x00fa" -> Just $ ordinal 100 + "céadú" -> Just $ ordinal 100 + "ceadú" -> Just $ ordinal 100 "ceadu" -> Just $ ordinal 100 - "c\x00e9adu" -> Just $ ordinal 100 + "céadu" -> Just $ ordinal 100 "miliu" -> Just $ ordinal 1000 - "mili\x00fa" -> Just $ ordinal 1000 - "milliun\x00fa" -> Just $ ordinal 1000000 + "miliú" -> Just $ ordinal 1000 + "milliunú" -> Just $ ordinal 1000000 "milliunu" -> Just $ ordinal 1000000 - "milli\x00fanu" -> Just $ ordinal 1000000 - "milli\x00fan\x00fa" -> Just $ ordinal 1000000 + "milliúnu" -> Just $ ordinal 1000000 + "milliúnú" -> Just $ ordinal 1000000 _ -> Nothing _ -> Nothing } @@ -107,7 +107,7 @@ ruleOrdinalDigits :: Rule ruleOrdinalDigits = Rule { name = "ordinal (digits)" , pattern = - [ regex "0*(\\d+) ?(adh|a|d|\x00fa|u)" + [ regex "0*(\\d+) ?(adh|a|d|ú|u)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> ordinal <$> parseInt match diff --git a/Duckling/Ordinal/HE/Rules.hs b/Duckling/Ordinal/HE/Rules.hs index 0e7df163..3aed962a 100644 --- a/Duckling/Ordinal/HE/Rules.hs +++ b/Duckling/Ordinal/HE/Rules.hs @@ -29,7 +29,7 @@ ruleOrdinal4 :: Rule ruleOrdinal4 = Rule { name = "ordinal 4" , pattern = - [ regex "(\x05d0\x05e8\x05d1\x05e2\x05d4|\x05e8\x05d1\x05d9\x05e2\x05d9)" + [ regex "(ארבעה|רביעי)" ] , prod = \_ -> Just $ ordinal 4 } @@ -38,7 +38,7 @@ ruleOrdinal9 :: Rule ruleOrdinal9 = Rule { name = "ordinal 9" , pattern = - [ regex "(\x05ea\x05e9\x05e2\x05d4|\x05ea\x05e9\x05d9\x05e2\x05d9)" + [ regex "(תשעה|תשיעי)" ] , prod = \_ -> Just $ ordinal 9 } @@ -47,7 +47,7 @@ ruleOrdinal10 :: Rule ruleOrdinal10 = Rule { name = "ordinal 10" , pattern = - [ regex "(\x05e2\x05e9\x05e8\x05d4|\x05e2\x05e9\x05d9\x05e8\x05d9)" + [ regex "(עשרה|עשירי)" ] , prod = \_ -> Just $ ordinal 10 } @@ -56,7 +56,7 @@ ruleOrdinal12 :: Rule ruleOrdinal12 = Rule { name = "ordinal 12" , pattern = - [ regex "(\x05e9\x05e0\x05d9\x05d9\x05dd \x05e2\x05e9\x05e8|\x05ea\x05e8\x05d9 \x05e2\x05e9\x05e8)" + [ regex "(שניים עשר|תרי עשר)" ] , prod = \_ -> Just $ ordinal 12 } @@ -65,7 +65,7 @@ ruleOrdinal17 :: Rule ruleOrdinal17 = Rule { name = "ordinal 17" , pattern = - [ regex "(\x05e9\x05d1\x05e2(\x05d4)? \x05e2\x05e9\x05e8(\x05d4)?)" + [ regex "(שבע(ה)? עשר(ה)?)" ] , prod = \_ -> Just $ ordinal 17 } @@ -74,7 +74,7 @@ ruleOrdinal18 :: Rule ruleOrdinal18 = Rule { name = "ordinal 18" , pattern = - [ regex "(\x05e9\x05de\x05d5\x05e0\x05d4 \x05e2\x05e9\x05e8(\x05d4)?)" + [ regex "(שמונה עשר(ה)?)" ] , prod = \_ -> Just $ ordinal 18 } @@ -94,7 +94,7 @@ ruleOrdinal15 :: Rule ruleOrdinal15 = Rule { name = "ordinal 15" , pattern = - [ regex "(\x05d7\x05de\x05d9\x05e9\x05d4 \x05e2\x05e9\x05e8|\x05d7\x05de\x05e9 \x05e2\x05e9\x05e8\x05d4?)" + [ regex "(חמישה עשר|חמש עשרה?)" ] , prod = \_ -> Just $ ordinal 15 } @@ -103,7 +103,7 @@ ruleOrdinal5 :: Rule ruleOrdinal5 = Rule { name = "ordinal 5" , pattern = - [ regex "(\x05d7\x05de\x05d9\x05e9\x05d9|\x05d7\x05de\x05d9\x05e9\x05d4)" + [ regex "(חמישי|חמישה)" ] , prod = \_ -> Just $ ordinal 5 } @@ -112,7 +112,7 @@ ruleOrdinal16 :: Rule ruleOrdinal16 = Rule { name = "ordinal 16" , pattern = - [ regex "(\x05e9\x05e9(\x05d4)? \x05e2\x05e9\x05e8(\x05d4)?)" + [ regex "(שש(ה)? עשר(ה)?)" ] , prod = \_ -> Just $ ordinal 16 } @@ -121,7 +121,7 @@ ruleOrdinal14 :: Rule ruleOrdinal14 = Rule { name = "ordinal 14" , pattern = - [ regex "(\x05d0\x05e8\x05d1\x05e2(\x05d4)? \x05e2\x05e9\x05e8(\x05d4)?)" + [ regex "(ארבע(ה)? עשר(ה)?)" ] , prod = \_ -> Just $ ordinal 14 } @@ -130,18 +130,18 @@ ruleOrdinal20 :: Rule ruleOrdinal20 = Rule { name = "ordinal 20..90" , pattern = - [ regex "(\x05e2\x05e9\x05e8\x05d9\x05dd|\x05e9\x05dc\x05d5\x05e9\x05d9\x05dd|\x05d0\x05e8\x05d1\x05e2\x05d9\x05dd|\x05d7\x05de\x05d9\x05e9\x05d9\x05dd|\x05e9\x05d9\x05e9\x05d9\x05dd|\x05e9\x05d1\x05e2\x05d9\x05dd|\x05e9\x05de\x05d5\x05e0\x05d9\x05dd|\x05ea\x05e9\x05e2\x05d9\x05dd)" + [ regex "(עשרים|שלושים|ארבעים|חמישים|שישים|שבעים|שמונים|תשעים)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> case match of - "\x05e2\x05e9\x05e8\x05d9\x05dd" -> Just $ ordinal 20 - "\x05e9\x05dc\x05d5\x05e9\x05d9\x05dd" -> Just $ ordinal 30 - "\x05d0\x05e8\x05d1\x05e2\x05d9\x05dd" -> Just $ ordinal 40 - "\x05d7\x05de\x05d9\x05e9\x05d9\x05dd" -> Just $ ordinal 50 - "\x05e9\x05d9\x05e9\x05d9\x05dd" -> Just $ ordinal 60 - "\x05e9\x05d1\x05e2\x05d9\x05dd" -> Just $ ordinal 70 - "\x05e9\x05de\x05d5\x05e0\x05d9\x05dd" -> Just $ ordinal 80 - "\x05ea\x05e9\x05e2\x05d9\x05dd" -> Just $ ordinal 90 + "עשרים" -> Just $ ordinal 20 + "שלושים" -> Just $ ordinal 30 + "ארבעים" -> Just $ ordinal 40 + "חמישים" -> Just $ ordinal 50 + "שישים" -> Just $ ordinal 60 + "שבעים" -> Just $ ordinal 70 + "שמונים" -> Just $ ordinal 80 + "תשעים" -> Just $ ordinal 90 _ -> Nothing _ -> Nothing } @@ -150,7 +150,7 @@ ruleOrdinal :: Rule ruleOrdinal = Rule { name = "ordinal 1" , pattern = - [ regex "(\x05d0\x05d7\x05d3|\x05e8\x05d0\x05e9\x05d5\x05df)" + [ regex "(אחד|ראשון)" ] , prod = \_ -> Just $ ordinal 1 } @@ -159,7 +159,7 @@ ruleOrdinal13 :: Rule ruleOrdinal13 = Rule { name = "ordinal 13" , pattern = - [ regex "(\x05e9\x05dc\x05d5\x05e9(\x05d4)? \x05e2\x05e9\x05e8(\x05d4)?)" + [ regex "(שלוש(ה)? עשר(ה)?)" ] , prod = \_ -> Just $ ordinal 13 } @@ -168,7 +168,7 @@ ruleOrdinal7 :: Rule ruleOrdinal7 = Rule { name = "ordinal 7" , pattern = - [ regex "(\x05e9\x05d1\x05e2\x05d4|\x05e9\x05d1\x05d9\x05e2\x05d9)" + [ regex "(שבעה|שביעי)" ] , prod = \_ -> Just $ ordinal 7 } @@ -177,7 +177,7 @@ ruleOrdinal8 :: Rule ruleOrdinal8 = Rule { name = "ordinal 8" , pattern = - [ regex "(\x05e9\x05de\x05d5\x05e0\x05d4|\x05e9\x05de\x05d9\x05e0\x05d9)" + [ regex "(שמונה|שמיני)" ] , prod = \_ -> Just $ ordinal 8 } @@ -186,7 +186,7 @@ ruleOrdinal2 :: Rule ruleOrdinal2 = Rule { name = "ordinal 2" , pattern = - [ regex "(\x05e9\x05ea\x05d9\x05d9\x05dd|\x05e9\x05e0\x05d9\x05d9\x05dd|\x05e9\x05e0\x05d9)" + [ regex "(שתיים|שניים|שני)" ] , prod = \_ -> Just $ ordinal 2 } @@ -195,7 +195,7 @@ ruleOrdinal11 :: Rule ruleOrdinal11 = Rule { name = "ordinal 11" , pattern = - [ regex "(\x05d0\x05d7\x05d3 \x05e2\x05e9\x05e8(\x05d4)?)" + [ regex "(אחד עשר(ה)?)" ] , prod = \_ -> Just $ ordinal 11 } @@ -204,7 +204,7 @@ ruleOrdinal3 :: Rule ruleOrdinal3 = Rule { name = "ordinal 3" , pattern = - [ regex "(\x05e9\x05dc\x05d5\x05e9\x05d4|\x05e9\x05dc\x05d9\x05e9\x05d9)" + [ regex "(שלושה|שלישי)" ] , prod = \_ -> Just $ ordinal 3 } @@ -213,7 +213,7 @@ ruleOrdinal6 :: Rule ruleOrdinal6 = Rule { name = "ordinal 6" , pattern = - [ regex "(\x05e9\x05e9\x05d4|\x05e9\x05d9\x05e9\x05d9)" + [ regex "(ששה|שישי)" ] , prod = \_ -> Just $ ordinal 6 } @@ -222,7 +222,7 @@ ruleOrdinal19 :: Rule ruleOrdinal19 = Rule { name = "ordinal 19" , pattern = - [ regex "(\x05ea\x05e9\x05e2(\x05d4)? \x05e2\x05e9\x05e8(\x05d4)?)" + [ regex "(תשע(ה)? עשר(ה)?)" ] , prod = \_ -> Just $ ordinal 19 } @@ -232,7 +232,7 @@ ruleCompositeWithAnd = Rule { name = "ordinal composition (with and)" , pattern = [ dimension Ordinal - , regex "\x05d5" + , regex "ו" , dimension Ordinal ] , prod = \tokens -> case tokens of diff --git a/Duckling/Ordinal/HR/Rules.hs b/Duckling/Ordinal/HR/Rules.hs index 4a3f4e34..6cf3e175 100644 --- a/Duckling/Ordinal/HR/Rules.hs +++ b/Duckling/Ordinal/HR/Rules.hs @@ -152,7 +152,7 @@ ruleOrdinalsFirstth :: Rule ruleOrdinalsFirstth = Rule { name = "ordinals (first..19th)" , pattern = - [ regex "(prv(i|a|o(ga?)?)|drug(i|a|o(ga?)?)|tre(c|\x0107)(i|a|e(ga?)?)|(\x010d|c)etvrt(i|a|o(ga?)?)|pet(i|a|o(ga?)?)|(\x0161|s)est(i|a|o(ga?)?)|sedm(i|a|o(ga?)?)|osm(i|a|o(ga?)?)|devet(i|a|o(ga?)?)|deset(i|a|o(ga?)?)|jedanaest(i|a|o(ga?)?)|dvanaest(i|a|o(ga?)?)|trinaest(i|a|o(ga?)?)|(c|\x010d)etrnaest(i|a|o(ga?)?)|petnaest(i|a|o(ga?)?)|(s|\x0161)esnaest(i|a|o(ga?)?)|sedamnaest(i|a|o(ga?)?)|osamnaest(i|a|o(ga?)?)|devetnaest(i|a|o(ga?)?))" + [ regex "(prv(i|a|o(ga?)?)|drug(i|a|o(ga?)?)|tre(c|ć)(i|a|e(ga?)?)|(č|c)etvrt(i|a|o(ga?)?)|pet(i|a|o(ga?)?)|(š|s)est(i|a|o(ga?)?)|sedm(i|a|o(ga?)?)|osm(i|a|o(ga?)?)|devet(i|a|o(ga?)?)|deset(i|a|o(ga?)?)|jedanaest(i|a|o(ga?)?)|dvanaest(i|a|o(ga?)?)|trinaest(i|a|o(ga?)?)|(c|č)etrnaest(i|a|o(ga?)?)|petnaest(i|a|o(ga?)?)|(s|š)esnaest(i|a|o(ga?)?)|sedamnaest(i|a|o(ga?)?)|osamnaest(i|a|o(ga?)?)|devetnaest(i|a|o(ga?)?))" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> diff --git a/Duckling/Ordinal/IT/Rules.hs b/Duckling/Ordinal/IT/Rules.hs index b0631420..ea4f0843 100644 --- a/Duckling/Ordinal/IT/Rules.hs +++ b/Duckling/Ordinal/IT/Rules.hs @@ -78,7 +78,7 @@ ruleOrdinalDigits :: Rule ruleOrdinalDigits = Rule { name = "ordinal (digits)" , pattern = - [ regex "0*(\\d+) ?(\x00aa|\x00b0|\x00b0)" + [ regex "0*(\\d+) ?(ª|°|°)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> ordinal <$> parseInt match diff --git a/Duckling/Ordinal/JA/Rules.hs b/Duckling/Ordinal/JA/Rules.hs index 66dea7d1..9c66f636 100644 --- a/Duckling/Ordinal/JA/Rules.hs +++ b/Duckling/Ordinal/JA/Rules.hs @@ -25,7 +25,7 @@ ruleOrdinalDigits :: Rule ruleOrdinalDigits = Rule { name = "ordinal (digits)" , pattern = - [ regex "\x7b2c" + [ regex "第" , dimension Numeral ] , prod = \tokens -> case tokens of diff --git a/Duckling/Ordinal/KO/Rules.hs b/Duckling/Ordinal/KO/Rules.hs index 1dddb9ab..0798c389 100644 --- a/Duckling/Ordinal/KO/Rules.hs +++ b/Duckling/Ordinal/KO/Rules.hs @@ -26,7 +26,7 @@ ruleOrdinals = Rule { name = "ordinals (첫번째)" , pattern = [ dimension Numeral - , regex "\xbc88\xc9f8|\xc9f8(\xbc88)?" + , regex "번째|째(번)?" ] , prod = \tokens -> case tokens of (Token Numeral (NumeralData {TNumeral.value = v}):_) -> diff --git a/Duckling/Ordinal/NB/Rules.hs b/Duckling/Ordinal/NB/Rules.hs index 592585ec..74b0432b 100644 --- a/Duckling/Ordinal/NB/Rules.hs +++ b/Duckling/Ordinal/NB/Rules.hs @@ -26,18 +26,18 @@ ruleOrdinalsFirstst :: Rule ruleOrdinalsFirstst = Rule { name = "ordinals (first..31st)" , pattern = - [ regex "(f\x00f8rste|andre|tredje|fjerde|femtende|femte|sjette|syvende|\x00e5ttende|niende|tiende|ellevte|tolvte|trettende|fjortende|sekstende|syttende|attende|nittende|tyvende|tjuende|enogtyvende|toogtyvende|treogtyvende|fireogtyvende|femogtyvende|seksogtyvende|syvogtyvende|\x00e5tteogtyvende|niogtyvende|enogtjuende|toogtjuende|treogtjuende|fireogtjuende|femogtjuende|seksogtjuende|syvogtjuende|\x00e5tteogtyvend|niogtjuende|tredefte|enogtredefte)" + [ regex "(første|andre|tredje|fjerde|femtende|femte|sjette|syvende|åttende|niende|tiende|ellevte|tolvte|trettende|fjortende|sekstende|syttende|attende|nittende|tyvende|tjuende|enogtyvende|toogtyvende|treogtyvende|fireogtyvende|femogtyvende|seksogtyvende|syvogtyvende|åtteogtyvende|niogtyvende|enogtjuende|toogtjuende|treogtjuende|fireogtjuende|femogtjuende|seksogtjuende|syvogtjuende|åtteogtyvend|niogtjuende|tredefte|enogtredefte)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> case Text.toLower match of - "f\x00f8rste" -> Just $ ordinal 1 + "første" -> Just $ ordinal 1 "andre" -> Just $ ordinal 2 "tredje" -> Just $ ordinal 3 "fjerde" -> Just $ ordinal 4 "femte" -> Just $ ordinal 5 "sjette" -> Just $ ordinal 6 "syvende" -> Just $ ordinal 7 - "\x00e5ttende" -> Just $ ordinal 8 + "åttende" -> Just $ ordinal 8 "niende" -> Just $ ordinal 9 "tiende" -> Just $ ordinal 10 "ellevte" -> Just $ ordinal 11 @@ -65,8 +65,8 @@ ruleOrdinalsFirstst = Rule "seksogtyvende" -> Just $ ordinal 26 "syvogtyvende" -> Just $ ordinal 27 "syvogtjuende" -> Just $ ordinal 27 - "\x00e5tteogtyvende" -> Just $ ordinal 28 - "\x00e5tteogtjuende" -> Just $ ordinal 28 + "åtteogtyvende" -> Just $ ordinal 28 + "åtteogtjuende" -> Just $ ordinal 28 "niogtyvende" -> Just $ ordinal 29 "niogtjuende" -> Just $ ordinal 29 "tredefte" -> Just $ ordinal 30 diff --git a/Duckling/Ordinal/PL/Rules.hs b/Duckling/Ordinal/PL/Rules.hs index 4c8cadfa..09c33e8c 100644 --- a/Duckling/Ordinal/PL/Rules.hs +++ b/Duckling/Ordinal/PL/Rules.hs @@ -27,7 +27,7 @@ ruleThOrdinalNoSpace :: Rule ruleThOrdinalNoSpace = Rule { name = "24th ordinal no space" , pattern = - [ regex "dwudziest(ym|y|ego|emu|(a|\x0105)|ej)czwart(y|ego|emu|ym|(a|\x0105)|ej)" + [ regex "dwudziest(ym|y|ego|emu|(a|ą)|ej)czwart(y|ego|emu|ym|(a|ą)|ej)" ] , prod = \_ -> Just $ ordinal 24 } @@ -36,7 +36,7 @@ ruleThOrdinal16 :: Rule ruleThOrdinal16 = Rule { name = "31-39th ordinal" , pattern = - [ regex "trzydziest(ym|y|ego|emu|(a|\x0105)|ej)( |-)?" + [ regex "trzydziest(ym|y|ego|emu|(a|ą)|ej)( |-)?" , dimension Ordinal ] , prod = \tokens -> case tokens of @@ -49,7 +49,7 @@ ruleThOrdinal3 :: Rule ruleThOrdinal3 = Rule { name = "10th ordinal" , pattern = - [ regex "dziesi(a|\x0105)t(ym|y|ego|emu|(a|\x0105)|ej)" + [ regex "dziesi(a|ą)t(ym|y|ego|emu|(a|ą)|ej)" ] , prod = \_ -> Just $ ordinal 10 } @@ -69,7 +69,7 @@ ruleThOrdinal8 :: Rule ruleThOrdinal8 = Rule { name = "15th ordinal" , pattern = - [ regex "pi(e|\x0119)tnast(ym|y|ego|emu|(a|\x0105)|ej)" + [ regex "pi(e|ę)tnast(ym|y|ego|emu|(a|ą)|ej)" ] , prod = \_ -> Just $ ordinal 15 } @@ -78,7 +78,7 @@ ruleThOrdinal13 :: Rule ruleThOrdinal13 = Rule { name = "20th ordinal" , pattern = - [ regex "dwudziest(ym|y|ego|emu|(a|\x0105)|ej)" + [ regex "dwudziest(ym|y|ego|emu|(a|ą)|ej)" ] , prod = \_ -> Just $ ordinal 20 } @@ -87,7 +87,7 @@ ruleThOrdinal4 :: Rule ruleThOrdinal4 = Rule { name = "11th ordinal" , pattern = - [ regex "jedenast(ym|y|ego|emu|(a|\x0105)|ej)" + [ regex "jedenast(ym|y|ego|emu|(a|ą)|ej)" ] , prod = \_ -> Just $ ordinal 11 } @@ -96,7 +96,7 @@ ruleFifthOrdinal :: Rule ruleFifthOrdinal = Rule { name = "fifth ordinal" , pattern = - [ regex "pi(a|\x0105)t(y|ego|emu|m|(a|\x0105)|ej)" + [ regex "pi(a|ą)t(y|ego|emu|m|(a|ą)|ej)" ] , prod = \_ -> Just $ ordinal 5 } @@ -105,7 +105,7 @@ ruleThOrdinal11 :: Rule ruleThOrdinal11 = Rule { name = "18th ordinal" , pattern = - [ regex "osiemnast(ym|y|ego|emu|(a|\x0105)|ej)" + [ regex "osiemnast(ym|y|ego|emu|(a|ą)|ej)" ] , prod = \_ -> Just $ ordinal 18 } @@ -114,7 +114,7 @@ ruleSecondOrdinal :: Rule ruleSecondOrdinal = Rule { name = "second ordinal" , pattern = - [ regex "drugi?(ego|emu|m|(a|\x0105)|ej)?" + [ regex "drugi?(ego|emu|m|(a|ą)|ej)?" ] , prod = \_ -> Just $ ordinal 2 } @@ -123,7 +123,7 @@ ruleNdOrdinalNoSpace :: Rule ruleNdOrdinalNoSpace = Rule { name = "22nd ordinal no space" , pattern = - [ regex "dwudziest(ym|y|ego|emu|(a|\x0105)|ej)drugi?(ego|emu|m|(a|\x0105)|ej)?" + [ regex "dwudziest(ym|y|ego|emu|(a|ą)|ej)drugi?(ego|emu|m|(a|ą)|ej)?" ] , prod = \_ -> Just $ ordinal 22 } @@ -132,7 +132,7 @@ ruleSeventhOrdinal :: Rule ruleSeventhOrdinal = Rule { name = "seventh ordinal" , pattern = - [ regex "si(o|\x00f3)dm(y|ego|emu|m|(a|\x0105)|ej)" + [ regex "si(o|ó)dm(y|ego|emu|m|(a|ą)|ej)" ] , prod = \_ -> Just $ ordinal 7 } @@ -141,7 +141,7 @@ ruleStOrdinalNoSpace :: Rule ruleStOrdinalNoSpace = Rule { name = "21st ordinal no space" , pattern = - [ regex "dwudziest(ym|y|ego|emu|(a|\x0105)|ej)pierw?sz(y|ego|emu|m|(a|\x0105)|ej)" + [ regex "dwudziest(ym|y|ego|emu|(a|ą)|ej)pierw?sz(y|ego|emu|m|(a|ą)|ej)" ] , prod = \_ -> Just $ ordinal 21 } @@ -150,7 +150,7 @@ ruleThOrdinal7 :: Rule ruleThOrdinal7 = Rule { name = "14th ordinal" , pattern = - [ regex "czternast(ym|y|ego|emu|(a|\x0105)|ej)" + [ regex "czternast(ym|y|ego|emu|(a|ą)|ej)" ] , prod = \_ -> Just $ ordinal 14 } @@ -159,7 +159,7 @@ ruleThOrdinal2 :: Rule ruleThOrdinal2 = Rule { name = "9th ordinal" , pattern = - [ regex "dziewi(a|\x0105)t(ym|y|ego|em|emu|(a|\x0105)|ej)" + [ regex "dziewi(a|ą)t(ym|y|ego|em|emu|(a|ą)|ej)" ] , prod = \_ -> Just $ ordinal 9 } @@ -168,7 +168,7 @@ ruleThOrdinal9 :: Rule ruleThOrdinal9 = Rule { name = "16th ordinal" , pattern = - [ regex "szesnast(ym|y|ego|emu|(a|\x0105)|ej)" + [ regex "szesnast(ym|y|ego|emu|(a|ą)|ej)" ] , prod = \_ -> Just $ ordinal 16 } @@ -177,7 +177,7 @@ ruleThOrdinal :: Rule ruleThOrdinal = Rule { name = "8th ordinal" , pattern = - [ regex "(o|\x00f3|\x00d3)sm(y|ego|emu|m|(a|\x0105)|ej)" + [ regex "(o|ó|Ó)sm(y|ego|emu|m|(a|ą)|ej)" ] , prod = \_ -> Just $ ordinal 8 } @@ -186,7 +186,7 @@ ruleThOrdinal14 :: Rule ruleThOrdinal14 = Rule { name = "21-29th ordinal" , pattern = - [ regex "dwudziest(ym|y|ego|emu|(a|\x0105)|ej)( |-)?" + [ regex "dwudziest(ym|y|ego|emu|(a|ą)|ej)( |-)?" , dimension Ordinal ] , prod = \tokens -> case tokens of @@ -199,7 +199,7 @@ ruleThOrdinal10 :: Rule ruleThOrdinal10 = Rule { name = "17th ordinal" , pattern = - [ regex "siedemnast(ym|y|ego|emu|(a|\x0105)|ej)" + [ regex "siedemnast(ym|y|ego|emu|(a|ą)|ej)" ] , prod = \_ -> Just $ ordinal 17 } @@ -208,7 +208,7 @@ ruleRdOrdinalNoSpace :: Rule ruleRdOrdinalNoSpace = Rule { name = "23rd ordinal no space" , pattern = - [ regex "dwudziest(ym|y|ego|emu|(a|\x0105)|ej)trzeci(ego|ch|emu|m|mi|ej|(a|\x0105))?" + [ regex "dwudziest(ym|y|ego|emu|(a|ą)|ej)trzeci(ego|ch|emu|m|mi|ej|(a|ą))?" ] , prod = \_ -> Just $ ordinal 23 } @@ -217,7 +217,7 @@ ruleThOrdinal5 :: Rule ruleThOrdinal5 = Rule { name = "12th ordinal" , pattern = - [ regex "dwunast(ym|y|ego|emu|(a|\x0105)|ej)" + [ regex "dwunast(ym|y|ego|emu|(a|ą)|ej)" ] , prod = \_ -> Just $ ordinal 12 } @@ -226,7 +226,7 @@ ruleThOrdinal6 :: Rule ruleThOrdinal6 = Rule { name = "13th ordinal" , pattern = - [ regex "trzynast(ym|y|ego|emu|(a|\x0105)|ej)" + [ regex "trzynast(ym|y|ego|emu|(a|ą)|ej)" ] , prod = \_ -> Just $ ordinal 13 } @@ -235,7 +235,7 @@ ruleFirstOrdinal :: Rule ruleFirstOrdinal = Rule { name = "first ordinal" , pattern = - [ regex "pierw?sz(y|ego|emu|m|(a|\x0105)|ej)" + [ regex "pierw?sz(y|ego|emu|m|(a|ą)|ej)" ] , prod = \_ -> Just $ ordinal 1 } @@ -244,7 +244,7 @@ ruleSixthOrdinal :: Rule ruleSixthOrdinal = Rule { name = "sixth ordinal" , pattern = - [ regex "sz(o|\x00f3)st(y|ego|emu|m|(a|\x0105)|ej)" + [ regex "sz(o|ó)st(y|ego|emu|m|(a|ą)|ej)" ] , prod = \_ -> Just $ ordinal 6 } @@ -253,7 +253,7 @@ ruleFourthOrdinal :: Rule ruleFourthOrdinal = Rule { name = "fourth ordinal" , pattern = - [ regex "czwart(y|ego|emu|ym|(a|\x0105)|ej)" + [ regex "czwart(y|ego|emu|ym|(a|ą)|ej)" ] , prod = \_ -> Just $ ordinal 4 } @@ -262,7 +262,7 @@ ruleThOrdinal15 :: Rule ruleThOrdinal15 = Rule { name = "30th ordinal" , pattern = - [ regex "trzydziest(ym|y|ego|emu|(a|\x0105)|ej)" + [ regex "trzydziest(ym|y|ego|emu|(a|ą)|ej)" ] , prod = \_ -> Just $ ordinal 30 } @@ -271,7 +271,7 @@ ruleThOrdinal12 :: Rule ruleThOrdinal12 = Rule { name = "19th ordinal" , pattern = - [ regex "dziewi(\x0119|e)tnast(ym|y|ego|emu|(a|\x0105)|ej)" + [ regex "dziewi(ę|e)tnast(ym|y|ego|emu|(a|ą)|ej)" ] , prod = \_ -> Just $ ordinal 19 } @@ -280,7 +280,7 @@ ruleThirdOrdinal :: Rule ruleThirdOrdinal = Rule { name = "third ordinal" , pattern = - [ regex "trzeci(ego|ch|emu|m|mi|ej|(a|\x0105))?" + [ regex "trzeci(ego|ch|emu|m|mi|ej|(a|ą))?" ] , prod = \_ -> Just $ ordinal 3 } diff --git a/Duckling/Ordinal/PT/Rules.hs b/Duckling/Ordinal/PT/Rules.hs index a2b3fb7d..202f4d5b 100644 --- a/Duckling/Ordinal/PT/Rules.hs +++ b/Duckling/Ordinal/PT/Rules.hs @@ -25,7 +25,7 @@ ruleOrdinalsPrimeiro :: Rule ruleOrdinalsPrimeiro = Rule { name = "ordinals (primeiro..10)" , pattern = - [ regex "((primeir|segund|quart|quint|sext|s(e|\x00e9)tim|oitav|non|d(e|\x00e9)cim)(os?|as?))" + [ regex "((primeir|segund|quart|quint|sext|s(e|é)tim|oitav|non|d(e|é)cim)(os?|as?))" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> case Text.toLower match of @@ -54,13 +54,13 @@ ruleOrdinalsPrimeiro = Rule "sexta" -> Just $ ordinal 6 "sextas" -> Just $ ordinal 6 "setimas" -> Just $ ordinal 7 - "s\x00e9tima" -> Just $ ordinal 7 + "sétima" -> Just $ ordinal 7 "setimo" -> Just $ ordinal 7 "setimos" -> Just $ ordinal 7 "setima" -> Just $ ordinal 7 - "s\x00e9timos" -> Just $ ordinal 7 - "s\x00e9timo" -> Just $ ordinal 7 - "s\x00e9timas" -> Just $ ordinal 7 + "sétimos" -> Just $ ordinal 7 + "sétimo" -> Just $ ordinal 7 + "sétimas" -> Just $ ordinal 7 "oitavas" -> Just $ ordinal 8 "oitava" -> Just $ ordinal 8 "oitavo" -> Just $ ordinal 8 @@ -69,14 +69,14 @@ ruleOrdinalsPrimeiro = Rule "nona" -> Just $ ordinal 9 "nono" -> Just $ ordinal 9 "nonas" -> Just $ ordinal 9 - "d\x00e9cimos" -> Just $ ordinal 10 + "décimos" -> Just $ ordinal 10 "decimo" -> Just $ ordinal 10 "decimos" -> Just $ ordinal 10 - "d\x00e9cimo" -> Just $ ordinal 10 + "décimo" -> Just $ ordinal 10 "decimas" -> Just $ ordinal 10 - "d\x00e9cima" -> Just $ ordinal 10 + "décima" -> Just $ ordinal 10 "decima" -> Just $ ordinal 10 - "d\x00e9cimas" -> Just $ ordinal 10 + "décimas" -> Just $ ordinal 10 _ -> Nothing _ -> Nothing } diff --git a/Duckling/Ordinal/RO/Rules.hs b/Duckling/Ordinal/RO/Rules.hs index 277cc4f8..71117600 100644 --- a/Duckling/Ordinal/RO/Rules.hs +++ b/Duckling/Ordinal/RO/Rules.hs @@ -70,14 +70,14 @@ ordinalMap = HashMap.fromList , ("\537apte", 7) , ("opt", 8) , ("noua", 9) - , ("nou\x0103", 9) + , ("nouă", 9) ] ruleSpelledOutOrdinals :: Rule ruleSpelledOutOrdinals = Rule { name = "spelled out ordinals" , pattern = - [ regex "al?\\s(doi|trei|patru|cinci|(s|\x0219)a(s|pt)e|opt|nou(a|\x0103))[ -]?(le)?a" + [ regex "al?\\s(doi|trei|patru|cinci|(s|ș)a(s|pt)e|opt|nou(a|ă))[ -]?(le)?a" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> diff --git a/Duckling/Ordinal/RU/Rules.hs b/Duckling/Ordinal/RU/Rules.hs index 63dd98c1..8e006b9a 100644 --- a/Duckling/Ordinal/RU/Rules.hs +++ b/Duckling/Ordinal/RU/Rules.hs @@ -27,30 +27,30 @@ ruleOrdinalsFirstth :: Rule ruleOrdinalsFirstth = Rule { name = "ordinals (first..19th)" , pattern = - [ regex "(\x043f\x0435\x0440\x0432|\x0432\x0442\x043e\x0440|\x0442\x0440\x0435\x0442|\x0447\x0435\x0442\x0432\x0435\x0440\x0442|\x043f\x044f\x0442|\x0448\x0435\x0441\x0442|\x0441\x0435\x0434\x044c\x043c|\x0432\x043e\x0441\x044c\x043c|\x0434\x0435\x0432\x044f\x0442|\x0434\x0435\x0441\x044f\x0442|\x043e\x0434\x0438\x043d\x0430\x0434\x0446\x0430\x0442|\x0434\x0432\x0435\x043d\x0430\x0434\x0446\x0430\x0442|\x0442\x0440\x0438\x043d\x0430\x0434\x0446\x0430\x0442|\x0447\x0435\x0442\x044b\x0440\x043d\x0430\x0434\x0446\x0430\x0442|\x043f\x044f\x0442\x043d\x0430\x0434\x0446\x0430\x0442|\x0448\x0435\x0441\x0442\x043d\x0430\x0434\x0446\x0430\x0442|\x0441\x0435\x043c\x043d\x0430\x0434\x0446\x0430\x0442|\x0432\x043e\x0441\x0435\x043c\x043d\x0430\x0434\x0446\x0430\x0442|\x0434\x0435\x0432\x044f\x0442\x043d\x0430\x0434\x0446\x0430\x0442|\x0434\x0432\x0430\x0434\x0446\x0430\x0442)(\x044b\x0439|\x043e\x0439|\x0438\x0439|\x0430\x044f|\x043e\x0435)" + [ regex "(перв|втор|трет|четверт|пят|шест|седьм|восьм|девят|десят|одинадцат|двенадцат|тринадцат|четырнадцат|пятнадцат|шестнадцат|семнадцат|восемнадцат|девятнадцат|двадцат)(ый|ой|ий|ая|ое)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> case match of - "\x043f\x0435\x0440\x0432" -> Just $ ordinal 1 - "\x0432\x0442\x043e\x0440" -> Just $ ordinal 2 - "\x0442\x0440\x0435\x0442" -> Just $ ordinal 3 - "\x0447\x0435\x0442\x0432\x0435\x0440\x0442" -> Just $ ordinal 4 - "\x043f\x044f\x0442" -> Just $ ordinal 5 - "\x0448\x0435\x0441\x0442" -> Just $ ordinal 6 - "\x0441\x0435\x0434\x044c\x043c" -> Just $ ordinal 7 - "\x0432\x043e\x0441\x044c\x043c" -> Just $ ordinal 8 - "\x0434\x0435\x0432\x044f\x0442" -> Just $ ordinal 9 - "\x0434\x0435\x0441\x044f\x0442" -> Just $ ordinal 10 - "\x043e\x0434\x0438\x043d\x0430\x0434\x0446\x0430\x0442" -> Just $ ordinal 11 - "\x0434\x0432\x0435\x043d\x0430\x0434\x0446\x0430\x0442" -> Just $ ordinal 12 - "\x0442\x0440\x0438\x043d\x0430\x0434\x0446\x0430\x0442" -> Just $ ordinal 13 - "\x0447\x0435\x0442\x044b\x0440\x043d\x0430\x0434\x0446\x0430\x0442" -> Just $ ordinal 14 - "\x043f\x044f\x0442\x043d\x0430\x0434\x0446\x0430\x0442" -> Just $ ordinal 15 - "\x0448\x0435\x0441\x0442\x043d\x0430\x0434\x0446\x0430\x0442" -> Just $ ordinal 16 - "\x0441\x0435\x043c\x043d\x0430\x0434\x0446\x0430\x0442" -> Just $ ordinal 17 - "\x0432\x043e\x0441\x0435\x043c\x043d\x0430\x0434\x0446\x0430\x0442" -> Just $ ordinal 18 - "\x0434\x0435\x0432\x044f\x0442\x043d\x0430\x0434\x0446\x0430\x0442" -> Just $ ordinal 19 - "\x0434\x0432\x0430\x0434\x0446\x0430\x0442" -> Just $ ordinal 20 + "перв" -> Just $ ordinal 1 + "втор" -> Just $ ordinal 2 + "трет" -> Just $ ordinal 3 + "четверт" -> Just $ ordinal 4 + "пят" -> Just $ ordinal 5 + "шест" -> Just $ ordinal 6 + "седьм" -> Just $ ordinal 7 + "восьм" -> Just $ ordinal 8 + "девят" -> Just $ ordinal 9 + "десят" -> Just $ ordinal 10 + "одинадцат" -> Just $ ordinal 11 + "двенадцат" -> Just $ ordinal 12 + "тринадцат" -> Just $ ordinal 13 + "четырнадцат" -> Just $ ordinal 14 + "пятнадцат" -> Just $ ordinal 15 + "шестнадцат" -> Just $ ordinal 16 + "семнадцат" -> Just $ ordinal 17 + "восемнадцат" -> Just $ ordinal 18 + "девятнадцат" -> Just $ ordinal 19 + "двадцат" -> Just $ ordinal 20 _ -> Nothing _ -> Nothing } @@ -59,33 +59,33 @@ ruleOrdinal :: Rule ruleOrdinal = Rule { name = "ordinal 21..99" , pattern = - [ regex "(\x0434\x0432\x0430\x0434\x0446\x0430\x0442\x044c|\x0442\x0440\x0438\x0434\x0446\x0430\x0442\x044c|\x0441\x043e\x0440\x043e\x043a|\x043f\x044f\x0442\x044c\x0434\x0435\x0441\x044f\x0442|\x0448\x0435\x0441\x0442\x044c\x0434\x0435\x0441\x044f\x0442|\x0441\x0435\x043c\x044c\x0434\x0435\x0441\x044f\x0442|\x0432\x043e\x0441\x0435\x043c\x044c\x0434\x0435\x0441\x044f\x0442|\x0434\x0435\x0432\x044f\x043d\x043e\x0441\x0442\x043e)" - , regex "(\x043f\x0435\x0440\x0432|\x0432\x0442\x043e\x0440|\x0442\x0440\x0435\x0442|\x0447\x0435\x0442\x0432\x0435\x0440\x0442|\x043f\x044f\x0442|\x0448\x0435\x0441\x0442|\x0441\x0435\x0434\x044c\x043c|\x0432\x043e\x0441\x044c\x043c|\x0434\x0435\x0432\x044f\x0442)(\x044b\x0439|\x043e\x0439|\x0438\x0439|\x0430\x044f|\x043e\x0435)" + [ regex "(двадцать|тридцать|сорок|пятьдесят|шестьдесят|семьдесят|восемьдесят|девяносто)" + , regex "(перв|втор|трет|четверт|пят|шест|седьм|восьм|девят)(ый|ой|ий|ая|ое)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (m1:_)): Token RegexMatch (GroupMatch (m2:_)): _) -> do dozen <- case Text.toLower m1 of - "\x0434\x0432\x0430\x0434\x0446\x0430\x0442\x044c" -> Just 20 - "\x0442\x0440\x0438\x0434\x0446\x0430\x0442\x044c" -> Just 30 - "\x0441\x043e\x0440\x043e\x043a" -> Just 40 - "\x043f\x044f\x0442\x044c\x0434\x0435\x0441\x044f\x0442" -> Just 50 - "\x0448\x0435\x0441\x0442\x044c\x0434\x0435\x0441\x044f\x0442" -> Just 60 - "\x0441\x0435\x043c\x044c\x0434\x0435\x0441\x044f\x0442" -> Just 70 - "\x0432\x043e\x0441\x0435\x043c\x044c\x0434\x0435\x0441\x044f\x0442" -> Just 80 - "\x0434\x0435\x0432\x044f\x043d\x043e\x0441\x0442\x043e" -> Just 90 + "двадцать" -> Just 20 + "тридцать" -> Just 30 + "сорок" -> Just 40 + "пятьдесят" -> Just 50 + "шестьдесят" -> Just 60 + "семьдесят" -> Just 70 + "восемьдесят" -> Just 80 + "девяносто" -> Just 90 _ -> Nothing unit <- case Text.toLower m2 of - "\x043f\x0435\x0440\x0432" -> Just 1 - "\x0432\x0442\x043e\x0440" -> Just 2 - "\x0442\x0440\x0435\x0442" -> Just 3 - "\x0447\x0435\x0442\x0432\x0435\x0440\x0442" -> Just 4 - "\x043f\x044f\x0442" -> Just 5 - "\x0448\x0435\x0441\x0442" -> Just 6 - "\x0441\x0435\x0434\x044c\x043c" -> Just 7 - "\x0432\x043e\x0441\x044c\x043c" -> Just 8 - "\x0434\x0435\x0432\x044f\x0442" -> Just 9 + "перв" -> Just 1 + "втор" -> Just 2 + "трет" -> Just 3 + "четверт" -> Just 4 + "пят" -> Just 5 + "шест" -> Just 6 + "седьм" -> Just 7 + "восьм" -> Just 8 + "девят" -> Just 9 _ -> Nothing Just . ordinal $ dozen + unit _ -> Nothing @@ -95,7 +95,7 @@ ruleOrdinalDigits :: Rule ruleOrdinalDigits = Rule { name = "ordinal (digits)" , pattern = - [ regex "0*(\\d+)-?((\x044b|\x043e|\x0438)?\x0439|\x0430\x044f|\x043e\x0435)" + [ regex "0*(\\d+)-?((ы|о|и)?й|ая|ое)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> ordinal <$> parseInt match diff --git a/Duckling/Ordinal/SV/Rules.hs b/Duckling/Ordinal/SV/Rules.hs index 35c28215..2aa735bc 100644 --- a/Duckling/Ordinal/SV/Rules.hs +++ b/Duckling/Ordinal/SV/Rules.hs @@ -28,16 +28,16 @@ import Duckling.Types ordinalsMap :: HashMap Text Int ordinalsMap = HashMap.fromList - [ ( "f\x00f6rsta", 1 ) - , ( "f\x00f6rste", 1 ) + [ ( "första", 1 ) + , ( "förste", 1 ) , ( "andra", 2 ) , ( "andre", 2) , ( "tredje", 3 ) - , ( "fj\x00e4rde", 4 ) + , ( "fjärde", 4 ) , ( "femte", 5 ) - , ( "sj\x00e4tte", 6 ) + , ( "sjätte", 6 ) , ( "sjunde", 7 ) - , ( "\x00e5ttonde", 8 ) + , ( "åttonde", 8 ) , ( "nionde", 9 ) , ( "tionde", 10 ) , ( "elfte", 11 ) @@ -55,7 +55,7 @@ ordinalsMap = HashMap.fromList , ( "femtionde", 50 ) , ( "sextionde", 60 ) , ( "sjuttionde", 70 ) - , ( "\x00e5ttionde", 80 ) + , ( "åttionde", 80 ) , ( "nittionde", 90 ) ] @@ -67,7 +67,7 @@ cardinalsMap = HashMap.fromList , ( "femtio", 50 ) , ( "sextio", 60 ) , ( "sjuttio", 70 ) - , ( "\x00e5ttio", 80 ) + , ( "åttio", 80 ) , ( "nittio", 90 ) ] @@ -75,7 +75,7 @@ ruleOrdinals :: Rule ruleOrdinals = Rule { name = "ordinals (first..twentieth,thirtieth,...)" , pattern = - [ regex "(f\x00f6rsta|f\x00f6rste|andra|andre|tredje|fj\x00e4rde|femte|sj\x00e4tte|sjunde|\x00e5ttonde|nionde|tionde|elfte|tolfte|trettionde|fjortonde|femtonde|sextonde|sjuttonde|artonde|nittonde|tjugonde|trettionde|fyrtionde|femtonde|sextionde|sjuttionde|\x00e5ttionde|nittionde)" + [ regex "(första|förste|andra|andre|tredje|fjärde|femte|sjätte|sjunde|åttonde|nionde|tionde|elfte|tolfte|trettionde|fjortonde|femtonde|sextonde|sjuttonde|artonde|nittonde|tjugonde|trettionde|fyrtionde|femtonde|sextionde|sjuttionde|åttionde|nittionde)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> @@ -87,7 +87,7 @@ ruleCompositeOrdinals :: Rule ruleCompositeOrdinals = Rule { name = "ordinals (composite, e.g., eighty-seven)" , pattern = - [ regex "(tjugo|trettio|fyrtio|femtio|sextio|sjuttio|\x00e5ttio|nittio)(f\x00f6rsta|f\x00f6rste|andra|andre|tredje|fj\x00e4rde|femte|sj\x00e4tte|sjunde|\x00e5ttonde|nionde)" + [ regex "(tjugo|trettio|fyrtio|femtio|sextio|sjuttio|åttio|nittio)(första|förste|andra|andre|tredje|fjärde|femte|sjätte|sjunde|åttonde|nionde)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (tens:units:_)):_) -> do diff --git a/Duckling/Ordinal/TR/Rules.hs b/Duckling/Ordinal/TR/Rules.hs index 60be2338..882bc4c4 100644 --- a/Duckling/Ordinal/TR/Rules.hs +++ b/Duckling/Ordinal/TR/Rules.hs @@ -29,30 +29,30 @@ ordinals :: [Text] ordinals = [ "birinci" , "ikinci" - , "\x00fc\x00e7\x00fcnc\x00fc" - , "d\x00f6rd\x00fcnc\x00fc" - , "be\x015finci" - , "alt\x0131nc\x0131" + , "üçüncü" + , "dördüncü" + , "beşinci" + , "altıncı" , "yedinci" , "sekizinci" , "dokuzuncu" , "onuncu" , "on birinci" , "on ikinci" - , "on \x00fc\x00e7\x00fcnc\x00fc" - , "on d\x00f6rd\x00fcnc\x00fc" - , "on be\x015finci" - , "on alt\x0131nc\x0131" + , "on üçüncü" + , "on dördüncü" + , "on beşinci" + , "on altıncı" , "on yedinci" , "on sekizinci" , "on dokuzuncu" , "yirminci" , "yirmi birinci" , "yirmi ikinci" - , "yirmi \x00fc\x00e7\x00fcnc\x00fc" - , "yirmi d\x00f6rd\x00fcnc\x00fc" - , "yirmi be\x015finci" - , "yirmi alt\x0131nc\x0131" + , "yirmi üçüncü" + , "yirmi dördüncü" + , "yirmi beşinci" + , "yirmi altıncı" , "yirmi yedinci" , "yirmi sekizinci" , "yirmi dokuzuncu" @@ -80,7 +80,7 @@ ruleOrdinalDigits :: Rule ruleOrdinalDigits = Rule { name = "ordinal (digits)" , pattern = - [ regex "0*(\\d+) ?('?)(inci|nci|\x0131nc\x0131|nc\x0131|uncu|ncu|\x00fcnc\x00fc|nc\x00fc|.)" + [ regex "0*(\\d+) ?('?)(inci|nci|ıncı|ncı|uncu|ncu|üncü|ncü|.)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> diff --git a/Duckling/Ordinal/UK/Rules.hs b/Duckling/Ordinal/UK/Rules.hs index 22074b12..4a52b3b9 100644 --- a/Duckling/Ordinal/UK/Rules.hs +++ b/Duckling/Ordinal/UK/Rules.hs @@ -28,33 +28,33 @@ import Duckling.Types ordinalsFirstThMap :: HashMap Text Int ordinalsFirstThMap = HashMap.fromList - [ ( "\x043f\x0435\x0440\x0448" , 1 ) - , ( "\x0434\x0440\x0443\x0433" , 2 ) - , ( "\x0442\x0440\x0435\x0442" , 3 ) - , ( "\x0447\x0435\x0442\x0432\x0435\x0440\x0442" , 4 ) - , ( "\x043f\x2018\x044f\x0442" , 5 ) - , ( "\x0448\x043e\x0441\x0442" , 6 ) - , ( "\x0441\x044c\x043e\x043c" , 7 ) - , ( "\x0432\x043e\x0441\x044c\x043c" , 8 ) - , ( "\x0434\x0435\x0432\x2018\x044f\x0442" , 9 ) - , ( "\x0434\x0435\x0441\x044f\x0442" , 10 ) - , ( "\x043e\x0434\x0438\x043d\x0430\x0434\x0446\x044f\x0442" , 11 ) - , ( "\x0434\x0432\x0430\x043d\x0430\x0434\x0446\x044f\x0442" , 12 ) - , ( "\x0442\x0440\x0438\x043d\x0430\x0434\x0446\x044f\x0442" , 13 ) - , ( "\x0447\x043e\x0442\x0438\x0440\x043d\x0430\x0434\x0446\x044f\x0442" , 14 ) - , ( "\x043f\x2018\x044f\x0442\x043d\x0430\x0434\x0446\x044f\x0442" , 15 ) - , ( "\x0448\x0456\x0441\x0442\x043d\x0430\x0434\x0446\x044f\x0442" , 16 ) - , ( "\x0441\x0456\x043c\x043d\x0430\x0434\x0446\x044f\x0442" , 17 ) - , ( "\x0432\x0456\x0441\x0456\x043c\x043d\x0430\x0434\x0446\x044f\x0442" , 18 ) - , ( "\x0434\x0435\x0432\x2018\x044f\x0442\x043d\x0430\x0434\x0446\x044f\x0442" , 19 ) - , ( "\x0434\x0432\x0430\x0434\x0446\x044f\x0442" , 20 ) + [ ( "перш" , 1 ) + , ( "друг" , 2 ) + , ( "трет" , 3 ) + , ( "четверт" , 4 ) + , ( "п‘ят" , 5 ) + , ( "шост" , 6 ) + , ( "сьом" , 7 ) + , ( "восьм" , 8 ) + , ( "дев‘ят" , 9 ) + , ( "десят" , 10 ) + , ( "одинадцят" , 11 ) + , ( "дванадцят" , 12 ) + , ( "тринадцят" , 13 ) + , ( "чотирнадцят" , 14 ) + , ( "п‘ятнадцят" , 15 ) + , ( "шістнадцят" , 16 ) + , ( "сімнадцят" , 17 ) + , ( "вісімнадцят" , 18 ) + , ( "дев‘ятнадцят" , 19 ) + , ( "двадцят" , 20 ) ] ruleOrdinalsFirstth :: Rule ruleOrdinalsFirstth = Rule { name = "ordinals (first..19th)" , pattern = - [ regex "(\x043f\x0435\x0440\x0448|\x0434\x0440\x0443\x0433|\x0442\x0440\x0435\x0442|\x0447\x0435\x0442\x0432\x0435\x0440\x0442|\x043f\x2018\x044f\x0442|\x0448\x043e\x0441\x0442|\x0441\x044c\x043e\x043c|\x0432\x043e\x0441\x044c\x043c|\x0434\x0435\x0432\x2018\x044f\x0442|\x0434\x0435\x0441\x044f\x0442|\x043e\x0434\x0438\x043d\x0430\x0434\x0446\x044f\x0442|\x0434\x0432\x0430\x043d\x0430\x0434\x0446\x044f\x0442|\x0442\x0440\x0438\x043d\x0430\x0434\x0446\x044f\x0442|\x0447\x043e\x0442\x0438\x0440\x043d\x0430\x0434\x0446\x044f\x0442|\x043f\x2018\x044f\x0442\x043d\x0430\x0434\x0446\x044f\x0442|\x0448\x0456\x0441\x0442\x043d\x0430\x0434\x0446\x044f\x0442|\x0441\x0456\x043c\x043d\x0430\x0434\x0446\x044f\x0442|\x0432\x0456\x0441\x0456\x043c\x043d\x0430\x0434\x0446\x044f\x0442|\x0434\x0435\x0432\x2018\x044f\x0442\x043d\x0430\x0434\x0446\x044f\x0442|\x0434\x0432\x0430\x0434\x0446\x044f\x0442)(\x0438\x0439|\x0456\x0439|\x0430|\x044f|\x0435|\x0454)" + [ regex "(перш|друг|трет|четверт|п‘ят|шост|сьом|восьм|дев‘ят|десят|одинадцят|дванадцят|тринадцят|чотирнадцят|п‘ятнадцят|шістнадцят|сімнадцят|вісімнадцят|дев‘ятнадцят|двадцят)(ий|ій|а|я|е|є)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> @@ -64,22 +64,22 @@ ruleOrdinalsFirstth = Rule ordinalTensMap :: HashMap Text Int ordinalTensMap = HashMap.fromList - [ ( "\x0434\x0432\x0430\x0434\x0446\x044f\x0442\x044c" , 20 ) - , ( "\x0442\x0440\x0438\x0434\x0446\x044f\x0442\x044c" , 30 ) - , ( "\x0441\x043e\x0440\x043e\x043a" , 40 ) - , ( "\x043f\x2018\x044f\x0442\x0434\x0435\x0441\x044f\x0442" , 50 ) - , ( "\x0448\x0456\x0441\x0442\x0434\x0435\x0441\x044f\x0442" , 60 ) - , ( "\x0441\x0456\x043c\x0434\x0435\x0441\x044f\x0442" , 70 ) - , ( "\x0432\x0456\x0441\x0456\x043c\x0434\x0435\x0441\x044f\x0442" , 80 ) - , ( "\x0434\x0435\x0432\x2018\x044f\x043d\x043e\x0441\x0442\x043e" , 90 ) + [ ( "двадцять" , 20 ) + , ( "тридцять" , 30 ) + , ( "сорок" , 40 ) + , ( "п‘ятдесят" , 50 ) + , ( "шістдесят" , 60 ) + , ( "сімдесят" , 70 ) + , ( "вісімдесят" , 80 ) + , ( "дев‘яносто" , 90 ) ] ruleOrdinal :: Rule ruleOrdinal = Rule { name = "ordinal 21..99" , pattern = - [ regex "(\x0434\x0432\x0430\x0434\x0446\x044f\x0442\x044c|\x0442\x0440\x0438\x0434\x0446\x044f\x0442\x044c|\x0441\x043e\x0440\x043e\x043a|\x043f\x2018\x044f\x0442\x0434\x0435\x0441\x044f\x0442|\x0448\x0456\x0441\x0442\x044c\x0434\x0435\x0441\x044f\x0442|\x0441\x0456\x043c\x0434\x0435\x0441\x044f\x0442|\x0432\x0456\x0441\x0456\x043c\x0434\x0435\x0441\x044f\x0442|\x0434\x0435\x0432\x2018\x044f\x043d\x043e\x0441\x0442\x043e)" - , regex "(\x043f\x0435\x0440\x0448|\x0434\x0440\x0443\x0433|\x0442\x0440\x0435\x0442|\x0447\x0435\x0442\x0432\x0435\x0440\x0442|\x043f\x2018\x044f\x0442|\x0448\x043e\x0441\x0442|\x0441\x044c\x043e\x043c|\x0432\x043e\x0441\x044c\x043c|\x0434\x0435\x0432\x2018\x044f\x0442)(\x0438\x0439|\x0456\x0439|\x0430|\x044f|\x0435|\x0454)" + [ regex "(двадцять|тридцять|сорок|п‘ятдесят|шістьдесят|сімдесят|вісімдесят|дев‘яносто)" + , regex "(перш|друг|трет|четверт|п‘ят|шост|сьом|восьм|дев‘ят)(ий|ій|а|я|е|є)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (m1:_)): @@ -95,7 +95,7 @@ ruleOrdinalDigits :: Rule ruleOrdinalDigits = Rule { name = "ordinal (digits)" , pattern = - [ regex "0*(\\d+)-?((\x0438|\x0456)?\x0439|\x0430|\x044f|\x0435|\x0454)" + [ regex "0*(\\d+)-?((и|і)?й|а|я|е|є)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> ordinal <$> parseInt match diff --git a/Duckling/Ordinal/VI/Rules.hs b/Duckling/Ordinal/VI/Rules.hs index 05f4e009..b80ad8bf 100644 --- a/Duckling/Ordinal/VI/Rules.hs +++ b/Duckling/Ordinal/VI/Rules.hs @@ -22,7 +22,7 @@ ruleOrdinals :: Rule ruleOrdinals = Rule { name = "ordinals" , pattern = - [ regex "(\x0111\x1ea7u ti\x00ean|th\x1ee9 nh\x1ea5t|th\x1ee9 1)" + [ regex "(đầu tiên|thứ nhất|thứ 1)" ] , prod = \_ -> Just $ ordinal 1 } diff --git a/Duckling/Ordinal/ZH/Rules.hs b/Duckling/Ordinal/ZH/Rules.hs index f161b942..520306dd 100644 --- a/Duckling/Ordinal/ZH/Rules.hs +++ b/Duckling/Ordinal/ZH/Rules.hs @@ -25,7 +25,7 @@ ruleOrdinalDigits :: Rule ruleOrdinalDigits = Rule { name = "ordinal (digits)" , pattern = - [ regex "\x7b2c" + [ regex "第" , dimension Numeral ] , prod = \tokens -> case tokens of diff --git a/Duckling/Quantity/FR/Rules.hs b/Duckling/Quantity/FR/Rules.hs index 908afd0d..9ba2d050 100644 --- a/Duckling/Quantity/FR/Rules.hs +++ b/Duckling/Quantity/FR/Rules.hs @@ -29,7 +29,7 @@ ruleNumeralUnits = Rule { name = " " , pattern = [ dimension Numeral - , regex "(tasses?|cuill?(e|\x00e8)res? (a|\x00e0) soupe?)" + , regex "(tasses?|cuill?(e|è)res? (a|à) soupe?)" ] , prod = \tokens -> case tokens of (Token Numeral NumeralData {TNumeral.value = v}: @@ -46,7 +46,7 @@ ruleQuantityOfProduct = Rule { name = " of product" , pattern = [ dimension Quantity - , regex "de (caf(e|\x00e9)|sucre)" + , regex "de (caf(e|é)|sucre)" ] , prod = \tokens -> case tokens of (Token Quantity qd: diff --git a/Duckling/Quantity/KO/Rules.hs b/Duckling/Quantity/KO/Rules.hs index c0d09be6..bf34d1c7 100644 --- a/Duckling/Quantity/KO/Rules.hs +++ b/Duckling/Quantity/KO/Rules.hs @@ -28,7 +28,7 @@ ruleQuantityOfProduct = Rule { name = " of product" , pattern = [ dimension Quantity - , regex "\xc758 (\xc0bc\xacb9\xc0b4|\xcf5c\xb77c)" + , regex "의 (삼겹살|콜라)" ] , prod = \tokens -> case tokens of (Token Quantity qd: @@ -41,7 +41,7 @@ ruleQuantityOfProduct2 :: Rule ruleQuantityOfProduct2 = Rule { name = " of product" , pattern = - [ regex "(\xc0bc\xacb9\xc0b4|\xcf5c\xb77c)" + [ regex "(삼겹살|콜라)" , dimension Quantity ] , prod = \tokens -> case tokens of @@ -56,22 +56,22 @@ ruleNumeralUnits = Rule { name = " " , pattern = [ dimension Numeral - , regex "(\xac1c|\xd310|\xadf8(\xb7a8|\xb78c)|\xadfc|\xd30c\xc6b4(\xb4dc|\xc988)|\xc8111\xc2dc|\xadf8\xb987|\xcef5)" + , regex "(개|판|그(램|람)|근|파운(드|즈)|접1시|그릇|컵)" ] , prod = \tokens -> case tokens of (Token Numeral NumeralData {TNumeral.value = v}: Token RegexMatch (GroupMatch (match:_)): _) -> case match of - "\xac1c" -> Just . Token Quantity $ quantity TQuantity.Unnamed v - "\xd310" -> Just . Token Quantity $ quantity (TQuantity.Custom "판") v - "\xadfc" -> Just . Token Quantity $ quantity (TQuantity.Custom "근") v - "\xadf8\xb7a8" -> Just . Token Quantity $ quantity TQuantity.Gram v - "\xadf8\xb78c" -> Just . Token Quantity $ quantity TQuantity.Gram v - "\xd30c\xc6b4\xb4dc" -> Just . Token Quantity $ quantity TQuantity.Pound v - "\xd30c\xc6b4\xc988" -> Just . Token Quantity $ quantity TQuantity.Pound v - "\xc8111\xc2dc" -> Just . Token Quantity $ quantity TQuantity.Dish v - "\xadf8\xb987" -> Just . Token Quantity $ quantity TQuantity.Bowl v - "\xcef5" -> Just . Token Quantity $ quantity TQuantity.Cup v + "개" -> Just . Token Quantity $ quantity TQuantity.Unnamed v + "판" -> Just . Token Quantity $ quantity (TQuantity.Custom "판") v + "근" -> Just . Token Quantity $ quantity (TQuantity.Custom "근") v + "그램" -> Just . Token Quantity $ quantity TQuantity.Gram v + "그람" -> Just . Token Quantity $ quantity TQuantity.Gram v + "파운드" -> Just . Token Quantity $ quantity TQuantity.Pound v + "파운즈" -> Just . Token Quantity $ quantity TQuantity.Pound v + "접1시" -> Just . Token Quantity $ quantity TQuantity.Dish v + "그릇" -> Just . Token Quantity $ quantity TQuantity.Bowl v + "컵" -> Just . Token Quantity $ quantity TQuantity.Cup v _ -> Nothing _ -> Nothing } diff --git a/Duckling/Quantity/PT/Rules.hs b/Duckling/Quantity/PT/Rules.hs index c2bc59ca..3097865e 100644 --- a/Duckling/Quantity/PT/Rules.hs +++ b/Duckling/Quantity/PT/Rules.hs @@ -46,7 +46,7 @@ ruleQuantityOfProduct = Rule { name = " of product" , pattern = [ dimension Quantity - , regex "de (caf(e|\x00e9)|a(\x00e7|c)ucar)" + , regex "de (caf(e|é)|a(ç|c)ucar)" ] , prod = \tokens -> case tokens of (Token Quantity qd: diff --git a/Duckling/Quantity/RO/Rules.hs b/Duckling/Quantity/RO/Rules.hs index c2498fcd..98f7410c 100644 --- a/Duckling/Quantity/RO/Rules.hs +++ b/Duckling/Quantity/RO/Rules.hs @@ -28,7 +28,7 @@ ruleNumeralUnits = Rule { name = " " , pattern = [ dimension Numeral - , regex "livr(a|e|\x0103)" + , regex "livr(a|e|ă)" ] , prod = \tokens -> case tokens of (Token Numeral NumeralData {TNumeral.value = v}:_) -> @@ -41,7 +41,7 @@ ruleQuantityOfProduct = Rule { name = " of product" , pattern = [ dimension Quantity - , regex "de (carne|can(a|\x0103)|zah(a|\x0103)r)" + , regex "de (carne|can(a|ă)|zah(a|ă)r)" ] , prod = \tokens -> case tokens of (Token Quantity qd: diff --git a/Duckling/Temperature/ES/Rules.hs b/Duckling/Temperature/ES/Rules.hs index 44123b5a..76e07f73 100644 --- a/Duckling/Temperature/ES/Rules.hs +++ b/Duckling/Temperature/ES/Rules.hs @@ -26,7 +26,7 @@ ruleLatentTempTemp = Rule { name = " temp" , pattern = [ dimension Temperature - , regex "(grados?)|\x00b0" + , regex "(grados?)|°" ] , prod = \tokens -> case tokens of (Token Temperature td:_) -> Just . Token Temperature $ @@ -39,7 +39,7 @@ ruleTempCelsius = Rule { name = " Celsius" , pattern = [ dimension Temperature - , regex "(cent(i|\x00ed)grados?|c(el[cs]?(ius)?)?\\.?)" + , regex "(cent(i|í)grados?|c(el[cs]?(ius)?)?\\.?)" ] , prod = \tokens -> case tokens of (Token Temperature td:_) -> Just . Token Temperature $ diff --git a/Duckling/Temperature/FR/Rules.hs b/Duckling/Temperature/FR/Rules.hs index c7c1518f..c0e3b1c2 100644 --- a/Duckling/Temperature/FR/Rules.hs +++ b/Duckling/Temperature/FR/Rules.hs @@ -26,7 +26,7 @@ ruleLatentTempDegrees = Rule { name = " degrees" , pattern = [ dimension Temperature - , regex "(deg(r(\x00e9|e|\x00e8))?s?\\.?)|\x00b0" + , regex "(deg(r(é|e|è))?s?\\.?)|°" ] , prod = \tokens -> case tokens of (Token Temperature td:_) -> Just . Token Temperature $ @@ -65,7 +65,7 @@ ruleLatentTempEnDessousDeZero = Rule { name = " en dessous de zero" , pattern = [ dimension Temperature - , regex "en dessous de (0|z(\x00e9|e)ro)" + , regex "en dessous de (0|z(é|e)ro)" ] , prod = \tokens -> case tokens of (Token Temperature td@(TemperatureData {TTemperature.value = v}):_) -> diff --git a/Duckling/Temperature/GA/Rules.hs b/Duckling/Temperature/GA/Rules.hs index 993ea0b6..4049989e 100644 --- a/Duckling/Temperature/GA/Rules.hs +++ b/Duckling/Temperature/GA/Rules.hs @@ -26,7 +26,7 @@ ruleLatentTempCim = Rule { name = " céim" , pattern = [ dimension Temperature - , regex "g?ch?(\x00e9|e)im(e(anna)?)?|\x00b0" + , regex "g?ch?(é|e)im(e(anna)?)?|°" ] , prod = \tokens -> case tokens of (Token Temperature td:_) -> Just . Token Temperature $ @@ -39,7 +39,7 @@ ruleTempCelsius = Rule { name = " Celsius" , pattern = [ dimension Temperature - , regex "ceinteagr(\x00e1|a)d|c(el[cs]?(ius)?)?\\.?" + , regex "ceinteagr(á|a)d|c(el[cs]?(ius)?)?\\.?" ] , prod = \tokens -> case tokens of (Token Temperature td:_) -> Just . Token Temperature $ @@ -65,7 +65,7 @@ ruleLatentTempFaoiBhunNid = Rule { name = " faoi bhun náid" , pattern = [ dimension Temperature - , regex "faoi bhun (0|n(a|\x00e1)id)" + , regex "faoi bhun (0|n(a|á)id)" ] , prod = \tokens -> case tokens of (Token Temperature td@(TemperatureData {TTemperature.value = v}):_) -> diff --git a/Duckling/Temperature/HR/Rules.hs b/Duckling/Temperature/HR/Rules.hs index 4878a91d..4a4a1915 100644 --- a/Duckling/Temperature/HR/Rules.hs +++ b/Duckling/Temperature/HR/Rules.hs @@ -25,7 +25,7 @@ ruleLatentTempStupnjevi = Rule { name = " stupnjevi" , pattern = [ dimension Temperature - , regex "deg\\.?|stupa?nj((ev)?a)?|\x00b0" + , regex "deg\\.?|stupa?nj((ev)?a)?|°" ] , prod = \tokens -> case tokens of (Token Temperature td:_) -> Just . Token Temperature $ diff --git a/Duckling/Temperature/IT/Rules.hs b/Duckling/Temperature/IT/Rules.hs index 5339800c..5b74f506 100644 --- a/Duckling/Temperature/IT/Rules.hs +++ b/Duckling/Temperature/IT/Rules.hs @@ -25,7 +25,7 @@ ruleLatentTempDegrees = Rule { name = " degrees" , pattern = [ dimension Temperature - , regex "(grad[io]?\\.?)|\x00b0" + , regex "(grad[io]?\\.?)|°" ] , prod = \tokens -> case tokens of (Token Temperature td:_) -> Just . Token Temperature $ diff --git a/Duckling/Temperature/JA/Rules.hs b/Duckling/Temperature/JA/Rules.hs index 7c886f29..0299317f 100644 --- a/Duckling/Temperature/JA/Rules.hs +++ b/Duckling/Temperature/JA/Rules.hs @@ -25,7 +25,7 @@ ruleLatentTempDegrees = Rule { name = " degrees" , pattern = [ dimension Temperature - , regex "\x5ea6|\x00b0" + , regex "度|°" ] , prod = \tokens -> case tokens of (Token Temperature td:_) -> Just . Token Temperature $ @@ -38,7 +38,7 @@ ruleTempCelcius = Rule { name = " Celcius" , pattern = [ dimension Temperature - , regex "\x6442\x6c0f(\x00b0|\x5ea6)|(\x00b0)C" + , regex "摂氏(°|度)|(°)C" ] , prod = \tokens -> case tokens of (Token Temperature td:_) -> Just . Token Temperature $ @@ -50,9 +50,9 @@ ruleCelciusTemp :: Rule ruleCelciusTemp = Rule { name = "Celcius " , pattern = - [ regex "\x6442\x6c0f" + [ regex "摂氏" , dimension Temperature - , regex "\x5ea6|\x00b0" + , regex "度|°" ] , prod = \tokens -> case tokens of (_:Token Temperature td:_) -> Just . Token Temperature $ @@ -65,7 +65,7 @@ ruleTempFahrenheit = Rule { name = " Fahrenheit" , pattern = [ dimension Temperature - , regex "\x83ef\x6c0f(\x00b0|\x5ea6)|(\x00b0)F" + , regex "華氏(°|度)|(°)F" ] , prod = \tokens -> case tokens of (Token Temperature td:_) -> Just . Token Temperature $ @@ -77,9 +77,9 @@ ruleFahrenheitTemp :: Rule ruleFahrenheitTemp = Rule { name = "Fahrenheit " , pattern = - [ regex "\x83ef\x6c0f" + [ regex "華氏" , dimension Temperature - , regex "\x5ea6|\x00b0" + , regex "度|°" ] , prod = \tokens -> case tokens of (_:Token Temperature td:_) -> Just . Token Temperature $ diff --git a/Duckling/Temperature/KO/Rules.hs b/Duckling/Temperature/KO/Rules.hs index 913a59b8..1f8f5544 100644 --- a/Duckling/Temperature/KO/Rules.hs +++ b/Duckling/Temperature/KO/Rules.hs @@ -25,7 +25,7 @@ ruleLatentTempDegrees = Rule { name = " degrees" , pattern = [ Predicate isLatent - , regex "\xb3c4|\x00b0" + , regex "도|°" ] , prod = \tokens -> case tokens of (Token Temperature td:_) -> Just . Token Temperature $ @@ -37,7 +37,7 @@ ruleTemp :: Rule ruleTemp = Rule { name = "섭씨 " , pattern = - [ regex "\xc12d\xc528" + [ regex "섭씨" , dimension Temperature ] , prod = \tokens -> case tokens of @@ -63,7 +63,7 @@ ruleTemp2 :: Rule ruleTemp2 = Rule { name = "화씨 " , pattern = - [ regex "\xd654\xc528" + [ regex "화씨" , dimension Temperature ] , prod = \tokens -> case tokens of diff --git a/Duckling/Temperature/PT/Rules.hs b/Duckling/Temperature/PT/Rules.hs index db6a054b..e20402bf 100644 --- a/Duckling/Temperature/PT/Rules.hs +++ b/Duckling/Temperature/PT/Rules.hs @@ -26,7 +26,7 @@ ruleLatentTempTemp = Rule { name = " temp" , pattern = [ dimension Temperature - , regex "(graus?)|\x00b0" + , regex "(graus?)|°" ] , prod = \tokens -> case tokens of (Token Temperature td:_) -> Just . Token Temperature $ @@ -39,7 +39,7 @@ ruleTempCelsius = Rule { name = " Celsius" , pattern = [ dimension Temperature - , regex "(cent(i|\x00ed)grados?|c(el[cs]?(ius)?)?\\.?)" + , regex "(cent(i|í)grados?|c(el[cs]?(ius)?)?\\.?)" ] , prod = \tokens -> case tokens of (Token Temperature td:_) -> Just . Token Temperature $ @@ -65,7 +65,7 @@ ruleLatentTempTempAbaixoDeZero = Rule { name = " temp abaixo de zero" , pattern = [ dimension Temperature - , regex "((graus?)|\x00b0)?( abaixo (de)? zero)" + , regex "((graus?)|°)?( abaixo (de)? zero)" ] , prod = \tokens -> case tokens of (Token Temperature td@(TemperatureData {TTemperature.value = v}):_) -> diff --git a/Duckling/Temperature/RO/Rules.hs b/Duckling/Temperature/RO/Rules.hs index c7910748..d4c9c9ce 100644 --- a/Duckling/Temperature/RO/Rules.hs +++ b/Duckling/Temperature/RO/Rules.hs @@ -25,7 +25,7 @@ ruleLatentTempGrade = Rule { name = " grade" , pattern = [ dimension Temperature - , regex "(grade)|\x00b0" + , regex "(grade)|°" ] , prod = \tokens -> case tokens of (Token Temperature td:_) -> Just . Token Temperature $ diff --git a/Duckling/Temperature/ZH/Rules.hs b/Duckling/Temperature/ZH/Rules.hs index 600d8966..40af5339 100644 --- a/Duckling/Temperature/ZH/Rules.hs +++ b/Duckling/Temperature/ZH/Rules.hs @@ -25,7 +25,7 @@ ruleLatentTempDegrees = Rule { name = " degrees" , pattern = [ dimension Temperature - , regex "\x5ea6|\x00b0" + , regex "度|°" ] , prod = \tokens -> case tokens of (Token Temperature td:_) -> Just . Token Temperature $ @@ -38,7 +38,7 @@ ruleTempCelcius = Rule { name = " Celcius" , pattern = [ dimension Temperature - , regex "(\x6444|\x651d)\x6c0f(\x00b0|\x5ea6)|(\x00b0)C" + , regex "(摄|攝)氏(°|度)|(°)C" ] , prod = \tokens -> case tokens of (Token Temperature td:_) -> Just . Token Temperature $ @@ -50,9 +50,9 @@ ruleCelciusTemp :: Rule ruleCelciusTemp = Rule { name = "Celcius " , pattern = - [ regex "(\x6444|\x651d)\x6c0f" + [ regex "(摄|攝)氏" , dimension Temperature - , regex "\x5ea6|\x00b0" + , regex "度|°" ] , prod = \tokens -> case tokens of (_:Token Temperature td:_) -> Just . Token Temperature $ @@ -65,7 +65,7 @@ ruleTempFahrenheit = Rule { name = " Fahrenheit" , pattern = [ dimension Temperature - , regex "(\x534e|\x83ef)\x6c0f(\x00b0|\x5ea6)|(\x00b0)F" + , regex "(华|華)氏(°|度)|(°)F" ] , prod = \tokens -> case tokens of (Token Temperature td:_) -> Just . Token Temperature $ @@ -77,9 +77,9 @@ ruleFahrenheitTemp :: Rule ruleFahrenheitTemp = Rule { name = "Fahrenheit " , pattern = - [ regex "(\x534e|\x83ef)\x6c0f" + [ regex "(华|華)氏" , dimension Temperature - , regex "\x5ea6|\x00b0" + , regex "度|°" ] , prod = \tokens -> case tokens of (_:Token Temperature td:_) -> Just . Token Temperature $ diff --git a/Duckling/Time/DA/Rules.hs b/Duckling/Time/DA/Rules.hs index 8f0f293c..ea39b476 100644 --- a/Duckling/Time/DA/Rules.hs +++ b/Duckling/Time/DA/Rules.hs @@ -36,8 +36,8 @@ daysOfWeek = , ( "Wednesday", "onsdag|ons\\.?" ) , ( "Thursday" , "torsdag|tors?\\.?" ) , ( "Friday" , "fredag|fre\\.?" ) - , ( "Saturday" , "l\x00f8rdag|l\x00f8r\\.?" ) - , ( "Sunday" , "s\x00f8ndag|s\x00f8n\\.?" ) + , ( "Saturday" , "lørdag|lør\\.?" ) + , ( "Sunday" , "søndag|søn\\.?" ) ] ruleDaysOfWeek :: [Rule] @@ -103,7 +103,7 @@ ruleQuarterTotillbeforeIntegerHourofday :: Rule ruleQuarterTotillbeforeIntegerHourofday = Rule { name = "quarter to|till|before (hour-of-day)" , pattern = - [ regex "(et|\x00e9t)? ?kvart(er)? i" + [ regex "(et|ét)? ?kvart(er)? i" , Predicate isAnHourOfDay ] , prod = \tokens -> case tokens of @@ -185,7 +185,7 @@ ruleNewYearsDay :: Rule ruleNewYearsDay = Rule { name = "new year's day" , pattern = - [ regex "nyt\x00e5rsdag" + [ regex "nytårsdag" ] , prod = \_ -> tt $ monthDay 1 1 } @@ -275,7 +275,7 @@ ruleNow :: Rule ruleNow = Rule { name = "now" , pattern = - [ regex "lige nu|nu|(i )?dette \x00f8jeblik" + [ regex "lige nu|nu|(i )?dette øjeblik" ] , prod = \_ -> tt $ cycleNth TG.Second 0 } @@ -332,7 +332,7 @@ ruleQuarterAfterpastIntegerHourofday :: Rule ruleQuarterAfterpastIntegerHourofday = Rule { name = "quarter after|past (hour-of-day)" , pattern = - [ regex "(et|\x00e9t)? ?kvart(er)? over" + [ regex "(et|ét)? ?kvart(er)? over" , Predicate isAnHourOfDay ] , prod = \tokens -> case tokens of @@ -398,7 +398,7 @@ ruleTheCycleBeforeTime = Rule { name = "the before