From 9c367ab6cd9afe993d817fed8372496f5c49119a Mon Sep 17 00:00:00 2001 From: Julien Odent Date: Mon, 16 Jul 2018 05:35:02 -0700 Subject: [PATCH] Don't accept dashes (-) as token separators Summary: This is causing some issues, e.g. `20-30` resolving to 8:30pm (latent). Updating `Numeral` rules to account for that (`EN`, `FR`, `NL` following tests). Differential Revision: D8854891 fbshipit-source-id: ba17099b014d9cf2f48a7d85147cc890b02578f5 --- Duckling/Numeral/EN/Rules.hs | 2 + Duckling/Numeral/FR/Rules.hs | 8 +- Duckling/Numeral/NL/Rules.hs | 2 +- Duckling/Ranking/Classifiers/DA_XX.hs | 251 ++++---- Duckling/Ranking/Classifiers/DE_XX.hs | 407 ++++++------ Duckling/Ranking/Classifiers/EL_XX.hs | 622 +++++++++---------- Duckling/Ranking/Classifiers/EN_AU.hs | 791 ++++++++++++----------- Duckling/Ranking/Classifiers/EN_BZ.hs | 791 ++++++++++++----------- Duckling/Ranking/Classifiers/EN_CA.hs | 852 ++++++++++++------------- Duckling/Ranking/Classifiers/EN_GB.hs | 842 ++++++++++++------------- Duckling/Ranking/Classifiers/EN_IE.hs | 791 ++++++++++++----------- Duckling/Ranking/Classifiers/EN_IN.hs | 791 ++++++++++++----------- Duckling/Ranking/Classifiers/EN_JM.hs | 791 ++++++++++++----------- Duckling/Ranking/Classifiers/EN_NZ.hs | 791 ++++++++++++----------- Duckling/Ranking/Classifiers/EN_PH.hs | 791 ++++++++++++----------- Duckling/Ranking/Classifiers/EN_TT.hs | 791 ++++++++++++----------- Duckling/Ranking/Classifiers/EN_US.hs | 863 +++++++++++++------------- Duckling/Ranking/Classifiers/EN_XX.hs | 840 ++++++++++++------------- Duckling/Ranking/Classifiers/EN_ZA.hs | 791 ++++++++++++----------- Duckling/Ranking/Classifiers/ES_XX.hs | 98 ++- Duckling/Ranking/Classifiers/FR_XX.hs | 606 +++++++++--------- Duckling/Ranking/Classifiers/HE_XX.hs | 152 +++-- Duckling/Ranking/Classifiers/HR_XX.hs | 392 ++++++------ Duckling/Ranking/Classifiers/IT_XX.hs | 270 ++++---- Duckling/Ranking/Classifiers/KO_XX.hs | 244 ++++---- Duckling/Ranking/Classifiers/NB_XX.hs | 260 ++++---- Duckling/Ranking/Classifiers/NL_BE.hs | 414 ++++++------ Duckling/Ranking/Classifiers/NL_NL.hs | 414 ++++++------ Duckling/Ranking/Classifiers/NL_XX.hs | 414 ++++++------ Duckling/Ranking/Classifiers/PL_XX.hs | 362 +++++------ Duckling/Ranking/Classifiers/PT_XX.hs | 184 +++--- Duckling/Ranking/Classifiers/SV_XX.hs | 260 ++++---- Duckling/Types/Document.hs | 2 +- 33 files changed, 7686 insertions(+), 8194 deletions(-) diff --git a/Duckling/Numeral/EN/Rules.hs b/Duckling/Numeral/EN/Rules.hs index 3e0e3ceb..28a0b4b7 100644 --- a/Duckling/Numeral/EN/Rules.hs +++ b/Duckling/Numeral/EN/Rules.hs @@ -148,10 +148,12 @@ ruleCompositeTens = Rule { name = "integer 21..99" , pattern = [ oneOf [20,30..90] + , regex "[\\s\\-]+" , numberBetween 1 10 ] , prod = \tokens -> case tokens of (Token Numeral NumeralData{TNumeral.value = tens}: + _: Token Numeral NumeralData{TNumeral.value = units}: _) -> double $ tens + units _ -> Nothing diff --git a/Duckling/Numeral/FR/Rules.hs b/Duckling/Numeral/FR/Rules.hs index ed0100a4..f65ef7b6 100644 --- a/Duckling/Numeral/FR/Rules.hs +++ b/Duckling/Numeral/FR/Rules.hs @@ -57,10 +57,12 @@ ruleNumerals2 = Rule { name = "numbers 22..29 32..39 .. 52..59" , pattern = [ oneOf [20, 50, 40, 30] + , regex "[\\s\\-]+" , numberBetween 2 10 ] , prod = \tokens -> case tokens of (Token Numeral NumeralData{TNumeral.value = v1}: + _: Token Numeral NumeralData{TNumeral.value = v2}: _) -> double $ v1 + v2 _ -> Nothing @@ -117,10 +119,12 @@ ruleNumerals5 = Rule { name = "numbers 62..69 .. 92..99" , pattern = [ oneOf [60, 80] + , regex "[\\s\\-]+" , numberBetween 2 20 ] , prod = \tokens -> case tokens of (Token Numeral NumeralData{TNumeral.value = v1}: + _: Token Numeral NumeralData{TNumeral.value = v2}: _) -> double $ v1 + v2 _ -> Nothing @@ -166,10 +170,12 @@ ruleNumeral3 = Rule { name = "number (17..19)" , pattern = [ numberWith TNumeral.value (== 10) + , regex "[\\s\\-]+" , numberBetween 7 10 ] , prod = \tokens -> case tokens of (_: + _: Token Numeral NumeralData{TNumeral.value = v}: _) -> double $ 10 + v _ -> Nothing @@ -224,7 +230,7 @@ ruleNumerals = Rule { name = "numbers 21 31 41 51" , pattern = [ oneOf [20, 50, 40, 30] - , regex "et" + , regex "-?et-?" , numberWith TNumeral.value (== 1) ] , prod = \tokens -> case tokens of diff --git a/Duckling/Numeral/NL/Rules.hs b/Duckling/Numeral/NL/Rules.hs index a582dfa9..2f4f5980 100644 --- a/Duckling/Numeral/NL/Rules.hs +++ b/Duckling/Numeral/NL/Rules.hs @@ -148,7 +148,7 @@ ruleNumeralsEn = Rule { name = "numbers en" , pattern = [ numberBetween 1 10 - , regex "en" + , regex "-?en-?" , oneOf [20, 30 .. 90] ] , prod = \tokens -> case tokens of diff --git a/Duckling/Ranking/Classifiers/DA_XX.hs b/Duckling/Ranking/Classifiers/DA_XX.hs index 96f390af..9ed9508f 100644 --- a/Duckling/Ranking/Classifiers/DA_XX.hs +++ b/Duckling/Ranking/Classifiers/DA_XX.hs @@ -42,11 +42,11 @@ classifiers likelihoods = HashMap.fromList [], n = 0}}), ("integer (numeric)", Classifier{okData = - ClassData{prior = -0.8013607652001781, unseen = -4.955827057601261, - likelihoods = HashMap.fromList [("", 0.0)], n = 140}, + ClassData{prior = -0.8157495026522777, unseen = -4.941642422609305, + likelihoods = HashMap.fromList [("", 0.0)], n = 138}, koData = - ClassData{prior = -0.5955087109960292, unseen = -5.159055299214529, - likelihoods = HashMap.fromList [("", 0.0)], n = 172}}), + ClassData{prior = -0.5839478885949533, unseen = -5.170483995038151, + likelihoods = HashMap.fromList [("", 0.0)], n = 174}}), ("the day before yesterday", Classifier{okData = ClassData{prior = 0.0, unseen = -1.0986122886681098, @@ -82,13 +82,12 @@ classifiers n = 3}}), ("dd/mm", Classifier{okData = - ClassData{prior = -0.2231435513142097, - unseen = -2.3025850929940455, - likelihoods = HashMap.fromList [("", 0.0)], n = 8}, + ClassData{prior = -0.5108256237659907, + unseen = -2.0794415416798357, + likelihoods = HashMap.fromList [("", 0.0)], n = 6}, koData = - ClassData{prior = -1.6094379124341003, - unseen = -1.3862943611198906, - likelihoods = HashMap.fromList [("", 0.0)], n = 2}}), + ClassData{prior = -0.916290731874155, unseen = -1.791759469228055, + likelihoods = HashMap.fromList [("", 0.0)], n = 4}}), ("today", Classifier{okData = ClassData{prior = -0.2876820724517809, @@ -287,140 +286,135 @@ classifiers n = 2}}), ("intersect", Classifier{okData = - ClassData{prior = -0.4402080565520373, unseen = -5.84354441703136, + ClassData{prior = -0.40546510810816444, + unseen = -5.820082930352362, likelihoods = HashMap.fromList [(" - (interval)on ", - -4.048882188145344), + -4.02535169073515), ("Wednesday (non ordinal)", - -5.147494476813453), + -5.123963979403259), (" - (interval)on ", - -4.048882188145344), - ("hourday", -5.147494476813453), - ("dayhour", -2.896202678206958), - ("daymonth", -3.355735007585398), - ("monthyear", -3.068052935133617), - ("Mondayon ", -5.147494476813453), - ("intersecthh:mm", -5.147494476813453), - ("Wednesdaynext ", -5.147494476813453), - ("Marchyear", -4.742029368705289), + -4.02535169073515), + ("hourday", -5.123963979403259), + ("dayhour", -2.872672180796764), + ("daymonth", -3.332204510175204), + ("monthyear", -3.044522437723423), + ("Mondayon ", -5.123963979403259), + ("intersecthh:mm", -5.123963979403259), + ("Wednesdaynext ", -5.123963979403259), + ("Marchyear", -4.718498871295094), ("intersect by \"of\", \"from\", \"'s\"year", - -4.742029368705289), - ("Mondayintersect", -5.147494476813453), - ("last of