From 436e4662d9828bb5dc9c0850f73c882b96a084c7 Mon Sep 17 00:00:00 2001 From: Julien Odent Date: Mon, 13 Nov 2017 08:49:01 -0800 Subject: [PATCH] Time/NL: don't be too eager on days of week Summary: "d" would parse as "dinsdag" (Tuesday), "zo" would parse as "zondag" (Sunday, also means "so"). Made dots mandatory, to prevent further issues (e.g. "zon" means "sun"). Reviewed By: mullender Differential Revision: D6312693 fbshipit-source-id: 58c5824e3ff174fc9c293c3f2d13e152c60e51de --- Duckling/Ranking/Classifiers/NL_XX.hs | 20 +++++++------- Duckling/Time/NL/Corpus.hs | 22 +++++++++++++--- Duckling/Time/NL/Rules.hs | 38 +++++++++++++-------------- 3 files changed, 47 insertions(+), 33 deletions(-) diff --git a/Duckling/Ranking/Classifiers/NL_XX.hs b/Duckling/Ranking/Classifiers/NL_XX.hs index 5e611026..51a0919e 100644 --- a/Duckling/Ranking/Classifiers/NL_XX.hs +++ b/Duckling/Ranking/Classifiers/NL_XX.hs @@ -74,8 +74,8 @@ classifiers likelihoods = HashMap.fromList [], n = 0}}), ("thursday", Classifier{okData = - ClassData{prior = 0.0, unseen = -2.4849066497880004, - likelihoods = HashMap.fromList [("", 0.0)], n = 10}, + ClassData{prior = 0.0, unseen = -2.3978952727983707, + likelihoods = HashMap.fromList [("", 0.0)], n = 9}, koData = ClassData{prior = -infinity, unseen = -0.6931471805599453, likelihoods = HashMap.fromList [], n = 0}}), @@ -299,8 +299,8 @@ classifiers likelihoods = HashMap.fromList [], n = 0}}), ("wednesday", Classifier{okData = - ClassData{prior = 0.0, unseen = -2.1972245773362196, - likelihoods = HashMap.fromList [("", 0.0)], n = 7}, + ClassData{prior = 0.0, unseen = -2.3025850929940455, + likelihoods = HashMap.fromList [("", 0.0)], n = 8}, koData = ClassData{prior = -infinity, unseen = -0.6931471805599453, likelihoods = HashMap.fromList [], n = 0}}), @@ -373,8 +373,8 @@ classifiers n = 1}}), ("saturday", Classifier{okData = - ClassData{prior = 0.0, unseen = -1.791759469228055, - likelihoods = HashMap.fromList [("", 0.0)], n = 4}, + ClassData{prior = 0.0, unseen = -1.9459101490553135, + likelihoods = HashMap.fromList [("", 0.0)], n = 5}, koData = ClassData{prior = -infinity, unseen = -0.6931471805599453, likelihoods = HashMap.fromList [], n = 0}}), @@ -1007,8 +1007,8 @@ classifiers likelihoods = HashMap.fromList [], n = 0}}), ("sunday", Classifier{okData = - ClassData{prior = 0.0, unseen = -2.3025850929940455, - likelihoods = HashMap.fromList [("", 0.0)], n = 8}, + ClassData{prior = 0.0, unseen = -2.3978952727983707, + likelihoods = HashMap.fromList [("", 0.0)], n = 9}, koData = ClassData{prior = -infinity, unseen = -0.6931471805599453, likelihoods = HashMap.fromList [], n = 0}}), @@ -1419,8 +1419,8 @@ classifiers likelihoods = HashMap.fromList [], n = 0}}), ("tuesday", Classifier{okData = - ClassData{prior = 0.0, unseen = -2.3978952727983707, - likelihoods = HashMap.fromList [("", 0.0)], n = 9}, + ClassData{prior = 0.0, unseen = -2.4849066497880004, + likelihoods = HashMap.fromList [("", 0.0)], n = 10}, koData = ClassData{prior = -infinity, unseen = -0.6931471805599453, likelihoods = HashMap.fromList [], n = 0}}), diff --git a/Duckling/Time/NL/Corpus.hs b/Duckling/Time/NL/Corpus.hs index 5b2c90be..c718697e 100644 --- a/Duckling/Time/NL/Corpus.hs +++ b/Duckling/Time/NL/Corpus.hs @@ -23,16 +23,27 @@ import Duckling.Time.Corpus import Duckling.Time.Types hiding (Month) import Duckling.TimeGrain.Types hiding (add) +context :: Context +context = testContext {locale = makeLocale NL Nothing} + corpus :: Corpus -corpus = (testContext {locale = makeLocale NL Nothing}, allExamples) +corpus = (context, allExamples) negativeCorpus :: NegativeCorpus -negativeCorpus = (testContext {locale = makeLocale NL Nothing}, examples) +negativeCorpus = (context, examples) where examples = [ "een hotel" , "twee aanbiedingen" , "komende 5 agendapunten" + , "d" + , "do" + , "di" + , "woe" + , "vr" + , "zat" + , "zo" + , "zon" ] allExamples :: [Example] @@ -65,10 +76,10 @@ allExamples = concat ] , examples (datetime (2013, 2, 19, 0, 0, 0) Day) [ "dinsdag" + , "di." ] , examples (datetime (2013, 2, 14, 0, 0, 0) Day) [ "donderdag" - , "do" , "do." ] , examples (datetime (2013, 2, 15, 0, 0, 0) Day) @@ -78,10 +89,12 @@ allExamples = concat , examples (datetime (2013, 2, 16, 0, 0, 0) Day) [ "zaterdag" , "za." + , "zat." ] , examples (datetime (2013, 2, 17, 0, 0, 0) Day) [ "zondag" , "zo." + , "zon." ] , examples (datetime (2013, 3, 1, 0, 0, 0) Day) [ "1 maart" @@ -192,6 +205,7 @@ allExamples = concat , examples (datetime (2013, 2, 13, 0, 0, 0) Day) [ "komende woensdag" , "woensdag" + , "woe." ] , examples (datetime (2013, 2, 20, 0, 0, 0) Day) [ "volgende week woensdag" @@ -324,7 +338,7 @@ allExamples = concat [ "Vrij. 18 juli 2014 7 uur 's avonds" ] , examples (datetime (2014, 7, 18, 0, 0, 0) Day) - [ "Vr, 18 Juli 2014" + [ "Vr., 18 Juli 2014" , "Vrijdag, 18-07-14" , "Vrijdag, 18/07/2014" , "18de juli 2014" diff --git a/Duckling/Time/NL/Rules.hs b/Duckling/Time/NL/Rules.hs index 5d2d0e67..ac7d6bdc 100644 --- a/Duckling/Time/NL/Rules.hs +++ b/Duckling/Time/NL/Rules.hs @@ -45,29 +45,29 @@ ruleInstants = mkRuleInstants ruleDaysOfWeek :: [Rule] ruleDaysOfWeek = mkRuleDaysOfWeek - [ ( "monday" , "maandags?|ma\\.?" ) - , ( "tuesday" , "dinsdags?|di?\\.?" ) - , ( "wednesday" , "woensdags?|woe?\\.?" ) - , ( "thursday" , "donderdags?|do\\.?" ) - , ( "friday" , "vrijdags?|vrij\\.?|vr\\.?" ) - , ( "saturday" , "zaterdags?|zat?\\.?" ) - , ( "sunday" , "zondags?|zon?\\.?" ) + [ ( "monday" , "maandags?|ma\\." ) + , ( "tuesday" , "dinsdags?|di\\." ) + , ( "wednesday" , "woensdags?|woe\\." ) + , ( "thursday" , "donderdags?|do\\." ) + , ( "friday" , "vrijdags?|vr(ij)?\\." ) + , ( "saturday" , "zaterdags?|zat?\\." ) + , ( "sunday" , "zondags?|zon?\\." ) ] ruleMonths :: [Rule] ruleMonths = mkRuleMonths - [ ( "January" , "januari|jan\\.?" ) - , ( "February" , "februari|feb\\.?" ) - , ( "March" , "maart|mar\\.?" ) - , ( "April" , "april|apr\\.?" ) - , ( "May" , "mei\\.?" ) - , ( "June" , "juni|jun\\.?" ) - , ( "July" , "juli|jul\\.?" ) - , ( "August" , "augustus|aug\\.?" ) - , ( "September", "september|sept?\\.?" ) - , ( "October" , "oktober|okt\\.?" ) - , ( "November" , "november|nov\\.?" ) - , ( "December" , "december|dec\\.?" ) + [ ( "January" , "januari|jan\\.?" ) + , ( "February" , "februari|feb\\.?" ) + , ( "March" , "maart|mar\\.?" ) + , ( "April" , "april|apr\\.?" ) + , ( "May" , "mei\\.?" ) + , ( "June" , "juni?\\.?" ) + , ( "July" , "juli?\\.?" ) + , ( "August" , "augustus|aug\\.?" ) + , ( "September", "september|sept?\\.?" ) + , ( "October" , "oktober|okt\\.?" ) + , ( "November" , "november|nov\\.?" ) + , ( "December" , "december|dec\\.?" ) ] ruleSeasons :: [Rule]