Time/NL: don't be too eager on days of week

Summary:
"d" would parse as "dinsdag" (Tuesday), "zo" would parse as "zondag" (Sunday, also means "so").
Made dots mandatory, to prevent further issues (e.g. "zon" means "sun").

Reviewed By: mullender

Differential Revision: D6312693

fbshipit-source-id: 58c5824e3ff174fc9c293c3f2d13e152c60e51de
This commit is contained in:
Julien Odent 2017-11-13 08:49:01 -08:00 committed by Facebook Github Bot
parent c6a7fedb7b
commit 436e4662d9
3 changed files with 47 additions and 33 deletions

View File

@ -74,8 +74,8 @@ classifiers
likelihoods = HashMap.fromList [], n = 0}}), likelihoods = HashMap.fromList [], n = 0}}),
("thursday", ("thursday",
Classifier{okData = Classifier{okData =
ClassData{prior = 0.0, unseen = -2.4849066497880004, ClassData{prior = 0.0, unseen = -2.3978952727983707,
likelihoods = HashMap.fromList [("", 0.0)], n = 10}, likelihoods = HashMap.fromList [("", 0.0)], n = 9},
koData = koData =
ClassData{prior = -infinity, unseen = -0.6931471805599453, ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}), likelihoods = HashMap.fromList [], n = 0}}),
@ -299,8 +299,8 @@ classifiers
likelihoods = HashMap.fromList [], n = 0}}), likelihoods = HashMap.fromList [], n = 0}}),
("wednesday", ("wednesday",
Classifier{okData = Classifier{okData =
ClassData{prior = 0.0, unseen = -2.1972245773362196, ClassData{prior = 0.0, unseen = -2.3025850929940455,
likelihoods = HashMap.fromList [("", 0.0)], n = 7}, likelihoods = HashMap.fromList [("", 0.0)], n = 8},
koData = koData =
ClassData{prior = -infinity, unseen = -0.6931471805599453, ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}), likelihoods = HashMap.fromList [], n = 0}}),
@ -373,8 +373,8 @@ classifiers
n = 1}}), n = 1}}),
("saturday", ("saturday",
Classifier{okData = Classifier{okData =
ClassData{prior = 0.0, unseen = -1.791759469228055, ClassData{prior = 0.0, unseen = -1.9459101490553135,
likelihoods = HashMap.fromList [("", 0.0)], n = 4}, likelihoods = HashMap.fromList [("", 0.0)], n = 5},
koData = koData =
ClassData{prior = -infinity, unseen = -0.6931471805599453, ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}), likelihoods = HashMap.fromList [], n = 0}}),
@ -1007,8 +1007,8 @@ classifiers
likelihoods = HashMap.fromList [], n = 0}}), likelihoods = HashMap.fromList [], n = 0}}),
("sunday", ("sunday",
Classifier{okData = Classifier{okData =
ClassData{prior = 0.0, unseen = -2.3025850929940455, ClassData{prior = 0.0, unseen = -2.3978952727983707,
likelihoods = HashMap.fromList [("", 0.0)], n = 8}, likelihoods = HashMap.fromList [("", 0.0)], n = 9},
koData = koData =
ClassData{prior = -infinity, unseen = -0.6931471805599453, ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}), likelihoods = HashMap.fromList [], n = 0}}),
@ -1419,8 +1419,8 @@ classifiers
likelihoods = HashMap.fromList [], n = 0}}), likelihoods = HashMap.fromList [], n = 0}}),
("tuesday", ("tuesday",
Classifier{okData = Classifier{okData =
ClassData{prior = 0.0, unseen = -2.3978952727983707, ClassData{prior = 0.0, unseen = -2.4849066497880004,
likelihoods = HashMap.fromList [("", 0.0)], n = 9}, likelihoods = HashMap.fromList [("", 0.0)], n = 10},
koData = koData =
ClassData{prior = -infinity, unseen = -0.6931471805599453, ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}), likelihoods = HashMap.fromList [], n = 0}}),

View File

@ -23,16 +23,27 @@ import Duckling.Time.Corpus
import Duckling.Time.Types hiding (Month) import Duckling.Time.Types hiding (Month)
import Duckling.TimeGrain.Types hiding (add) import Duckling.TimeGrain.Types hiding (add)
context :: Context
context = testContext {locale = makeLocale NL Nothing}
corpus :: Corpus corpus :: Corpus
corpus = (testContext {locale = makeLocale NL Nothing}, allExamples) corpus = (context, allExamples)
negativeCorpus :: NegativeCorpus negativeCorpus :: NegativeCorpus
negativeCorpus = (testContext {locale = makeLocale NL Nothing}, examples) negativeCorpus = (context, examples)
where where
examples = examples =
[ "een hotel" [ "een hotel"
, "twee aanbiedingen" , "twee aanbiedingen"
, "komende 5 agendapunten" , "komende 5 agendapunten"
, "d"
, "do"
, "di"
, "woe"
, "vr"
, "zat"
, "zo"
, "zon"
] ]
allExamples :: [Example] allExamples :: [Example]
@ -65,10 +76,10 @@ allExamples = concat
] ]
, examples (datetime (2013, 2, 19, 0, 0, 0) Day) , examples (datetime (2013, 2, 19, 0, 0, 0) Day)
[ "dinsdag" [ "dinsdag"
, "di."
] ]
, examples (datetime (2013, 2, 14, 0, 0, 0) Day) , examples (datetime (2013, 2, 14, 0, 0, 0) Day)
[ "donderdag" [ "donderdag"
, "do"
, "do." , "do."
] ]
, examples (datetime (2013, 2, 15, 0, 0, 0) Day) , examples (datetime (2013, 2, 15, 0, 0, 0) Day)
@ -78,10 +89,12 @@ allExamples = concat
, examples (datetime (2013, 2, 16, 0, 0, 0) Day) , examples (datetime (2013, 2, 16, 0, 0, 0) Day)
[ "zaterdag" [ "zaterdag"
, "za." , "za."
, "zat."
] ]
, examples (datetime (2013, 2, 17, 0, 0, 0) Day) , examples (datetime (2013, 2, 17, 0, 0, 0) Day)
[ "zondag" [ "zondag"
, "zo." , "zo."
, "zon."
] ]
, examples (datetime (2013, 3, 1, 0, 0, 0) Day) , examples (datetime (2013, 3, 1, 0, 0, 0) Day)
[ "1 maart" [ "1 maart"
@ -192,6 +205,7 @@ allExamples = concat
, examples (datetime (2013, 2, 13, 0, 0, 0) Day) , examples (datetime (2013, 2, 13, 0, 0, 0) Day)
[ "komende woensdag" [ "komende woensdag"
, "woensdag" , "woensdag"
, "woe."
] ]
, examples (datetime (2013, 2, 20, 0, 0, 0) Day) , examples (datetime (2013, 2, 20, 0, 0, 0) Day)
[ "volgende week woensdag" [ "volgende week woensdag"
@ -324,7 +338,7 @@ allExamples = concat
[ "Vrij. 18 juli 2014 7 uur 's avonds" [ "Vrij. 18 juli 2014 7 uur 's avonds"
] ]
, examples (datetime (2014, 7, 18, 0, 0, 0) Day) , examples (datetime (2014, 7, 18, 0, 0, 0) Day)
[ "Vr, 18 Juli 2014" [ "Vr., 18 Juli 2014"
, "Vrijdag, 18-07-14" , "Vrijdag, 18-07-14"
, "Vrijdag, 18/07/2014" , "Vrijdag, 18/07/2014"
, "18de juli 2014" , "18de juli 2014"

View File

@ -45,29 +45,29 @@ ruleInstants = mkRuleInstants
ruleDaysOfWeek :: [Rule] ruleDaysOfWeek :: [Rule]
ruleDaysOfWeek = mkRuleDaysOfWeek ruleDaysOfWeek = mkRuleDaysOfWeek
[ ( "monday" , "maandags?|ma\\.?" ) [ ( "monday" , "maandags?|ma\\." )
, ( "tuesday" , "dinsdags?|di?\\.?" ) , ( "tuesday" , "dinsdags?|di\\." )
, ( "wednesday" , "woensdags?|woe?\\.?" ) , ( "wednesday" , "woensdags?|woe\\." )
, ( "thursday" , "donderdags?|do\\.?" ) , ( "thursday" , "donderdags?|do\\." )
, ( "friday" , "vrijdags?|vrij\\.?|vr\\.?" ) , ( "friday" , "vrijdags?|vr(ij)?\\." )
, ( "saturday" , "zaterdags?|zat?\\.?" ) , ( "saturday" , "zaterdags?|zat?\\." )
, ( "sunday" , "zondags?|zon?\\.?" ) , ( "sunday" , "zondags?|zon?\\." )
] ]
ruleMonths :: [Rule] ruleMonths :: [Rule]
ruleMonths = mkRuleMonths ruleMonths = mkRuleMonths
[ ( "January" , "januari|jan\\.?" ) [ ( "January" , "januari|jan\\.?" )
, ( "February" , "februari|feb\\.?" ) , ( "February" , "februari|feb\\.?" )
, ( "March" , "maart|mar\\.?" ) , ( "March" , "maart|mar\\.?" )
, ( "April" , "april|apr\\.?" ) , ( "April" , "april|apr\\.?" )
, ( "May" , "mei\\.?" ) , ( "May" , "mei\\.?" )
, ( "June" , "juni|jun\\.?" ) , ( "June" , "juni?\\.?" )
, ( "July" , "juli|jul\\.?" ) , ( "July" , "juli?\\.?" )
, ( "August" , "augustus|aug\\.?" ) , ( "August" , "augustus|aug\\.?" )
, ( "September", "september|sept?\\.?" ) , ( "September", "september|sept?\\.?" )
, ( "October" , "oktober|okt\\.?" ) , ( "October" , "oktober|okt\\.?" )
, ( "November" , "november|nov\\.?" ) , ( "November" , "november|nov\\.?" )
, ( "December" , "december|dec\\.?" ) , ( "December" , "december|dec\\.?" )
] ]
ruleSeasons :: [Rule] ruleSeasons :: [Rule]