From d8888e2ff842c4dacd570989570c101cf7b43153 Mon Sep 17 00:00:00 2001 From: Filipe Pereira Date: Mon, 16 Aug 2021 10:43:17 -0700 Subject: [PATCH] Ca time improvements (#639) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Summary: Some time recognition improvements for Catalan: - morning should be a time range recognised until noon - "dema" can also be used for tomorrow (besides "demà") - "se" alone should not be understood as September Pull Request resolved: https://github.com/facebook/duckling/pull/639 Reviewed By: stroxler Differential Revision: D30312076 Pulled By: chessai fbshipit-source-id: 1a42bbd7eecc4f5690145ee9cadb8eccae8edd08 --- Duckling/Ranking/Classifiers/CA_XX.hs | 333 +++++++++++++------------- Duckling/Time/CA/Corpus.hs | 18 +- Duckling/Time/CA/Rules.hs | 8 +- 3 files changed, 193 insertions(+), 166 deletions(-) diff --git a/Duckling/Ranking/Classifiers/CA_XX.hs b/Duckling/Ranking/Classifiers/CA_XX.hs index 9fcc0e0d..1e6a2705 100644 --- a/Duckling/Ranking/Classifiers/CA_XX.hs +++ b/Duckling/Ranking/Classifiers/CA_XX.hs @@ -23,18 +23,18 @@ classifiers = HashMap.fromList [("midnight", Classifier{okData = - ClassData{prior = 0.0, unseen = -1.3862943611198906, - likelihoods = HashMap.fromList [("", 0.0)], n = 2}, + ClassData{prior = 0.0, unseen = -1.0986122886681098, + likelihoods = HashMap.fromList [("", 0.0)], n = 1}, koData = ClassData{prior = -infinity, unseen = -0.6931471805599453, likelihoods = HashMap.fromList [], n = 0}}), ("integer (numeric)", Classifier{okData = - ClassData{prior = -0.6632942174102642, unseen = -3.970291913552122, - likelihoods = HashMap.fromList [("", 0.0)], n = 51}, + ClassData{prior = -0.6644370746775139, unseen = -4.007333185232471, + likelihoods = HashMap.fromList [("", 0.0)], n = 53}, koData = - ClassData{prior = -0.723918839226699, unseen = -3.912023005428146, - likelihoods = HashMap.fromList [("", 0.0)], n = 48}}), + ClassData{prior = -0.7227059828014897, unseen = -3.951243718581427, + likelihoods = HashMap.fromList [("", 0.0)], n = 50}}), ("the day before yesterday", Classifier{okData = ClassData{prior = 0.0, unseen = -1.0986122886681098, @@ -79,62 +79,64 @@ classifiers likelihoods = HashMap.fromList [("", 0.0)], n = 1}}), ("dd[/-]mm", Classifier{okData = - ClassData{prior = 0.0, unseen = -2.1972245773362196, - likelihoods = HashMap.fromList [("", 0.0)], n = 7}, + ClassData{prior = 0.0, unseen = -2.3025850929940455, + likelihoods = HashMap.fromList [("", 0.0)], n = 8}, koData = ClassData{prior = -infinity, unseen = -0.6931471805599453, likelihoods = HashMap.fromList [], n = 0}}), ("intersect by `de`", Classifier{okData = - ClassData{prior = -0.18721154208814647, unseen = -4.61512051684126, + ClassData{prior = -0.17768117723745236, + unseen = -4.663439094112067, likelihoods = HashMap.fromList - [("Dillunseste|en un ", -3.912023005428146), + [("Dillunseste|en un ", -3.960813169597578), (" de year (value by adding three composing numbers together)", - -3.912023005428146), + -3.960813169597578), + ("el (non ordinal)Setembre", + -3.5553480614894135), ("el (non ordinal)intersect by `de`", - -3.912023005428146), - ("daymonth", -1.6607312068216509), - ("monthyear", -3.912023005428146), - ("el (non ordinal)Abril", -3.912023005428146), - ("Dimecresel (proximo|que viene)", -3.912023005428146), - ("Maigyear", -3.912023005428146), - ("Dimecreseste|en un ", -3.912023005428146), - ("dayyear", -2.5257286443082556), - ("quarteryear", -3.912023005428146), - ("dd-dd (interval)year", -3.506557897319982), - ("el