diff --git a/Duckling/Ranking/Classifiers/PT_XX.hs b/Duckling/Ranking/Classifiers/PT_XX.hs index dd06d1af..b7631aa7 100644 --- a/Duckling/Ranking/Classifiers/PT_XX.hs +++ b/Duckling/Ranking/Classifiers/PT_XX.hs @@ -103,11 +103,12 @@ classifiers likelihoods = HashMap.fromList [], n = 0}}), ("integer (numeric)", Classifier{okData = - ClassData{prior = -0.2212726433783981, unseen = -5.375278407684165, - likelihoods = HashMap.fromList [("", 0.0)], n = 214}, + ClassData{prior = -0.23894721648733527, + unseen = -5.429345628954441, + likelihoods = HashMap.fromList [("", 0.0)], n = 226}, koData = - ClassData{prior = -1.6169567448481277, unseen = -4.007333185232471, - likelihoods = HashMap.fromList [("", 0.0)], n = 53}}), + ClassData{prior = -1.5486083515863098, unseen = -4.143134726391533, + likelihoods = HashMap.fromList [("", 0.0)], n = 61}}), ("the day before yesterday", Classifier{okData = ClassData{prior = 0.0, unseen = -1.3862943611198906, @@ -117,8 +118,8 @@ classifiers likelihoods = HashMap.fromList [], n = 0}}), ("hh(:|.|h)mm (time-of-day)", Classifier{okData = - ClassData{prior = 0.0, unseen = -2.3978952727983707, - likelihoods = HashMap.fromList [("", 0.0)], n = 9}, + ClassData{prior = 0.0, unseen = -2.70805020110221, + likelihoods = HashMap.fromList [("", 0.0)], n = 13}, koData = ClassData{prior = -infinity, unseen = -0.6931471805599453, likelihoods = HashMap.fromList [], n = 0}}), @@ -216,116 +217,123 @@ classifiers likelihoods = HashMap.fromList [], n = 0}}), ("intersect by `da` or `de`", Classifier{okData = - ClassData{prior = -1.9564365620423008, unseen = -4.852030263919617, + ClassData{prior = -1.9209712007080613, unseen = -4.912654885736052, likelihoods = HashMap.fromList - [("daymonth", -3.0524276172305362), - ("dd-dd de (interval)year", -4.151039905898646), - ("dayday", -3.457892725338701), - ("Quart-feira (que vem)", -4.151039905898646), - ("dayyear", -2.07159836421881), - ("quarteryear", -4.151039905898646), - ("dd-dd (interval)year", -4.151039905898646), - (" trimestreyear", -4.151039905898646), + [("daymonth", -2.959364629383116), + ("dd-dd de (interval)year", -4.212127597878484), + ("dayday", -3.518980417318539), + ("Quart-feira (que vem)", -4.212127597878484), + ("dayyear", -2.132686056198648), + ("quarteryear", -4.212127597878484), + ("dd-dd (interval)year", -4.212127597878484), + (" trimestreyear", -4.212127597878484), (" (ordinal or number) de year", - -2.7647455447787554), - ("day of month (1st)Mar\231o", -3.457892725338701), + -2.8258332367585934), + ("day of month (1st)Mar\231o", -3.518980417318539), + ("dayminute", -4.212127597878484), ("two time tokens separated by \",\"2year", - -3.7455747977904816), - ("intersectyear", -4.151039905898646), + -3.8066624897703196), + ("intersectyear", -4.212127597878484), ("dia (non ordinal)Fevereiro", - -3.7455747977904816), + -3.518980417318539), (" (ordinal or number) year", - -4.151039905898646), - ("two time tokens separated by \",\"year", -3.7455747977904816), - ("Domingo passado", -4.151039905898646), - ("dayweek", -3.457892725338701), + -4.212127597878484), + ("two time tokens separated by \",\"year", -3.8066624897703196), + ("Domingo passado", -4.212127597878484), + ("dayweek", -3.518980417318539), (" (ordinal or number) de two time tokens separated by \",\"2", - -4.151039905898646), + -4.212127597878484), (" (ordinal or number) de intersect", - -4.151039905898646), - ("Quart-feiraproximo ", -4.151039905898646), + -4.212127597878484), + ("Quart-feiraproximo ", -4.212127597878484), + ("dia (non ordinal)intersect", + -4.212127597878484), (" (ordinal or number) de two time tokens separated by \",\"", - -4.151039905898646)], - n = 27}, + -4.212127597878484)], + n = 29}, koData = - ClassData{prior = -0.15240700022243142, - unseen = -5.996452088619021, + ClassData{prior = -0.15836831577146182, + unseen = -6.030685260261263, likelihoods = HashMap.fromList [("time-of-day (latent) - (interval)", - -4.895349138638459), - ("hourday", -3.160748083250353), - ("year (latent)Abril", -4.895349138638459), - ("year (latent)Julho", -3.3549040976913105), - ("time-of-day (latent)Fevereiro", -4.048051278251256), - ("monthday", -4.6076670661866785), - ("monthyear", -3.5960661545081987), - ("yearhour", -3.914519885626733), + -4.929666231562589), + ("hourday", -3.195065176174482), + ("year (latent)Abril", -4.929666231562589), + ("year (latent)Julho", -3.3892211906154395), + ("dayhour", -5.3351313396707525), + ("time-of-day (latent)Fevereiro", -3.948836978550862), + ("monthday", -4.641984159110808), + ("monthyear", -3.6303832474323277), + ("yearhour", -3.8310539428944788), ("entre e (interval)Julho", - -3.6913763343125234), - ("houryear", -3.3549040976913105), - ("year (latent)Maio", -4.202201958078514), + -3.7256934272366524), + ("houryear", -3.3892211906154395), + ("year (latent)Maio", -4.236519051002643), ("time-of-day (latent)two time tokens separated by \",\"2", - -4.895349138638459), - ("Setembroyear", -4.202201958078514), - ("time-of-day (latent)Setembro", -4.202201958078514), - ("time-of-day (latent)intersect", -3.2859112262043593), + -4.929666231562589), + ("Setembroyear", -4.236519051002643), + ("time-of-day (latent)Setembro", -4.236519051002643), + ("time-of-day (latent)intersect", -3.3202283191284883), ("year (latent) ", - -5.300814246746624), - ("\224s Julho", -4.6076670661866785), - ("year (latent)intersect", -4.6076670661866785), - ("Setembrointersect", -5.300814246746624), + -5.3351313396707525), + ("\224s Julho", -4.641984159110808), + ("year (latent)intersect", -4.236519051002643), + ("Setembrointersect", -5.3351313396707525), ("intersect by `da` or `de`two time tokens separated by \",\"", - -5.300814246746624), + -5.3351313396707525), ("year (latent)amanh\227 pela ", - -5.300814246746624), - ("hourmonth", -1.7598549227093099), + -5.3351313396707525), + ("hourmonth", -1.7797832781813394), ("time-of-day (latent)intersect by `da` or `de`", - -3.509054777518569), - ("monthmonth", -5.300814246746624), + -3.543371870442698), + ("monthmonth", -5.3351313396707525), ("Setembrotwo time tokens separated by \",\"2", - -5.300814246746624), - ("year (latent)Fevereiro", -4.048051278251256), - ("time-of-day (latent)Dezembro", -5.300814246746624), - ("dayyear", -5.300814246746624), - ("time-of-day (latent)Julho", -3.509054777518569), - ("time-of-day (latent)Abril", -4.895349138638459), - ("passado year", -5.300814246746624), - ("year (latent) da manha", -5.300814246746624), + -5.3351313396707525), + ("year (latent)Fevereiro", -3.948836978550862), + ("time-of-day (latent)Dezembro", -5.3351313396707525), + ("dayyear", -5.3351313396707525), + ("time-of-day (latent)Julho", -3.543371870442698), + ("time-of-day (latent)Abril", -4.929666231562589), + ("passado year", -5.3351313396707525), + ("year (latent) da manha", -5.3351313396707525), (" (ordinal or number) de year", - -5.300814246746624), - ("time-of-day (latent)Mar\231o", -4.202201958078514), - ("year (latent)Janeiro", -5.300814246746624), - ("year (latent)Dezembro", -5.300814246746624), + -5.3351313396707525), + ("time-of-day (latent)Mar\231o", -4.236519051002643), + ("year (latent)Janeiro", -5.3351313396707525), + ("year (latent)Dezembro", -5.3351313396707525), ("entre e (interval)Janeiro", - -5.300814246746624), - ("yearmonth", -2.1872989375362493), - ("two time tokens separated by \",\"2year", -4.895349138638459), + -5.3351313396707525), + ("yearmonth", -2.1996371237416033), + ("two time tokens separated by \",\"2year", -4.929666231562589), ("Setembrotwo time tokens separated by \",\"", - -5.300814246746624), - ("intersect by `da` or `de`intersect", -5.300814246746624), - ("intersectyear", -5.300814246746624), + -5.3351313396707525), + ("intersect by `da` or `de`intersect", -5.3351313396707525), + ("intersectyear", -5.3351313396707525), ("intersect by `da` or `de`two time tokens separated by \",\"2", - -5.300814246746624), - ("time-of-day (latent)Maio", -4.202201958078514), - ("antes das Julho", -4.048051278251256), - ("Maioyear", -4.6076670661866785), - ("year (latent)Setembro", -4.202201958078514), - ("\224s Janeiro", -5.300814246746624), - ("two time tokens separated by \",\"year", -4.895349138638459), + -5.3351313396707525), + ("time-of-day (latent)Maio", -4.236519051002643), + ("antes das Julho", -4.082368371175385), + ("Maioyear", -4.641984159110808), + ("year (latent)Setembro", -4.236519051002643), + ("\224s Janeiro", -5.3351313396707525), + ("two time tokens separated by \",\"year", -4.929666231562589), ("time-of-day (latent)two time tokens separated by \",\"", - -4.895349138638459), - ("intersect by `da` or `de`year", -3.7967368499703498), + -4.929666231562589), + ("intersect by `da` or `de`year", -3.8310539428944788), ("year (latent) - (interval)", - -4.895349138638459), - ("yearday", -5.300814246746624), - ("time-of-day (latent)Janeiro", -5.300814246746624), - ("Julhoyear", -5.300814246746624), + -4.929666231562589), + ("yearday", -5.3351313396707525), + ("time-of-day (latent)Janeiro", -5.3351313396707525), + ("dia (non ordinal)intersect", + -5.3351313396707525), + ("Julhoyear", -5.3351313396707525), ("de - (interval)Julho", - -5.300814246746624), - ("year (latent)Mar\231o", -4.202201958078514)], - n = 164}}), + -5.3351313396707525), + ("year (latent)Mar\231o", -4.236519051002643), + ("yearminute", -5.3351313396707525)], + n = 169}}), (" and half", Classifier{okData = ClassData{prior = 0.0, unseen = -3.1354942159291497, @@ -405,11 +413,11 @@ classifiers likelihoods = HashMap.fromList [], n = 0}}), ("integer (0..19)", Classifier{okData = - ClassData{prior = -1.7699577099400975e-2, - unseen = -4.060443010546419, - likelihoods = HashMap.fromList [("", 0.0)], n = 56}, + ClassData{prior = -1.6529301951210582e-2, + unseen = -4.127134385045092, + likelihoods = HashMap.fromList [("", 0.0)], n = 60}, koData = - ClassData{prior = -4.04305126783455, unseen = -1.0986122886681098, + ClassData{prior = -4.110873864173311, unseen = -1.0986122886681098, likelihoods = HashMap.fromList [("", 0.0)], n = 1}}), ("desde dd-dd (interval)", Classifier{okData = @@ -425,44 +433,46 @@ classifiers likelihoods = HashMap.fromList [], n = 0}}), (" ", Classifier{okData = - ClassData{prior = -0.3437715391028245, unseen = -4.61512051684126, + ClassData{prior = -0.3639653772014116, unseen = -4.663439094112067, likelihoods = HashMap.fromList - [(" and quinzeafternoon", -2.995732273553991), - ("dayhour", -3.2188758248682006), - ("\224s morning", -3.2188758248682006), - ("hourhour", -1.8325814637483102), - (" and halfafternoon", -2.995732273553991), - ("minutehour", -1.5141277326297755), - ("time-of-day (latent)morning", -3.2188758248682006), - ("time-of-day (latent)evening", -3.506557897319982), - ("\224s afternoon", -2.3025850929940455), + [(" and quinzeafternoon", -3.044522437723423), + ("dayhour", -3.2676659890376327), + ("\224s morning", -3.2676659890376327), + ("intersectevening", -3.960813169597578), + ("hourhour", -1.7635885922613588), + (" and halfafternoon", -3.044522437723423), + ("minutehour", -1.5629178967992075), + ("time-of-day (latent)morning", -3.2676659890376327), + ("time-of-day (latent)evening", -3.5553480614894135), + ("\224s afternoon", -2.3513752571634776), (" (ordinal or number) de morning", - -3.912023005428146), + -3.960813169597578), ("dia de morning", - -3.912023005428146), - ("time-of-day (latent)afternoon", -3.506557897319982), + -3.960813169597578), + ("time-of-day (latent)afternoon", -3.5553480614894135), (" and afternoon", - -2.659260036932778), - ("intersectmorning", -3.912023005428146), - ("\224s evening", -3.506557897319982), - ("intersect by `da` or `de`morning", -3.912023005428146)], - n = 39}, + -2.70805020110221), + ("intersectmorning", -3.960813169597578), + ("\224s evening", -3.2676659890376327), + ("intersect by `da` or `de`morning", -3.960813169597578)], + n = 41}, koData = - ClassData{prior = -1.2347444629926898, unseen = -4.007333185232471, + ClassData{prior = -1.1871656860095547, unseen = -4.0943445622221, likelihoods = HashMap.fromList - [("yearhour", -1.5040773967762742), - ("monthhour", -2.890371757896165), - ("hourhour", -2.6026896854443837), - ("time-of-day (latent)morning", -3.295836866004329), - ("year (latent)afternoon", -2.1972245773362196), - ("Fevereiromorning", -3.295836866004329), - ("year (latent)evening", -2.890371757896165), - ("time-of-day (latent)afternoon", -2.890371757896165), - ("year (latent)morning", -2.379546134130174), - ("intersect by `da` or `de`morning", -3.295836866004329)], - n = 16}}), + [("yearhour", -1.5125880864441827), + ("monthhour", -2.9789251552376097), + ("hourhour", -2.468099531471619), + ("time-of-day (latent)morning", -3.3843902633457743), + ("year (latent)afternoon", -2.2857779746776643), + ("time-of-day (latent)evening", -3.3843902633457743), + ("Fevereiromorning", -3.3843902633457743), + ("year (latent)evening", -2.691243082785829), + ("time-of-day (latent)afternoon", -2.9789251552376097), + ("year (latent)morning", -2.468099531471619), + ("intersect by `da` or `de`morning", -3.3843902633457743)], + n = 18}}), ("de - (interval)", Classifier{okData = ClassData{prior = -infinity, unseen = -1.0986122886681098, @@ -520,17 +530,16 @@ classifiers n = 108}}), (" horas", Classifier{okData = - ClassData{prior = -0.7731898882334817, - unseen = -2.9444389791664407, + ClassData{prior = -0.5306282510621704, unseen = -3.295836866004329, likelihoods = HashMap.fromList - [("time-of-day (latent)", -1.5040773967762742), - ("hour", -0.9444616088408514), - ("depois das ", -1.791759469228055), - ("\224s ", -2.1972245773362196)], - n = 6}, + [("time-of-day (latent)", -1.466337068793427), + ("hour", -0.8602012652231115), + ("depois das ", -2.159484249353372), + ("\224s ", -1.8718021769015913)], + n = 10}, koData = - ClassData{prior = -0.6190392084062235, unseen = -3.044522437723423, + ClassData{prior = -0.8873031950009028, unseen = -3.044522437723423, likelihoods = HashMap.fromList [("time-of-day (latent)", -1.2039728043259361), @@ -540,147 +549,172 @@ classifiers n = 7}}), ("intersect", Classifier{okData = - ClassData{prior = -1.2729656758128873, unseen = -5.288267030694535, + ClassData{prior = -1.0287687153008453, unseen = -5.58724865840025, likelihoods = HashMap.fromList - [("dayhour", -2.5106150064982073), - ("nowquinze para as (as relative minutes)", - -4.590056548178043), - ("Quart-feiraamanh\227 pela ", -4.590056548178043), - ("now\224s ", -4.1845914400698785), - ("now and 3/4", -4.590056548178043), - ("Segunda-feirain the ", -4.590056548178043), - ("Quart-feira ", -4.590056548178043), - ("yearSexta-feira", -4.590056548178043), - ("dayday", -3.0859791514017694), - ("hourhour", -4.590056548178043), - ("dayyear", -2.392831970841824), - ("de Sexta-feira", -4.590056548178043), - ("quarteryear", -4.590056548178043), - ("minutehour", -3.337293579682675), - (" and quinzein the ", - -4.1845914400698785), - ("Sexta-feira (ordinal or number) de ", - -4.590056548178043), - ("intersect by `da` or `de`in the ", - -4.1845914400698785), - (" (ordinal or number) de year", - -4.590056548178043), - ("now and ", -4.590056548178043), - ("tomorrow horas", -4.1845914400698785), - ("dd-dd de (interval)de ", -4.590056548178043), - ("now para as (as relative minutes)", - -4.590056548178043), - ("\224s in the ", -3.896909367618098), - ("Sexta-feiraintersect", -4.590056548178043), - ("dayminute", -3.337293579682675), - (" trimestrede ", -4.590056548178043), - (" (ordinal or number) de de ", - -3.2037621870581527), - ("intersectSexta-feira", -4.590056548178043), - ("dd-dd (interval)de ", -4.590056548178043), - ("Sexta-feiraintersect by `da` or `de`", -4.590056548178043), - (" (ordinal or number) year", - -4.590056548178043), - ("intersect by `da` or `de`Sexta-feira", -4.590056548178043), - (" (ordinal or number) de in the ", - -4.1845914400698785), - ("dia de in the ", - -4.1845914400698785), - (" (ordinal or number) de ", - -4.590056548178043), - ("yearday", -4.1845914400698785), - (" (ordinal or number) de two time tokens separated by \",\"2", - -4.590056548178043), - (" (ordinal or number) de intersect", - -4.590056548178043), - ("two time tokens separated by \",\"de ", - -4.1845914400698785), - ("Quart-feira\224s ", -4.590056548178043), - ("Quart-feira da manha", -4.590056548178043), - (" (ordinal or number) de two time tokens separated by \",\"", - -4.590056548178043), - ("tomorrowdepois das ", -4.1845914400698785), - (" and in the ", - -4.1845914400698785), - ("two time tokens separated by \",\"2de ", - -4.1845914400698785), - ("intersectde ", -4.590056548178043)], - n = 56}, - koData = - ClassData{prior = -0.3285040669720361, unseen = -5.924255797414532, - likelihoods = - HashMap.fromList - [("hourday", -3.3566290621822787), - ("dayhour", -3.8421368779639797), - ("daymonth", -5.2284312390838705), - ("monthday", -4.1298189504157605), - ("monthyear", -3.282521090028557), + [("dayhour", -1.9199346626520526), + ("dia (non ordinal)\224s ", + -3.5040547671018634), + ("dia de \224s ", + -4.890349128221754), + ("tomorrowin the ", -3.9740583963475986), (" (ordinal or number) de \224s ", - -5.2284312390838705), - ("now\224s ", -4.822966130975706), - ("entre e (interval)Julho", - -3.61899332664977), - ("houryear", -3.2135282185416054), - ("Dezembro\224s ", -5.2284312390838705), - ("\224s Julho", -4.822966130975706), - ("Fevereiroin the ", -4.822966130975706), - ("de antes das ", -2.487591215158669), - ("monthhour", -3.9756682705885025), - ("Setembrointersect", -5.2284312390838705), - ("intersect by `da` or `de`two time tokens separated by \",\"", - -5.2284312390838705), - ("hourmonth", -3.3566290621822787), - ("Julhode ", -5.2284312390838705), - (" am|pmintersect by `da` or `de`", - -4.535284058523925), - ("Setembrotwo time tokens separated by \",\"2", - -5.2284312390838705), - ("dayyear", -5.2284312390838705), + -4.890349128221754), + ("nowquinze para as (as relative minutes)", + -4.890349128221754), + ("Quart-feiraamanh\227 pela ", -4.890349128221754), + ("now\224s ", -4.484884020113589), + ("now and 3/4", -4.890349128221754), + ("Segunda-feirain the ", -4.890349128221754), + ("Quart-feira ", -4.890349128221754), + ("yearSexta-feira", -4.890349128221754), + ("dayday", -3.3862717314454795), + ("hourhour", -4.890349128221754), + ("dayyear", -2.6931245508855346), + ("Quinta-feira\224s ", -4.197201947661808), + ("de Sexta-feira", -4.890349128221754), + ("quarteryear", -4.890349128221754), ("intersect by `da` or `de`\224s ", - -5.2284312390838705), - ("passado year", -5.2284312390838705), - ("de o de