support composite distances

Summary:
Add general support for composite distances and remove ad-hoc feet/inch-specific rules.
Test cases are added to the corpus.

This implementation resolves the ambiguous "m" unit to either Metric or Imperial units
based on context.

Reviewed By: patapizza

Differential Revision: D13911040

fbshipit-source-id: e75fd237158fc1c2f02709dfe4dd2f1907958b4c
This commit is contained in:
Karl Ostmo 2019-02-15 10:39:12 -08:00 committed by Facebook Github Bot
parent 6d8a320a40
commit a6b60c8921
7 changed files with 323 additions and 57 deletions

View File

@ -51,6 +51,52 @@ allExamples = concat
, examples (simple Metre 1.87)
[ "1.87 meters"
]
-- Composite values:
, examples (simple Inch 94)
[ "7 feet and 10 inches"
, "7 feet, 10 inches"
, "7 feet 10 inches"
]
, examples (simple Metre 2001)
[ "2 km and 1 meter"
, "2 kilometer, 1 metre"
, "2 kilometer 1 metre"
]
, examples (simple Inch 166)
[ "2 yards 7 ft 10 inches"
, "2 yds, 7 feet and 10 inches"
, "2 yards, 7 feet, 10 in"
]
, examples (simple Foot 13)
[ "2 yards and 7 feet"
, "2 yards, 7 feet"
, "2 yd 7'"
]
, examples (simple Centimetre 1000806)
[ "10 kms 8 metres 6 cm"
, "10 kms, 8 meters, 6 cm"
, "10 kms, 8 meters and 6 centimeters"
-- , "10 kms, 8 meters, and 6 cm" -- Oxford comma not supported
]
, examples (simple Metre 1.3048)
[ "1 meter and 1 foot"
]
, examples (simple Kilometre 2.609344)
[ "1 kilometer and 1 mile"
]
, examples (simple M 3)
-- The original, ambiguous "m" unit is preserved
[ "3m"
]
, examples (simple Centimetre 305)
-- The ambiguous "m" unit is inferred as "meteres"
[ "3m and 5cm"
]
, examples (simple Foot 5281)
-- The ambiguous "m" unit is inferred as "miles"
[ "1m and 1ft"
]
-- Ranges:
, examples (between Kilometre (3, 5))
[ "between 3 and 5 kilometers"
, "from 3km to 5km"

View File

@ -24,54 +24,23 @@ import Duckling.Distance.Types (DistanceData(..))
import Duckling.Numeral.Helpers
import Duckling.Numeral.Types (NumeralData (..))
import Duckling.Types
import qualified Duckling.Distance.Types as TDistance
import qualified Duckling.Distance.Types as TDist
import qualified Duckling.Numeral.Types as TNumeral
ruleDistanceFeetInch :: Rule
ruleDistanceFeetInch = Rule
{ name = "<distance|feet> <distance|inch>"
, pattern =
[ Predicate $ isDistanceOfUnit TDistance.Foot
, Predicate $ isDistanceOfUnit TDistance.Inch
]
, prod = \case
(Token Distance DistanceData {TDistance.value = Just feet}:
Token Distance DistanceData {TDistance.value = Just inches}:
_) -> Just . Token Distance . withUnit TDistance.Inch . distance $
feet * 12 + inches
_ -> Nothing
}
ruleDistanceFeetAndInch :: Rule
ruleDistanceFeetAndInch = Rule
{ name = "<distance|feet> and <distance|inch>"
, pattern =
[ Predicate $ isDistanceOfUnit TDistance.Foot
, regex "and"
, Predicate $ isDistanceOfUnit TDistance.Inch
]
, prod = \case
(Token Distance DistanceData {TDistance.value = Just feet}:
_:
Token Distance DistanceData {TDistance.value = Just inches}:
_) -> Just . Token Distance . withUnit TDistance.Inch . distance $
feet * 12 + inches
_ -> Nothing
}
distances :: [(Text, String, TDistance.Unit)]
distances :: [(Text, String, TDist.Unit)]
distances = [ -- Imperial
("miles", "mi(le(s)?)?", TDistance.Mile)
, ("yard", "y(ar)?ds?", TDistance.Yard)
, ("feet", "('|f(oo|ee)?ts?)", TDistance.Foot)
, ("inch", "(\"|''|in(ch(es)?)?)", TDistance.Inch)
("miles", "mi(le(s)?)?", TDist.Mile)
, ("yard", "y(ar)?ds?", TDist.Yard)
, ("feet", "('|f(oo|ee)?ts?)", TDist.Foot)
, ("inch", "(\"|''|in(ch(es)?)?)", TDist.Inch)
-- Metric
, ("km", "k(ilo)?m?(et(er|re))?s?", TDistance.Kilometre)
, ("meters", "met(er|re)s?", TDistance.Metre)
, ("centimeters", "cm|centimet(er|re)s?", TDistance.Centimetre)
, ("millimeters", "mm|millimet(er|re)s?", TDistance.Millimetre)
, ("km", "k(ilo)?m?(et(er|re))?s?", TDist.Kilometre)
, ("meters", "met(er|re)s?", TDist.Metre)
, ("centimeters", "cm|centimet(er|re)s?", TDist.Centimetre)
, ("millimeters", "mm|millimet(er|re)s?", TDist.Millimetre)
-- Ambiguous
, ("m (miles or meters)", "m", TDistance.M)
, ("m (miles or meters)", "m", TDist.M)
]
rulePrecision :: Rule
@ -89,7 +58,7 @@ rulePrecision = Rule
ruleDistances :: [Rule]
ruleDistances = map go distances
where
go :: (Text, String, TDistance.Unit) -> Rule
go :: (Text, String, TDist.Unit) -> Rule
go (name, regexPattern, u) = Rule
{ name = name
, pattern = [ dimension Distance, regex regexPattern ]
@ -111,7 +80,7 @@ ruleIntervalBetweenNumeral = Rule
(_:
Token Numeral NumeralData{TNumeral.value = from}:
_:
Token Distance DistanceData{TDistance.value = Just to, TDistance.unit = Just u}:
Token Distance DistanceData{TDist.value=Just to, TDist.unit=Just u}:
_) | from < to ->
Just . Token Distance . withInterval (from, to) $ unitOnly u
_ -> Nothing
@ -128,9 +97,9 @@ ruleIntervalBetween = Rule
]
, prod = \case
(_:
Token Distance DistanceData{TDistance.value = Just from, TDistance.unit = Just u1}:
Token Distance DistanceData{TDist.value=Just from, TDist.unit=Just u1}:
_:
Token Distance DistanceData{TDistance.value = Just to, TDistance.unit = Just u2}:
Token Distance DistanceData{TDist.value=Just to, TDist.unit=Just u2}:
_) | from < to && u1 == u2 ->
Just . Token Distance . withInterval (from, to) $ unitOnly u1
_ -> Nothing
@ -147,7 +116,7 @@ ruleIntervalNumeralDash = Rule
, prod = \case
(Token Numeral NumeralData{TNumeral.value = from}:
_:
Token Distance DistanceData{TDistance.value = Just to, TDistance.unit = Just u}:
Token Distance DistanceData{TDist.value=Just to, TDist.unit=Just u}:
_) | from < to ->
Just . Token Distance . withInterval (from, to) $ unitOnly u
_ -> Nothing
@ -162,9 +131,9 @@ ruleIntervalDash = Rule
, Predicate isSimpleDistance
]
, prod = \case
(Token Distance DistanceData{TDistance.value = Just from, TDistance.unit = Just u1}:
(Token Distance DistanceData{TDist.value=Just from, TDist.unit=Just u1}:
_:
Token Distance DistanceData{TDistance.value = Just to, TDistance.unit = Just u2}:
Token Distance DistanceData{TDist.value=Just to, TDist.unit=Just u2}:
_) | from < to && u1 == u2 ->
Just . Token Distance . withInterval (from, to) $ unitOnly u1
_ -> Nothing
@ -179,7 +148,7 @@ ruleIntervalMax = Rule
]
, prod = \case
(_:
Token Distance DistanceData{TDistance.value = Just to, TDistance.unit = Just u}:
Token Distance DistanceData{TDist.value=Just to, TDist.unit=Just u}:
_) -> Just . Token Distance . withMax to $ unitOnly u
_ -> Nothing
}
@ -193,20 +162,53 @@ ruleIntervalMin = Rule
]
, prod = \case
(_:
Token Distance DistanceData{TDistance.value = Just to, TDistance.unit = Just u}:
Token Distance DistanceData{TDist.value=Just to, TDist.unit=Just u}:
_) -> Just . Token Distance . withMin to $ unitOnly u
_ -> Nothing
}
-- | NOTE: Oxford comma is not supported.
ruleCompositeDistanceCommasAnd :: Rule
ruleCompositeDistanceCommasAnd = Rule
{ name = "composite <distance> (with ,/and)"
, pattern =
[ Predicate isSimpleDistance
, regex ",|and"
, Predicate isSimpleDistance
]
, prod = \case
(Token Distance DistanceData{TDist.value=Just v1, TDist.unit=Just u1}:
_:
Token Distance DistanceData{TDist.value=Just v2, TDist.unit=Just u2}:
_) | u1 /= u2 && v1 > 0 && v2 > 0 -> Token Distance <$>
distanceSum v1 u1 v2 u2
_ -> Nothing
}
ruleCompositeDistance :: Rule
ruleCompositeDistance = Rule
{ name = "composite <distance>"
, pattern =
[ Predicate isSimpleDistance
, Predicate isSimpleDistance
]
, prod = \case
(Token Distance DistanceData{TDist.value=Just v1, TDist.unit=Just u1}:
Token Distance DistanceData{TDist.value=Just v2, TDist.unit=Just u2}:
_) | u1 /= u2 && v1 > 0 && v2 > 0 -> Token Distance <$>
distanceSum v1 u1 v2 u2
_ -> Nothing
}
rules :: [Rule]
rules =
[ ruleDistanceFeetInch
, ruleDistanceFeetAndInch
, ruleIntervalBetweenNumeral
[ ruleIntervalBetweenNumeral
, ruleIntervalBetween
, ruleIntervalMax
, ruleIntervalMin
, ruleIntervalNumeralDash
, ruleIntervalDash
, rulePrecision]
++ ruleDistances
, rulePrecision
, ruleCompositeDistanceCommasAnd
, ruleCompositeDistance
] ++ ruleDistances

View File

@ -10,6 +10,7 @@
module Duckling.Distance.Helpers
( distance
, distanceSum
, isDistanceOfUnit
, isSimpleDistance
, unitOnly
@ -21,11 +22,13 @@ module Duckling.Distance.Helpers
) where
import Prelude
import Data.Semigroup ((<>))
import Duckling.Dimensions.Types
import Duckling.Distance.Types (DistanceData(..))
import Duckling.Types
import qualified Duckling.Distance.Types as TDistance
import qualified Duckling.DistanceUnits.Types as DGTypes
-- -----------------------------------------------------------------
-- Patterns
@ -36,7 +39,8 @@ isSimpleDistance (Token Distance DistanceData {TDistance.value = Just _
isSimpleDistance _ = False
isDistanceOfUnit :: TDistance.Unit -> Predicate
isDistanceOfUnit unit (Token Distance DistanceData {TDistance.unit = Just u}) = unit == u
isDistanceOfUnit unit (Token Distance DistanceData {TDistance.unit = Just u}) =
unit == u
isDistanceOfUnit _ _ = False
-- -----------------------------------------------------------------
@ -48,6 +52,22 @@ distance x = DistanceData {TDistance.value = Just x
, TDistance.minValue = Nothing
, TDistance.maxValue = Nothing}
distanceSum ::
Double
-> TDistance.Unit
-> Double
-> TDistance.Unit
-> Maybe DistanceData
distanceSum v1 u1 v2 u2 = unwrapContext $ cd1 <> cd2
where
wrapContext v u = DGTypes.ContextualDistance v $ DGTypes.toSystemUnit u
cd1 = wrapContext v1 u1
cd2 = wrapContext v2 u2
unwrapContext DGTypes.Nonrelatable = Nothing
unwrapContext (DGTypes.ContextualDistance v u) =
Just $ withUnit (DGTypes.toRawUnit u) $ distance v
unitOnly :: TDistance.Unit -> DistanceData
unitOnly u = DistanceData {TDistance.unit = Just u
, TDistance.value = Nothing

View File

@ -0,0 +1,191 @@
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.
-- | This module shadows names of some units previously defined in
-- 'Duckling.Distance.Types'.
-- Therefore, the units from that module must be used qualified by module.
module Duckling.DistanceUnits.Types
( ContextualDistance (..)
, toSystemUnit
, toRawUnit
) where
import Data.Semigroup
import Data.Tuple.Extra (both)
import Prelude
import qualified Duckling.Distance.Types as TDist
-- | Supports deferred resolution of ambiguous units.
-- Note that this sum type cannot be simply replaced by
-- "Maybe (ContextualDistance Double DeferrableUnit)"
-- (with "Nothing" representing "Nonrelatable").
-- See "NOTE A" below.
data ContextualDistance
= Nonrelatable
-- ^ If two different ambiguous units were to be composed, then
-- it would not be possible to decide which to resolve to. As of today,
-- this won't happen since for now "M" is the lone ambiguous unit.
| ContextualDistance Double DeferrableUnit
-- | This represents Units that have not yet been established as unrelatable.
-- The "Definite" constructor is purposely listed in front of "Ambiguous",
-- so that it will be preferred when taking choosing the "min" between
-- an "Ambiguous" and "Definite" unit.
data DeferrableUnit
= Definite SystemUnit
| Ambiguous AmbiguousUnit
deriving (Eq, Ord)
-- | These measurement-system-specific units exist to disambiguate
-- the \"M\" of the 'Unit' type and to maintain the measurement-system context
-- for resolving to the appropriate precision.
--
-- It's also just handy to have separate programmatically accessible lists
-- of units for each type of measurement system.
data SystemUnit
= Metric MetricUnit
| Imperial ImperialUnit
deriving (Eq, Ord)
data ImperialUnit
= Inch
| Foot
| Yard
| Mile
deriving (Eq, Ord)
data MetricUnit
= Millimetre
| Centimetre
| Metre
| Kilometre
deriving (Eq, Ord)
-- | Currently there is only one actually ambiguous unit, but this design allows
-- for expansion.
data AmbiguousUnit
= M -- ^ "Miles" or "Metres"
deriving (Eq, Ord)
-- | Represents a value with an unambiguous unit
data UnitValuePair = UnitValuePair SystemUnit Double
sumScaledUnits :: SystemUnit -> (UnitValuePair, UnitValuePair) -> Double
sumScaledUnits preferredUnit = uncurry (+) . both (scaleUnits preferredUnit)
-- | Determine the definite meaning of an "Ambiguous" unit using the context of
-- another "Definite" unit
reconcileAmbiguousWithDefinite ::
Double
-> AmbiguousUnit
-> Double
-> SystemUnit
-> ContextualDistance
reconcileAmbiguousWithDefinite av au dv du =
ContextualDistance combinedValue $ Definite preferredUnit
where
resolvedAmbiguousUnit = resolveUnit au du
preferredUnit = du `min` resolvedAmbiguousUnit
combinedValue = sumScaledUnits preferredUnit
(UnitValuePair resolvedAmbiguousUnit av, UnitValuePair du dv)
-- | When both Metric and Imperial units are given, resolve to Metric.
-- Otherwise, preserve the original measurement system and use the smaller unit.
-- For the purpose of this resolution, all Metric units are considered "smaller"
-- than Imperial units.
instance Semigroup ContextualDistance where
_ <> Nonrelatable = Nonrelatable
Nonrelatable <> _ = Nonrelatable
(ContextualDistance v1 u@(Ambiguous u1))
<> (ContextualDistance v2 (Ambiguous u2))
| u1 == u2 = ContextualDistance (v1 + v2) u
| otherwise = Nonrelatable
-- NOTE A: Needing to return "Nonrelatable" in this edge case is why
-- the two-member "ContextualDistance" sum type cannot be simply
-- represented as (Maybe (ContextualDistance Double DeferrableUnit)).
(ContextualDistance av (Ambiguous au))
<> (ContextualDistance dv (Definite du)) =
reconcileAmbiguousWithDefinite av au dv du
(ContextualDistance dv (Definite du))
<> (ContextualDistance av (Ambiguous au)) =
reconcileAmbiguousWithDefinite av au dv du
(ContextualDistance v1 (Definite u1))
<> (ContextualDistance v2 (Definite u2)) =
ContextualDistance v $ Definite u
where
v = sumScaledUnits u (UnitValuePair u1 v1, UnitValuePair u2 v2)
u = u1 `min` u2
resolveUnit :: AmbiguousUnit -> SystemUnit -> SystemUnit
resolveUnit M (Metric _) = Metric Metre
resolveUnit M (Imperial _) = Imperial Mile
-- | Disambiguation of original Unit type
toSystemUnit :: TDist.Unit -> DeferrableUnit
toSystemUnit TDist.M = Ambiguous M
toSystemUnit TDist.Millimetre = Definite $ Metric Millimetre
toSystemUnit TDist.Centimetre = Definite $ Metric Centimetre
toSystemUnit TDist.Metre = Definite $ Metric Metre
toSystemUnit TDist.Kilometre = Definite $ Metric Kilometre
toSystemUnit TDist.Inch = Definite $ Imperial Inch
toSystemUnit TDist.Foot = Definite $ Imperial Foot
toSystemUnit TDist.Yard = Definite $ Imperial Yard
toSystemUnit TDist.Mile = Definite $ Imperial Mile
-- | Reconversion to original Unit type
toRawUnit :: DeferrableUnit -> TDist.Unit
toRawUnit (Ambiguous M) = TDist.M
toRawUnit (Definite (Metric Millimetre)) = TDist.Millimetre
toRawUnit (Definite (Metric Centimetre)) = TDist.Centimetre
toRawUnit (Definite (Metric Metre)) = TDist.Metre
toRawUnit (Definite (Metric Kilometre)) = TDist.Kilometre
toRawUnit (Definite (Imperial Inch)) = TDist.Inch
toRawUnit (Definite (Imperial Foot)) = TDist.Foot
toRawUnit (Definite (Imperial Yard)) = TDist.Yard
toRawUnit (Definite (Imperial Mile)) = TDist.Mile
-- | Convert a distance to the given units.
-- This only works if the unit is unambiguous.
scaleUnits ::
SystemUnit -- ^ target unit
-> UnitValuePair -- ^ Original unit and value
-> Double
scaleUnits targetUnit (UnitValuePair startingUnit v)
| startingUnit == targetUnit = v
| otherwise = inSIUnits v startingUnit / inSIUnits 1 targetUnit
-- | This is used when distances
-- must be normalized across measurement systems.
-- The Metric metre is the Standard International unit of distance.
inSIUnits :: Double -> SystemUnit -> Double
inSIUnits val (Metric u) = inMetres u val
inSIUnits val (Imperial u) = inInches u val * metersPerInch
-- | This conversion factor is exact.
metersPerInch :: Double
metersPerInch = 0.0254
inMetres :: Fractional a => MetricUnit -> a -> a
inMetres Millimetre n = n / 1000
inMetres Centimetre n = n / 100
inMetres Metre n = n
inMetres Kilometre n = n * 1000
inInches :: Num a => ImperialUnit -> a -> a
inInches Inch n = n
inInches Foot n = 12 * n
inInches Yard n = 3 * inInches Foot n
inInches Mile n = 5280 * inInches Foot n

View File

@ -85,9 +85,12 @@ allExamples = concat
, examples (DurationData 27 Month)
[ "2 years and 3 months"
, "2 years, 3 months"
, "2 years 3 months"
]
, examples (DurationData 31719604 Second)
[ "1 year, 2 days, 3 hours and 4 seconds"
, "1 year 2 days 3 hours and 4 seconds"
-- Oxford comma not supported:
-- , "1 year, 2 days, 3 hours, and 4 seconds"
]
]

View File

@ -175,6 +175,7 @@ ruleDurationPrecision = Rule
_ -> Nothing
}
-- | NOTE: Oxford comma is not supported.
ruleCompositeDurationCommasAnd :: Rule
ruleCompositeDurationCommasAnd = Rule
{ name = "composite <duration> (with ,/and)"

View File

@ -125,6 +125,7 @@ library
, Duckling.Ranking.Classifiers.JA_XX
, Duckling.Ranking.Classifiers.KA_XX
, Duckling.Ranking.Classifiers.KM_XX
, Duckling.Ranking.Classifiers.KN_XX
, Duckling.Ranking.Classifiers.KO_XX
, Duckling.Ranking.Classifiers.LO_XX
, Duckling.Ranking.Classifiers.ML_XX
@ -313,6 +314,7 @@ library
, Duckling.Distance.Helpers
, Duckling.Distance.Rules
, Duckling.Distance.Types
, Duckling.DistanceUnits.Types
-- Duration
, Duckling.Duration.AR.Corpus
@ -863,6 +865,7 @@ test-suite duckling-test
, Duckling.Distance.FR.Tests
, Duckling.Distance.GA.Tests
, Duckling.Distance.HR.Tests
, Duckling.Distance.IT.Tests
, Duckling.Distance.KM.Tests
, Duckling.Distance.KO.Tests
, Duckling.Distance.MN.Tests