mirror of
https://github.com/facebook/duckling.git
synced 2024-11-24 07:23:03 +03:00
Add custom isRangeValid implementation for ZH
Summary: Fixes #313 Reviewed By: stroxler Differential Revision: D28364035 fbshipit-source-id: 7fe3dba75410d217747a0d7a6f7df611ac26ec70
This commit is contained in:
parent
4878820294
commit
8cb77a43c7
@ -4,6 +4,7 @@
|
|||||||
|
|
||||||
### Core
|
### Core
|
||||||
* Make `isRangeValid` behave differently based on lang
|
* Make `isRangeValid` behave differently based on lang
|
||||||
|
* Add custom `isRangeValid` implementation for ZH
|
||||||
|
|
||||||
### Rulesets
|
### Rulesets
|
||||||
* CA (Catalan)
|
* CA (Catalan)
|
||||||
|
@ -57,7 +57,6 @@ allExamples = concat
|
|||||||
, examples (simple Inch 4)
|
, examples (simple Inch 4)
|
||||||
[ "4 inch"
|
[ "4 inch"
|
||||||
, "4 inches"
|
, "4 inches"
|
||||||
, "4''"
|
|
||||||
, "4英寸"
|
, "4英寸"
|
||||||
, "4英吋"
|
, "4英吋"
|
||||||
, "四吋"
|
, "四吋"
|
||||||
|
@ -1357,11 +1357,11 @@ classifiers
|
|||||||
likelihoods = HashMap.fromList [], n = 0}}),
|
likelihoods = HashMap.fromList [], n = 0}}),
|
||||||
("integer (0..10)",
|
("integer (0..10)",
|
||||||
Classifier{okData =
|
Classifier{okData =
|
||||||
ClassData{prior = -0.5907987153721106, unseen = -5.407171771460119,
|
ClassData{prior = -0.5957987257888164, unseen = -5.407171771460119,
|
||||||
likelihoods = HashMap.fromList [("", 0.0)], n = 221},
|
likelihoods = HashMap.fromList [("", 0.0)], n = 221},
|
||||||
koData =
|
koData =
|
||||||
ClassData{prior = -0.8071778665977783, unseen = -5.19295685089021,
|
ClassData{prior = -0.8010045764163588, unseen = -5.204006687076795,
|
||||||
likelihoods = HashMap.fromList [("", 0.0)], n = 178}}),
|
likelihoods = HashMap.fromList [("", 0.0)], n = 180}}),
|
||||||
("last n <cycle>",
|
("last n <cycle>",
|
||||||
Classifier{okData =
|
Classifier{okData =
|
||||||
ClassData{prior = 0.0, unseen = -4.605170185988091,
|
ClassData{prior = 0.0, unseen = -4.605170185988091,
|
||||||
|
@ -1357,11 +1357,11 @@ classifiers
|
|||||||
likelihoods = HashMap.fromList [], n = 0}}),
|
likelihoods = HashMap.fromList [], n = 0}}),
|
||||||
("integer (0..10)",
|
("integer (0..10)",
|
||||||
Classifier{okData =
|
Classifier{okData =
|
||||||
ClassData{prior = -0.5907987153721106, unseen = -5.407171771460119,
|
ClassData{prior = -0.5957987257888164, unseen = -5.407171771460119,
|
||||||
likelihoods = HashMap.fromList [("", 0.0)], n = 221},
|
likelihoods = HashMap.fromList [("", 0.0)], n = 221},
|
||||||
koData =
|
koData =
|
||||||
ClassData{prior = -0.8071778665977783, unseen = -5.19295685089021,
|
ClassData{prior = -0.8010045764163588, unseen = -5.204006687076795,
|
||||||
likelihoods = HashMap.fromList [("", 0.0)], n = 178}}),
|
likelihoods = HashMap.fromList [("", 0.0)], n = 180}}),
|
||||||
("last n <cycle>",
|
("last n <cycle>",
|
||||||
Classifier{okData =
|
Classifier{okData =
|
||||||
ClassData{prior = 0.0, unseen = -4.605170185988091,
|
ClassData{prior = 0.0, unseen = -4.605170185988091,
|
||||||
|
@ -1357,11 +1357,11 @@ classifiers
|
|||||||
likelihoods = HashMap.fromList [], n = 0}}),
|
likelihoods = HashMap.fromList [], n = 0}}),
|
||||||
("integer (0..10)",
|
("integer (0..10)",
|
||||||
Classifier{okData =
|
Classifier{okData =
|
||||||
ClassData{prior = -0.5907987153721106, unseen = -5.407171771460119,
|
ClassData{prior = -0.5957987257888164, unseen = -5.407171771460119,
|
||||||
likelihoods = HashMap.fromList [("", 0.0)], n = 221},
|
likelihoods = HashMap.fromList [("", 0.0)], n = 221},
|
||||||
koData =
|
koData =
|
||||||
ClassData{prior = -0.8071778665977783, unseen = -5.19295685089021,
|
ClassData{prior = -0.8010045764163588, unseen = -5.204006687076795,
|
||||||
likelihoods = HashMap.fromList [("", 0.0)], n = 178}}),
|
likelihoods = HashMap.fromList [("", 0.0)], n = 180}}),
|
||||||
("last n <cycle>",
|
("last n <cycle>",
|
||||||
Classifier{okData =
|
Classifier{okData =
|
||||||
ClassData{prior = 0.0, unseen = -4.605170185988091,
|
ClassData{prior = 0.0, unseen = -4.605170185988091,
|
||||||
|
@ -1357,11 +1357,11 @@ classifiers
|
|||||||
likelihoods = HashMap.fromList [], n = 0}}),
|
likelihoods = HashMap.fromList [], n = 0}}),
|
||||||
("integer (0..10)",
|
("integer (0..10)",
|
||||||
Classifier{okData =
|
Classifier{okData =
|
||||||
ClassData{prior = -0.5907987153721106, unseen = -5.407171771460119,
|
ClassData{prior = -0.5957987257888164, unseen = -5.407171771460119,
|
||||||
likelihoods = HashMap.fromList [("", 0.0)], n = 221},
|
likelihoods = HashMap.fromList [("", 0.0)], n = 221},
|
||||||
koData =
|
koData =
|
||||||
ClassData{prior = -0.8071778665977783, unseen = -5.19295685089021,
|
ClassData{prior = -0.8010045764163588, unseen = -5.204006687076795,
|
||||||
likelihoods = HashMap.fromList [("", 0.0)], n = 178}}),
|
likelihoods = HashMap.fromList [("", 0.0)], n = 180}}),
|
||||||
("last n <cycle>",
|
("last n <cycle>",
|
||||||
Classifier{okData =
|
Classifier{okData =
|
||||||
ClassData{prior = 0.0, unseen = -4.605170185988091,
|
ClassData{prior = 0.0, unseen = -4.605170185988091,
|
||||||
|
@ -1347,11 +1347,11 @@ classifiers
|
|||||||
likelihoods = HashMap.fromList [], n = 0}}),
|
likelihoods = HashMap.fromList [], n = 0}}),
|
||||||
("integer (0..10)",
|
("integer (0..10)",
|
||||||
Classifier{okData =
|
Classifier{okData =
|
||||||
ClassData{prior = -0.5907987153721106, unseen = -5.407171771460119,
|
ClassData{prior = -0.5957987257888164, unseen = -5.407171771460119,
|
||||||
likelihoods = HashMap.fromList [("", 0.0)], n = 221},
|
likelihoods = HashMap.fromList [("", 0.0)], n = 221},
|
||||||
koData =
|
koData =
|
||||||
ClassData{prior = -0.8071778665977783, unseen = -5.19295685089021,
|
ClassData{prior = -0.8010045764163588, unseen = -5.204006687076795,
|
||||||
likelihoods = HashMap.fromList [("", 0.0)], n = 178}}),
|
likelihoods = HashMap.fromList [("", 0.0)], n = 180}}),
|
||||||
("last n <cycle>",
|
("last n <cycle>",
|
||||||
Classifier{okData =
|
Classifier{okData =
|
||||||
ClassData{prior = 0.0, unseen = -4.605170185988091,
|
ClassData{prior = 0.0, unseen = -4.605170185988091,
|
||||||
|
@ -13,6 +13,7 @@ module Duckling.Types.Document
|
|||||||
( Document -- abstract
|
( Document -- abstract
|
||||||
, fromText
|
, fromText
|
||||||
, (!)
|
, (!)
|
||||||
|
, (!?)
|
||||||
, length
|
, length
|
||||||
, byteStringFromPos
|
, byteStringFromPos
|
||||||
, isAdjacent
|
, isAdjacent
|
||||||
@ -114,12 +115,44 @@ fromText rawInput = Document{..}
|
|||||||
where
|
where
|
||||||
w = UText.ord c
|
w = UText.ord c
|
||||||
|
|
||||||
-- As regexes are matched without whitespace delimitator, we need to check
|
data CharClass
|
||||||
|
= Alpha
|
||||||
|
| Digit
|
||||||
|
| Self {-# unpack #-} !Char
|
||||||
|
deriving (Eq, Ord, Show)
|
||||||
|
|
||||||
|
-- As regexes are matched without whitespace delimiter, we need to check
|
||||||
-- the reasonability of the match to actually be a word.
|
-- the reasonability of the match to actually be a word.
|
||||||
isRangeValid :: Lang -> Document -> Int -> Int -> Bool
|
isRangeValid :: Lang -> Document -> Int -> Int -> Bool
|
||||||
isRangeValid = \case
|
isRangeValid = \case
|
||||||
|
ZH -> zhIsRangeValid
|
||||||
_ -> defaultIsRangeValid
|
_ -> defaultIsRangeValid
|
||||||
where
|
where
|
||||||
|
zhIsRangeValid :: Document -> Int -> Int -> Bool
|
||||||
|
zhIsRangeValid doc start end =
|
||||||
|
(start == 0 ||
|
||||||
|
isDifferent (doc !? (start - 1)) (doc !? start)) &&
|
||||||
|
(end == length doc ||
|
||||||
|
isDifferent (doc !? (end - 1)) (doc !? end))
|
||||||
|
|
||||||
|
-- start == 0 = isDifferent (doc !? (end - 1)) (doc !? end)
|
||||||
|
-- end == length doc = isDifferent (doc !? (start - 1)) (doc !? start)
|
||||||
|
-- otherwise = isDifferent (doc !? (start - 1)) (doc !? start)
|
||||||
|
-- && isDifferent (doc !? (end - 1)) (doc !? end)
|
||||||
|
where
|
||||||
|
charClass :: Char -> Maybe CharClass
|
||||||
|
charClass c
|
||||||
|
| Char.isLower c || Char.isUpper c = Just Alpha
|
||||||
|
| Char.isDigit c = Just Digit
|
||||||
|
| otherwise = Nothing
|
||||||
|
isDifferent :: Maybe Char -> Maybe Char -> Bool
|
||||||
|
isDifferent Nothing Nothing = False
|
||||||
|
isDifferent Nothing _ = True
|
||||||
|
isDifferent _ Nothing = True
|
||||||
|
isDifferent (Just c1) (Just c2) = case (charClass c1, charClass c2) of
|
||||||
|
(Nothing, Nothing) -> True
|
||||||
|
(cc1, cc2) -> cc1 /= cc2
|
||||||
|
|
||||||
defaultIsRangeValid :: Document -> Int -> Int -> Bool
|
defaultIsRangeValid :: Document -> Int -> Int -> Bool
|
||||||
defaultIsRangeValid doc start end =
|
defaultIsRangeValid doc start end =
|
||||||
(start == 0 ||
|
(start == 0 ||
|
||||||
@ -127,11 +160,11 @@ isRangeValid = \case
|
|||||||
(end == length doc ||
|
(end == length doc ||
|
||||||
isDifferent (doc ! (end - 1)) (doc ! end))
|
isDifferent (doc ! (end - 1)) (doc ! end))
|
||||||
where
|
where
|
||||||
charClass :: Char -> Char
|
charClass :: Char -> CharClass
|
||||||
charClass c
|
charClass c
|
||||||
| Char.isLower c || Char.isUpper c = 'c'
|
| Char.isLower c || Char.isUpper c = Alpha
|
||||||
| Char.isDigit c = 'd'
|
| Char.isDigit c = Digit
|
||||||
| otherwise = c
|
| otherwise = Self c
|
||||||
isDifferent :: Char -> Char -> Bool
|
isDifferent :: Char -> Char -> Bool
|
||||||
isDifferent a b = charClass a /= charClass b
|
isDifferent a b = charClass a /= charClass b
|
||||||
|
|
||||||
@ -146,6 +179,13 @@ isAdjacentSeparator c = elem c [' ', '\t']
|
|||||||
(!) :: Document -> Int -> Char
|
(!) :: Document -> Int -> Char
|
||||||
(!) Document { indexable = s } ix = s Array.! ix
|
(!) Document { indexable = s } ix = s Array.! ix
|
||||||
|
|
||||||
|
(!?) :: Document -> Int -> Maybe Char
|
||||||
|
(!?) Document { indexable = s } ix = do
|
||||||
|
let (lo, hi) = Array.bounds s
|
||||||
|
case ix >= lo && ix <= hi of
|
||||||
|
True -> Just $ s Array.! ix
|
||||||
|
False -> Nothing
|
||||||
|
|
||||||
length :: Document -> Int
|
length :: Document -> Int
|
||||||
length Document { indexable = s } = Array.rangeSize $ Array.bounds s
|
length Document { indexable = s } = Array.rangeSize $ Array.bounds s
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user