mirror of
https://github.com/facebook/duckling.git
synced 2024-11-24 07:23:03 +03:00
Add custom isRangeValid implementation for ZH
Summary: Fixes #313 Reviewed By: stroxler Differential Revision: D28364035 fbshipit-source-id: 7fe3dba75410d217747a0d7a6f7df611ac26ec70
This commit is contained in:
parent
4878820294
commit
8cb77a43c7
@ -4,6 +4,7 @@
|
||||
|
||||
### Core
|
||||
* Make `isRangeValid` behave differently based on lang
|
||||
* Add custom `isRangeValid` implementation for ZH
|
||||
|
||||
### Rulesets
|
||||
* CA (Catalan)
|
||||
|
@ -57,7 +57,6 @@ allExamples = concat
|
||||
, examples (simple Inch 4)
|
||||
[ "4 inch"
|
||||
, "4 inches"
|
||||
, "4''"
|
||||
, "4英寸"
|
||||
, "4英吋"
|
||||
, "四吋"
|
||||
|
@ -1357,11 +1357,11 @@ classifiers
|
||||
likelihoods = HashMap.fromList [], n = 0}}),
|
||||
("integer (0..10)",
|
||||
Classifier{okData =
|
||||
ClassData{prior = -0.5907987153721106, unseen = -5.407171771460119,
|
||||
ClassData{prior = -0.5957987257888164, unseen = -5.407171771460119,
|
||||
likelihoods = HashMap.fromList [("", 0.0)], n = 221},
|
||||
koData =
|
||||
ClassData{prior = -0.8071778665977783, unseen = -5.19295685089021,
|
||||
likelihoods = HashMap.fromList [("", 0.0)], n = 178}}),
|
||||
ClassData{prior = -0.8010045764163588, unseen = -5.204006687076795,
|
||||
likelihoods = HashMap.fromList [("", 0.0)], n = 180}}),
|
||||
("last n <cycle>",
|
||||
Classifier{okData =
|
||||
ClassData{prior = 0.0, unseen = -4.605170185988091,
|
||||
|
@ -1357,11 +1357,11 @@ classifiers
|
||||
likelihoods = HashMap.fromList [], n = 0}}),
|
||||
("integer (0..10)",
|
||||
Classifier{okData =
|
||||
ClassData{prior = -0.5907987153721106, unseen = -5.407171771460119,
|
||||
ClassData{prior = -0.5957987257888164, unseen = -5.407171771460119,
|
||||
likelihoods = HashMap.fromList [("", 0.0)], n = 221},
|
||||
koData =
|
||||
ClassData{prior = -0.8071778665977783, unseen = -5.19295685089021,
|
||||
likelihoods = HashMap.fromList [("", 0.0)], n = 178}}),
|
||||
ClassData{prior = -0.8010045764163588, unseen = -5.204006687076795,
|
||||
likelihoods = HashMap.fromList [("", 0.0)], n = 180}}),
|
||||
("last n <cycle>",
|
||||
Classifier{okData =
|
||||
ClassData{prior = 0.0, unseen = -4.605170185988091,
|
||||
|
@ -1357,11 +1357,11 @@ classifiers
|
||||
likelihoods = HashMap.fromList [], n = 0}}),
|
||||
("integer (0..10)",
|
||||
Classifier{okData =
|
||||
ClassData{prior = -0.5907987153721106, unseen = -5.407171771460119,
|
||||
ClassData{prior = -0.5957987257888164, unseen = -5.407171771460119,
|
||||
likelihoods = HashMap.fromList [("", 0.0)], n = 221},
|
||||
koData =
|
||||
ClassData{prior = -0.8071778665977783, unseen = -5.19295685089021,
|
||||
likelihoods = HashMap.fromList [("", 0.0)], n = 178}}),
|
||||
ClassData{prior = -0.8010045764163588, unseen = -5.204006687076795,
|
||||
likelihoods = HashMap.fromList [("", 0.0)], n = 180}}),
|
||||
("last n <cycle>",
|
||||
Classifier{okData =
|
||||
ClassData{prior = 0.0, unseen = -4.605170185988091,
|
||||
|
@ -1357,11 +1357,11 @@ classifiers
|
||||
likelihoods = HashMap.fromList [], n = 0}}),
|
||||
("integer (0..10)",
|
||||
Classifier{okData =
|
||||
ClassData{prior = -0.5907987153721106, unseen = -5.407171771460119,
|
||||
ClassData{prior = -0.5957987257888164, unseen = -5.407171771460119,
|
||||
likelihoods = HashMap.fromList [("", 0.0)], n = 221},
|
||||
koData =
|
||||
ClassData{prior = -0.8071778665977783, unseen = -5.19295685089021,
|
||||
likelihoods = HashMap.fromList [("", 0.0)], n = 178}}),
|
||||
ClassData{prior = -0.8010045764163588, unseen = -5.204006687076795,
|
||||
likelihoods = HashMap.fromList [("", 0.0)], n = 180}}),
|
||||
("last n <cycle>",
|
||||
Classifier{okData =
|
||||
ClassData{prior = 0.0, unseen = -4.605170185988091,
|
||||
|
@ -1347,11 +1347,11 @@ classifiers
|
||||
likelihoods = HashMap.fromList [], n = 0}}),
|
||||
("integer (0..10)",
|
||||
Classifier{okData =
|
||||
ClassData{prior = -0.5907987153721106, unseen = -5.407171771460119,
|
||||
ClassData{prior = -0.5957987257888164, unseen = -5.407171771460119,
|
||||
likelihoods = HashMap.fromList [("", 0.0)], n = 221},
|
||||
koData =
|
||||
ClassData{prior = -0.8071778665977783, unseen = -5.19295685089021,
|
||||
likelihoods = HashMap.fromList [("", 0.0)], n = 178}}),
|
||||
ClassData{prior = -0.8010045764163588, unseen = -5.204006687076795,
|
||||
likelihoods = HashMap.fromList [("", 0.0)], n = 180}}),
|
||||
("last n <cycle>",
|
||||
Classifier{okData =
|
||||
ClassData{prior = 0.0, unseen = -4.605170185988091,
|
||||
|
@ -13,6 +13,7 @@ module Duckling.Types.Document
|
||||
( Document -- abstract
|
||||
, fromText
|
||||
, (!)
|
||||
, (!?)
|
||||
, length
|
||||
, byteStringFromPos
|
||||
, isAdjacent
|
||||
@ -114,12 +115,44 @@ fromText rawInput = Document{..}
|
||||
where
|
||||
w = UText.ord c
|
||||
|
||||
-- As regexes are matched without whitespace delimitator, we need to check
|
||||
data CharClass
|
||||
= Alpha
|
||||
| Digit
|
||||
| Self {-# unpack #-} !Char
|
||||
deriving (Eq, Ord, Show)
|
||||
|
||||
-- As regexes are matched without whitespace delimiter, we need to check
|
||||
-- the reasonability of the match to actually be a word.
|
||||
isRangeValid :: Lang -> Document -> Int -> Int -> Bool
|
||||
isRangeValid = \case
|
||||
ZH -> zhIsRangeValid
|
||||
_ -> defaultIsRangeValid
|
||||
where
|
||||
zhIsRangeValid :: Document -> Int -> Int -> Bool
|
||||
zhIsRangeValid doc start end =
|
||||
(start == 0 ||
|
||||
isDifferent (doc !? (start - 1)) (doc !? start)) &&
|
||||
(end == length doc ||
|
||||
isDifferent (doc !? (end - 1)) (doc !? end))
|
||||
|
||||
-- start == 0 = isDifferent (doc !? (end - 1)) (doc !? end)
|
||||
-- end == length doc = isDifferent (doc !? (start - 1)) (doc !? start)
|
||||
-- otherwise = isDifferent (doc !? (start - 1)) (doc !? start)
|
||||
-- && isDifferent (doc !? (end - 1)) (doc !? end)
|
||||
where
|
||||
charClass :: Char -> Maybe CharClass
|
||||
charClass c
|
||||
| Char.isLower c || Char.isUpper c = Just Alpha
|
||||
| Char.isDigit c = Just Digit
|
||||
| otherwise = Nothing
|
||||
isDifferent :: Maybe Char -> Maybe Char -> Bool
|
||||
isDifferent Nothing Nothing = False
|
||||
isDifferent Nothing _ = True
|
||||
isDifferent _ Nothing = True
|
||||
isDifferent (Just c1) (Just c2) = case (charClass c1, charClass c2) of
|
||||
(Nothing, Nothing) -> True
|
||||
(cc1, cc2) -> cc1 /= cc2
|
||||
|
||||
defaultIsRangeValid :: Document -> Int -> Int -> Bool
|
||||
defaultIsRangeValid doc start end =
|
||||
(start == 0 ||
|
||||
@ -127,11 +160,11 @@ isRangeValid = \case
|
||||
(end == length doc ||
|
||||
isDifferent (doc ! (end - 1)) (doc ! end))
|
||||
where
|
||||
charClass :: Char -> Char
|
||||
charClass :: Char -> CharClass
|
||||
charClass c
|
||||
| Char.isLower c || Char.isUpper c = 'c'
|
||||
| Char.isDigit c = 'd'
|
||||
| otherwise = c
|
||||
| Char.isLower c || Char.isUpper c = Alpha
|
||||
| Char.isDigit c = Digit
|
||||
| otherwise = Self c
|
||||
isDifferent :: Char -> Char -> Bool
|
||||
isDifferent a b = charClass a /= charClass b
|
||||
|
||||
@ -146,6 +179,13 @@ isAdjacentSeparator c = elem c [' ', '\t']
|
||||
(!) :: Document -> Int -> Char
|
||||
(!) Document { indexable = s } ix = s Array.! ix
|
||||
|
||||
(!?) :: Document -> Int -> Maybe Char
|
||||
(!?) Document { indexable = s } ix = do
|
||||
let (lo, hi) = Array.bounds s
|
||||
case ix >= lo && ix <= hi of
|
||||
True -> Just $ s Array.! ix
|
||||
False -> Nothing
|
||||
|
||||
length :: Document -> Int
|
||||
length Document { indexable = s } = Array.rangeSize $ Array.bounds s
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user