mirror of
https://github.com/facebook/duckling.git
synced 2024-12-25 13:11:38 +03:00
cc016bb178
Summary: * Simplified `Url` to only keep track of what we need (we can change back later) * Normalize domain: remove subdomains like `www`, `www2` and lower case * Return the full domain in the JSON value field * Updated offensive url example Reviewed By: JonCoens Differential Revision: D4705403 fbshipit-source-id: e5d11ee
65 lines
1.7 KiB
Haskell
65 lines
1.7 KiB
Haskell
-- Copyright (c) 2016-present, Facebook, Inc.
|
|
-- All rights reserved.
|
|
--
|
|
-- This source code is licensed under the BSD-style license found in the
|
|
-- LICENSE file in the root directory of this source tree. An additional grant
|
|
-- of patent rights can be found in the PATENTS file in the same directory.
|
|
|
|
|
|
{-# LANGUAGE GADTs #-}
|
|
{-# LANGUAGE OverloadedStrings #-}
|
|
|
|
module Duckling.Url.Rules
|
|
( rules ) where
|
|
|
|
import Prelude
|
|
import Data.String
|
|
|
|
import Duckling.Dimensions.Types
|
|
import Duckling.Regex.Types
|
|
import Duckling.Types
|
|
import Duckling.Url.Helpers
|
|
|
|
ruleURL :: Rule
|
|
ruleURL = Rule
|
|
{ name = "url"
|
|
, pattern =
|
|
[ regex "((([a-zA-Z]+)://)?(w{2,3}[0-9]*\\.)?(([\\w_-]+\\.)+[a-z]{2,4})(:(\\d+))?(/[^?\\s#]*)?(\\?[^\\s#]+)?)"
|
|
]
|
|
, prod = \tokens -> case tokens of
|
|
(Token RegexMatch (GroupMatch (m:_:_protocol:_:domain:_:_:_port:_path:_query:_)):
|
|
_) -> Just . Token Url $ url m domain
|
|
_ -> Nothing
|
|
}
|
|
|
|
ruleLocalhost :: Rule
|
|
ruleLocalhost = Rule
|
|
{ name = "localhost"
|
|
, pattern =
|
|
[ regex "((([a-zA-Z]+)://)?localhost(:(\\d+))?(/[^?\\s#]*)?(\\?[^\\s#]+)?)"
|
|
]
|
|
, prod = \tokens -> case tokens of
|
|
(Token RegexMatch (GroupMatch (m:_:_protocol:_:_port:_path:_query:_)):_) ->
|
|
Just . Token Url $ url m "localhost"
|
|
_ -> Nothing
|
|
}
|
|
|
|
ruleLocalURL :: Rule
|
|
ruleLocalURL = Rule
|
|
{ name = "local url"
|
|
, pattern =
|
|
[ regex "(([a-zA-Z]+)://([\\w_-]+)(:(\\d+))?(/[^?\\s#]*)?(\\?[^\\s#]+)?)"
|
|
]
|
|
, prod = \tokens -> case tokens of
|
|
(Token RegexMatch (GroupMatch (m:_protocol:domain:_:_port:_path:_query:_)):
|
|
_) -> Just . Token Url $ url m domain
|
|
_ -> Nothing
|
|
}
|
|
|
|
rules :: [Rule]
|
|
rules =
|
|
[ ruleURL
|
|
, ruleLocalhost
|
|
, ruleLocalURL
|
|
]
|