duckling/Duckling/Url/Rules.hs
Julien Odent cc016bb178 Refactoring + return domain
Summary:
* Simplified `Url` to only keep track of what we need (we can change back later)
* Normalize domain: remove subdomains like `www`, `www2` and lower case
* Return the full domain in the JSON value field
* Updated offensive url example

Reviewed By: JonCoens

Differential Revision: D4705403

fbshipit-source-id: e5d11ee
2017-03-14 13:49:20 -07:00

65 lines
1.7 KiB
Haskell

-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.
{-# LANGUAGE GADTs #-}
{-# LANGUAGE OverloadedStrings #-}
module Duckling.Url.Rules
( rules ) where
import Prelude
import Data.String
import Duckling.Dimensions.Types
import Duckling.Regex.Types
import Duckling.Types
import Duckling.Url.Helpers
ruleURL :: Rule
ruleURL = Rule
{ name = "url"
, pattern =
[ regex "((([a-zA-Z]+)://)?(w{2,3}[0-9]*\\.)?(([\\w_-]+\\.)+[a-z]{2,4})(:(\\d+))?(/[^?\\s#]*)?(\\?[^\\s#]+)?)"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (m:_:_protocol:_:domain:_:_:_port:_path:_query:_)):
_) -> Just . Token Url $ url m domain
_ -> Nothing
}
ruleLocalhost :: Rule
ruleLocalhost = Rule
{ name = "localhost"
, pattern =
[ regex "((([a-zA-Z]+)://)?localhost(:(\\d+))?(/[^?\\s#]*)?(\\?[^\\s#]+)?)"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (m:_:_protocol:_:_port:_path:_query:_)):_) ->
Just . Token Url $ url m "localhost"
_ -> Nothing
}
ruleLocalURL :: Rule
ruleLocalURL = Rule
{ name = "local url"
, pattern =
[ regex "(([a-zA-Z]+)://([\\w_-]+)(:(\\d+))?(/[^?\\s#]*)?(\\?[^\\s#]+)?)"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (m:_protocol:domain:_:_port:_path:_query:_)):
_) -> Just . Token Url $ url m domain
_ -> Nothing
}
rules :: [Rule]
rules =
[ ruleURL
, ruleLocalhost
, ruleLocalURL
]