mirror of
https://github.com/facebook/duckling.git
synced 2024-10-27 14:26:54 +03:00
cc016bb178
Summary: * Simplified `Url` to only keep track of what we need (we can change back later) * Normalize domain: remove subdomains like `www`, `www2` and lower case * Return the full domain in the JSON value field * Updated offensive url example Reviewed By: JonCoens Differential Revision: D4705403 fbshipit-source-id: e5d11ee
74 lines
2.1 KiB
Haskell
74 lines
2.1 KiB
Haskell
-- Copyright (c) 2016-present, Facebook, Inc.
|
|
-- All rights reserved.
|
|
--
|
|
-- This source code is licensed under the BSD-style license found in the
|
|
-- LICENSE file in the root directory of this source tree. An additional grant
|
|
-- of patent rights can be found in the PATENTS file in the same directory.
|
|
|
|
|
|
{-# LANGUAGE OverloadedStrings #-}
|
|
|
|
module Duckling.Url.Corpus
|
|
( corpus
|
|
, negativeCorpus
|
|
) where
|
|
|
|
import Data.String
|
|
import Prelude
|
|
|
|
import Duckling.Testing.Types
|
|
import Duckling.Url.Types
|
|
|
|
corpus :: Corpus
|
|
corpus = (testContext, allExamples)
|
|
|
|
negativeCorpus :: NegativeCorpus
|
|
negativeCorpus = (testContext, examples)
|
|
where
|
|
examples =
|
|
[ "foo"
|
|
, "MYHOST"
|
|
, "hey:42"
|
|
, "25"
|
|
]
|
|
|
|
allExamples :: [Example]
|
|
allExamples = concat
|
|
[ examples (UrlData "http://www.bla.com" "bla.com")
|
|
[ "http://www.bla.com"
|
|
]
|
|
, examples (UrlData "www.bla.com:8080/path" "bla.com")
|
|
[ "www.bla.com:8080/path"
|
|
]
|
|
, examples (UrlData "https://myserver?foo=bar" "myserver")
|
|
[ "https://myserver?foo=bar"
|
|
]
|
|
, examples (UrlData "cnn.com/info" "cnn.com")
|
|
[ "cnn.com/info"
|
|
]
|
|
, examples (UrlData "bla.com/path/path?ext=%23&foo=bla" "bla.com")
|
|
[ "bla.com/path/path?ext=%23&foo=bla"
|
|
]
|
|
, examples (UrlData "localhost" "localhost")
|
|
[ "localhost"
|
|
]
|
|
, examples (UrlData "localhost:8000" "localhost")
|
|
[ "localhost:8000"
|
|
]
|
|
, examples (UrlData "http://kimchi" "kimchi")
|
|
[ "http://kimchi"
|
|
]
|
|
, examples (UrlData "https://500px.com:443/about" "500px.com")
|
|
[ "https://500px.com:443/about"
|
|
]
|
|
, examples (UrlData "www2.foo-bar.net?foo=bar" "foo-bar.net")
|
|
[ "www2.foo-bar.net?foo=bar"
|
|
]
|
|
, examples (UrlData "https://api.wit.ai/message?q=hi" "api.wit.ai")
|
|
[ "https://api.wit.ai/message?q=hi"
|
|
]
|
|
, examples (UrlData "aMaZon.co.uk/?page=home" "amazon.co.uk")
|
|
[ "aMaZon.co.uk/?page=home"
|
|
]
|
|
]
|