Url: handle fragments

Summary: https://github.com/facebook/duckling/issues/155

Reviewed By: JonCoens

Differential Revision: D6967145

fbshipit-source-id: 44e573b57c714a62d93d74063f597d31feb8c6f5
This commit is contained in:
Julien Odent 2018-02-12 12:47:14 -08:00 committed by Facebook Github Bot
parent 1f7290880c
commit c7fb533a67
2 changed files with 10 additions and 1 deletions

View File

@ -70,4 +70,13 @@ allExamples = concat
, examples (UrlData "aMaZon.co.uk/?page=home" "amazon.co.uk")
[ "aMaZon.co.uk/?page=home"
]
, examples (UrlData "https://en.wikipedia.org/wiki/Uniform_Resource_Identifier#Syntax" "en.wikipedia.org")
[ "https://en.wikipedia.org/wiki/Uniform_Resource_Identifier#Syntax"
]
, examples (UrlData "http://example.com/data.csv#cell=4,1-6,2" "example.com")
[ "http://example.com/data.csv#cell=4,1-6,2"
]
, examples (UrlData "http://example.com/bar.webm#t=40,80&xywh=160,120,320,240" "example.com")
[ "http://example.com/bar.webm#t=40,80&xywh=160,120,320,240"
]
]

View File

@ -24,7 +24,7 @@ ruleURL :: Rule
ruleURL = Rule
{ name = "url"
, pattern =
[ regex "((([a-zA-Z]+)://)?(w{2,3}[0-9]*\\.)?(([\\w_-]+\\.)+[a-z]{2,4})(:(\\d+))?(/[^?\\s#]*)?(\\?[^\\s#]+)?)"
[ regex "((([a-zA-Z]+)://)?(w{2,3}[0-9]*\\.)?(([\\w_-]+\\.)+[a-z]{2,4})(:(\\d+))?(/[^?\\s#]*)?(\\?[^\\s#]+)?(#[\\-,*=&a-z0-9]+)?)"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (m:_:_protocol:_:domain:_:_:_port:_path:_query:_)):