Fix invalid URLs, NFC norm., support IDNs

Fixes #138
This commit is contained in:
makeworld 2020-12-19 19:41:25 -05:00
parent bd7bb5f9f0
commit 8aec339283
5 changed files with 43 additions and 9 deletions

View File

@ -9,9 +9,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- **Subscriptions** to feeds and page changes (#61)
- Opening local files with `file://` URIs (#103, #117)
- `show_link` option added in config to optionally see the URL (#133)
- Support for Unicode in domain names (IDNs)
- Unnecessarily encoded characters in URLs will be decoded (#138)
- URLs are NFC-normalized before any processing (#138)
### Changed
- Updated [go-gemini](https://github.com/makeworld-the-better-one/go-gemini) to v0.10.0
- Updated [go-gemini](https://github.com/makeworld-the-better-one/go-gemini) to v0.11.0
- Supports CN-only wildcard certs
- Time out when header takes too long
- Preformatted text is now light yellow by default
@ -20,6 +23,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Fixed
- Single quotes are used in the default config for commands and paths so that Windows paths with backslashes will be parsed correctly
- Downloading now uses proxies when appropriate
- User-entered URLs with invalid characters will be percent-encoded (#138)
## [1.6.0] - 2020-11-04

View File

@ -5,8 +5,10 @@ import (
"net/url"
"strings"
"github.com/makeworld-the-better-one/go-gemini"
"github.com/spf13/viper"
"gitlab.com/tslocum/cview"
"golang.org/x/text/unicode/norm"
)
// This file contains funcs that are small, self-contained utilities.
@ -73,15 +75,23 @@ func resolveRelLink(t *tab, prev, next string) (string, error) {
// Example: gemini://gus.guru:1965/ and //gus.guru/.
// This function will take both output the same URL each time.
//
// It will also percent-encode invalid characters, and decode chars
// that don't need to be encoded. It will also apply Unicode NFC
// normalization.
//
// The string passed must already be confirmed to be a URL.
// Detection of a search string vs. a URL must happen elsewhere.
//
// It only works with absolute URLs.
func normalizeURL(u string) string {
parsed, err := url.Parse(u)
u = norm.NFC.String(u)
tmp, err := gemini.GetPunycodeURL(u)
if err != nil {
return u
}
u = tmp
parsed, _ := url.Parse(u)
if parsed.Scheme == "" {
// Always add scheme
@ -102,6 +112,17 @@ func normalizeURL(u string) string {
// gemini://example.com -> gemini://example.com/
if parsed.Path == "" {
parsed.Path = "/"
} else {
// Decode and re-encode path
// This removes needless encoding, like that of ASCII chars
// And encodes anything that wasn't but should've been
parsed.RawPath = strings.ReplaceAll(url.PathEscape(parsed.Path), "%2F", "/")
}
// Do the same to the query string
un, err := gemini.QueryUnescape(parsed.RawQuery)
if err == nil {
parsed.RawQuery = gemini.QueryEscape(un)
}
return parsed.String()

View File

@ -21,6 +21,11 @@ var normalizeURLTests = []struct {
{"mailto:example@example.com", "mailto:example@example.com"},
{"magnet:?xt=urn:btih:test", "magnet:?xt=urn:btih:test"},
{"https://example.com", "https://example.com"},
// Fixing URL tests
{"gemini://gemini.circumlunar.space/%64%6f%63%73/%66%61%71%2e%67%6d%69", "gemini://gemini.circumlunar.space/docs/faq.gmi"},
{"gemini://example.com/蛸", "gemini://example.com/%E8%9B%B8"},
{"gemini://gemini.circumlunar.space/%64%6f%63%73/;;.'%66%61%71蛸%2e%67%6d%69", "gemini://gemini.circumlunar.space/docs/%3B%3B.%27faq%E8%9B%B8.gmi"},
{"gemini://example.com/?%2Ch%64ello蛸", "gemini://example.com/?%2Chdello%E8%9B%B8"},
}
func TestNormalizeURL(t *testing.T) {

5
go.mod
View File

@ -7,7 +7,7 @@ require (
github.com/fsnotify/fsnotify v1.4.9 // indirect
github.com/gdamore/tcell v1.3.1-0.20200608133353-cb1e5d6fa606
github.com/google/go-cmp v0.5.0 // indirect
github.com/makeworld-the-better-one/go-gemini v0.10.0
github.com/makeworld-the-better-one/go-gemini v0.11.0
github.com/makeworld-the-better-one/go-isemoji v1.1.0
github.com/makeworld-the-better-one/progressbar/v3 v3.3.5-0.20200710151429-125743e22b4f
github.com/mitchellh/go-homedir v1.1.0
@ -22,8 +22,7 @@ require (
github.com/spf13/viper v1.7.0
github.com/stretchr/testify v1.6.1
gitlab.com/tslocum/cview v1.4.8-0.20200713214710-cc7796c4ca44
golang.org/x/sys v0.0.0-20200817155316-9781c653f443 // indirect
golang.org/x/text v0.3.3
golang.org/x/text v0.3.5-0.20201208001344-75a595aef632
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 // indirect
gopkg.in/ini.v1 v1.57.0 // indirect
)

13
go.sum
View File

@ -133,8 +133,8 @@ github.com/lucasb-eyer/go-colorful v1.0.3 h1:QIbQXiugsb+q10B+MI+7DI1oQLdmnep86tW
github.com/lucasb-eyer/go-colorful v1.0.3/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0=
github.com/magiconair/properties v1.8.1 h1:ZC2Vc7/ZFkGmsVC9KvOjumD+G5lXy2RtTKyzRKO2BQ4=
github.com/magiconair/properties v1.8.1/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ=
github.com/makeworld-the-better-one/go-gemini v0.10.0 h1:MZYuGD5RcjXD5k+gZLKOs1djKaPDaQrdY0OhdIh637c=
github.com/makeworld-the-better-one/go-gemini v0.10.0/go.mod h1:P7/FbZ+IEIbA/d+A0Y3w2GNgD8SA2AcNv7aDGJbaWG4=
github.com/makeworld-the-better-one/go-gemini v0.11.0 h1:MNGiULJFvcqls9oCy40tE897hDeKvNmEK9i5kRucgQk=
github.com/makeworld-the-better-one/go-gemini v0.11.0/go.mod h1:F+3x+R1xeYK90jMtBq+U+8Sh64r2dHleDZ/en3YgSmg=
github.com/makeworld-the-better-one/go-isemoji v1.1.0 h1:wZBHOKB5zAIgaU2vaWnXFDDhatebB8TySrNVxjVV84g=
github.com/makeworld-the-better-one/go-isemoji v1.1.0/go.mod h1:FBjkPl9rr0G4vlZCc+Mr+QcnOfGCTbGWYW8/1sp06I0=
github.com/makeworld-the-better-one/gofeed v1.1.1-0.20201123002655-c0c6354134fe h1:i3b9Qy5z23DcXRnrsMYcM5s9Ng5VIidM1xZd+szuTsY=
@ -286,6 +286,8 @@ golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLL
golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200301022130-244492dfa37a h1:GuSPYbZzB5/dcLNCwLQLsg3obCJtX9IJhpXkvY7kzk0=
golang.org/x/net v0.0.0-20200301022130-244492dfa37a/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20201216054612-986b41b23924 h1:QsnDpLLOKwHBBDa8nDws4DYNc/ryVW2vCpxCs09d4PY=
golang.org/x/net v0.0.0-20201216054612-986b41b23924/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
@ -311,13 +313,16 @@ golang.org/x/sys v0.0.0-20190626150813-e07cf5db2756/go.mod h1:h1NjWce9XRLGQEsW7w
golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200202164722-d101bd2416d5/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200610111108-226ff32320da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200817155316-9781c653f443 h1:X18bCaipMcoJGm27Nv7zr4XYPKGUy92GtqboKC2Hxaw=
golang.org/x/sys v0.0.0-20200817155316-9781c653f443/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68 h1:nxC68pudNYkKU6jWhgrqdreuFiOQWj1Fs7T3VrH4Pjw=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
golang.org/x/text v0.3.3 h1:cokOdA+Jmi5PJGXLlLllQSgYigAEfHXJAERHVMaCc2k=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.5-0.20201208001344-75a595aef632 h1:clKlpQ6BheG1zIRhU2SPRAXpLgol/tqWVEeRkjpsaDI=
golang.org/x/text v0.3.5-0.20201208001344-75a595aef632/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/tools v0.0.0-20180221164845-07fd8470d635/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=