amfora/renderer/page.go

153 lines
4.2 KiB
Go
Raw Normal View History

2020-06-20 07:09:01 +03:00
package renderer
import (
"bytes"
2020-06-20 07:09:01 +03:00
"errors"
"io"
2020-06-20 07:09:01 +03:00
"mime"
"os"
2020-06-20 07:09:01 +03:00
"strings"
2020-12-20 23:54:47 +03:00
"time"
2020-06-20 07:09:01 +03:00
"github.com/makeworld-the-better-one/amfora/structs"
"github.com/makeworld-the-better-one/go-gemini"
"github.com/spf13/viper"
2020-06-20 07:09:01 +03:00
"golang.org/x/text/encoding/ianaindex"
)
var ErrTooLarge = errors.New("page content would be too large")
var ErrTimedOut = errors.New("page download timed out")
var ErrCantDisplay = errors.New("invalid content for a page")
var ErrBadEncoding = errors.New("unsupported encoding")
2020-08-28 00:57:19 +03:00
var ErrBadMediatype = errors.New("displayable mediatype is not handled in the code, implementation error")
2020-06-20 07:09:01 +03:00
// isUTF8 returns true for charsets that are compatible with UTF-8 and don't need to be decoded.
func isUTF8(charset string) bool {
utfCharsets := []string{"", "utf-8", "us-ascii"}
2021-02-05 02:06:56 +03:00
for _, s := range utfCharsets {
if charset == s || strings.ToLower(charset) == s {
2020-06-20 07:09:01 +03:00
return true
}
}
return false
}
2021-02-05 02:06:56 +03:00
// getMetaInfo returns the output of mime.ParseMediaType, but handles the empty
// META which is equal to "text/gemini; charset=utf-8" according to the spec.
func decodeMeta(meta string) (string, map[string]string, error) {
if meta == "" {
params := make(map[string]string)
params["charset"] = "utf-8"
return "text/gemini", params, nil
}
return mime.ParseMediaType(meta)
}
2020-06-20 07:09:01 +03:00
// CanDisplay returns true if the response is supported by Amfora
// for displaying on the screen.
// It also doubles as a function to detect whether something can be stored in a Page struct.
func CanDisplay(res *gemini.Response) bool {
if gemini.SimplifyStatus(res.Status) != 20 {
// No content
return false
}
2021-02-05 02:06:56 +03:00
mediatype, params, err := decodeMeta(res.Meta)
2020-06-20 07:09:01 +03:00
if err != nil {
return false
}
if !strings.HasPrefix(mediatype, "text/") {
2020-06-20 07:09:01 +03:00
// Amfora doesn't support other filetypes
return false
}
if isUTF8(params["charset"]) {
return true
}
enc, err := ianaindex.MIME.Encoding(params["charset"]) // Lowercasing is done inside
// Encoding sometimes returns nil, see #3 on this repo and golang/go#19421
return err == nil && enc != nil
}
// MakePage creates a formatted, rendered Page from the given network response and params.
// You must set the Page.Width value yourself.
func MakePage(url string, res *gemini.Response, width int, proxied bool) (*structs.Page, error) {
2020-06-20 07:09:01 +03:00
if !CanDisplay(res) {
return nil, ErrCantDisplay
2020-06-20 07:09:01 +03:00
}
buf := new(bytes.Buffer)
2020-07-26 19:12:54 +03:00
_, err := io.CopyN(buf, res.Body, viper.GetInt64("a-general.page_max_size")+1)
if err == nil {
2020-07-26 19:12:54 +03:00
// Content was larger than max size
return nil, ErrTooLarge
} else if err != io.EOF {
2020-11-24 05:26:44 +03:00
if os.IsTimeout(err) {
// I would use
// errors.Is(err, os.ErrDeadlineExceeded)
// but that isn't supported before Go 1.15.
return nil, ErrTimedOut
}
// Some other error
2020-06-20 07:09:01 +03:00
return nil, err
}
// Otherwise, the error is EOF, which is what we want.
2020-06-20 07:09:01 +03:00
2021-02-05 02:06:56 +03:00
mediatype, params, _ := decodeMeta(res.Meta)
2020-06-20 07:09:01 +03:00
// Convert content first
var utfText string
if isUTF8(params["charset"]) {
2020-08-05 20:31:59 +03:00
utfText = buf.String()
2020-06-20 07:09:01 +03:00
} else {
encoding, err := ianaindex.MIME.Encoding(params["charset"])
if encoding == nil || err != nil {
// Some encoding doesn't exist and wasn't caught in CanDisplay()
return nil, ErrBadEncoding
2020-06-20 07:09:01 +03:00
}
2020-08-05 20:31:59 +03:00
utfText, err = encoding.NewDecoder().String(buf.String())
2020-06-20 07:09:01 +03:00
if err != nil {
return nil, err
}
}
if mediatype == "text/gemini" {
rendered, links := RenderGemini(utfText, width, proxied)
2020-06-20 07:09:01 +03:00
return &structs.Page{
Mediatype: structs.TextGemini,
RawMediatype: mediatype,
URL: url,
Raw: utfText,
Content: rendered,
Links: links,
2020-12-20 23:54:47 +03:00
MadeAt: time.Now(),
2020-06-20 07:09:01 +03:00
}, nil
} else if strings.HasPrefix(mediatype, "text/") {
2020-07-28 23:58:32 +03:00
if mediatype == "text/x-ansi" || strings.HasSuffix(url, ".ans") || strings.HasSuffix(url, ".ansi") {
2020-07-11 00:45:14 +03:00
// ANSI
return &structs.Page{
Mediatype: structs.TextAnsi,
RawMediatype: mediatype,
URL: url,
Raw: utfText,
Content: RenderANSI(utfText),
Links: []string{},
2020-12-20 23:54:47 +03:00
MadeAt: time.Now(),
2020-07-11 00:45:14 +03:00
}, nil
}
// Treated as plaintext
return &structs.Page{
Mediatype: structs.TextPlain,
RawMediatype: mediatype,
URL: url,
Raw: utfText,
Content: RenderPlainText(utfText),
Links: []string{},
2020-12-20 23:54:47 +03:00
MadeAt: time.Now(),
}, nil
2020-06-20 07:09:01 +03:00
}
2020-08-28 00:57:19 +03:00
return nil, ErrBadMediatype
2020-06-20 07:09:01 +03:00
}