2024-01-25 07:01:24 +03:00
|
|
|
|
// Package downloader provides a mechanism for getting files from
|
2024-01-15 04:45:34 +03:00
|
|
|
|
// HTTP/S URLs, making use of a mostly RFC-compliant cache.
|
|
|
|
|
//
|
2024-01-27 01:18:38 +03:00
|
|
|
|
// The entrypoint is downloader.New.
|
|
|
|
|
//
|
2024-01-15 04:45:34 +03:00
|
|
|
|
// Acknowledgement: This package is a heavily customized fork
|
|
|
|
|
// of https://github.com/gregjones/httpcache, via bitcomplete/download.
|
2024-01-25 07:01:24 +03:00
|
|
|
|
package downloader
|
2024-01-15 04:45:34 +03:00
|
|
|
|
|
|
|
|
|
import (
|
|
|
|
|
"bufio"
|
|
|
|
|
"context"
|
|
|
|
|
"errors"
|
|
|
|
|
"io"
|
|
|
|
|
"net/http"
|
|
|
|
|
"net/url"
|
|
|
|
|
"os"
|
|
|
|
|
"path/filepath"
|
|
|
|
|
"sync"
|
|
|
|
|
"time"
|
|
|
|
|
|
2024-01-25 07:01:24 +03:00
|
|
|
|
"github.com/neilotoole/streamcache"
|
|
|
|
|
|
2024-01-15 04:45:34 +03:00
|
|
|
|
"github.com/neilotoole/sq/libsq/core/errz"
|
|
|
|
|
"github.com/neilotoole/sq/libsq/core/ioz/checksum"
|
|
|
|
|
"github.com/neilotoole/sq/libsq/core/ioz/httpz"
|
|
|
|
|
"github.com/neilotoole/sq/libsq/core/lg"
|
|
|
|
|
"github.com/neilotoole/sq/libsq/core/lg/lga"
|
|
|
|
|
"github.com/neilotoole/sq/libsq/core/lg/lgm"
|
2024-01-27 01:18:38 +03:00
|
|
|
|
"github.com/neilotoole/sq/libsq/core/options"
|
2024-01-15 04:45:34 +03:00
|
|
|
|
"github.com/neilotoole/sq/libsq/core/progress"
|
|
|
|
|
)
|
|
|
|
|
|
2024-01-27 01:18:38 +03:00
|
|
|
|
var OptContinueOnError = options.NewBool(
|
|
|
|
|
"download.refresh.continue-on-error",
|
|
|
|
|
"",
|
|
|
|
|
false,
|
|
|
|
|
0,
|
|
|
|
|
true,
|
|
|
|
|
"Continue with stale download if refresh fails",
|
2024-01-29 00:55:51 +03:00
|
|
|
|
`Continue with stale download if refresh fails. This option applies if a download
|
|
|
|
|
is in the cache, but is considered stale, and a refresh attempt fails. If set to
|
|
|
|
|
true, the refresh error is logged, and the stale download is returned. This is a
|
|
|
|
|
sort of "Airplane Mode" for downloads: when true, sq continues with the cached
|
|
|
|
|
download when the network is unavailable. If false, an error is returned instead.`,
|
2024-01-27 01:18:38 +03:00
|
|
|
|
options.TagSource,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
var OptCache = options.NewBool(
|
|
|
|
|
"download.cache",
|
|
|
|
|
"",
|
|
|
|
|
false,
|
|
|
|
|
0,
|
|
|
|
|
true,
|
|
|
|
|
"Cache downloads",
|
2024-01-29 00:55:51 +03:00
|
|
|
|
`Cache downloaded remote files. When false, the download cache is not used and
|
2024-01-27 01:18:38 +03:00
|
|
|
|
the file is re-downloaded on each command.`,
|
|
|
|
|
options.TagSource,
|
|
|
|
|
)
|
|
|
|
|
|
2024-01-15 04:45:34 +03:00
|
|
|
|
// State is an enumeration of caching states based on the cache-control
|
|
|
|
|
// values of the request and the response.
|
|
|
|
|
//
|
|
|
|
|
// - Uncached indicates the item is not cached.
|
|
|
|
|
// - Fresh indicates that the cached item can be returned.
|
|
|
|
|
// - Stale indicates that the cached item needs validating before it is returned.
|
|
|
|
|
// - Transparent indicates the cached item should not be used to fulfil the request.
|
|
|
|
|
//
|
|
|
|
|
// Because this is only a private cache, 'public' and 'private' in cache-control aren't
|
|
|
|
|
// significant. Similarly, smax-age isn't used.
|
|
|
|
|
type State int
|
|
|
|
|
|
|
|
|
|
const (
|
|
|
|
|
// Uncached indicates that the item is not cached.
|
|
|
|
|
Uncached State = iota
|
|
|
|
|
|
|
|
|
|
// Stale indicates that the cached item needs validating before it is returned.
|
|
|
|
|
Stale
|
|
|
|
|
|
|
|
|
|
// Fresh indicates the cached item can be returned.
|
|
|
|
|
Fresh
|
|
|
|
|
|
|
|
|
|
// Transparent indicates the cached item should not be used to fulfil the request.
|
|
|
|
|
Transparent
|
|
|
|
|
)
|
|
|
|
|
|
2024-01-27 01:18:38 +03:00
|
|
|
|
// String returns a string representation of State.
|
|
|
|
|
func (s State) String() string {
|
|
|
|
|
switch s {
|
|
|
|
|
case Uncached:
|
|
|
|
|
return "uncached"
|
|
|
|
|
case Stale:
|
|
|
|
|
return "stale"
|
|
|
|
|
case Fresh:
|
|
|
|
|
return "fresh"
|
|
|
|
|
case Transparent:
|
|
|
|
|
return "transparent"
|
|
|
|
|
default:
|
|
|
|
|
return "unknown"
|
2024-01-15 04:45:34 +03:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2024-01-27 01:18:38 +03:00
|
|
|
|
// XFromCache is the header added to responses that are returned from the cache.
|
|
|
|
|
const XFromCache = "X-From-Stream"
|
2024-01-15 04:45:34 +03:00
|
|
|
|
|
2024-01-25 07:01:24 +03:00
|
|
|
|
// Downloader encapsulates downloading a file from a URL, using a local
|
2024-01-28 03:01:45 +03:00
|
|
|
|
// disk cache if possible. Downloader.Get makes uses of the Handler callback
|
|
|
|
|
// mechanism to facilitate early consumption of a download stream while the
|
|
|
|
|
// download is still in flight.
|
2024-01-25 07:01:24 +03:00
|
|
|
|
type Downloader struct {
|
2024-01-27 10:11:24 +03:00
|
|
|
|
// c is the HTTP client used to make requests.
|
|
|
|
|
c *http.Client
|
|
|
|
|
|
|
|
|
|
// cache implements the on-disk cache. If nil, caching is disabled.
|
|
|
|
|
// It will be created in dlDir.
|
|
|
|
|
cache *cache
|
|
|
|
|
|
2024-01-28 03:01:45 +03:00
|
|
|
|
// dlStream is the streamcache.Stream that is passed Handler.Uncached for an
|
|
|
|
|
// active download. This field is reset to nil on each call to Downloader.Get.
|
2024-01-27 10:11:24 +03:00
|
|
|
|
dlStream *streamcache.Stream
|
2024-01-15 04:45:34 +03:00
|
|
|
|
|
|
|
|
|
// name is a user-friendly name, such as a source handle like @data.
|
|
|
|
|
name string
|
|
|
|
|
|
2024-01-28 03:01:45 +03:00
|
|
|
|
// url is the URL of the download. It is parsed in downloader.New,
|
2024-01-15 04:45:34 +03:00
|
|
|
|
// thus is guaranteed to be valid.
|
|
|
|
|
url string
|
|
|
|
|
|
2024-01-27 10:11:24 +03:00
|
|
|
|
// dlDir is the directory in which the download cache is stored.
|
|
|
|
|
dlDir string
|
|
|
|
|
|
2024-01-28 03:01:45 +03:00
|
|
|
|
// mu guards concurrent access to the Downloader.
|
2024-01-27 10:11:24 +03:00
|
|
|
|
mu sync.Mutex
|
2024-01-15 04:45:34 +03:00
|
|
|
|
|
2024-01-27 01:18:38 +03:00
|
|
|
|
// continueOnError, if true, indicates that the downloader
|
|
|
|
|
// should server the cached file if a refresh attempt fails.
|
|
|
|
|
continueOnError bool
|
|
|
|
|
|
2024-01-15 04:45:34 +03:00
|
|
|
|
// markCachedResponses, if true, indicates that responses returned from the
|
|
|
|
|
// cache will be given an extra header, X-From-cache.
|
|
|
|
|
markCachedResponses bool
|
|
|
|
|
}
|
|
|
|
|
|
2024-01-28 03:01:45 +03:00
|
|
|
|
// New returns a new Downloader for url that caches downloads in dlDir.
|
|
|
|
|
// Arg name is a user-friendly label, such as a source handle like @data.
|
2024-01-15 04:45:34 +03:00
|
|
|
|
// The name may show up in logs, or progress indicators etc.
|
2024-01-27 01:18:38 +03:00
|
|
|
|
func New(name string, c *http.Client, dlURL, dlDir string) (*Downloader, error) {
|
2024-01-15 04:45:34 +03:00
|
|
|
|
_, err := url.ParseRequestURI(dlURL)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return nil, errz.Wrap(err, "invalid download URL")
|
|
|
|
|
}
|
|
|
|
|
|
2024-01-27 01:18:38 +03:00
|
|
|
|
if dlDir, err = filepath.Abs(dlDir); err != nil {
|
2024-01-15 04:45:34 +03:00
|
|
|
|
return nil, errz.Err(err)
|
|
|
|
|
}
|
|
|
|
|
|
2024-01-25 07:01:24 +03:00
|
|
|
|
dl := &Downloader{
|
2024-01-15 04:45:34 +03:00
|
|
|
|
name: name,
|
|
|
|
|
c: c,
|
|
|
|
|
url: dlURL,
|
|
|
|
|
markCachedResponses: true,
|
2024-01-27 01:18:38 +03:00
|
|
|
|
continueOnError: true,
|
|
|
|
|
dlDir: dlDir,
|
2024-01-15 04:45:34 +03:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return dl, nil
|
|
|
|
|
}
|
|
|
|
|
|
2024-01-28 03:01:45 +03:00
|
|
|
|
// Get attempts to get the remote file, invoking Handler as appropriate. Exactly
|
|
|
|
|
// one of the Handler methods will be invoked, one time.
|
|
|
|
|
//
|
|
|
|
|
// - If Handler.Uncached is invoked, a download stream has begun. Get will
|
|
|
|
|
// then block until the download is completed. The download resp.Body is
|
|
|
|
|
// written to cache, and on success, the filepath to the newly updated
|
|
|
|
|
// cache file is returned.
|
|
|
|
|
// If an error occurs during cache write, the error is logged, and Get
|
|
|
|
|
// returns the filepath of the previously cached download, if permitted
|
|
|
|
|
// by policy. If not permitted or not existing, empty string is returned.
|
|
|
|
|
// - If Handler.Cached is invoked, Get returns immediately afterwards with
|
|
|
|
|
// the filepath of the cached download (the same value provided to
|
|
|
|
|
// Handler.Cached).
|
|
|
|
|
// - If Handler.Error is invoked, there was an unrecoverable problem (e.g. a
|
|
|
|
|
// transport error, and there's no previous cache) and the download is
|
|
|
|
|
// unavailable. That error should be propagated up the stack. Get will
|
|
|
|
|
// return empty string.
|
2024-01-27 01:18:38 +03:00
|
|
|
|
//
|
|
|
|
|
// Get consults the context for options. In particular, it makes
|
|
|
|
|
// use of OptCache and OptContinueOnError.
|
2024-01-28 03:01:45 +03:00
|
|
|
|
func (dl *Downloader) Get(ctx context.Context, h Handler) (cacheFile string) {
|
2024-01-15 04:45:34 +03:00
|
|
|
|
dl.mu.Lock()
|
|
|
|
|
defer dl.mu.Unlock()
|
|
|
|
|
|
2024-01-27 01:18:38 +03:00
|
|
|
|
o := options.FromContext(ctx)
|
|
|
|
|
dl.continueOnError = OptContinueOnError.Get(o)
|
|
|
|
|
if OptCache.Get(o) {
|
|
|
|
|
dl.cache = &cache{dir: dl.dlDir}
|
|
|
|
|
}
|
|
|
|
|
|
2024-01-15 04:45:34 +03:00
|
|
|
|
req := dl.mustRequest(ctx)
|
|
|
|
|
lg.FromContext(ctx).Debug("Get download", lga.URL, dl.url)
|
2024-01-28 03:01:45 +03:00
|
|
|
|
cacheFile = dl.get(req, h)
|
|
|
|
|
return cacheFile
|
2024-01-15 04:45:34 +03:00
|
|
|
|
}
|
|
|
|
|
|
2024-01-28 03:01:45 +03:00
|
|
|
|
// get contains the main logic for getting the download.
|
|
|
|
|
// It invokes Handler as appropriate, and on success returns the
|
|
|
|
|
// filepath of the valid cached download ›file.
|
|
|
|
|
func (dl *Downloader) get(req *http.Request, h Handler) (cacheFile string) { //nolint:gocognit,funlen,cyclop
|
2024-01-15 04:45:34 +03:00
|
|
|
|
ctx := req.Context()
|
|
|
|
|
log := lg.FromContext(ctx)
|
2024-01-25 07:01:24 +03:00
|
|
|
|
|
2024-01-27 01:18:38 +03:00
|
|
|
|
dl.dlStream = nil
|
|
|
|
|
|
|
|
|
|
var fpBody string
|
|
|
|
|
if dl.cache != nil {
|
|
|
|
|
_, fpBody, _ = dl.cache.paths(req)
|
|
|
|
|
}
|
2024-01-15 04:45:34 +03:00
|
|
|
|
|
|
|
|
|
state := dl.state(req)
|
2024-01-27 01:18:38 +03:00
|
|
|
|
if state == Fresh && fpBody != "" {
|
2024-01-15 04:45:34 +03:00
|
|
|
|
// The cached response is fresh, so we can return it.
|
|
|
|
|
h.Cached(fpBody)
|
2024-01-28 03:01:45 +03:00
|
|
|
|
return fpBody
|
2024-01-15 04:45:34 +03:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
cacheable := dl.isCacheable(req)
|
|
|
|
|
var err error
|
|
|
|
|
var cachedResp *http.Response
|
|
|
|
|
if cacheable {
|
|
|
|
|
cachedResp, err = dl.cache.get(req.Context(), req) //nolint:bodyclose
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var resp *http.Response
|
|
|
|
|
if cacheable && cachedResp != nil && err == nil { //nolint:nestif
|
|
|
|
|
if dl.markCachedResponses {
|
|
|
|
|
cachedResp.Header.Set(XFromCache, "1")
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if varyMatches(cachedResp, req) {
|
|
|
|
|
// Can only use cached value if the new request doesn't Vary significantly
|
|
|
|
|
freshness := getFreshness(cachedResp.Header, req.Header)
|
2024-01-27 01:18:38 +03:00
|
|
|
|
if freshness == Fresh && fpBody != "" {
|
|
|
|
|
lg.WarnIfCloseError(log, lgm.CloseHTTPResponseBody, cachedResp.Body)
|
2024-01-15 04:45:34 +03:00
|
|
|
|
h.Cached(fpBody)
|
2024-01-28 03:01:45 +03:00
|
|
|
|
return fpBody
|
2024-01-15 04:45:34 +03:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if freshness == Stale {
|
|
|
|
|
var req2 *http.Request
|
|
|
|
|
// Add validators if caller hasn't already done so
|
|
|
|
|
etag := cachedResp.Header.Get("etag")
|
|
|
|
|
if etag != "" && req.Header.Get("etag") == "" {
|
|
|
|
|
req2 = cloneRequest(req)
|
|
|
|
|
req2.Header.Set("if-none-match", etag)
|
|
|
|
|
}
|
|
|
|
|
lastModified := cachedResp.Header.Get("last-modified")
|
|
|
|
|
if lastModified != "" && req.Header.Get("last-modified") == "" {
|
|
|
|
|
if req2 == nil {
|
|
|
|
|
req2 = cloneRequest(req)
|
|
|
|
|
}
|
|
|
|
|
req2.Header.Set("if-modified-since", lastModified)
|
|
|
|
|
}
|
|
|
|
|
if req2 != nil {
|
|
|
|
|
req = req2
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
resp, err = dl.do(req) //nolint:bodyclose
|
|
|
|
|
switch {
|
|
|
|
|
case err == nil && req.Method == http.MethodGet && resp.StatusCode == http.StatusNotModified:
|
|
|
|
|
// Replace the 304 response with the one from cache, but update with some new headers
|
|
|
|
|
endToEndHeaders := getEndToEndHeaders(resp.Header)
|
|
|
|
|
for _, header := range endToEndHeaders {
|
|
|
|
|
cachedResp.Header[header] = resp.Header[header]
|
|
|
|
|
}
|
|
|
|
|
lg.WarnIfCloseError(log, lgm.CloseHTTPResponseBody, resp.Body)
|
|
|
|
|
resp = cachedResp
|
|
|
|
|
|
2024-01-28 03:01:45 +03:00
|
|
|
|
case fpBody != "" && (err != nil || resp.StatusCode >= 500) &&
|
2024-01-27 01:18:38 +03:00
|
|
|
|
req.Method == http.MethodGet && canStaleOnError(cachedResp.Header, req.Header):
|
2024-01-15 04:45:34 +03:00
|
|
|
|
// In case of transport failure and stale-if-error activated, returns cached content
|
2024-01-27 01:18:38 +03:00
|
|
|
|
// when available.
|
|
|
|
|
lg.WarnIfCloseError(log, lgm.CloseHTTPResponseBody, cachedResp.Body)
|
2024-01-15 04:45:34 +03:00
|
|
|
|
log.Warn("Returning cached response due to transport failure", lga.Err, err)
|
|
|
|
|
h.Cached(fpBody)
|
2024-01-28 03:01:45 +03:00
|
|
|
|
return fpBody
|
2024-01-15 04:45:34 +03:00
|
|
|
|
|
|
|
|
|
default:
|
2024-01-27 01:18:38 +03:00
|
|
|
|
if err != nil && resp != nil && resp.StatusCode != http.StatusOK {
|
|
|
|
|
log.Warn("Unexpected HTTP status from server; will serve from cache if possible",
|
|
|
|
|
lga.Err, err, lga.Status, resp.StatusCode)
|
|
|
|
|
|
|
|
|
|
if fp := dl.cacheFileOnError(req, err); fp != "" {
|
|
|
|
|
lg.WarnIfCloseError(log, lgm.CloseHTTPResponseBody, resp.Body)
|
|
|
|
|
h.Cached(fp)
|
2024-01-28 03:01:45 +03:00
|
|
|
|
return fp
|
2024-01-27 01:18:38 +03:00
|
|
|
|
}
|
2024-01-15 04:45:34 +03:00
|
|
|
|
}
|
2024-01-27 01:18:38 +03:00
|
|
|
|
|
2024-01-15 04:45:34 +03:00
|
|
|
|
if err != nil {
|
2024-01-27 01:18:38 +03:00
|
|
|
|
lg.WarnIfCloseError(log, lgm.CloseHTTPResponseBody, resp.Body)
|
|
|
|
|
if fp := dl.cacheFileOnError(req, err); fp != "" {
|
|
|
|
|
h.Cached(fp)
|
2024-01-28 03:01:45 +03:00
|
|
|
|
return fp
|
2024-01-27 01:18:38 +03:00
|
|
|
|
}
|
|
|
|
|
h.Error(err)
|
2024-01-28 03:01:45 +03:00
|
|
|
|
return ""
|
2024-01-27 01:18:38 +03:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if resp.StatusCode != http.StatusOK {
|
|
|
|
|
lg.WarnIfCloseError(log, lgm.CloseHTTPResponseBody, resp.Body)
|
|
|
|
|
err = errz.Errorf("download: unexpected HTTP status: %s", httpz.StatusText(resp.StatusCode))
|
|
|
|
|
if fp := dl.cacheFileOnError(req, err); fp != "" {
|
|
|
|
|
h.Cached(fp)
|
2024-01-28 03:01:45 +03:00
|
|
|
|
return fp
|
2024-01-27 01:18:38 +03:00
|
|
|
|
}
|
|
|
|
|
|
2024-01-15 04:45:34 +03:00
|
|
|
|
h.Error(err)
|
2024-01-28 03:01:45 +03:00
|
|
|
|
return ""
|
2024-01-15 04:45:34 +03:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
reqCacheControl := parseCacheControl(req.Header)
|
|
|
|
|
if _, ok := reqCacheControl["only-if-cached"]; ok {
|
2024-01-25 07:01:24 +03:00
|
|
|
|
resp = newGatewayTimeoutResponse(req) //nolint:bodyclose
|
2024-01-15 04:45:34 +03:00
|
|
|
|
} else {
|
|
|
|
|
resp, err = dl.do(req) //nolint:bodyclose
|
|
|
|
|
if err != nil {
|
2024-01-27 01:18:38 +03:00
|
|
|
|
if fp := dl.cacheFileOnError(req, err); fp != "" {
|
|
|
|
|
h.Cached(fp)
|
2024-01-28 03:01:45 +03:00
|
|
|
|
return fp
|
2024-01-27 01:18:38 +03:00
|
|
|
|
}
|
2024-01-15 04:45:34 +03:00
|
|
|
|
h.Error(err)
|
2024-01-28 03:01:45 +03:00
|
|
|
|
return ""
|
2024-01-15 04:45:34 +03:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if cacheable && canStore(parseCacheControl(req.Header), parseCacheControl(resp.Header)) {
|
|
|
|
|
for _, varyKey := range headerAllCommaSepValues(resp.Header, "vary") {
|
|
|
|
|
varyKey = http.CanonicalHeaderKey(varyKey)
|
|
|
|
|
fakeHeader := "X-Varied-" + varyKey
|
|
|
|
|
reqValue := req.Header.Get(varyKey)
|
|
|
|
|
if reqValue != "" {
|
|
|
|
|
resp.Header.Set(fakeHeader, reqValue)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if resp == cachedResp {
|
2024-01-27 01:18:38 +03:00
|
|
|
|
err = dl.cache.write(ctx, resp, true)
|
2024-01-15 04:45:34 +03:00
|
|
|
|
lg.WarnIfCloseError(log, lgm.CloseHTTPResponseBody, resp.Body)
|
2024-01-27 01:18:38 +03:00
|
|
|
|
if err != nil {
|
2024-01-15 04:45:34 +03:00
|
|
|
|
log.Error("Failed to update cache header", lga.Dir, dl.cache.dir, lga.Err, err)
|
2024-01-27 01:18:38 +03:00
|
|
|
|
if fp := dl.cacheFileOnError(req, err); fp != "" {
|
|
|
|
|
h.Cached(fp)
|
2024-01-28 03:01:45 +03:00
|
|
|
|
return fp
|
2024-01-27 01:18:38 +03:00
|
|
|
|
}
|
2024-01-15 04:45:34 +03:00
|
|
|
|
h.Error(err)
|
2024-01-28 03:01:45 +03:00
|
|
|
|
return ""
|
2024-01-15 04:45:34 +03:00
|
|
|
|
}
|
|
|
|
|
|
2024-01-27 01:18:38 +03:00
|
|
|
|
if fpBody != "" {
|
|
|
|
|
h.Cached(fpBody)
|
2024-01-28 03:01:45 +03:00
|
|
|
|
return fpBody
|
2024-01-27 01:18:38 +03:00
|
|
|
|
}
|
|
|
|
|
} else if cachedResp != nil && cachedResp.Body != nil {
|
|
|
|
|
lg.WarnIfCloseError(log, lgm.CloseHTTPResponseBody, cachedResp.Body)
|
|
|
|
|
}
|
2024-01-15 04:45:34 +03:00
|
|
|
|
|
2024-01-27 01:18:38 +03:00
|
|
|
|
dl.dlStream = streamcache.New(resp.Body)
|
|
|
|
|
resp.Body = dl.dlStream.NewReader(ctx)
|
|
|
|
|
h.Uncached(dl.dlStream)
|
|
|
|
|
|
|
|
|
|
if err = dl.cache.write(req.Context(), resp, false); err != nil {
|
|
|
|
|
// We don't explicitly call Handler.Error: it would be "illegal" to do so
|
|
|
|
|
// anyway, because the Handler docs state that at most one Handler callback
|
|
|
|
|
// func is ever invoked.
|
|
|
|
|
//
|
2024-01-28 03:01:45 +03:00
|
|
|
|
// The cache write could fail for one of two reasons:
|
|
|
|
|
//
|
2024-01-27 01:18:38 +03:00
|
|
|
|
// - The download didn't complete successfully: that is, there was an error
|
|
|
|
|
// reading from resp.Body. In this case, that same error will be propagated
|
|
|
|
|
// to the Handler via the streamcache.Stream that was provided to Handler.Uncached.
|
|
|
|
|
// - The download completed, but there was a problem writing out the cache
|
|
|
|
|
// files (header, body, checksum). This is likely a very rare occurrence.
|
|
|
|
|
// In that case, any previous cache files are left untouched by cache.write,
|
|
|
|
|
// and all we do is log the error. If the cache is inconsistent, it will
|
|
|
|
|
// repair itself on next invocation, so it's not a big deal.
|
|
|
|
|
log.Warn("Failed to write download cache", lga.Dir, dl.cache.dir, lga.Err, err)
|
2024-01-28 03:01:45 +03:00
|
|
|
|
lg.WarnIfCloseError(log, lgm.CloseHTTPResponseBody, resp.Body)
|
|
|
|
|
return ""
|
2024-01-15 04:45:34 +03:00
|
|
|
|
}
|
2024-01-27 01:18:38 +03:00
|
|
|
|
lg.WarnIfCloseError(log, lgm.CloseHTTPResponseBody, resp.Body)
|
2024-01-28 03:01:45 +03:00
|
|
|
|
return fpBody
|
2024-01-15 04:45:34 +03:00
|
|
|
|
}
|
|
|
|
|
|
2024-01-25 07:01:24 +03:00
|
|
|
|
// It's not cacheable, so we can just wrap resp.Body in a streamcache
|
|
|
|
|
// and return it.
|
2024-01-27 01:18:38 +03:00
|
|
|
|
dl.dlStream = streamcache.New(resp.Body)
|
2024-01-25 07:01:24 +03:00
|
|
|
|
resp.Body = nil // Unnecessary, but just to be explicit.
|
2024-01-27 01:18:38 +03:00
|
|
|
|
h.Uncached(dl.dlStream)
|
2024-01-28 03:01:45 +03:00
|
|
|
|
return ""
|
2024-01-15 04:45:34 +03:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// do executes the request.
|
2024-01-25 07:01:24 +03:00
|
|
|
|
func (dl *Downloader) do(req *http.Request) (*http.Response, error) {
|
2024-01-15 04:45:34 +03:00
|
|
|
|
ctx := req.Context()
|
2024-01-25 07:01:24 +03:00
|
|
|
|
log := lg.FromContext(ctx)
|
2024-01-15 04:45:34 +03:00
|
|
|
|
bar := progress.FromContext(ctx).NewWaiter(dl.name+": start download", true)
|
|
|
|
|
start := time.Now()
|
|
|
|
|
resp, err := dl.c.Do(req)
|
2024-01-25 07:01:24 +03:00
|
|
|
|
logResp(log, req, resp, time.Since(start), err)
|
2024-01-15 04:45:34 +03:00
|
|
|
|
bar.Stop()
|
|
|
|
|
if err != nil {
|
|
|
|
|
// Download timeout errors are typically wrapped in an url.Error, resulting
|
|
|
|
|
// in a message like:
|
|
|
|
|
//
|
|
|
|
|
// Get "https://example.com": http response header not received within 1ms timeout
|
|
|
|
|
//
|
|
|
|
|
// We want to trim off that `GET "URL"` prefix, but we only do that if
|
|
|
|
|
// there's a wrapped error beneath (which should be the case).
|
|
|
|
|
if errz.Has[*url.Error](err) && errors.Is(err, context.DeadlineExceeded) {
|
|
|
|
|
if e := errors.Unwrap(err); e != nil {
|
|
|
|
|
err = e
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return nil, err
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if resp.Body != nil && resp.Body != http.NoBody {
|
|
|
|
|
r := progress.NewReader(req.Context(), dl.name+": download", resp.ContentLength, resp.Body)
|
|
|
|
|
resp.Body, _ = r.(io.ReadCloser)
|
|
|
|
|
}
|
|
|
|
|
return resp, nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// mustRequest creates a new request from dl.url. The url has already been
|
|
|
|
|
// parsed in download.New, so it's safe to ignore the error.
|
2024-01-25 07:01:24 +03:00
|
|
|
|
func (dl *Downloader) mustRequest(ctx context.Context) *http.Request {
|
2024-01-15 04:45:34 +03:00
|
|
|
|
req, err := http.NewRequestWithContext(ctx, http.MethodGet, dl.url, nil)
|
|
|
|
|
if err != nil {
|
|
|
|
|
lg.FromContext(ctx).Error("Failed to create request", lga.URL, dl.url, lga.Err, err)
|
|
|
|
|
panic(err)
|
|
|
|
|
}
|
|
|
|
|
return req
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Clear deletes the cache.
|
2024-01-25 07:01:24 +03:00
|
|
|
|
func (dl *Downloader) Clear(ctx context.Context) error {
|
2024-01-15 04:45:34 +03:00
|
|
|
|
dl.mu.Lock()
|
|
|
|
|
defer dl.mu.Unlock()
|
|
|
|
|
if dl.cache == nil {
|
|
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return dl.cache.clear(ctx)
|
|
|
|
|
}
|
|
|
|
|
|
2024-01-25 07:01:24 +03:00
|
|
|
|
// State returns the Downloader's cache state.
|
|
|
|
|
func (dl *Downloader) State(ctx context.Context) State {
|
2024-01-15 04:45:34 +03:00
|
|
|
|
dl.mu.Lock()
|
|
|
|
|
defer dl.mu.Unlock()
|
|
|
|
|
return dl.state(dl.mustRequest(ctx))
|
|
|
|
|
}
|
|
|
|
|
|
2024-01-25 07:01:24 +03:00
|
|
|
|
func (dl *Downloader) state(req *http.Request) State {
|
2024-01-15 04:45:34 +03:00
|
|
|
|
if !dl.isCacheable(req) {
|
|
|
|
|
return Uncached
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ctx := req.Context()
|
|
|
|
|
log := lg.FromContext(ctx)
|
|
|
|
|
|
|
|
|
|
if !dl.cache.exists(req) {
|
|
|
|
|
return Uncached
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fpHeader, _, _ := dl.cache.paths(req)
|
|
|
|
|
f, err := os.Open(fpHeader)
|
|
|
|
|
if err != nil {
|
|
|
|
|
log.Error(msgCloseCacheHeaderFile, lga.File, fpHeader, lga.Err, err)
|
|
|
|
|
return Uncached
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
defer lg.WarnIfCloseError(log, msgCloseCacheHeaderFile, f)
|
|
|
|
|
|
|
|
|
|
cachedResp, err := httpz.ReadResponseHeader(bufio.NewReader(f), nil) //nolint:bodyclose
|
|
|
|
|
if err != nil {
|
|
|
|
|
log.Error("Failed to read cached response header", lga.Err, err)
|
|
|
|
|
return Uncached
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return getFreshness(cachedResp.Header, req.Header)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Filesize returns the size of the downloaded file. This should
|
2024-01-27 01:18:38 +03:00
|
|
|
|
// only be invoked after the download has completed or is cached,
|
|
|
|
|
// as it may block until the download completes.
|
2024-01-25 07:01:24 +03:00
|
|
|
|
func (dl *Downloader) Filesize(ctx context.Context) (int64, error) {
|
2024-01-15 04:45:34 +03:00
|
|
|
|
dl.mu.Lock()
|
|
|
|
|
defer dl.mu.Unlock()
|
|
|
|
|
|
2024-01-27 01:18:38 +03:00
|
|
|
|
if dl.dlStream != nil {
|
|
|
|
|
// There's an active download, so we can get the filesize
|
|
|
|
|
// when the download completes.
|
|
|
|
|
size, err := dl.dlStream.Total(ctx)
|
|
|
|
|
return int64(size), err
|
|
|
|
|
}
|
|
|
|
|
|
2024-01-15 04:45:34 +03:00
|
|
|
|
if dl.cache == nil {
|
2024-01-27 01:18:38 +03:00
|
|
|
|
return 0, errz.New("download file size not available")
|
2024-01-15 04:45:34 +03:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
req := dl.mustRequest(ctx)
|
|
|
|
|
if !dl.cache.exists(req) {
|
|
|
|
|
// It's not in the cache.
|
2024-01-27 01:18:38 +03:00
|
|
|
|
return 0, errz.New("download file size not available")
|
2024-01-15 04:45:34 +03:00
|
|
|
|
}
|
|
|
|
|
|
2024-01-27 01:18:38 +03:00
|
|
|
|
// It's in the cache.
|
2024-01-15 04:45:34 +03:00
|
|
|
|
_, fp, _ := dl.cache.paths(req)
|
|
|
|
|
fi, err := os.Stat(fp)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return 0, errz.Wrapf(err, "unable to stat cached download file: %s", fp)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return fi.Size(), nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// CacheFile returns the path to the cached file, if it exists and has
|
|
|
|
|
// been fully downloaded.
|
2024-01-25 07:01:24 +03:00
|
|
|
|
func (dl *Downloader) CacheFile(ctx context.Context) (fp string, err error) {
|
2024-01-15 04:45:34 +03:00
|
|
|
|
dl.mu.Lock()
|
|
|
|
|
defer dl.mu.Unlock()
|
|
|
|
|
|
|
|
|
|
if dl.cache == nil {
|
|
|
|
|
return "", errz.Errorf("cache doesn't exist for: %s", dl.url)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
req := dl.mustRequest(ctx)
|
|
|
|
|
if !dl.cache.exists(req) {
|
|
|
|
|
return "", errz.Errorf("no cache for: %s", dl.url)
|
|
|
|
|
}
|
|
|
|
|
_, fp, _ = dl.cache.paths(req)
|
|
|
|
|
return fp, nil
|
|
|
|
|
}
|
|
|
|
|
|
2024-01-27 01:18:38 +03:00
|
|
|
|
// cacheFileOnError returns the path to the cached file, if it exists,
|
|
|
|
|
// and is allowed to be returned on a refresh error. If not, empty
|
|
|
|
|
// string is returned.
|
|
|
|
|
func (dl *Downloader) cacheFileOnError(req *http.Request, err error) (fp string) {
|
|
|
|
|
if req == nil {
|
|
|
|
|
return ""
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if dl.cache == nil {
|
|
|
|
|
return ""
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if !dl.continueOnError {
|
|
|
|
|
return ""
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if !dl.cache.exists(req) {
|
|
|
|
|
return ""
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
_, fp, _ = dl.cache.paths(req)
|
|
|
|
|
lg.FromContext(req.Context()).Warn("Returning possibly stale cached response due to download refresh error",
|
|
|
|
|
lga.Err, err, lga.Path, fp)
|
|
|
|
|
return fp
|
|
|
|
|
}
|
|
|
|
|
|
2024-01-15 04:45:34 +03:00
|
|
|
|
// Checksum returns the checksum of the cached download, if available.
|
2024-01-25 07:01:24 +03:00
|
|
|
|
func (dl *Downloader) Checksum(ctx context.Context) (sum checksum.Checksum, ok bool) {
|
2024-01-15 04:45:34 +03:00
|
|
|
|
dl.mu.Lock()
|
|
|
|
|
defer dl.mu.Unlock()
|
|
|
|
|
|
|
|
|
|
if dl.cache == nil {
|
|
|
|
|
return "", false
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
req := dl.mustRequest(ctx)
|
|
|
|
|
return dl.cache.cachedChecksum(req)
|
|
|
|
|
}
|
|
|
|
|
|
2024-01-25 07:01:24 +03:00
|
|
|
|
func (dl *Downloader) isCacheable(req *http.Request) bool {
|
2024-01-27 01:18:38 +03:00
|
|
|
|
if dl.cache == nil {
|
2024-01-15 04:45:34 +03:00
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
return (req.Method == http.MethodGet || req.Method == http.MethodHead) && req.Header.Get("range") == ""
|
|
|
|
|
}
|