package source import ( "context" "io" "net/http" "path/filepath" "time" "github.com/neilotoole/sq/libsq/core/errz" "github.com/neilotoole/sq/libsq/core/ioz" "github.com/neilotoole/sq/libsq/core/ioz/checksum" "github.com/neilotoole/sq/libsq/core/ioz/download" "github.com/neilotoole/sq/libsq/core/ioz/httpz" "github.com/neilotoole/sq/libsq/core/lg" "github.com/neilotoole/sq/libsq/core/lg/lga" "github.com/neilotoole/sq/libsq/core/options" ) var OptHTTPRequestTimeout = options.NewDuration( "http.request.timeout", "", 0, time.Second*10, "HTTP/S request initial response timeout duration", `How long to wait for initial response from a HTTP/S endpoint before timeout occurs. Reading the body of the response, such as a large HTTP file download, is not affected by this option. Example: 500ms or 3s. Contrast with http.response.timeout.`, options.TagSource, ) var OptHTTPResponseTimeout = options.NewDuration( "http.response.timeout", "", 0, 0, "HTTP/S request completion timeout duration", `How long to wait for the entire HTTP transaction to complete. This includes reading the body of the response, such as a large HTTP file download. Typically this is set to 0, indicating no timeout. Contrast with http.request.timeout.`, options.TagSource, ) var OptHTTPSInsecureSkipVerify = options.NewBool( "https.insecure-skip-verify", "", false, 0, false, "Skip HTTPS TLS verification", "Skip HTTPS TLS verification. Useful when downloading against self-signed certs.", options.TagSource, ) // downloadFor returns the download.Download for src, creating // and caching it if necessary. func (fs *Files) downloadFor(ctx context.Context, src *Source) (*download.Download, error) { dl, ok := fs.downloads[src.Handle] if ok { return dl, nil } dlDir, err := fs.downloadDirFor(src) if err != nil { return nil, err } if err = ioz.RequireDir(dlDir); err != nil { return nil, err } c := fs.httpClientFor(ctx, src) if dl, err = download.New(src.Handle, c, src.Location, dlDir); err != nil { return nil, err } fs.downloads[src.Handle] = dl return dl, nil } func (fs *Files) httpClientFor(ctx context.Context, src *Source) *http.Client { o := options.Merge(options.FromContext(ctx), src.Options) return httpz.NewClient(httpz.DefaultUserAgent, httpz.OptRequestTimeout(OptHTTPRequestTimeout.Get(o)), httpz.OptResponseTimeout(OptHTTPResponseTimeout.Get(o)), httpz.OptInsecureSkipVerify(OptHTTPSInsecureSkipVerify.Get(o)), ) } // downloadDirFor gets the download cache dir for src. It is not // guaranteed that the returned dir exists or is accessible. func (fs *Files) downloadDirFor(src *Source) (string, error) { cacheDir, err := fs.CacheDirFor(src) if err != nil { return "", err } fp := filepath.Join(cacheDir, "download", checksum.Sum([]byte(src.Location))) return fp, nil } // openRemoteFile adds a remote file to fs's cache, returning a reader. // If the remote file is already cached, the path to that cached download // file is returned in cachedDownload; otherwise cachedDownload is empty. // If checkFresh is false and the file is already fully downloaded, its // freshness is not checked against the remote server. func (fs *Files) openRemoteFile(ctx context.Context, src *Source, checkFresh bool) (cachedDownload string, rdr io.ReadCloser, err error, ) { loc := src.Location if getLocType(loc) != locTypeRemoteFile { return "", nil, errz.Errorf("not a remote file: %s", loc) } dl, err := fs.downloadFor(ctx, src) if err != nil { return "", nil, err } if !checkFresh && fs.fscache.Exists(loc) { // If the download has completed, dl.CacheFile will return the // path to the cached file. cachedDownload, err = dl.CacheFile(ctx) if err != nil { return "", nil, err } // The file is already cached, and we're not checking freshness. // So, we can just return the cached reader. rdr, _, err = fs.fscache.Get(loc) if err != nil { return "", nil, errz.Err(err) } return cachedDownload, rdr, nil } errCh := make(chan error, 1) rdrCh := make(chan io.ReadCloser, 1) h := download.Handler{ Cached: func(fp string) { if !fs.fscache.Exists(fp) { if hErr := fs.fscache.MapFile(fp); hErr != nil { errCh <- errz.Wrapf(hErr, "failed to map file into fscache: %s", fp) return } } r, _, hErr := fs.fscache.Get(fp) if hErr != nil { errCh <- errz.Err(hErr) return } cachedDownload = fp rdrCh <- r }, Uncached: func() (dest ioz.WriteErrorCloser) { r, w, wErrFn, hErr := fs.fscache.GetWithErr(loc) if hErr != nil { errCh <- errz.Err(hErr) return nil } wec := ioz.NewFuncWriteErrorCloser(w, func(err error) { lg.FromContext(ctx).Error("Error writing to fscache", lga.Src, src, lga.Err, err) wErrFn(err) }) rdrCh <- r return wec }, Error: func(hErr error) { errCh <- hErr }, } fs.fillerWgs.Add(1) go func() { defer fs.fillerWgs.Done() dl.Get(ctx, h) }() select { case <-ctx.Done(): return "", nil, errz.Err(ctx.Err()) case err = <-errCh: return "", nil, err case rdr = <-rdrCh: return cachedDownload, rdr, nil } }