// Package files contains functionality for dealing with files,
// including remote files (e.g. HTTP). The files.Files type
// is the central API for interacting with files.
package files

import (
	"context"
	"io"
	"log/slog"
	"net/http"
	"os"
	"sync"

	"github.com/neilotoole/streamcache"

	"github.com/neilotoole/sq/libsq/core/cleanup"
	"github.com/neilotoole/sq/libsq/core/errz"
	"github.com/neilotoole/sq/libsq/core/ioz"
	"github.com/neilotoole/sq/libsq/core/ioz/httpz"
	"github.com/neilotoole/sq/libsq/core/ioz/lockfile"
	"github.com/neilotoole/sq/libsq/core/lg"
	"github.com/neilotoole/sq/libsq/core/lg/lga"
	"github.com/neilotoole/sq/libsq/core/lg/lgm"
	"github.com/neilotoole/sq/libsq/core/options"
	"github.com/neilotoole/sq/libsq/files/internal/downloader"
	"github.com/neilotoole/sq/libsq/source"
	"github.com/neilotoole/sq/libsq/source/location"
)

// Files is the centralized API for interacting with files. It provides
// a uniform mechanism for reading files, whether from local disk, stdin,
// or remote HTTP.
type Files struct {
	log         *slog.Logger
	clnup       *cleanup.Cleanup
	optRegistry *options.Registry

	// streams is a map of source handles to streamcache.Stream
	// instances: this is used to cache non-regular-file streams, such
	// as stdin, or in-progress downloads. This streamcache mechanism
	// permits multiple readers to access the stream. For example:
	//
	//  $ cat FILE | sq .data
	//
	// In this scenario FILE is provided on os.Stdin, but sq needs
	// to read the stdin stream several times: first, to detect the type
	// of data on stdin (via Files.DetectStdinType), and then to actually
	// ingest the data.
	streams map[string]*streamcache.Stream

	// downloaders is a cache map of source handles to the downloader for
	// that source: we only ever want to have one downloader per source.
	// See Files.downloaderFor.
	downloaders map[string]*downloader.Downloader

	// downloadedFiles is a map of source handles to filepath of
	// already downloaded files. Consulting this map allows Files
	// to directly serve the downloaded file from disk instead of
	// using downloader.Downloader (which typically makes an HTTP
	// call to check the freshness of an already downloaded file).
	downloadedFiles map[string]string

	// cfgLockFn is the lock func for sq's config.
	cfgLockFn lockfile.LockFunc

	cacheDir string
	tempDir  string

	// detectFns is the set of functions that can detect
	// the type of a file.
	detectFns []TypeDetectFunc

	// mu guards access to Files' internals.
	mu sync.Mutex
}

// New returns a new Files instance. The caller must invoke Files.Close
// when done with the instance.
func New(ctx context.Context, optReg *options.Registry, cfgLock lockfile.LockFunc,
	tmpDir, cacheDir string,
) (*Files, error) {
	log := lg.FromContext(ctx)
	log.Debug("Creating new Files instance", "tmp_dir", tmpDir, "cache_dir", cacheDir)

	if optReg == nil {
		optReg = &options.Registry{}
	}

	fs := &Files{
		log:             lg.FromContext(ctx),
		optRegistry:     optReg,
		cacheDir:        cacheDir,
		tempDir:         tmpDir,
		cfgLockFn:       cfgLock,
		clnup:           cleanup.New(),
		downloaders:     map[string]*downloader.Downloader{},
		downloadedFiles: map[string]string{},
		streams:         map[string]*streamcache.Stream{},
	}

	return fs, nil
}

// Filesize returns the file size of src.Location. If the source is being
// ingested asynchronously, this function may block until loading completes.
// An error is returned if src is not a document/file source.
func (fs *Files) Filesize(ctx context.Context, src *source.Source) (size int64, err error) {
	switch location.TypeOf(src.Location) {
	case location.TypeFile:
		var fi os.FileInfo
		if fi, err = os.Stat(src.Location); err != nil {
			return 0, errz.Err(err)
		}
		return fi.Size(), nil

	case location.TypeStdin:
		fs.mu.Lock()
		stdinStream, ok := fs.streams[source.StdinHandle]
		fs.mu.Unlock()
		if !ok {
			// This is a programming error; probably should panic here.
			return 0, errz.Errorf("stdin not present in cache")
		}
		var total int
		if total, err = stdinStream.Total(ctx); err != nil {
			return 0, err
		}
		return int64(total), nil

	case location.TypeHTTP:
		fs.mu.Lock()

		// First check if the file is already downloaded
		// and in File's list of downloaded files.
		dlFile, ok := fs.downloadedFiles[src.Handle]
		if ok {
			// The file is already downloaded.
			defer fs.mu.Unlock()
			return ioz.Filesize(dlFile)
		}

		// It's not in the list of downloaded files, so
		// check if there's an active download stream.
		dlStream, ok := fs.streams[src.Handle]
		if ok {
			fs.mu.Unlock()

			var total int
			// Block until the download completes.
			if total, err = dlStream.Total(ctx); err != nil {
				return 0, err
			}
			return int64(total), nil
		}

		// Finally, we turn to the downloader.
		defer fs.mu.Unlock()
		var dl *downloader.Downloader
		if dl, err = fs.downloaderFor(ctx, src); err != nil {
			return 0, err
		}

		if dlFile, err = dl.CacheFile(ctx); err != nil {
			return 0, err
		}

		return ioz.Filesize(dlFile)

	case location.TypeSQL:
		// Should be impossible.
		return 0, errz.Errorf("invalid to get size of SQL source: %s", src.Handle)

	default:
		// Should be impossible.
		return 0, errz.Errorf("unknown source location type: %s", src)
	}
}

// AddStdin copies f to fs's cache: the stdin data in f
// is later accessible via Files.NewReader(src) where src.Handle
// is source.StdinHandle; f's type can be detected via DetectStdinType.
func (fs *Files) AddStdin(ctx context.Context, f *os.File) error {
	fs.mu.Lock()
	defer fs.mu.Unlock()

	if _, ok := fs.streams[source.StdinHandle]; ok {
		return errz.Errorf("%s already added to reader cache", source.StdinHandle)
	}

	stream := streamcache.New(f)
	fs.streams[source.StdinHandle] = stream
	lg.FromContext(ctx).With(lga.Handle, source.StdinHandle, lga.File, f.Name()).
		Debug("Added stdin to reader cache")
	return nil
}

// filepath returns the file path of src.Location. An error is returned
// if the source's driver type is not a document type (e.g. it is a
// SQL driver). If src is a remote (http) location, the returned filepath
// is that of the cached download file. It's not guaranteed that that
// file exists.
func (fs *Files) filepath(src *source.Source) (string, error) {
	switch location.TypeOf(src.Location) {
	case location.TypeFile:
		return src.Location, nil
	case location.TypeHTTP:
		_, dlFile, err := fs.downloadPaths(src)
		if err != nil {
			return "", err
		}
		return dlFile, nil
	case location.TypeSQL:
		return "", errz.Errorf("cannot get filepath of SQL source: %s", src.Handle)
	case location.TypeStdin:
		return "", errz.Errorf("cannot get filepath of stdin source: %s", src.Handle)
	default:
		return "", errz.Errorf("unknown source location type for %s: %s", src.Handle, location.Redact(src.Location))
	}
}

// NewReader returns a new io.ReadCloser for src.Location. Arg ingesting is
// a performance hint that indicates that the reader is being used to ingest
// data (as opposed to, say, sampling the data for type detection). It's an
// error to invoke NewReader for a src after having invoked it for the same
// src with ingesting=true.
//
// If src.Handle is StdinHandle, AddStdin must first have been invoked.
//
// The caller must close the reader.
func (fs *Files) NewReader(ctx context.Context, src *source.Source, ingesting bool) (io.ReadCloser, error) {
	fs.mu.Lock()
	defer fs.mu.Unlock()

	return fs.newReader(ctx, src, ingesting)
}

// newReader returns a new io.ReadCloser for src.Location. If finalRdr is
// true, and src is using a streamcache.Stream, that cache is sealed after
// the reader is created: newReader must not be called again for src in the
// lifetime of this Files instance.
func (fs *Files) newReader(ctx context.Context, src *source.Source, finalRdr bool) (io.ReadCloser, error) {
	log := lg.FromContext(ctx).With(lga.Src, src)
	lg.Depth(log, slog.LevelDebug, 2, "Invoked Files.NewReader", "final_reader", finalRdr)

	loc := src.Location
	switch location.TypeOf(loc) {
	case location.TypeUnknown:
		return nil, errz.Errorf("unknown source location type: %s", loc)
	case location.TypeSQL:
		return nil, errz.Errorf("invalid to read SQL source: %s", loc)
	case location.TypeFile:
		return errz.Return(os.Open(loc))
	case location.TypeStdin:
		stdinStream, ok := fs.streams[source.StdinHandle]
		if !ok {
			// This is a programming error: AddStdin should have been invoked first.
			// Probably should panic here.
			return nil, errz.New("@stdin not cached: has AddStdin been invoked yet?")
		}
		r := stdinStream.NewReader(ctx)
		if finalRdr {
			lg.FromContext(ctx).Debug("Sealing source stream")
			stdinStream.Seal()
		}
		return r, nil
	default:
		// It's a remote file.
	}

	// Is there a download in progress?
	if dlStream, ok := fs.streams[src.Handle]; ok {
		r := dlStream.NewReader(ctx)
		if finalRdr {
			log.Debug("Sealing download source stream")
			dlStream.Seal()
		}
		return r, nil
	}

	// Is the file already downloaded?
	if fp, ok := fs.downloadedFiles[src.Handle]; ok {
		return errz.Return(os.Open(fp))
	}

	// One of dlFile, dlStream, or err is guaranteed to be non-nil.
	dlFile, dlStream, err := fs.maybeStartDownload(ctx, src, false)
	switch {
	case err != nil:
		return nil, err
	case dlFile != "":
		return errz.Return(os.Open(dlFile))
	case dlStream != nil:
		r := dlStream.NewReader(ctx)
		if finalRdr {
			log.Debug("Sealing download source stream")
			dlStream.Seal()
		}
		return r, nil
	default:
		// Should be impossible.
		panic("Files.maybeStartDownload returned all nils")
	}
}

// Ping implements a ping mechanism for document
// sources (local or remote files).
func (fs *Files) Ping(ctx context.Context, src *source.Source) error {
	fs.mu.Lock()
	defer fs.mu.Unlock()

	switch location.TypeOf(src.Location) {
	case location.TypeStdin:
		// Stdin is always available.
		return nil
	case location.TypeFile:
		if _, err := os.Stat(src.Location); err != nil {
			return errz.Wrapf(err, "ping: failed to stat file source %s: %s", src.Handle, src.Location)
		}
		return nil

	case location.TypeHTTP:
		req, err := http.NewRequestWithContext(ctx, http.MethodHead, src.Location, nil)
		if err != nil {
			return errz.Wrapf(err, "ping: %s", src.Handle)
		}

		c := fs.httpClientFor(ctx, src)
		resp, err := c.Do(req) //nolint:bodyclose
		if err != nil {
			return errz.Wrapf(err, "ping: %s", src.Handle)
		}

		// This shouldn't be necessary because the request method was HEAD,
		// so resp.Body should be nil?
		lg.WarnIfCloseError(fs.log, lgm.CloseHTTPResponseBody, resp.Body)

		if resp.StatusCode != http.StatusOK {
			return errz.Errorf("ping: %s: expected {%s} but got {%s}",
				src.Handle, httpz.StatusText(http.StatusOK), httpz.StatusText(resp.StatusCode))
		}
		return nil

	default:
		// Shouldn't happen
		return errz.Errorf("ping: %s is not a document source", src.Handle)
	}
}

// Close closes any open resources.
func (fs *Files) Close() error {
	fs.mu.Lock()
	defer fs.mu.Unlock()

	var err error
	for _, stream := range fs.streams {
		select {
		case <-stream.Done():
		// Nothing to do, it's already closed.
		default:
			if c, ok := stream.Source().(io.Closer); ok {
				err = errz.Append(err, c.Close())
			}
		}
	}

	err = errz.Append(err, fs.clnup.Run())
	err = errz.Append(err, errz.Wrap(os.RemoveAll(fs.tempDir), "remove files temp dir"))

	fs.doCacheSweep()

	return err
}

// CreateTemp creates a new temporary file fs's temp dir with the given
// filename pattern, as per the os.CreateTemp docs. If arg clean is
// true, the file is added to the cleanup sequence invoked by fs.Close.
// It is the callers responsibility to close the returned file.
func (fs *Files) CreateTemp(pattern string, clean bool) (*os.File, error) {
	f, err := os.CreateTemp(fs.tempDir, pattern)
	if err != nil {
		return nil, errz.Err(err)
	}

	if clean {
		fname := f.Name()
		fs.clnup.AddE(func() error {
			return errz.Err(os.Remove(fname))
		})
	}
	return f, nil
}

// NewReaderFunc returns a func that returns an io.ReadCloser. The caller
// is responsible for closing the returned io.ReadCloser.
type NewReaderFunc func(ctx context.Context) (io.ReadCloser, error)