git-bug/cache/repo_cache.go

279 lines
6.8 KiB
Go
Raw Normal View History

package cache
import (
"fmt"
"io"
"io/ioutil"
"os"
"strconv"
"sync"
"github.com/MichaelMure/git-bug/bug"
"github.com/MichaelMure/git-bug/entity"
2018-11-21 20:56:12 +03:00
"github.com/MichaelMure/git-bug/identity"
"github.com/MichaelMure/git-bug/repository"
"github.com/MichaelMure/git-bug/util/process"
)
2019-02-18 16:11:37 +03:00
// 1: original format
// 2: added cache for identities with a reference in the bug cache
// 3: no more legacy identity
const formatVersion = 3
2020-08-25 16:26:23 +03:00
// The maximum number of bugs loaded in memory. After that, eviction will be done.
2020-08-26 04:04:40 +03:00
const defaultMaxLoadedBugs = 1000
2020-07-26 10:52:29 +03:00
var _ repository.RepoCommon = &RepoCache{}
var _ repository.RepoConfig = &RepoCache{}
var _ repository.RepoKeyring = &RepoCache{}
2019-01-19 18:01:06 +03:00
// RepoCache is a cache for a Repository. This cache has multiple functions:
//
// 1. After being loaded, a Bug is kept in memory in the cache, allowing for fast
// access later.
2019-04-18 03:48:27 +03:00
// 2. The cache maintain in memory and on disk a pre-digested excerpt for each bug,
2019-01-19 18:01:06 +03:00
// allowing for fast querying the whole set of bugs without having to load
// them individually.
// 3. The cache guarantee that a single instance of a Bug is loaded at once, avoiding
// loss of data that we could have with multiple copies in the same process.
// 4. The same way, the cache maintain in memory a single copy of the loaded identities.
//
// The cache also protect the on-disk data by locking the git repository for its
// own usage, by writing a lock file. Of course, normal git operations are not
// affected, only git-bug related one.
type RepoCache struct {
2018-09-11 20:28:32 +03:00
// the underlying repo
repo repository.ClockedRepo
2019-02-18 16:11:37 +03:00
2020-02-12 23:03:20 +03:00
// the name of the repository, as defined in the MultiRepoCache
name string
2020-08-25 16:26:23 +03:00
// maximum number of loaded bugs
maxLoadedBugs int
muBug sync.RWMutex
2018-09-11 20:28:32 +03:00
// excerpt of bugs data for all bugs
bugExcerpts map[entity.Id]*BugExcerpt
2018-09-11 20:28:32 +03:00
// bug loaded in memory
bugs map[entity.Id]*BugCache
2020-08-25 16:26:23 +03:00
// loadedBugs is an LRU cache that records which bugs the cache has loaded in
loadedBugs *LRUIdCache
2020-07-26 10:52:29 +03:00
muIdentity sync.RWMutex
2019-02-18 16:11:37 +03:00
// excerpt of identities data for all identities
identitiesExcerpts map[entity.Id]*IdentityExcerpt
2019-01-17 05:09:08 +03:00
// identities loaded in memory
identities map[entity.Id]*IdentityCache
2019-02-18 16:11:37 +03:00
// the user identity's id, if known
userIdentityId entity.Id
}
func NewRepoCache(r repository.ClockedRepo) (*RepoCache, error) {
2020-02-12 23:03:20 +03:00
return NewNamedRepoCache(r, "")
}
func NewNamedRepoCache(r repository.ClockedRepo, name string) (*RepoCache, error) {
c := &RepoCache{
2020-08-25 16:26:23 +03:00
repo: r,
name: name,
maxLoadedBugs: defaultMaxLoadedBugs,
bugs: make(map[entity.Id]*BugCache),
loadedBugs: NewLRUIdCache(),
identities: make(map[entity.Id]*IdentityCache),
}
err := c.lock()
if err != nil {
return &RepoCache{}, err
}
err = c.load()
if err == nil {
return c, nil
}
2020-06-26 01:58:38 +03:00
// Cache is either missing, broken or outdated. Rebuilding.
err = c.buildCache()
if err != nil {
return nil, err
}
return c, c.write()
}
2020-08-25 16:26:23 +03:00
// setCacheSize change the maximum number of loaded bugs
func (c *RepoCache) setCacheSize(size int) {
c.maxLoadedBugs = size
c.evictIfNeeded()
}
// load will try to read from the disk all the cache files
func (c *RepoCache) load() error {
err := c.loadBugCache()
if err != nil {
return err
}
return c.loadIdentityCache()
}
// write will serialize on disk all the cache files
func (c *RepoCache) write() error {
err := c.writeBugCache()
if err != nil {
return err
}
return c.writeIdentityCache()
}
func (c *RepoCache) lock() error {
err := repoIsAvailable(c.repo)
if err != nil {
return err
}
2020-12-05 05:08:54 +03:00
f, err := c.repo.LocalStorage().Create(lockfile)
if err != nil {
return err
}
pid := fmt.Sprintf("%d", os.Getpid())
2020-12-05 05:08:54 +03:00
_, err = f.Write([]byte(pid))
if err != nil {
return err
}
return f.Close()
}
func (c *RepoCache) Close() error {
c.muBug.Lock()
defer c.muBug.Unlock()
c.muIdentity.Lock()
defer c.muIdentity.Unlock()
c.identities = make(map[entity.Id]*IdentityCache)
c.identitiesExcerpts = nil
2020-08-26 04:04:40 +03:00
c.bugs = make(map[entity.Id]*BugCache)
c.bugExcerpts = nil
2019-03-25 23:58:27 +03:00
2020-12-08 16:42:13 +03:00
err := c.repo.Close()
if err != nil {
return err
}
2020-12-05 05:08:54 +03:00
return c.repo.LocalStorage().Remove(lockfile)
}
func (c *RepoCache) buildCache() error {
2020-12-05 05:08:54 +03:00
// TODO: make that parallel
c.muBug.Lock()
defer c.muBug.Unlock()
c.muIdentity.Lock()
defer c.muIdentity.Unlock()
2019-02-19 01:16:47 +03:00
_, _ = fmt.Fprintf(os.Stderr, "Building identity cache... ")
c.identitiesExcerpts = make(map[entity.Id]*IdentityExcerpt)
2019-02-19 01:16:47 +03:00
allIdentities := identity.ReadAllLocal(c.repo)
2019-02-19 01:16:47 +03:00
for i := range allIdentities {
if i.Err != nil {
return i.Err
}
c.identitiesExcerpts[i.Identity.Id()] = NewIdentityExcerpt(i.Identity)
}
_, _ = fmt.Fprintln(os.Stderr, "Done.")
_, _ = fmt.Fprintf(os.Stderr, "Building bug cache... ")
c.bugExcerpts = make(map[entity.Id]*BugExcerpt)
allBugs := bug.ReadAllLocal(c.repo)
2020-12-08 16:42:13 +03:00
// wipe the index just to be sure
err := c.repo.ClearBleveIndex("bug")
if err != nil {
2020-12-08 16:42:13 +03:00
return err
}
2020-08-25 05:43:42 +03:00
for b := range allBugs {
if b.Err != nil {
return b.Err
}
snap := b.Bug.Compile()
2019-02-18 16:11:37 +03:00
c.bugExcerpts[b.Bug.Id()] = NewBugExcerpt(b.Bug, &snap)
if err := c.addBugToSearchIndex(&snap); err != nil {
return err
}
}
_, _ = fmt.Fprintln(os.Stderr, "Done.")
return nil
}
// repoIsAvailable check is the given repository is locked by a Cache.
// Note: this is a smart function that will cleanup the lock file if the
// corresponding process is not there anymore.
// If no error is returned, the repo is free to edit.
2020-12-05 05:08:54 +03:00
func repoIsAvailable(repo repository.RepoStorage) error {
// Todo: this leave way for a racey access to the repo between the test
// if the file exist and the actual write. It's probably not a problem in
// practice because using a repository will be done from user interaction
// or in a context where a single instance of git-bug is already guaranteed
// (say, a server with the web UI running). But still, that might be nice to
// have a mutex or something to guard that.
// Todo: this will fail if somehow the filesystem is shared with another
// computer. Should add a configuration that prevent the cleaning of the
// lock file
2020-12-05 05:08:54 +03:00
f, err := repo.LocalStorage().Open(lockfile)
if err != nil && !os.IsNotExist(err) {
return err
}
if err == nil {
// lock file already exist
buf, err := ioutil.ReadAll(io.LimitReader(f, 10))
if err != nil {
return err
}
if len(buf) == 10 {
2018-09-10 13:47:05 +03:00
return fmt.Errorf("the lock file should be < 10 bytes")
}
pid, err := strconv.Atoi(string(buf))
if err != nil {
return err
}
if process.IsRunning(pid) {
2018-09-10 13:47:05 +03:00
return fmt.Errorf("the repository you want to access is already locked by the process pid %d", pid)
}
// The lock file is just laying there after a crash, clean it
fmt.Println("A lock file is present but the corresponding process is not, removing it.")
err = f.Close()
if err != nil {
return err
}
2020-12-05 05:08:54 +03:00
err = repo.LocalStorage().Remove(lockfile)
if err != nil {
return err
}
}
return nil
}