git-bug/bridge/github/import.go

598 lines
15 KiB
Go
Raw Normal View History

2018-09-24 20:22:32 +03:00
package github
import (
"context"
"fmt"
2019-04-27 02:15:02 +03:00
"time"
2018-09-24 20:22:32 +03:00
"github.com/shurcooL/githubv4"
2018-09-24 20:22:32 +03:00
"github.com/MichaelMure/git-bug/bridge/core"
"github.com/MichaelMure/git-bug/bridge/core/auth"
2018-09-25 20:10:38 +03:00
"github.com/MichaelMure/git-bug/bug"
2018-09-24 20:22:32 +03:00
"github.com/MichaelMure/git-bug/cache"
"github.com/MichaelMure/git-bug/entity"
"github.com/MichaelMure/git-bug/util/text"
2018-09-24 20:22:32 +03:00
)
2021-03-05 22:06:21 +03:00
const EMPTY_TITLE_PLACEHOLDER = "<empty string>"
2018-09-25 20:10:38 +03:00
// githubImporter implement the Importer interface
type githubImporter struct {
conf core.Configuration
2019-05-10 03:05:00 +03:00
// mediator to access the Github API
mediator *importMediator
2019-05-10 03:05:00 +03:00
// send only channel
out chan<- core.ImportResult
}
func (gi *githubImporter) Init(_ context.Context, _ *cache.RepoCache, conf core.Configuration) error {
gi.conf = conf
return nil
}
// ImportAll iterate over all the configured repository issues and ensure the creation of the
// missing issues / timeline items / edits / label events ...
func (gi *githubImporter) ImportAll(ctx context.Context, repo *cache.RepoCache, since time.Time) (<-chan core.ImportResult, error) {
creds, err := auth.List(repo,
auth.WithTarget(target),
auth.WithKind(auth.KindToken),
auth.WithMeta(auth.MetaKeyLogin, gi.conf[confKeyDefaultLogin]),
)
if err != nil {
return nil, err
}
if len(creds) <= 0 {
return nil, ErrMissingIdentityToken
}
client := buildClient(creds[0].(*auth.Token))
gi.mediator = NewImportMediator(ctx, client, gi.conf[confKeyOwner], gi.conf[confKeyProject], since)
out := make(chan core.ImportResult)
gi.out = out
go func() {
defer close(gi.out)
// Loop over all matching issues
for event := range gi.mediator.Issues {
var issue issue
var issueEdits <-chan userContentEditEvent
var timelineItems <-chan timelineEvent
switch e := event.(type) {
case messageEvent:
fmt.Println(e.msg)
continue
case issueData:
issue = e.issue
issueEdits = e.issueEdits
timelineItems = e.timelineItems
default:
panic(fmt.Sprint("Unknown event type"))
}
// create issue
b, err := gi.ensureIssue(ctx, repo, &issue, issueEdits)
if err != nil {
err := fmt.Errorf("issue creation: %v", err)
out <- core.NewImportError(err, "")
return
}
// loop over timeline items
for event := range timelineItems {
var item timelineItem
var edits <-chan userContentEditEvent
switch e := event.(type) {
case messageEvent:
fmt.Println(e.msg)
continue
case timelineData:
item = e.timelineItem
edits = e.userContentEdits
default:
panic(fmt.Sprint("Unknown event type"))
}
err := gi.ensureTimelineItem(ctx, repo, b, &item, edits)
if err != nil {
err = fmt.Errorf("timeline item creation: %v", err)
out <- core.NewImportError(err, "")
return
}
}
2019-04-27 02:15:02 +03:00
if !b.NeedCommit() {
out <- core.NewImportNothing(b.Id(), "no imported operation")
} else if err := b.Commit(); err != nil {
// commit bug state
err = fmt.Errorf("bug commit: %v", err)
out <- core.NewImportError(err, "")
return
}
}
if err := gi.mediator.Error(); err != nil {
gi.out <- core.NewImportError(err, "")
2019-04-27 02:15:02 +03:00
}
}()
return out, nil
2018-09-25 20:10:38 +03:00
}
// getNextUserContentEdit reads the input channel, handles messages, and returns the next
// userContentEditData.
func getNextUserContentEdit(in <-chan userContentEditEvent) (*userContentEditData, bool) {
for {
event, hasEvent := <-in
if !hasEvent {
return nil, false
}
switch e := event.(type) {
case messageEvent:
fmt.Println(e.msg)
continue
case userContentEditData:
return &e, true
default:
panic(fmt.Sprint("Unknown event type"))
}
}
}
func (gi *githubImporter) ensureIssue(ctx context.Context, repo *cache.RepoCache, issue *issue, issueEditEvents <-chan userContentEditEvent) (*cache.BugCache, error) {
2021-03-05 22:06:21 +03:00
author, err := gi.ensurePerson(ctx, repo, issue.Author)
if err != nil {
return nil, err
}
// resolve bug
2020-03-28 19:08:27 +03:00
b, err := repo.ResolveBugMatcher(func(excerpt *cache.BugExcerpt) bool {
return excerpt.CreateMetadata[core.MetaKeyOrigin] == target &&
excerpt.CreateMetadata[metaKeyGithubId] == parseId(issue.Id)
})
if err != nil && err != bug.ErrBugNotExist {
return nil, err
}
// get first issue edit
// if it exists, then it holds the bug creation
firstEdit, hasEdit := getNextUserContentEdit(issueEditEvents)
2021-03-05 22:06:21 +03:00
// At Github there exist issues with seemingly empty titles. An example is
// https://github.com/NixOS/nixpkgs/issues/72730 .
// The title provided by the GraphQL API actually consists of a space followed by a
// zero width space (U+200B). This title would cause the NewBugRaw() function to
// return an error: empty title.
title := string(issue.Title)
2021-03-05 22:06:21 +03:00
if title == " \u200b" { // U+200B == zero width space
title = EMPTY_TITLE_PLACEHOLDER
2019-05-10 13:02:44 +03:00
}
if err == bug.ErrBugNotExist {
var textInput string
if hasEdit {
// use the first issue edit: it represents the bug creation itself
textInput = string(*firstEdit.Diff)
} else {
// if there are no issue edits then the issue struct holds the bug creation
textInput = string(issue.Body)
}
cleanText, err := text.Cleanup(textInput)
if err != nil {
return nil, err
}
// create bug
b, _, err = repo.NewBugRaw(
author,
issue.CreatedAt.Unix(),
title, // TODO: this is the *current* title, not the original one
cleanText,
nil,
map[string]string{
core.MetaKeyOrigin: target,
metaKeyGithubId: parseId(issue.Id),
metaKeyGithubUrl: issue.Url.String(),
})
if err != nil {
return nil, err
}
// importing a new bug
gi.out <- core.NewImportBug(b.Id())
}
if b == nil {
return nil, fmt.Errorf("finding or creating issue")
}
// process remaining issue edits, if they exist
for {
edit, hasEdit := getNextUserContentEdit(issueEditEvents)
if !hasEdit {
break
}
// other edits will be added as CommentEdit operations
target, err := b.ResolveOperationWithMetadata(metaKeyGithubId, parseId(issue.Id))
if err == cache.ErrNoMatchingOp {
// original comment is missing somehow, issuing a warning
gi.out <- core.NewImportWarning(fmt.Errorf("comment ID %s to edit is missing", parseId(issue.Id)), b.Id())
continue
}
if err != nil {
return nil, err
}
err = gi.ensureCommentEdit(ctx, repo, b, target, &edit.userContentEdit)
if err != nil {
return nil, err
}
}
return b, nil
2019-04-27 02:15:02 +03:00
}
func (gi *githubImporter) ensureTimelineItem(ctx context.Context, repo *cache.RepoCache, b *cache.BugCache, item *timelineItem, commentEdits <-chan userContentEditEvent) error {
2018-09-25 20:10:38 +03:00
switch item.Typename {
case "IssueComment":
err := gi.ensureComment(ctx, repo, b, &item.IssueComment, commentEdits)
2019-05-10 03:05:00 +03:00
if err != nil {
return fmt.Errorf("timeline comment creation: %v", err)
}
return nil
2018-09-25 20:10:38 +03:00
case "LabeledEvent":
id := parseId(item.LabeledEvent.Id)
_, err := b.ResolveOperationWithMetadata(metaKeyGithubId, id)
if err == nil {
return nil
}
if err != cache.ErrNoMatchingOp {
return err
}
2021-03-05 22:06:21 +03:00
author, err := gi.ensurePerson(ctx, repo, item.LabeledEvent.Actor)
2019-01-19 18:01:06 +03:00
if err != nil {
return err
}
op, err := b.ForceChangeLabelsRaw(
2019-01-19 18:01:06 +03:00
author,
2018-09-25 20:10:38 +03:00
item.LabeledEvent.CreatedAt.Unix(),
[]string{
string(item.LabeledEvent.Label.Name),
},
nil,
map[string]string{metaKeyGithubId: id},
2018-09-25 20:10:38 +03:00
)
if err != nil {
return err
}
2019-05-04 14:19:56 +03:00
gi.out <- core.NewImportLabelChange(op.Id())
return nil
2018-09-25 20:10:38 +03:00
case "UnlabeledEvent":
id := parseId(item.UnlabeledEvent.Id)
_, err := b.ResolveOperationWithMetadata(metaKeyGithubId, id)
if err == nil {
return nil
}
if err != cache.ErrNoMatchingOp {
return err
}
2021-03-05 22:06:21 +03:00
author, err := gi.ensurePerson(ctx, repo, item.UnlabeledEvent.Actor)
2019-01-19 18:01:06 +03:00
if err != nil {
return err
}
op, err := b.ForceChangeLabelsRaw(
2019-01-19 18:01:06 +03:00
author,
2018-09-25 20:10:38 +03:00
item.UnlabeledEvent.CreatedAt.Unix(),
nil,
[]string{
string(item.UnlabeledEvent.Label.Name),
},
map[string]string{metaKeyGithubId: id},
2018-09-25 20:10:38 +03:00
)
if err != nil {
return err
}
gi.out <- core.NewImportLabelChange(op.Id())
return nil
2018-09-25 20:10:38 +03:00
case "ClosedEvent":
id := parseId(item.ClosedEvent.Id)
_, err := b.ResolveOperationWithMetadata(metaKeyGithubId, id)
if err != cache.ErrNoMatchingOp {
return err
}
if err == nil {
return nil
}
2021-03-05 22:06:21 +03:00
author, err := gi.ensurePerson(ctx, repo, item.ClosedEvent.Actor)
2019-01-19 18:01:06 +03:00
if err != nil {
return err
}
op, err := b.CloseRaw(
2019-01-19 18:01:06 +03:00
author,
2018-09-25 20:10:38 +03:00
item.ClosedEvent.CreatedAt.Unix(),
map[string]string{metaKeyGithubId: id},
2018-09-25 20:10:38 +03:00
)
if err != nil {
return err
}
gi.out <- core.NewImportStatusChange(op.Id())
return nil
2018-09-25 20:10:38 +03:00
case "ReopenedEvent":
id := parseId(item.ReopenedEvent.Id)
_, err := b.ResolveOperationWithMetadata(metaKeyGithubId, id)
if err != cache.ErrNoMatchingOp {
return err
}
if err == nil {
return nil
}
2021-03-05 22:06:21 +03:00
author, err := gi.ensurePerson(ctx, repo, item.ReopenedEvent.Actor)
2019-01-19 18:01:06 +03:00
if err != nil {
return err
}
op, err := b.OpenRaw(
2019-01-19 18:01:06 +03:00
author,
2018-09-25 20:10:38 +03:00
item.ReopenedEvent.CreatedAt.Unix(),
map[string]string{metaKeyGithubId: id},
2018-09-25 20:10:38 +03:00
)
if err != nil {
return err
}
gi.out <- core.NewImportStatusChange(op.Id())
return nil
2018-09-25 20:10:38 +03:00
case "RenamedTitleEvent":
id := parseId(item.RenamedTitleEvent.Id)
_, err := b.ResolveOperationWithMetadata(metaKeyGithubId, id)
if err != cache.ErrNoMatchingOp {
return err
}
if err == nil {
return nil
}
2021-03-05 22:06:21 +03:00
author, err := gi.ensurePerson(ctx, repo, item.RenamedTitleEvent.Actor)
2019-01-19 18:01:06 +03:00
if err != nil {
return err
}
2021-03-05 22:06:21 +03:00
// At Github there exist issues with seemingly empty titles. An example is
// https://github.com/NixOS/nixpkgs/issues/72730 .
// The title provided by the GraphQL API actually consists of a space followed
// by a zero width space (U+200B). This title would cause the NewBugRaw()
// function to return an error: empty title.
title := string(item.RenamedTitleEvent.CurrentTitle)
if title == " \u200b" { // U+200B == zero width space
title = EMPTY_TITLE_PLACEHOLDER
}
op, err := b.SetTitleRaw(
2019-01-19 18:01:06 +03:00
author,
2018-09-25 20:10:38 +03:00
item.RenamedTitleEvent.CreatedAt.Unix(),
2021-03-05 22:06:21 +03:00
title,
map[string]string{metaKeyGithubId: id},
2018-09-25 20:10:38 +03:00
)
if err != nil {
return err
}
gi.out <- core.NewImportTitleEdition(op.Id())
return nil
2018-09-25 20:10:38 +03:00
}
return nil
}
func (gi *githubImporter) ensureComment(ctx context.Context, repo *cache.RepoCache, b *cache.BugCache, comment *issueComment, commentEditEvents <-chan userContentEditEvent) error {
2021-03-05 22:06:21 +03:00
author, err := gi.ensurePerson(ctx, repo, comment.Author)
if err != nil {
return err
}
targetOpID, err := b.ResolveOperationWithMetadata(metaKeyGithubId, parseId(comment.Id))
if err != nil && err != cache.ErrNoMatchingOp {
// real error
return err
}
firstEdit, hasEdit := getNextUserContentEdit(commentEditEvents)
if err == cache.ErrNoMatchingOp {
var textInput string
if hasEdit {
// use the first comment edit: it represents the comment creation itself
textInput = string(*firstEdit.Diff)
} else {
// if there are not comment edits, then the comment struct holds the comment creation
textInput = string(comment.Body)
}
cleanText, err := text.Cleanup(textInput)
if err != nil {
return err
}
// add comment operation
op, err := b.AddCommentRaw(
author,
comment.CreatedAt.Unix(),
cleanText,
nil,
map[string]string{
metaKeyGithubId: parseId(comment.Id),
metaKeyGithubUrl: comment.Url.String(),
},
)
if err != nil {
return err
}
gi.out <- core.NewImportComment(op.Id())
targetOpID = op.Id()
}
if targetOpID == "" {
return fmt.Errorf("finding or creating issue comment")
}
// process remaining comment edits, if they exist
for {
edit, hasEdit := getNextUserContentEdit(commentEditEvents)
if !hasEdit {
break
}
// ensure editor identity
2021-03-05 22:06:21 +03:00
_, err := gi.ensurePerson(ctx, repo, edit.Editor)
if err != nil {
return err
}
err = gi.ensureCommentEdit(ctx, repo, b, targetOpID, &edit.userContentEdit)
if err != nil {
return err
}
}
return nil
}
func (gi *githubImporter) ensureCommentEdit(ctx context.Context, repo *cache.RepoCache, b *cache.BugCache, target entity.Id, edit *userContentEdit) error {
_, err := b.ResolveOperationWithMetadata(metaKeyGithubId, parseId(edit.Id))
if err == nil {
return nil
}
if err != cache.ErrNoMatchingOp {
// real error
return err
}
2021-03-05 22:06:21 +03:00
editor, err := gi.ensurePerson(ctx, repo, edit.Editor)
2019-02-24 15:06:03 +03:00
if err != nil {
return err
2019-01-19 18:01:06 +03:00
}
switch {
case edit.DeletedAt != nil:
// comment deletion, not supported yet
return nil
case edit.DeletedAt == nil:
cleanText, err := text.Cleanup(string(*edit.Diff))
if err != nil {
return err
}
// comment edition
op, err := b.EditCommentRaw(
2019-01-19 18:01:06 +03:00
editor,
edit.CreatedAt.Unix(),
target,
cleanText,
map[string]string{
metaKeyGithubId: parseId(edit.Id),
},
)
if err != nil {
return err
}
gi.out <- core.NewImportCommentEdition(op.Id())
return nil
}
return nil
}
2019-02-24 14:58:04 +03:00
// ensurePerson create a bug.Person from the Github data
2021-03-05 22:06:21 +03:00
func (gi *githubImporter) ensurePerson(ctx context.Context, repo *cache.RepoCache, actor *actor) (*cache.IdentityCache, error) {
2019-01-19 18:01:06 +03:00
// When a user has been deleted, Github return a null actor, while displaying a profile named "ghost"
// in it's UI. So we need a special case to get it.
if actor == nil {
2021-03-05 22:06:21 +03:00
return gi.getGhost(ctx, repo)
2019-01-19 18:01:06 +03:00
}
// Look first in the cache
i, err := repo.ResolveIdentityImmutableMetadata(metaKeyGithubLogin, string(actor.Login))
2019-01-19 18:01:06 +03:00
if err == nil {
return i, nil
}
if entity.IsErrMultipleMatch(err) {
2019-01-19 18:01:06 +03:00
return nil, err
}
2019-01-19 18:01:06 +03:00
2019-05-10 03:05:00 +03:00
// importing a new identity
2018-10-07 19:27:23 +03:00
var name string
var email string
switch actor.Typename {
case "User":
if actor.User.Name != nil {
name = string(*(actor.User.Name))
}
email = string(actor.User.Email)
case "Organization":
if actor.Organization.Name != nil {
name = string(*(actor.Organization.Name))
}
if actor.Organization.Email != nil {
email = string(*(actor.Organization.Email))
}
case "Bot":
}
2020-02-09 00:04:25 +03:00
// Name is not necessarily set, fallback to login as a name is required in the identity
if name == "" {
name = string(actor.Login)
}
i, err = repo.NewIdentityRaw(
2019-01-19 18:01:06 +03:00
name,
email,
string(actor.Login),
2019-01-19 18:01:06 +03:00
string(actor.AvatarUrl),
map[string]string{
metaKeyGithubLogin: string(actor.Login),
2019-01-19 18:01:06 +03:00
},
)
if err != nil {
return nil, err
}
gi.out <- core.NewImportIdentity(i.Id())
return i, nil
2018-09-25 20:10:38 +03:00
}
2021-03-05 22:06:21 +03:00
func (gi *githubImporter) getGhost(ctx context.Context, repo *cache.RepoCache) (*cache.IdentityCache, error) {
loginName := "ghost"
2019-01-19 18:01:06 +03:00
// Look first in the cache
i, err := repo.ResolveIdentityImmutableMetadata(metaKeyGithubLogin, loginName)
2019-01-19 18:01:06 +03:00
if err == nil {
return i, nil
}
if entity.IsErrMultipleMatch(err) {
2019-01-19 18:01:06 +03:00
return nil, err
}
2021-03-05 22:06:21 +03:00
user, err := gi.mediator.User(ctx, loginName)
userName := ""
if user.Name != nil {
userName = string(*user.Name)
}
2019-01-19 18:01:06 +03:00
return repo.NewIdentityRaw(
userName,
"",
string(user.Login),
string(user.AvatarUrl),
2019-01-19 18:01:06 +03:00
map[string]string{
metaKeyGithubLogin: string(user.Login),
2019-01-19 18:01:06 +03:00
},
2019-01-17 05:09:08 +03:00
)
}
2021-03-05 22:06:21 +03:00
// parseId converts the unusable githubv4.ID (an interface{}) into a string
2018-09-25 20:10:38 +03:00
func parseId(id githubv4.ID) string {
return fmt.Sprintf("%v", id)
}