2021-02-27 02:42:37 +03:00
|
|
|
package github
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
|
|
|
"time"
|
|
|
|
|
|
|
|
"github.com/shurcooL/githubv4"
|
|
|
|
)
|
|
|
|
|
2021-03-22 21:26:59 +03:00
|
|
|
const (
|
|
|
|
// These values influence how fast the github graphql rate limit is exhausted.
|
2022-06-05 16:01:08 +03:00
|
|
|
|
2021-04-09 14:07:45 +03:00
|
|
|
NumIssues = 40
|
|
|
|
NumIssueEdits = 100
|
|
|
|
NumTimelineItems = 100
|
|
|
|
NumCommentEdits = 100
|
2021-02-27 02:42:37 +03:00
|
|
|
|
2021-04-09 14:07:45 +03:00
|
|
|
ChanCapacity = 128
|
2021-02-27 02:42:37 +03:00
|
|
|
)
|
|
|
|
|
2021-03-08 09:53:09 +03:00
|
|
|
// importMediator provides a convenient interface to retrieve issues from the Github GraphQL API.
|
2021-02-27 02:42:37 +03:00
|
|
|
type importMediator struct {
|
|
|
|
// Github graphql client
|
2021-09-14 23:22:28 +03:00
|
|
|
gh *rateLimitHandlerClient
|
2021-03-05 22:06:21 +03:00
|
|
|
|
|
|
|
// name of the repository owner on Github
|
|
|
|
owner string
|
|
|
|
|
|
|
|
// name of the Github repository
|
2021-02-27 02:42:37 +03:00
|
|
|
project string
|
2021-03-05 22:06:21 +03:00
|
|
|
|
2021-03-08 09:53:09 +03:00
|
|
|
// since specifies which issues to import. Issues that have been updated at or after the
|
|
|
|
// given date should be imported.
|
2021-02-27 02:42:37 +03:00
|
|
|
since time.Time
|
|
|
|
|
2021-03-22 21:26:59 +03:00
|
|
|
// importEvents holds events representing issues, comments, edits, ...
|
|
|
|
// In this channel issues are immediately followed by their issue edits and comments are
|
|
|
|
// immediately followed by their comment edits.
|
|
|
|
importEvents chan ImportEvent
|
2021-03-05 22:06:21 +03:00
|
|
|
|
2021-03-08 09:53:09 +03:00
|
|
|
// Sticky error
|
|
|
|
err error
|
|
|
|
}
|
2021-03-05 22:06:21 +03:00
|
|
|
|
2021-09-14 23:22:28 +03:00
|
|
|
func NewImportMediator(ctx context.Context, client *rateLimitHandlerClient, owner, project string, since time.Time) *importMediator {
|
2021-02-27 02:42:37 +03:00
|
|
|
mm := importMediator{
|
2021-09-14 23:22:28 +03:00
|
|
|
gh: client,
|
2021-03-22 21:26:59 +03:00
|
|
|
owner: owner,
|
|
|
|
project: project,
|
|
|
|
since: since,
|
2021-04-09 14:07:45 +03:00
|
|
|
importEvents: make(chan ImportEvent, ChanCapacity),
|
2021-03-22 21:26:59 +03:00
|
|
|
err: nil,
|
2021-02-27 02:42:37 +03:00
|
|
|
}
|
|
|
|
|
2022-06-05 16:01:08 +03:00
|
|
|
go mm.start(ctx)
|
2021-03-08 09:53:09 +03:00
|
|
|
|
2022-06-05 16:01:08 +03:00
|
|
|
return &mm
|
2021-03-05 22:06:21 +03:00
|
|
|
}
|
|
|
|
|
2022-06-05 16:01:08 +03:00
|
|
|
func (mm *importMediator) start(ctx context.Context) {
|
|
|
|
ctx, cancel := context.WithCancel(ctx)
|
|
|
|
mm.fillImportEvents(ctx)
|
|
|
|
// Make sure we cancel everything when we are done, instead of relying on the parent context
|
|
|
|
// This should unblock pending send to the channel if the capacity was reached and avoid a panic/race when closing.
|
|
|
|
cancel()
|
|
|
|
close(mm.importEvents)
|
2021-03-05 22:06:21 +03:00
|
|
|
}
|
|
|
|
|
2022-06-05 16:01:08 +03:00
|
|
|
// NextImportEvent returns the next ImportEvent, or nil if done.
|
|
|
|
func (mm *importMediator) NextImportEvent() ImportEvent {
|
|
|
|
return <-mm.importEvents
|
2021-03-05 22:06:21 +03:00
|
|
|
}
|
|
|
|
|
2021-02-27 02:42:37 +03:00
|
|
|
func (mm *importMediator) Error() error {
|
2021-03-22 21:26:59 +03:00
|
|
|
return mm.err
|
2021-02-27 02:42:37 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
func (mm *importMediator) User(ctx context.Context, loginName string) (*user, error) {
|
|
|
|
query := userQuery{}
|
|
|
|
vars := varmap{"login": githubv4.String(loginName)}
|
2022-06-05 16:01:08 +03:00
|
|
|
if err := mm.gh.queryImport(ctx, &query, vars, mm.importEvents); err != nil {
|
2021-02-27 02:42:37 +03:00
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
return &query.User, nil
|
|
|
|
}
|
|
|
|
|
2021-03-22 21:26:59 +03:00
|
|
|
func (mm *importMediator) fillImportEvents(ctx context.Context) {
|
2021-03-05 22:06:21 +03:00
|
|
|
initialCursor := githubv4.String("")
|
2021-03-22 21:26:59 +03:00
|
|
|
issues, hasIssues := mm.queryIssue(ctx, initialCursor)
|
2021-03-05 22:06:21 +03:00
|
|
|
for hasIssues {
|
|
|
|
for _, node := range issues.Nodes {
|
|
|
|
select {
|
|
|
|
case <-ctx.Done():
|
|
|
|
return
|
2021-03-22 21:26:59 +03:00
|
|
|
case mm.importEvents <- IssueEvent{node.issue}:
|
2021-03-05 22:06:21 +03:00
|
|
|
}
|
|
|
|
|
2021-03-22 21:26:59 +03:00
|
|
|
// issue edit events follow the issue event
|
|
|
|
mm.fillIssueEditEvents(ctx, &node)
|
|
|
|
// last come the timeline events
|
|
|
|
mm.fillTimelineEvents(ctx, &node)
|
2021-02-27 02:42:37 +03:00
|
|
|
}
|
2021-03-05 22:06:21 +03:00
|
|
|
if !issues.PageInfo.HasNextPage {
|
|
|
|
break
|
2021-02-27 02:42:37 +03:00
|
|
|
}
|
2021-03-22 21:26:59 +03:00
|
|
|
issues, hasIssues = mm.queryIssue(ctx, issues.PageInfo.EndCursor)
|
2021-02-27 02:42:37 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-22 21:26:59 +03:00
|
|
|
func (mm *importMediator) fillIssueEditEvents(ctx context.Context, issueNode *issueNode) {
|
2021-02-27 02:42:37 +03:00
|
|
|
edits := &issueNode.UserContentEdits
|
|
|
|
hasEdits := true
|
|
|
|
for hasEdits {
|
|
|
|
for edit := range reverse(edits.Nodes) {
|
|
|
|
if edit.Diff == nil || string(*edit.Diff) == "" {
|
2021-03-05 22:06:21 +03:00
|
|
|
// issueEdit.Diff == nil happen if the event is older than early
|
|
|
|
// 2018, Github doesn't have the data before that. Best we can do is
|
|
|
|
// to ignore the event.
|
2021-02-27 02:42:37 +03:00
|
|
|
continue
|
|
|
|
}
|
2021-03-05 22:06:21 +03:00
|
|
|
select {
|
|
|
|
case <-ctx.Done():
|
|
|
|
return
|
2021-03-22 21:26:59 +03:00
|
|
|
case mm.importEvents <- IssueEditEvent{issueId: issueNode.issue.Id, userContentEdit: edit}:
|
2021-03-05 22:06:21 +03:00
|
|
|
}
|
2021-02-27 02:42:37 +03:00
|
|
|
}
|
|
|
|
if !edits.PageInfo.HasPreviousPage {
|
|
|
|
break
|
|
|
|
}
|
2021-03-22 21:26:59 +03:00
|
|
|
edits, hasEdits = mm.queryIssueEdits(ctx, issueNode.issue.Id, edits.PageInfo.EndCursor)
|
2021-02-27 02:42:37 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-22 21:26:59 +03:00
|
|
|
func (mm *importMediator) queryIssueEdits(ctx context.Context, nid githubv4.ID, cursor githubv4.String) (*userContentEditConnection, bool) {
|
|
|
|
vars := newIssueEditVars()
|
|
|
|
vars["gqlNodeId"] = nid
|
|
|
|
if cursor == "" {
|
|
|
|
vars["issueEditBefore"] = (*githubv4.String)(nil)
|
|
|
|
} else {
|
|
|
|
vars["issueEditBefore"] = cursor
|
|
|
|
}
|
|
|
|
query := issueEditQuery{}
|
2022-06-05 16:01:08 +03:00
|
|
|
if err := mm.gh.queryImport(ctx, &query, vars, mm.importEvents); err != nil {
|
2021-03-22 21:26:59 +03:00
|
|
|
mm.err = err
|
|
|
|
return nil, false
|
|
|
|
}
|
|
|
|
connection := &query.Node.Issue.UserContentEdits
|
|
|
|
if len(connection.Nodes) <= 0 {
|
|
|
|
return nil, false
|
|
|
|
}
|
|
|
|
return connection, true
|
|
|
|
}
|
|
|
|
|
|
|
|
func (mm *importMediator) fillTimelineEvents(ctx context.Context, issueNode *issueNode) {
|
2021-02-27 02:42:37 +03:00
|
|
|
items := &issueNode.TimelineItems
|
|
|
|
hasItems := true
|
|
|
|
for hasItems {
|
|
|
|
for _, item := range items.Nodes {
|
2021-03-22 21:26:59 +03:00
|
|
|
select {
|
|
|
|
case <-ctx.Done():
|
|
|
|
return
|
|
|
|
case mm.importEvents <- TimelineEvent{issueId: issueNode.issue.Id, timelineItem: item}:
|
|
|
|
}
|
2021-03-05 22:06:21 +03:00
|
|
|
if item.Typename == "IssueComment" {
|
2021-03-08 09:53:09 +03:00
|
|
|
// Issue comments are different than other timeline items in that
|
|
|
|
// they may have associated user content edits.
|
2021-03-22 21:26:59 +03:00
|
|
|
// Right after the comment we send the comment edits.
|
|
|
|
mm.fillCommentEdits(ctx, &item)
|
2021-03-05 22:06:21 +03:00
|
|
|
}
|
2021-02-27 02:42:37 +03:00
|
|
|
}
|
|
|
|
if !items.PageInfo.HasNextPage {
|
|
|
|
break
|
|
|
|
}
|
2021-03-22 21:26:59 +03:00
|
|
|
items, hasItems = mm.queryTimeline(ctx, issueNode.issue.Id, items.PageInfo.EndCursor)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (mm *importMediator) queryTimeline(ctx context.Context, nid githubv4.ID, cursor githubv4.String) (*timelineItemsConnection, bool) {
|
|
|
|
vars := newTimelineVars()
|
|
|
|
vars["gqlNodeId"] = nid
|
|
|
|
if cursor == "" {
|
|
|
|
vars["timelineAfter"] = (*githubv4.String)(nil)
|
|
|
|
} else {
|
|
|
|
vars["timelineAfter"] = cursor
|
2021-02-27 02:42:37 +03:00
|
|
|
}
|
2021-03-22 21:26:59 +03:00
|
|
|
query := timelineQuery{}
|
2022-06-05 16:01:08 +03:00
|
|
|
if err := mm.gh.queryImport(ctx, &query, vars, mm.importEvents); err != nil {
|
2021-03-22 21:26:59 +03:00
|
|
|
mm.err = err
|
|
|
|
return nil, false
|
|
|
|
}
|
|
|
|
connection := &query.Node.Issue.TimelineItems
|
|
|
|
if len(connection.Nodes) <= 0 {
|
|
|
|
return nil, false
|
|
|
|
}
|
|
|
|
return connection, true
|
2021-02-27 02:42:37 +03:00
|
|
|
}
|
|
|
|
|
2021-03-22 21:26:59 +03:00
|
|
|
func (mm *importMediator) fillCommentEdits(ctx context.Context, item *timelineItem) {
|
2021-03-05 22:06:21 +03:00
|
|
|
// Here we are only concerned with timeline items of type issueComment.
|
2021-02-27 02:42:37 +03:00
|
|
|
if item.Typename != "IssueComment" {
|
|
|
|
return
|
|
|
|
}
|
2021-03-17 21:29:39 +03:00
|
|
|
// First: setup message handling while submitting GraphQL queries.
|
2021-02-27 02:42:37 +03:00
|
|
|
comment := &item.IssueComment
|
|
|
|
edits := &comment.UserContentEdits
|
|
|
|
hasEdits := true
|
|
|
|
for hasEdits {
|
|
|
|
for edit := range reverse(edits.Nodes) {
|
|
|
|
if edit.Diff == nil || string(*edit.Diff) == "" {
|
2021-03-05 22:06:21 +03:00
|
|
|
// issueEdit.Diff == nil happen if the event is older than early
|
|
|
|
// 2018, Github doesn't have the data before that. Best we can do is
|
|
|
|
// to ignore the event.
|
2021-02-27 02:42:37 +03:00
|
|
|
continue
|
|
|
|
}
|
2021-03-05 22:06:21 +03:00
|
|
|
select {
|
|
|
|
case <-ctx.Done():
|
|
|
|
return
|
2021-03-22 21:26:59 +03:00
|
|
|
case mm.importEvents <- CommentEditEvent{commentId: comment.Id, userContentEdit: edit}:
|
2021-03-05 22:06:21 +03:00
|
|
|
}
|
2021-02-27 02:42:37 +03:00
|
|
|
}
|
|
|
|
if !edits.PageInfo.HasPreviousPage {
|
|
|
|
break
|
|
|
|
}
|
2021-03-22 21:26:59 +03:00
|
|
|
edits, hasEdits = mm.queryCommentEdits(ctx, comment.Id, edits.PageInfo.EndCursor)
|
2021-02-27 02:42:37 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-22 21:26:59 +03:00
|
|
|
func (mm *importMediator) queryCommentEdits(ctx context.Context, nid githubv4.ID, cursor githubv4.String) (*userContentEditConnection, bool) {
|
2021-03-05 22:06:21 +03:00
|
|
|
vars := newCommentEditVars()
|
|
|
|
vars["gqlNodeId"] = nid
|
2021-02-27 02:42:37 +03:00
|
|
|
if cursor == "" {
|
|
|
|
vars["commentEditBefore"] = (*githubv4.String)(nil)
|
|
|
|
} else {
|
|
|
|
vars["commentEditBefore"] = cursor
|
|
|
|
}
|
|
|
|
query := commentEditQuery{}
|
2022-06-05 16:01:08 +03:00
|
|
|
if err := mm.gh.queryImport(ctx, &query, vars, mm.importEvents); err != nil {
|
2021-03-22 21:26:59 +03:00
|
|
|
mm.err = err
|
2021-02-27 02:42:37 +03:00
|
|
|
return nil, false
|
|
|
|
}
|
|
|
|
connection := &query.Node.IssueComment.UserContentEdits
|
|
|
|
if len(connection.Nodes) <= 0 {
|
|
|
|
return nil, false
|
|
|
|
}
|
|
|
|
return connection, true
|
|
|
|
}
|
|
|
|
|
2021-03-22 21:26:59 +03:00
|
|
|
func (mm *importMediator) queryIssue(ctx context.Context, cursor githubv4.String) (*issueConnection, bool) {
|
2021-03-05 22:06:21 +03:00
|
|
|
vars := newIssueVars(mm.owner, mm.project, mm.since)
|
2021-02-27 02:42:37 +03:00
|
|
|
if cursor == "" {
|
|
|
|
vars["issueAfter"] = (*githubv4.String)(nil)
|
|
|
|
} else {
|
2021-04-09 14:07:45 +03:00
|
|
|
vars["issueAfter"] = cursor
|
2021-02-27 02:42:37 +03:00
|
|
|
}
|
|
|
|
query := issueQuery{}
|
2022-06-05 16:01:08 +03:00
|
|
|
if err := mm.gh.queryImport(ctx, &query, vars, mm.importEvents); err != nil {
|
2021-03-22 21:26:59 +03:00
|
|
|
mm.err = err
|
2021-02-27 02:42:37 +03:00
|
|
|
return nil, false
|
|
|
|
}
|
|
|
|
connection := &query.Repository.Issues
|
|
|
|
if len(connection.Nodes) <= 0 {
|
|
|
|
return nil, false
|
|
|
|
}
|
|
|
|
return connection, true
|
|
|
|
}
|
|
|
|
|
|
|
|
func reverse(eds []userContentEdit) chan userContentEdit {
|
|
|
|
ret := make(chan userContentEdit)
|
|
|
|
go func() {
|
|
|
|
for i := range eds {
|
|
|
|
ret <- eds[len(eds)-1-i]
|
|
|
|
}
|
|
|
|
close(ret)
|
|
|
|
}()
|
|
|
|
return ret
|
|
|
|
}
|
2022-06-05 16:01:08 +03:00
|
|
|
|
|
|
|
// varmap is a container for Github API's pagination variables
|
|
|
|
type varmap map[string]interface{}
|
|
|
|
|
|
|
|
func newIssueVars(owner, project string, since time.Time) varmap {
|
|
|
|
return varmap{
|
|
|
|
"owner": githubv4.String(owner),
|
|
|
|
"name": githubv4.String(project),
|
|
|
|
"issueSince": githubv4.DateTime{Time: since},
|
|
|
|
"issueFirst": githubv4.Int(NumIssues),
|
|
|
|
"issueEditLast": githubv4.Int(NumIssueEdits),
|
|
|
|
"issueEditBefore": (*githubv4.String)(nil),
|
|
|
|
"timelineFirst": githubv4.Int(NumTimelineItems),
|
|
|
|
"timelineAfter": (*githubv4.String)(nil),
|
|
|
|
"commentEditLast": githubv4.Int(NumCommentEdits),
|
|
|
|
"commentEditBefore": (*githubv4.String)(nil),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func newIssueEditVars() varmap {
|
|
|
|
return varmap{
|
|
|
|
"issueEditLast": githubv4.Int(NumIssueEdits),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func newTimelineVars() varmap {
|
|
|
|
return varmap{
|
|
|
|
"timelineFirst": githubv4.Int(NumTimelineItems),
|
|
|
|
"commentEditLast": githubv4.Int(NumCommentEdits),
|
|
|
|
"commentEditBefore": (*githubv4.String)(nil),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func newCommentEditVars() varmap {
|
|
|
|
return varmap{
|
|
|
|
"commentEditLast": githubv4.Int(NumCommentEdits),
|
|
|
|
}
|
|
|
|
}
|