Files
awesome-docker/internal/checker/github.go
2026-02-28 00:46:59 +01:00

175 lines
4.0 KiB
Markdown

package checker
import (
"context"
"fmt"
"net/url"
"strings"
"time"
"github.com/shurcooL/githubv4"
"golang.org/x/oauth2"
)
// RepoInfo holds metadata about a GitHub repository.
type RepoInfo struct {
Owner string
Name string
URL string
IsArchived bool
IsDisabled bool
IsPrivate bool
PushedAt time.Time
Stars int
Forks int
HasLicense bool
}
// ExtractGitHubRepo extracts owner/name from a GitHub URL.
// Returns false for non-repo URLs (issues, wiki, apps, etc.).
func ExtractGitHubRepo(rawURL string) (owner, name string, ok bool) {
u, err := url.Parse(rawURL)
if err != nil {
return "", "", false
}
host := strings.ToLower(u.Hostname())
if host != "github.com" && host != "www.github.com" {
return "", "", false
}
path := strings.Trim(u.Path, "/")
parts := strings.Split(path, "/")
if len(parts) != 2 || parts[0] == "" || parts[1] == "" {
return "", "", false
}
// Skip known non-repository top-level routes.
switch parts[0] {
case "apps", "features", "topics":
return "", "", false
}
name = strings.TrimSuffix(parts[1], ".git")
if name == "" {
return "", "", false
}
return parts[0], name, true
}
func isHTTPURL(raw string) bool {
u, err := url.Parse(raw)
if err != nil {
return false
}
return u.Scheme == "http" || u.Scheme == "https"
}
func isGitHubAuthError(err error) bool {
if err == nil {
return false
}
s := strings.ToLower(err.Error())
return strings.Contains(s, "401 unauthorized") ||
strings.Contains(s, "bad credentials") ||
strings.Contains(s, "resource not accessible by integration")
}
// PartitionLinks separates URLs into GitHub repos and external HTTP(S) links.
func PartitionLinks(urls []string) (github, external []string) {
for _, url := range urls {
if _, _, ok := ExtractGitHubRepo(url); ok {
github = append(github, url)
} else if isHTTPURL(url) {
external = append(external, url)
}
}
return
}
// GitHubChecker uses the GitHub GraphQL API.
type GitHubChecker struct {
client *githubv4.Client
}
// NewGitHubChecker creates a checker with the given OAuth token.
func NewGitHubChecker(token string) *GitHubChecker {
src := oauth2.StaticTokenSource(&oauth2.Token{AccessToken: token})
httpClient := oauth2.NewClient(context.Background(), src)
return &GitHubChecker{client: githubv4.NewClient(httpClient)}
}
// CheckRepo queries a single GitHub repository.
func (gc *GitHubChecker) CheckRepo(ctx context.Context, owner, name string) (RepoInfo, error) {
var query struct {
Repository struct {
IsArchived bool
IsDisabled bool
IsPrivate bool
PushedAt time.Time
StargazerCount int
ForkCount int
LicenseInfo *struct {
Name string
}
} `graphql:"repository(owner: $owner, name: $name)"`
}
vars := map[string]interface{}{
"owner": githubv4.String(owner),
"name": githubv4.String(name),
}
if err := gc.client.Query(ctx, &query, vars); err != nil {
return RepoInfo{}, fmt.Errorf("github query %s/%s: %w", owner, name, err)
}
r := query.Repository
return RepoInfo{
Owner: owner,
Name: name,
URL: fmt.Sprintf("https://github.com/%s/%s", owner, name),
IsArchived: r.IsArchived,
IsDisabled: r.IsDisabled,
IsPrivate: r.IsPrivate,
PushedAt: r.PushedAt,
Stars: r.StargazerCount,
Forks: r.ForkCount,
HasLicense: r.LicenseInfo != nil,
}, nil
}
// CheckRepos queries multiple repos in sequence with rate limiting.
func (gc *GitHubChecker) CheckRepos(ctx context.Context, urls []string, batchSize int) ([]RepoInfo, []error) {
if batchSize <= 0 {
batchSize = 50
}
var results []RepoInfo
var errs []error
for i, url := range urls {
owner, name, ok := ExtractGitHubRepo(url)
if !ok {
continue
}
info, err := gc.CheckRepo(ctx, owner, name)
if err != nil {
errs = append(errs, err)
if isGitHubAuthError(err) {
break
}
continue
}
results = append(results, info)
if (i+1)%batchSize == 0 {
time.Sleep(1 * time.Second)
}
}
return results, errs
}