feat: add HTTP link checker and GitHub GraphQL repo checker
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
3
go.mod
3
go.mod
@@ -6,6 +6,9 @@ require github.com/spf13/cobra v1.10.2
|
||||
|
||||
require (
|
||||
github.com/inconshreveable/mousetrap v1.1.0 // indirect
|
||||
github.com/shurcooL/githubv4 v0.0.0-20260209031235-2402fdf4a9ed // indirect
|
||||
github.com/shurcooL/graphql v0.0.0-20240915155400-7ee5256398cf // indirect
|
||||
github.com/spf13/pflag v1.0.9 // indirect
|
||||
golang.org/x/oauth2 v0.35.0 // indirect
|
||||
gopkg.in/yaml.v3 v3.0.1 // indirect
|
||||
)
|
||||
|
||||
6
go.sum
6
go.sum
@@ -2,11 +2,17 @@ github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6N
|
||||
github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
|
||||
github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
|
||||
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
|
||||
github.com/shurcooL/githubv4 v0.0.0-20260209031235-2402fdf4a9ed h1:KT7hI8vYXgU0s2qaMkrfq9tCA1w/iEPgfredVP+4Tzw=
|
||||
github.com/shurcooL/githubv4 v0.0.0-20260209031235-2402fdf4a9ed/go.mod h1:zqMwyHmnN/eDOZOdiTohqIUKUrTFX62PNlu7IJdu0q8=
|
||||
github.com/shurcooL/graphql v0.0.0-20240915155400-7ee5256398cf h1:o1uxfymjZ7jZ4MsgCErcwWGtVKSiNAXtS59Lhs6uI/g=
|
||||
github.com/shurcooL/graphql v0.0.0-20240915155400-7ee5256398cf/go.mod h1:9dIRpgIY7hVhoqfe0/FcYp0bpInZaT7dc3BYOprrIUE=
|
||||
github.com/spf13/cobra v1.10.2 h1:DMTTonx5m65Ic0GOoRY2c16WCbHxOOw6xxezuLaBpcU=
|
||||
github.com/spf13/cobra v1.10.2/go.mod h1:7C1pvHqHw5A4vrJfjNwvOdzYu0Gml16OCs2GRiTUUS4=
|
||||
github.com/spf13/pflag v1.0.9 h1:9exaQaMOCwffKiiiYk6/BndUBv+iRViNW+4lEMi0PvY=
|
||||
github.com/spf13/pflag v1.0.9/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
|
||||
go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg=
|
||||
golang.org/x/oauth2 v0.35.0 h1:Mv2mzuHuZuY2+bkyWXIHMfhNdJAdwW3FuWeCPYN5GVQ=
|
||||
golang.org/x/oauth2 v0.35.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
|
||||
138
internal/checker/github.go
Normal file
138
internal/checker/github.go
Normal file
@@ -0,0 +1,138 @@
|
||||
package checker
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/shurcooL/githubv4"
|
||||
"golang.org/x/oauth2"
|
||||
)
|
||||
|
||||
// RepoInfo holds metadata about a GitHub repository.
|
||||
type RepoInfo struct {
|
||||
Owner string
|
||||
Name string
|
||||
URL string
|
||||
IsArchived bool
|
||||
IsDisabled bool
|
||||
IsPrivate bool
|
||||
PushedAt time.Time
|
||||
Stars int
|
||||
Forks int
|
||||
HasLicense bool
|
||||
}
|
||||
|
||||
// ExtractGitHubRepo extracts owner/name from a GitHub URL.
|
||||
// Returns false for non-repo URLs (issues, wiki, apps, etc.).
|
||||
func ExtractGitHubRepo(url string) (owner, name string, ok bool) {
|
||||
if !strings.HasPrefix(url, "https://github.com/") {
|
||||
return "", "", false
|
||||
}
|
||||
path := strings.TrimPrefix(url, "https://github.com/")
|
||||
path = strings.TrimRight(path, "/")
|
||||
parts := strings.Split(path, "/")
|
||||
if len(parts) != 2 || parts[0] == "" || parts[1] == "" {
|
||||
return "", "", false
|
||||
}
|
||||
// Skip non-repo paths
|
||||
if parts[0] == "apps" || parts[0] == "features" || parts[0] == "topics" {
|
||||
return "", "", false
|
||||
}
|
||||
return parts[0], parts[1], true
|
||||
}
|
||||
|
||||
// PartitionLinks separates URLs into GitHub repos and external links.
|
||||
func PartitionLinks(urls []string) (github, external []string) {
|
||||
for _, url := range urls {
|
||||
if _, _, ok := ExtractGitHubRepo(url); ok {
|
||||
github = append(github, url)
|
||||
} else {
|
||||
external = append(external, url)
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// GitHubChecker uses the GitHub GraphQL API.
|
||||
type GitHubChecker struct {
|
||||
client *githubv4.Client
|
||||
}
|
||||
|
||||
// NewGitHubChecker creates a checker with the given OAuth token.
|
||||
func NewGitHubChecker(token string) *GitHubChecker {
|
||||
src := oauth2.StaticTokenSource(&oauth2.Token{AccessToken: token})
|
||||
httpClient := oauth2.NewClient(context.Background(), src)
|
||||
return &GitHubChecker{client: githubv4.NewClient(httpClient)}
|
||||
}
|
||||
|
||||
// CheckRepo queries a single GitHub repository.
|
||||
func (gc *GitHubChecker) CheckRepo(ctx context.Context, owner, name string) (RepoInfo, error) {
|
||||
var query struct {
|
||||
Repository struct {
|
||||
IsArchived bool
|
||||
IsDisabled bool
|
||||
IsPrivate bool
|
||||
PushedAt time.Time
|
||||
StargazerCount int
|
||||
ForkCount int
|
||||
LicenseInfo *struct {
|
||||
Name string
|
||||
}
|
||||
} `graphql:"repository(owner: $owner, name: $name)"`
|
||||
}
|
||||
|
||||
vars := map[string]interface{}{
|
||||
"owner": githubv4.String(owner),
|
||||
"name": githubv4.String(name),
|
||||
}
|
||||
|
||||
if err := gc.client.Query(ctx, &query, vars); err != nil {
|
||||
return RepoInfo{}, fmt.Errorf("github query %s/%s: %w", owner, name, err)
|
||||
}
|
||||
|
||||
r := query.Repository
|
||||
return RepoInfo{
|
||||
Owner: owner,
|
||||
Name: name,
|
||||
URL: fmt.Sprintf("https://github.com/%s/%s", owner, name),
|
||||
IsArchived: r.IsArchived,
|
||||
IsDisabled: r.IsDisabled,
|
||||
IsPrivate: r.IsPrivate,
|
||||
PushedAt: r.PushedAt,
|
||||
Stars: r.StargazerCount,
|
||||
Forks: r.ForkCount,
|
||||
HasLicense: r.LicenseInfo != nil,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// CheckRepos queries multiple repos in sequence with rate limiting.
|
||||
func (gc *GitHubChecker) CheckRepos(ctx context.Context, urls []string, batchSize int) ([]RepoInfo, []error) {
|
||||
if batchSize <= 0 {
|
||||
batchSize = 50
|
||||
}
|
||||
|
||||
var results []RepoInfo
|
||||
var errs []error
|
||||
|
||||
for i, url := range urls {
|
||||
owner, name, ok := ExtractGitHubRepo(url)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
|
||||
info, err := gc.CheckRepo(ctx, owner, name)
|
||||
if err != nil {
|
||||
errs = append(errs, err)
|
||||
continue
|
||||
}
|
||||
results = append(results, info)
|
||||
|
||||
if (i+1)%batchSize == 0 {
|
||||
time.Sleep(1 * time.Second)
|
||||
}
|
||||
}
|
||||
|
||||
return results, errs
|
||||
}
|
||||
52
internal/checker/github_test.go
Normal file
52
internal/checker/github_test.go
Normal file
@@ -0,0 +1,52 @@
|
||||
package checker
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestExtractGitHubRepo(t *testing.T) {
|
||||
tests := []struct {
|
||||
url string
|
||||
owner string
|
||||
name string
|
||||
ok bool
|
||||
}{
|
||||
{"https://github.com/docker/compose", "docker", "compose", true},
|
||||
{"https://github.com/moby/moby", "moby", "moby", true},
|
||||
{"https://github.com/user/repo/", "user", "repo", true},
|
||||
{"https://github.com/user/repo/issues", "", "", false},
|
||||
{"https://github.com/user/repo/wiki", "", "", false},
|
||||
{"https://github.com/apps/dependabot", "", "", false},
|
||||
{"https://example.com/not-github", "", "", false},
|
||||
{"https://github.com/user", "", "", false},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
owner, name, ok := ExtractGitHubRepo(tt.url)
|
||||
if ok != tt.ok {
|
||||
t.Errorf("ExtractGitHubRepo(%q): ok = %v, want %v", tt.url, ok, tt.ok)
|
||||
continue
|
||||
}
|
||||
if ok {
|
||||
if owner != tt.owner || name != tt.name {
|
||||
t.Errorf("ExtractGitHubRepo(%q) = (%q, %q), want (%q, %q)", tt.url, owner, name, tt.owner, tt.name)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestPartitionLinks(t *testing.T) {
|
||||
urls := []string{
|
||||
"https://github.com/docker/compose",
|
||||
"https://example.com/tool",
|
||||
"https://github.com/moby/moby",
|
||||
"https://github.com/user/repo/issues",
|
||||
}
|
||||
gh, ext := PartitionLinks(urls)
|
||||
if len(gh) != 2 {
|
||||
t.Errorf("github links = %d, want 2", len(gh))
|
||||
}
|
||||
if len(ext) != 2 {
|
||||
t.Errorf("external links = %d, want 2", len(ext))
|
||||
}
|
||||
}
|
||||
111
internal/checker/http.go
Normal file
111
internal/checker/http.go
Normal file
@@ -0,0 +1,111 @@
|
||||
package checker
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net/http"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/veggiemonk/awesome-docker/internal/cache"
|
||||
)
|
||||
|
||||
const (
|
||||
defaultTimeout = 30 * time.Second
|
||||
defaultConcurrency = 10
|
||||
userAgent = "awesome-docker-checker/1.0"
|
||||
)
|
||||
|
||||
// LinkResult holds the result of checking a single URL.
|
||||
type LinkResult struct {
|
||||
URL string
|
||||
OK bool
|
||||
StatusCode int
|
||||
Redirected bool
|
||||
RedirectURL string
|
||||
Error string
|
||||
}
|
||||
|
||||
// CheckLink checks a single URL. Uses HEAD first, falls back to GET.
|
||||
func CheckLink(url string, client *http.Client) LinkResult {
|
||||
result := LinkResult{URL: url}
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), defaultTimeout)
|
||||
defer cancel()
|
||||
|
||||
// Try HEAD first
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodHead, url, nil)
|
||||
if err != nil {
|
||||
result.Error = err.Error()
|
||||
return result
|
||||
}
|
||||
req.Header.Set("User-Agent", userAgent)
|
||||
|
||||
// Track redirects
|
||||
var finalURL string
|
||||
origCheckRedirect := client.CheckRedirect
|
||||
client.CheckRedirect = func(req *http.Request, via []*http.Request) error {
|
||||
finalURL = req.URL.String()
|
||||
if len(via) >= 10 {
|
||||
return http.ErrUseLastResponse
|
||||
}
|
||||
return nil
|
||||
}
|
||||
defer func() { client.CheckRedirect = origCheckRedirect }()
|
||||
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
// Fallback to GET
|
||||
req, err2 := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
|
||||
if err2 != nil {
|
||||
result.Error = err.Error()
|
||||
return result
|
||||
}
|
||||
req.Header.Set("User-Agent", userAgent)
|
||||
resp, err = client.Do(req)
|
||||
if err != nil {
|
||||
result.Error = err.Error()
|
||||
return result
|
||||
}
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
result.StatusCode = resp.StatusCode
|
||||
result.OK = resp.StatusCode >= 200 && resp.StatusCode < 400
|
||||
|
||||
if finalURL != "" && finalURL != url {
|
||||
result.Redirected = true
|
||||
result.RedirectURL = finalURL
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
// CheckLinks checks multiple URLs concurrently.
|
||||
func CheckLinks(urls []string, concurrency int, exclude *cache.ExcludeList) []LinkResult {
|
||||
if concurrency <= 0 {
|
||||
concurrency = defaultConcurrency
|
||||
}
|
||||
|
||||
results := make([]LinkResult, len(urls))
|
||||
sem := make(chan struct{}, concurrency)
|
||||
var wg sync.WaitGroup
|
||||
|
||||
for i, url := range urls {
|
||||
if exclude != nil && exclude.IsExcluded(url) {
|
||||
results[i] = LinkResult{URL: url, OK: true}
|
||||
continue
|
||||
}
|
||||
|
||||
wg.Add(1)
|
||||
go func(idx int, u string) {
|
||||
defer wg.Done()
|
||||
sem <- struct{}{}
|
||||
defer func() { <-sem }()
|
||||
client := &http.Client{Timeout: defaultTimeout}
|
||||
results[idx] = CheckLink(u, client)
|
||||
}(i, url)
|
||||
}
|
||||
|
||||
wg.Wait()
|
||||
return results
|
||||
}
|
||||
80
internal/checker/http_test.go
Normal file
80
internal/checker/http_test.go
Normal file
@@ -0,0 +1,80 @@
|
||||
package checker
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestCheckLinkOK(t *testing.T) {
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusOK)
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
result := CheckLink(server.URL, &http.Client{})
|
||||
if !result.OK {
|
||||
t.Errorf("expected OK, got status %d, error: %s", result.StatusCode, result.Error)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCheckLink404(t *testing.T) {
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusNotFound)
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
result := CheckLink(server.URL, &http.Client{})
|
||||
if result.OK {
|
||||
t.Error("expected not OK for 404")
|
||||
}
|
||||
if result.StatusCode != 404 {
|
||||
t.Errorf("status = %d, want 404", result.StatusCode)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCheckLinkRedirect(t *testing.T) {
|
||||
final := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusOK)
|
||||
}))
|
||||
defer final.Close()
|
||||
|
||||
redir := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
http.Redirect(w, r, final.URL, http.StatusMovedPermanently)
|
||||
}))
|
||||
defer redir.Close()
|
||||
|
||||
result := CheckLink(redir.URL, &http.Client{})
|
||||
if !result.OK {
|
||||
t.Errorf("expected OK after following redirect, error: %s", result.Error)
|
||||
}
|
||||
if !result.Redirected {
|
||||
t.Error("expected Redirected = true")
|
||||
}
|
||||
}
|
||||
|
||||
func TestCheckLinks(t *testing.T) {
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
if r.URL.Path == "/bad" {
|
||||
w.WriteHeader(http.StatusNotFound)
|
||||
return
|
||||
}
|
||||
w.WriteHeader(http.StatusOK)
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
urls := []string{server.URL + "/good", server.URL + "/bad", server.URL + "/also-good"}
|
||||
results := CheckLinks(urls, 2, nil)
|
||||
if len(results) != 3 {
|
||||
t.Fatalf("results = %d, want 3", len(results))
|
||||
}
|
||||
|
||||
for _, r := range results {
|
||||
if r.URL == server.URL+"/bad" && r.OK {
|
||||
t.Error("expected /bad to not be OK")
|
||||
}
|
||||
if r.URL == server.URL+"/good" && !r.OK {
|
||||
t.Error("expected /good to be OK")
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user