From 9a5331398d4d618ab59ec10453b441e8d55f5550 Mon Sep 17 00:00:00 2001 From: Julien Bisconti Date: Sat, 28 Feb 2026 01:44:56 +0100 Subject: [PATCH] Add ci subcommands and simplify scheduled workflows --- .github/workflows/broken_links.yml | 27 +- .github/workflows/health_report.yml | 24 +- cmd/awesome-docker/main.go | 528 +++++++++++++++++++++------- 3 files changed, 420 insertions(+), 159 deletions(-) diff --git a/.github/workflows/broken_links.yml b/.github/workflows/broken_links.yml index cab7e2f..d300688 100644 --- a/.github/workflows/broken_links.yml +++ b/.github/workflows/broken_links.yml @@ -5,9 +5,14 @@ on: - cron: "0 2 * * 6" workflow_dispatch: +concurrency: + group: broken-links-${{ github.ref }} + cancel-in-progress: false + jobs: check-links: runs-on: ubuntu-latest + timeout-minutes: 30 permissions: contents: read issues: write @@ -24,22 +29,7 @@ jobs: - name: Run Link Check id: link_check - run: | - set +e - ./awesome-docker check > link_check_output.txt 2>&1 - exit_code=$? - set -e - - has_errors=false - if [ "$exit_code" -ne 0 ]; then - has_errors=true - fi - if grep -qi "broken links" link_check_output.txt; then - has_errors=true - fi - - echo "has_errors=$has_errors" >> "$GITHUB_OUTPUT" - echo "check_exit_code=$exit_code" >> "$GITHUB_OUTPUT" + run: ./awesome-docker ci broken-links --issue-file broken_links_issue.md --github-output "$GITHUB_OUTPUT" env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} @@ -49,10 +39,7 @@ jobs: with: script: | const fs = require('fs'); - const output = fs.readFileSync('link_check_output.txt', 'utf8'); - const exitCode = '${{ steps.link_check.outputs.check_exit_code }}'; - - const issueBody = `# Broken Links Detected\n\nThe weekly link check found broken links or the checker failed to execute cleanly.\n\nChecker exit code: ${exitCode}\n\n\`\`\`\n${output}\n\`\`\`\n\n## Action Required\n\n- Update the URL if the resource moved\n- Remove the entry if permanently unavailable\n- Add to \`config/exclude.yaml\` if a known false positive\n- Investigate checker failures when exit code is non-zero\n\n---\n*Auto-generated by broken_links.yml*`; + const issueBody = fs.readFileSync('broken_links_issue.md', 'utf8'); const issues = await github.rest.issues.listForRepo({ owner: context.repo.owner, diff --git a/.github/workflows/health_report.yml b/.github/workflows/health_report.yml index 14e091a..0945182 100644 --- a/.github/workflows/health_report.yml +++ b/.github/workflows/health_report.yml @@ -5,9 +5,14 @@ on: - cron: "0 9 * * 1" workflow_dispatch: +concurrency: + group: health-report-${{ github.ref }} + cancel-in-progress: false + jobs: health-check: runs-on: ubuntu-latest + timeout-minutes: 30 permissions: contents: read issues: write @@ -22,22 +27,12 @@ jobs: - name: Build run: go build -o awesome-docker ./cmd/awesome-docker - - name: Run Health Scoring - run: ./awesome-docker health - continue-on-error: true + - name: Run Health + Report + id: report + run: ./awesome-docker ci health-report --issue-file health_report.txt --github-output "$GITHUB_OUTPUT" env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Generate Report - id: report - run: | - ./awesome-docker report > health_report.txt 2>&1 || true - if [ -s health_report.txt ]; then - echo "has_report=true" >> "$GITHUB_OUTPUT" - else - echo "has_report=false" >> "$GITHUB_OUTPUT" - fi - - name: Create/Update Issue with Health Report if: steps.report.outputs.has_report == 'true' uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # ratchet:actions/github-script@v8 @@ -45,8 +40,7 @@ jobs: script: | const fs = require('fs'); const report = fs.readFileSync('health_report.txt', 'utf8'); - - const issueBody = report + '\n\n---\n*Auto-generated weekly by health_report.yml*'; + const issueBody = report; const issues = await github.rest.issues.listForRepo({ owner: context.repo.owner, diff --git a/cmd/awesome-docker/main.go b/cmd/awesome-docker/main.go index 9c336fe..5ad2518 100644 --- a/cmd/awesome-docker/main.go +++ b/cmd/awesome-docker/main.go @@ -4,6 +4,8 @@ import ( "context" "fmt" "os" + "strconv" + "strings" "github.com/spf13/cobra" "github.com/veggiemonk/awesome-docker/internal/builder" @@ -23,6 +25,15 @@ const ( version = "0.1.0" ) +type checkSummary struct { + ExternalTotal int + GitHubTotal int + Broken []checker.LinkResult + Redirected []checker.LinkResult + GitHubErrors []error + GitHubSkipped bool +} + func main() { root := &cobra.Command{ Use: "awesome-docker", @@ -37,6 +48,7 @@ func main() { buildCmd(), reportCmd(), validateCmd(), + ciCmd(), ) if err := root.Execute(); err != nil { @@ -70,6 +82,205 @@ func collectURLs(sections []parser.Section, urls *[]string) { } } +func runLinkChecks(prMode bool) (checkSummary, error) { + doc, err := parseReadme() + if err != nil { + return checkSummary{}, fmt.Errorf("parse: %w", err) + } + + var urls []string + collectURLs(doc.Sections, &urls) + + exclude, err := cache.LoadExcludeList(excludePath) + if err != nil { + return checkSummary{}, fmt.Errorf("load exclude list: %w", err) + } + + ghURLs, extURLs := checker.PartitionLinks(urls) + + summary := checkSummary{ + ExternalTotal: len(extURLs), + GitHubTotal: len(ghURLs), + } + + results := checker.CheckLinks(extURLs, 10, exclude) + for _, r := range results { + if !r.OK { + summary.Broken = append(summary.Broken, r) + } + if r.Redirected { + summary.Redirected = append(summary.Redirected, r) + } + } + + if prMode { + summary.GitHubSkipped = true + return summary, nil + } + + token := os.Getenv("GITHUB_TOKEN") + if token == "" { + summary.GitHubSkipped = true + return summary, nil + } + + gc := checker.NewGitHubChecker(token) + _, errs := gc.CheckRepos(context.Background(), ghURLs, 50) + summary.GitHubErrors = errs + return summary, nil +} + +func runHealth(ctx context.Context) error { + token := os.Getenv("GITHUB_TOKEN") + if token == "" { + return fmt.Errorf("GITHUB_TOKEN environment variable is required") + } + + doc, err := parseReadme() + if err != nil { + return fmt.Errorf("parse: %w", err) + } + + var urls []string + collectURLs(doc.Sections, &urls) + ghURLs, _ := checker.PartitionLinks(urls) + + fmt.Printf("Scoring %d GitHub repositories...\n", len(ghURLs)) + gc := checker.NewGitHubChecker(token) + infos, errs := gc.CheckRepos(ctx, ghURLs, 50) + for _, e := range errs { + fmt.Printf(" error: %v\n", e) + } + if len(infos) == 0 { + if len(errs) > 0 { + return fmt.Errorf("failed to fetch GitHub metadata for all repositories (%d errors); check network/DNS and GITHUB_TOKEN", len(errs)) + } + return fmt.Errorf("no GitHub repositories found in README") + } + + scored := scorer.ScoreAll(infos) + cacheEntries := scorer.ToCacheEntries(scored) + + hc, err := cache.LoadHealthCache(healthCachePath) + if err != nil { + return fmt.Errorf("load cache: %w", err) + } + hc.Merge(cacheEntries) + if err := cache.SaveHealthCache(healthCachePath, hc); err != nil { + return fmt.Errorf("save cache: %w", err) + } + + fmt.Printf("Cache updated: %d entries in %s\n", len(hc.Entries), healthCachePath) + return nil +} + +func scoredFromCache() ([]scorer.ScoredEntry, error) { + hc, err := cache.LoadHealthCache(healthCachePath) + if err != nil { + return nil, fmt.Errorf("load cache: %w", err) + } + if len(hc.Entries) == 0 { + return nil, fmt.Errorf("no cache data, run 'health' first") + } + + scored := make([]scorer.ScoredEntry, 0, len(hc.Entries)) + for _, e := range hc.Entries { + scored = append(scored, scorer.ScoredEntry{ + URL: e.URL, + Name: e.Name, + Status: scorer.Status(e.Status), + Stars: e.Stars, + HasLicense: e.HasLicense, + LastPush: e.LastPush, + }) + } + return scored, nil +} + +func markdownReportFromCache() (string, error) { + scored, err := scoredFromCache() + if err != nil { + return "", err + } + return scorer.GenerateReport(scored), nil +} + +func writeGitHubOutput(path, key, value string) error { + if path == "" { + return nil + } + f, err := os.OpenFile(path, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0o644) + if err != nil { + return fmt.Errorf("open github output file: %w", err) + } + defer f.Close() + if _, err := fmt.Fprintf(f, "%s=%s\n", key, value); err != nil { + return fmt.Errorf("write github output: %w", err) + } + return nil +} + +func sanitizeOutputValue(v string) string { + v = strings.ReplaceAll(v, "\n", " ") + v = strings.ReplaceAll(v, "\r", " ") + return strings.TrimSpace(v) +} + +func buildBrokenLinksIssueBody(summary checkSummary, runErr error) string { + var b strings.Builder + b.WriteString("# Broken Links Detected\n\n") + + if runErr != nil { + b.WriteString("The link checker failed to execute cleanly.\n\n") + b.WriteString("## Failure\n\n") + fmt.Fprintf(&b, "- %s\n\n", runErr) + } else { + fmt.Fprintf(&b, "- Broken links: %d\n", len(summary.Broken)) + fmt.Fprintf(&b, "- Redirected links: %d\n", len(summary.Redirected)) + fmt.Fprintf(&b, "- GitHub API errors: %d\n\n", len(summary.GitHubErrors)) + + if len(summary.Broken) > 0 { + b.WriteString("## Broken Links\n\n") + for _, r := range summary.Broken { + fmt.Fprintf(&b, "- `%s` -> `%d %s`\n", r.URL, r.StatusCode, strings.TrimSpace(r.Error)) + } + b.WriteString("\n") + } + + if len(summary.GitHubErrors) > 0 { + b.WriteString("## GitHub API Errors\n\n") + for _, e := range summary.GitHubErrors { + fmt.Fprintf(&b, "- `%s`\n", e) + } + b.WriteString("\n") + } + } + + b.WriteString("## Action Required\n\n") + b.WriteString("- Update the URL if the resource moved\n") + b.WriteString("- Remove the entry if permanently unavailable\n") + b.WriteString("- Add to `config/exclude.yaml` if a known false positive\n") + b.WriteString("- Investigate GitHub API/auth failures when present\n\n") + b.WriteString("---\n") + b.WriteString("*Auto-generated by awesome-docker ci broken-links*\n") + return b.String() +} + +func buildHealthReportIssueBody(report string, healthErr error) string { + var b strings.Builder + if healthErr != nil { + b.WriteString("WARNING: health refresh failed in this run; showing latest cached report.\n\n") + fmt.Fprintf(&b, "Error: `%s`\n\n", healthErr) + } + b.WriteString(report) + if !strings.HasSuffix(report, "\n") { + b.WriteString("\n") + } + b.WriteString("\n---\n") + b.WriteString("*Auto-generated weekly by awesome-docker ci health-report*\n") + return b.String() +} + func lintCmd() *cobra.Command { var fix bool cmd := &cobra.Command{ @@ -113,71 +324,45 @@ func checkCmd() *cobra.Command { Use: "check", Short: "Check links for reachability", RunE: func(cmd *cobra.Command, args []string) error { - doc, err := parseReadme() + summary, err := runLinkChecks(prMode) if err != nil { - return fmt.Errorf("parse: %w", err) + return err } - var urls []string - collectURLs(doc.Sections, &urls) - - exclude, err := cache.LoadExcludeList(excludePath) - if err != nil { - return fmt.Errorf("load exclude list: %w", err) - } - - ghURLs, extURLs := checker.PartitionLinks(urls) - - fmt.Printf("Checking %d external links...\n", len(extURLs)) - results := checker.CheckLinks(extURLs, 10, exclude) - var broken []checker.LinkResult - var redirected []checker.LinkResult - for _, r := range results { - if !r.OK { - broken = append(broken, r) - } - if r.Redirected { - redirected = append(redirected, r) - } - } - - var ghErrs []error + fmt.Printf("Checking %d external links...\n", summary.ExternalTotal) if !prMode { - token := os.Getenv("GITHUB_TOKEN") - if token != "" { - fmt.Printf("Checking %d GitHub repositories...\n", len(ghURLs)) - gc := checker.NewGitHubChecker(token) - _, errs := gc.CheckRepos(context.Background(), ghURLs, 50) - ghErrs = errs - for _, e := range ghErrs { - fmt.Printf(" GitHub error: %v\n", e) - } - } else { + if summary.GitHubSkipped { fmt.Println("GITHUB_TOKEN not set, skipping GitHub repo checks") + } else { + fmt.Printf("Checking %d GitHub repositories...\n", summary.GitHubTotal) } } - if len(redirected) > 0 { - fmt.Printf("\n%d redirected links (consider updating):\n", len(redirected)) - for _, r := range redirected { + for _, e := range summary.GitHubErrors { + fmt.Printf(" GitHub error: %v\n", e) + } + + if len(summary.Redirected) > 0 { + fmt.Printf("\n%d redirected links (consider updating):\n", len(summary.Redirected)) + for _, r := range summary.Redirected { fmt.Printf(" %s -> %s\n", r.URL, r.RedirectURL) } } - if len(broken) > 0 { - fmt.Printf("\n%d broken links:\n", len(broken)) - for _, r := range broken { + if len(summary.Broken) > 0 { + fmt.Printf("\n%d broken links:\n", len(summary.Broken)) + for _, r := range summary.Broken { fmt.Printf(" %s -> %d %s\n", r.URL, r.StatusCode, r.Error) } } - if len(broken) > 0 && len(ghErrs) > 0 { - return fmt.Errorf("found %d broken links and %d GitHub API errors", len(broken), len(ghErrs)) + if len(summary.Broken) > 0 && len(summary.GitHubErrors) > 0 { + return fmt.Errorf("found %d broken links and %d GitHub API errors", len(summary.Broken), len(summary.GitHubErrors)) } - if len(broken) > 0 { - return fmt.Errorf("found %d broken links", len(broken)) + if len(summary.Broken) > 0 { + return fmt.Errorf("found %d broken links", len(summary.Broken)) } - if len(ghErrs) > 0 { - return fmt.Errorf("github checks failed with %d errors", len(ghErrs)) + if len(summary.GitHubErrors) > 0 { + return fmt.Errorf("github checks failed with %d errors", len(summary.GitHubErrors)) } fmt.Println("All links OK") @@ -193,47 +378,7 @@ func healthCmd() *cobra.Command { Use: "health", Short: "Score repository health and update cache", RunE: func(cmd *cobra.Command, args []string) error { - token := os.Getenv("GITHUB_TOKEN") - if token == "" { - return fmt.Errorf("GITHUB_TOKEN environment variable is required") - } - - doc, err := parseReadme() - if err != nil { - return fmt.Errorf("parse: %w", err) - } - - var urls []string - collectURLs(doc.Sections, &urls) - ghURLs, _ := checker.PartitionLinks(urls) - - fmt.Printf("Scoring %d GitHub repositories...\n", len(ghURLs)) - gc := checker.NewGitHubChecker(token) - infos, errs := gc.CheckRepos(context.Background(), ghURLs, 50) - for _, e := range errs { - fmt.Printf(" error: %v\n", e) - } - if len(infos) == 0 { - if len(errs) > 0 { - return fmt.Errorf("failed to fetch GitHub metadata for all repositories (%d errors); check network/DNS and GITHUB_TOKEN", len(errs)) - } - return fmt.Errorf("no GitHub repositories found in README") - } - - scored := scorer.ScoreAll(infos) - cacheEntries := scorer.ToCacheEntries(scored) - - hc, err := cache.LoadHealthCache(healthCachePath) - if err != nil { - return fmt.Errorf("load cache: %w", err) - } - hc.Merge(cacheEntries) - if err := cache.SaveHealthCache(healthCachePath, hc); err != nil { - return fmt.Errorf("save cache: %w", err) - } - - fmt.Printf("Cache updated: %d entries in %s\n", len(hc.Entries), healthCachePath) - return nil + return runHealth(context.Background()) }, } } @@ -258,24 +403,9 @@ func reportCmd() *cobra.Command { Use: "report", Short: "Generate health report from cache", RunE: func(cmd *cobra.Command, args []string) error { - hc, err := cache.LoadHealthCache(healthCachePath) + scored, err := scoredFromCache() if err != nil { - return fmt.Errorf("load cache: %w", err) - } - if len(hc.Entries) == 0 { - return fmt.Errorf("no cache data, run 'health' first") - } - - var scored []scorer.ScoredEntry - for _, e := range hc.Entries { - scored = append(scored, scorer.ScoredEntry{ - URL: e.URL, - Name: e.Name, - Status: scorer.Status(e.Status), - Stars: e.Stars, - HasLicense: e.HasLicense, - LastPush: e.LastPush, - }) + return err } if jsonOutput { @@ -319,28 +449,17 @@ func validateCmd() *cobra.Command { fmt.Printf("Lint OK: %d warnings\n", result.Warnings) fmt.Println("\n=== Checking links (PR mode) ===") - var urls []string - collectURLs(doc.Sections, &urls) - exclude, err := cache.LoadExcludeList(excludePath) + summary, err := runLinkChecks(true) if err != nil { - return fmt.Errorf("load exclude list: %w", err) + return err } - _, extURLs := checker.PartitionLinks(urls) - - fmt.Printf("Checking %d external links...\n", len(extURLs)) - results := checker.CheckLinks(extURLs, 10, exclude) - var broken []checker.LinkResult - for _, r := range results { - if !r.OK { - broken = append(broken, r) - } - } - if len(broken) > 0 { - fmt.Printf("\n%d broken links:\n", len(broken)) - for _, r := range broken { + fmt.Printf("Checking %d external links...\n", summary.ExternalTotal) + if len(summary.Broken) > 0 { + fmt.Printf("\n%d broken links:\n", len(summary.Broken)) + for _, r := range summary.Broken { fmt.Printf(" %s -> %d %s\n", r.URL, r.StatusCode, r.Error) } - return fmt.Errorf("found %d broken links", len(broken)) + return fmt.Errorf("found %d broken links", len(summary.Broken)) } fmt.Println("\nValidation passed") @@ -348,3 +467,164 @@ func validateCmd() *cobra.Command { }, } } + +func ciCmd() *cobra.Command { + cmd := &cobra.Command{ + Use: "ci", + Short: "CI-oriented helper commands", + } + cmd.AddCommand( + ciBrokenLinksCmd(), + ciHealthReportCmd(), + ) + return cmd +} + +func ciBrokenLinksCmd() *cobra.Command { + var issueFile string + var githubOutput string + var strict bool + + cmd := &cobra.Command{ + Use: "broken-links", + Short: "Run link checks and emit CI outputs/artifacts", + RunE: func(cmd *cobra.Command, args []string) error { + summary, runErr := runLinkChecks(false) + + hasErrors := runErr != nil || len(summary.Broken) > 0 || len(summary.GitHubErrors) > 0 + exitCode := 0 + if hasErrors { + exitCode = 1 + } + if runErr != nil { + exitCode = 2 + } + + if issueFile != "" && hasErrors { + body := buildBrokenLinksIssueBody(summary, runErr) + if err := os.WriteFile(issueFile, []byte(body), 0o644); err != nil { + return fmt.Errorf("write issue file: %w", err) + } + } + + if err := writeGitHubOutput(githubOutput, "has_errors", strconv.FormatBool(hasErrors)); err != nil { + return err + } + if err := writeGitHubOutput(githubOutput, "check_exit_code", strconv.Itoa(exitCode)); err != nil { + return err + } + if err := writeGitHubOutput(githubOutput, "broken_count", strconv.Itoa(len(summary.Broken))); err != nil { + return err + } + if err := writeGitHubOutput(githubOutput, "github_error_count", strconv.Itoa(len(summary.GitHubErrors))); err != nil { + return err + } + if runErr != nil { + if err := writeGitHubOutput(githubOutput, "run_error", sanitizeOutputValue(runErr.Error())); err != nil { + return err + } + } + + if runErr != nil { + fmt.Printf("CI broken-links run error: %v\n", runErr) + } + if hasErrors { + fmt.Printf("CI broken-links found %d broken links and %d GitHub errors\n", len(summary.Broken), len(summary.GitHubErrors)) + } else { + fmt.Println("CI broken-links found no errors") + } + + if strict { + if runErr != nil { + return runErr + } + if hasErrors { + return fmt.Errorf("found %d broken links and %d GitHub API errors", len(summary.Broken), len(summary.GitHubErrors)) + } + } + return nil + }, + } + + cmd.Flags().StringVar(&issueFile, "issue-file", "broken_links_issue.md", "Path to write issue markdown body") + cmd.Flags().StringVar(&githubOutput, "github-output", "", "Path to GitHub output file (typically $GITHUB_OUTPUT)") + cmd.Flags().BoolVar(&strict, "strict", false, "Return non-zero when errors are found") + return cmd +} + +func ciHealthReportCmd() *cobra.Command { + var issueFile string + var githubOutput string + var strict bool + + cmd := &cobra.Command{ + Use: "health-report", + Short: "Refresh health cache, render report, and emit CI outputs/artifacts", + RunE: func(cmd *cobra.Command, args []string) error { + healthErr := runHealth(context.Background()) + report, reportErr := markdownReportFromCache() + + healthOK := healthErr == nil + reportOK := reportErr == nil + hasReport := reportOK && strings.TrimSpace(report) != "" + hasErrors := !healthOK || !reportOK + + if hasReport && issueFile != "" { + body := buildHealthReportIssueBody(report, healthErr) + if err := os.WriteFile(issueFile, []byte(body), 0o644); err != nil { + return fmt.Errorf("write issue file: %w", err) + } + } + + if err := writeGitHubOutput(githubOutput, "has_report", strconv.FormatBool(hasReport)); err != nil { + return err + } + if err := writeGitHubOutput(githubOutput, "health_ok", strconv.FormatBool(healthOK)); err != nil { + return err + } + if err := writeGitHubOutput(githubOutput, "report_ok", strconv.FormatBool(reportOK)); err != nil { + return err + } + if err := writeGitHubOutput(githubOutput, "has_errors", strconv.FormatBool(hasErrors)); err != nil { + return err + } + if healthErr != nil { + if err := writeGitHubOutput(githubOutput, "health_error", sanitizeOutputValue(healthErr.Error())); err != nil { + return err + } + } + if reportErr != nil { + if err := writeGitHubOutput(githubOutput, "report_error", sanitizeOutputValue(reportErr.Error())); err != nil { + return err + } + } + + if healthErr != nil { + fmt.Printf("CI health-report health error: %v\n", healthErr) + } + if reportErr != nil { + fmt.Printf("CI health-report report error: %v\n", reportErr) + } + if hasReport { + fmt.Println("CI health-report generated report artifact") + } else { + fmt.Println("CI health-report has no report artifact") + } + + if strict { + if healthErr != nil { + return healthErr + } + if reportErr != nil { + return reportErr + } + } + return nil + }, + } + + cmd.Flags().StringVar(&issueFile, "issue-file", "health_report.txt", "Path to write health issue markdown body") + cmd.Flags().StringVar(&githubOutput, "github-output", "", "Path to GitHub output file (typically $GITHUB_OUTPUT)") + cmd.Flags().BoolVar(&strict, "strict", false, "Return non-zero when health/report fails") + return cmd +}