From 8bb3147e4efbe3d43dfcad4b747b51c33eb99755 Mon Sep 17 00:00:00 2001 From: Julien Bisconti Date: Fri, 27 Feb 2026 23:45:13 +0100 Subject: [PATCH] fix: prevent false link-check failures and harden health/workflow errors Address three review findings from the Go rewrite:\n\n- checker: update PartitionLinks to only classify HTTP(S) URLs as external links.\n This skips markdown-relative targets (for example and anchors) so\n non-URL entries are no longer sent to HTTP validation and do not produce\n deterministic "unsupported protocol scheme" failures.\n\n- health command: stop ignoring LoadHealthCache errors.\n Return a user-facing "load cache" error when the cache file is unreadable\n or invalid instead of allowing a nil cache panic on Merge.\n\n- broken links workflow: remove masked execution behavior from the link check\n step. Capture awesome-docker check exit code, set has_errors=true on any\n non-zero exit, and expose the exit code in the generated issue body so\n checker failures are visible and cannot incorrectly close the tracking issue.\n\nTest coverage updates:\n- extend checker partition test to include markdown-relative/anchor targets\n and verify they are not treated as external URLs.\n- add cache test for invalid YAML load failure. --- .github/workflows/broken_links.yml | 22 ++++++++++++++++------ cmd/awesome-docker/main.go | 5 ++++- internal/cache/cache_test.go | 16 ++++++++++++++++ internal/checker/github.go | 13 +++++++++++-- internal/checker/github_test.go | 2 ++ 5 files changed, 49 insertions(+), 9 deletions(-) diff --git a/.github/workflows/broken_links.yml b/.github/workflows/broken_links.yml index 678b89e..5cc6c77 100644 --- a/.github/workflows/broken_links.yml +++ b/.github/workflows/broken_links.yml @@ -25,12 +25,21 @@ jobs: - name: Run Link Check id: link_check run: | - ./awesome-docker check > link_check_output.txt 2>&1 || true - if grep -q "broken links" link_check_output.txt; then - echo "has_errors=true" >> "$GITHUB_OUTPUT" - else - echo "has_errors=false" >> "$GITHUB_OUTPUT" + set +e + ./awesome-docker check > link_check_output.txt 2>&1 + exit_code=$? + set -e + + has_errors=false + if [ "$exit_code" -ne 0 ]; then + has_errors=true fi + if grep -qi "broken links" link_check_output.txt; then + has_errors=true + fi + + echo "has_errors=$has_errors" >> "$GITHUB_OUTPUT" + echo "check_exit_code=$exit_code" >> "$GITHUB_OUTPUT" env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} @@ -41,8 +50,9 @@ jobs: script: | const fs = require('fs'); const output = fs.readFileSync('link_check_output.txt', 'utf8'); + const exitCode = '${{ steps.link_check.outputs.check_exit_code }}'; - const issueBody = `# Broken Links Detected\n\nThe weekly link check found broken or inaccessible links.\n\n\`\`\`\n${output}\n\`\`\`\n\n## Action Required\n\n- Update the URL if the resource moved\n- Remove the entry if permanently unavailable\n- Add to \`config/exclude.yaml\` if a known false positive\n\n---\n*Auto-generated by broken_links.yml*`; + const issueBody = `# Broken Links Detected\n\nThe weekly link check found broken links or the checker failed to execute cleanly.\n\nChecker exit code: ${exitCode}\n\n\`\`\`\n${output}\n\`\`\`\n\n## Action Required\n\n- Update the URL if the resource moved\n- Remove the entry if permanently unavailable\n- Add to \`config/exclude.yaml\` if a known false positive\n- Investigate checker failures when exit code is non-zero\n\n---\n*Auto-generated by broken_links.yml*`; const issues = await github.rest.issues.listForRepo({ owner: context.repo.owner, diff --git a/cmd/awesome-docker/main.go b/cmd/awesome-docker/main.go index 1e8bd48..2a7b9eb 100644 --- a/cmd/awesome-docker/main.go +++ b/cmd/awesome-docker/main.go @@ -204,7 +204,10 @@ func healthCmd() *cobra.Command { scored := scorer.ScoreAll(infos) cacheEntries := scorer.ToCacheEntries(scored) - hc, _ := cache.LoadHealthCache(healthCachePath) + hc, err := cache.LoadHealthCache(healthCachePath) + if err != nil { + return fmt.Errorf("load cache: %w", err) + } hc.Merge(cacheEntries) if err := cache.SaveHealthCache(healthCachePath, hc); err != nil { return fmt.Errorf("save cache: %w", err) diff --git a/internal/cache/cache_test.go b/internal/cache/cache_test.go index 03ec514..a868743 100644 --- a/internal/cache/cache_test.go +++ b/internal/cache/cache_test.go @@ -78,6 +78,22 @@ func TestLoadHealthCacheMissing(t *testing.T) { } } +func TestLoadHealthCacheInvalidYAML(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "health.yaml") + if err := os.WriteFile(path, []byte("entries:\n - url: [not yaml"), 0644); err != nil { + t.Fatal(err) + } + + hc, err := LoadHealthCache(path) + if err == nil { + t.Fatal("expected error for invalid YAML") + } + if hc != nil { + t.Fatal("expected nil cache on invalid YAML") + } +} + func TestMerge(t *testing.T) { hc := &HealthCache{ Entries: []HealthEntry{ diff --git a/internal/checker/github.go b/internal/checker/github.go index e1f1dac..146240e 100644 --- a/internal/checker/github.go +++ b/internal/checker/github.go @@ -3,6 +3,7 @@ package checker import ( "context" "fmt" + "net/url" "strings" "time" @@ -43,12 +44,20 @@ func ExtractGitHubRepo(url string) (owner, name string, ok bool) { return parts[0], parts[1], true } -// PartitionLinks separates URLs into GitHub repos and external links. +func isHTTPURL(raw string) bool { + u, err := url.Parse(raw) + if err != nil { + return false + } + return u.Scheme == "http" || u.Scheme == "https" +} + +// PartitionLinks separates URLs into GitHub repos and external HTTP(S) links. func PartitionLinks(urls []string) (github, external []string) { for _, url := range urls { if _, _, ok := ExtractGitHubRepo(url); ok { github = append(github, url) - } else { + } else if isHTTPURL(url) { external = append(external, url) } } diff --git a/internal/checker/github_test.go b/internal/checker/github_test.go index 7ac8fbe..8e751d0 100644 --- a/internal/checker/github_test.go +++ b/internal/checker/github_test.go @@ -41,6 +41,8 @@ func TestPartitionLinks(t *testing.T) { "https://example.com/tool", "https://github.com/moby/moby", "https://github.com/user/repo/issues", + "dozzle", + "#projects", } gh, ext := PartitionLinks(urls) if len(gh) != 2 {