From c14a071c8d7ebd0f9b665e327fdf33d630ccf0a2 Mon Sep 17 00:00:00 2001 From: Julien Bisconti Date: Sat, 28 Feb 2026 01:31:37 +0100 Subject: [PATCH] Improve checker/fixer cohesion and harden workflows --- cmd/awesome-docker/main.go | 33 +++++++++++++---- internal/builder/builder.go | 26 +++++++++++-- internal/builder/builder_test.go | 20 ++++++++++ internal/cache/cache.go | 1 + internal/cache/cache_test.go | 16 ++++++++ internal/checker/http.go | 38 ++++++++++++------- internal/checker/http_test.go | 38 +++++++++++++++++++ internal/linter/fixer.go | 63 ++++++++++++++++---------------- internal/linter/fixer_test.go | 53 +++++++++++++++++++++++++++ internal/parser/parser.go | 21 ++++++----- internal/parser/parser_test.go | 14 +++++++ internal/scorer/scorer.go | 41 +++++++++++---------- 12 files changed, 279 insertions(+), 85 deletions(-) diff --git a/cmd/awesome-docker/main.go b/cmd/awesome-docker/main.go index 254028d..9c336fe 100644 --- a/cmd/awesome-docker/main.go +++ b/cmd/awesome-docker/main.go @@ -121,7 +121,10 @@ func checkCmd() *cobra.Command { var urls []string collectURLs(doc.Sections, &urls) - exclude, _ := cache.LoadExcludeList(excludePath) + exclude, err := cache.LoadExcludeList(excludePath) + if err != nil { + return fmt.Errorf("load exclude list: %w", err) + } ghURLs, extURLs := checker.PartitionLinks(urls) @@ -138,13 +141,15 @@ func checkCmd() *cobra.Command { } } + var ghErrs []error if !prMode { token := os.Getenv("GITHUB_TOKEN") if token != "" { fmt.Printf("Checking %d GitHub repositories...\n", len(ghURLs)) gc := checker.NewGitHubChecker(token) _, errs := gc.CheckRepos(context.Background(), ghURLs, 50) - for _, e := range errs { + ghErrs = errs + for _, e := range ghErrs { fmt.Printf(" GitHub error: %v\n", e) } } else { @@ -164,8 +169,16 @@ func checkCmd() *cobra.Command { for _, r := range broken { fmt.Printf(" %s -> %d %s\n", r.URL, r.StatusCode, r.Error) } + } + if len(broken) > 0 && len(ghErrs) > 0 { + return fmt.Errorf("found %d broken links and %d GitHub API errors", len(broken), len(ghErrs)) + } + if len(broken) > 0 { return fmt.Errorf("found %d broken links", len(broken)) } + if len(ghErrs) > 0 { + return fmt.Errorf("github checks failed with %d errors", len(ghErrs)) + } fmt.Println("All links OK") return nil @@ -256,11 +269,12 @@ func reportCmd() *cobra.Command { var scored []scorer.ScoredEntry for _, e := range hc.Entries { scored = append(scored, scorer.ScoredEntry{ - URL: e.URL, - Name: e.Name, - Status: scorer.Status(e.Status), - Stars: e.Stars, - LastPush: e.LastPush, + URL: e.URL, + Name: e.Name, + Status: scorer.Status(e.Status), + Stars: e.Stars, + HasLicense: e.HasLicense, + LastPush: e.LastPush, }) } @@ -307,7 +321,10 @@ func validateCmd() *cobra.Command { fmt.Println("\n=== Checking links (PR mode) ===") var urls []string collectURLs(doc.Sections, &urls) - exclude, _ := cache.LoadExcludeList(excludePath) + exclude, err := cache.LoadExcludeList(excludePath) + if err != nil { + return fmt.Errorf("load exclude list: %w", err) + } _, extURLs := checker.PartitionLinks(urls) fmt.Printf("Checking %d external links...\n", len(extURLs)) diff --git a/internal/builder/builder.go b/internal/builder/builder.go index ac85f31..6b2cd20 100644 --- a/internal/builder/builder.go +++ b/internal/builder/builder.go @@ -36,12 +36,30 @@ func Build(markdownPath, templatePath, outputPath string) error { // Inject into template — support both placeholder formats output := string(tmpl) - replacements := []struct{ old, new string }{ - {`
`, `
` + buf.String() + `
`}, - {`
`, `
` + buf.String() + `
`}, + replacements := []struct { + old string + new string + }{ + { + old: `
`, + new: `
` + buf.String() + `
`, + }, + { + old: `
`, + new: `
` + buf.String() + `
`, + }, } + + replaced := false for _, r := range replacements { - output = strings.Replace(output, r.old, r.new, 1) + if strings.Contains(output, r.old) { + output = strings.Replace(output, r.old, r.new, 1) + replaced = true + break + } + } + if !replaced { + return fmt.Errorf("template missing supported markdown placeholder") } if err := os.WriteFile(outputPath, []byte(output), 0o644); err != nil { diff --git a/internal/builder/builder_test.go b/internal/builder/builder_test.go index 7fd631e..5cf98c3 100644 --- a/internal/builder/builder_test.go +++ b/internal/builder/builder_test.go @@ -111,3 +111,23 @@ func TestBuildRealREADME(t *testing.T) { } t.Logf("Generated %d bytes", info.Size()) } + +func TestBuildFailsWithoutPlaceholder(t *testing.T) { + dir := t.TempDir() + + mdPath := filepath.Join(dir, "README.md") + if err := os.WriteFile(mdPath, []byte("# Title\n"), 0o644); err != nil { + t.Fatal(err) + } + + tmplPath := filepath.Join(dir, "template.html") + if err := os.WriteFile(tmplPath, []byte("
"), 0o644); err != nil { + t.Fatal(err) + } + + outPath := filepath.Join(dir, "index.html") + err := Build(mdPath, tmplPath, outPath) + if err == nil { + t.Fatal("expected Build to fail when template has no supported placeholder") + } +} diff --git a/internal/cache/cache.go b/internal/cache/cache.go index ce9c672..0c481ba 100644 --- a/internal/cache/cache.go +++ b/internal/cache/cache.go @@ -89,6 +89,7 @@ func (hc *HealthCache) Merge(entries []HealthEntry) { if i, exists := index[e.URL]; exists { hc.Entries[i] = e } else { + index[e.URL] = len(hc.Entries) hc.Entries = append(hc.Entries, e) } } diff --git a/internal/cache/cache_test.go b/internal/cache/cache_test.go index 71d1293..52a161d 100644 --- a/internal/cache/cache_test.go +++ b/internal/cache/cache_test.go @@ -119,3 +119,19 @@ func TestMerge(t *testing.T) { t.Errorf("last entry = %q, want C", hc.Entries[2].Name) } } + +func TestMergeDeduplicatesIncomingBatch(t *testing.T) { + hc := &HealthCache{} + + hc.Merge([]HealthEntry{ + {URL: "https://github.com/c/c", Name: "C", Stars: 1}, + {URL: "https://github.com/c/c", Name: "C", Stars: 2}, + }) + + if len(hc.Entries) != 1 { + t.Fatalf("entries = %d, want 1", len(hc.Entries)) + } + if hc.Entries[0].Stars != 2 { + t.Fatalf("stars = %d, want last value 2", hc.Entries[0].Stars) + } +} diff --git a/internal/checker/http.go b/internal/checker/http.go index 3e17929..7aa0a82 100644 --- a/internal/checker/http.go +++ b/internal/checker/http.go @@ -25,6 +25,15 @@ type LinkResult struct { Error string } +func shouldFallbackToGET(statusCode int) bool { + switch statusCode { + case http.StatusBadRequest, http.StatusForbidden, http.StatusMethodNotAllowed, http.StatusNotImplemented: + return true + default: + return false + } +} + // CheckLink checks a single URL. Uses HEAD first, falls back to GET. func CheckLink(url string, client *http.Client) LinkResult { result := LinkResult{URL: url} @@ -32,14 +41,6 @@ func CheckLink(url string, client *http.Client) LinkResult { ctx, cancel := context.WithTimeout(context.Background(), defaultTimeout) defer cancel() - // Try HEAD first - req, err := http.NewRequestWithContext(ctx, http.MethodHead, url, nil) - if err != nil { - result.Error = err.Error() - return result - } - req.Header.Set("User-Agent", userAgent) - // Track redirects var finalURL string origCheckRedirect := client.CheckRedirect @@ -52,16 +53,25 @@ func CheckLink(url string, client *http.Client) LinkResult { } defer func() { client.CheckRedirect = origCheckRedirect }() - resp, err := client.Do(req) + doRequest := func(method string) (*http.Response, error) { + req, err := http.NewRequestWithContext(ctx, method, url, nil) + if err != nil { + return nil, err + } + req.Header.Set("User-Agent", userAgent) + return client.Do(req) + } + + resp, err := doRequest(http.MethodHead) if err != nil { - // Fallback to GET - req, err2 := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) - if err2 != nil { + resp, err = doRequest(http.MethodGet) + if err != nil { result.Error = err.Error() return result } - req.Header.Set("User-Agent", userAgent) - resp, err = client.Do(req) + } else if shouldFallbackToGET(resp.StatusCode) { + resp.Body.Close() + resp, err = doRequest(http.MethodGet) if err != nil { result.Error = err.Error() return result diff --git a/internal/checker/http_test.go b/internal/checker/http_test.go index b8eef45..e51188d 100644 --- a/internal/checker/http_test.go +++ b/internal/checker/http_test.go @@ -78,3 +78,41 @@ func TestCheckLinks(t *testing.T) { } } } + +func TestCheckLinkFallbackToGETOnMethodNotAllowed(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.Method == http.MethodHead { + w.WriteHeader(http.StatusMethodNotAllowed) + return + } + w.WriteHeader(http.StatusOK) + })) + defer server.Close() + + result := CheckLink(server.URL, &http.Client{}) + if !result.OK { + t.Errorf("expected OK after GET fallback, got status %d, error: %s", result.StatusCode, result.Error) + } + if result.StatusCode != http.StatusOK { + t.Errorf("status = %d, want 200", result.StatusCode) + } +} + +func TestCheckLinkFallbackToGETOnForbiddenHead(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.Method == http.MethodHead { + w.WriteHeader(http.StatusForbidden) + return + } + w.WriteHeader(http.StatusOK) + })) + defer server.Close() + + result := CheckLink(server.URL, &http.Client{}) + if !result.OK { + t.Errorf("expected OK after GET fallback, got status %d, error: %s", result.StatusCode, result.Error) + } + if result.StatusCode != http.StatusOK { + t.Errorf("status = %d, want 200", result.StatusCode) + } +} diff --git a/internal/linter/fixer.go b/internal/linter/fixer.go index edb22e0..4d8eac7 100644 --- a/internal/linter/fixer.go +++ b/internal/linter/fixer.go @@ -20,6 +20,9 @@ var attributionRe = regexp.MustCompile(`\s+(?:(?:[Cc]reated|[Mm]aintained|[Bb]ui // bareAttributionRe matches: by @author at end of line (no link). var bareAttributionRe = regexp.MustCompile(`\s+by\s+@\w+\.?$`) +// sectionHeadingRe matches markdown headings. +var sectionHeadingRe = regexp.MustCompile(`^(#{1,6})\s+(.+?)(?:\s*)?$`) + // RemoveAttribution strips author attribution from a description string. func RemoveAttribution(desc string) string { desc = attributionRe.ReplaceAllString(desc, "") @@ -47,12 +50,6 @@ func FormatEntry(e parser.Entry) string { return fmt.Sprintf("- [%s](%s) - %s", e.Name, e.URL, desc) } -// entryGroup tracks a consecutive run of entry lines. -type entryGroup struct { - startIdx int // index in lines slice - entries []parser.Entry -} - // FixFile reads the README, fixes entries (capitalize, period, remove attribution, // sort), and writes the result back. func FixFile(path string) (int, error) { @@ -71,34 +68,39 @@ func FixFile(path string) (int, error) { return 0, err } - // Identify entry groups (consecutive parsed entry lines) - var groups []entryGroup - var current *entryGroup fixCount := 0 + var headingLines []int for i, line := range lines { - entry, err := parser.ParseEntry(line, i+1) - if err != nil { - // Not an entry — close any active group - if current != nil { - groups = append(groups, *current) - current = nil - } - continue + if sectionHeadingRe.MatchString(line) { + headingLines = append(headingLines, i) } - if current == nil { - current = &entryGroup{startIdx: i} - } - current.entries = append(current.entries, entry) - } - if current != nil { - groups = append(groups, *current) } - // Process each group: fix entries, sort, replace lines - for _, g := range groups { + // Process each heading block independently to match linter sort scope. + for i, headingIdx := range headingLines { + start := headingIdx + 1 + end := len(lines) + if i+1 < len(headingLines) { + end = headingLines[i+1] + } + + var entryPositions []int + var entries []parser.Entry + for lineIdx := start; lineIdx < end; lineIdx++ { + entry, err := parser.ParseEntry(lines[lineIdx], lineIdx+1) + if err != nil { + continue + } + entryPositions = append(entryPositions, lineIdx) + entries = append(entries, entry) + } + if len(entries) == 0 { + continue + } + var fixed []parser.Entry - for _, e := range g.entries { + for _, e := range entries { f := FixEntry(e) f.Description = RemoveAttribution(f.Description) // Re-apply period after removing attribution (it may have been stripped) @@ -109,13 +111,12 @@ func FixFile(path string) (int, error) { } sorted := SortEntries(fixed) - for j, e := range sorted { newLine := FormatEntry(e) - idx := g.startIdx + j - if lines[idx] != newLine { + lineIdx := entryPositions[j] + if lines[lineIdx] != newLine { fixCount++ - lines[idx] = newLine + lines[lineIdx] = newLine } } } diff --git a/internal/linter/fixer_test.go b/internal/linter/fixer_test.go index ae37ef0..6872ce7 100644 --- a/internal/linter/fixer_test.go +++ b/internal/linter/fixer_test.go @@ -138,3 +138,56 @@ Some text here. t.Errorf("expected period added, got:\n%s", result) } } + +func TestFixFileSortsAcrossBlankLinesAndIsIdempotent(t *testing.T) { + content := `# Awesome Docker + +## Tools + +- [Zulu](https://example.com/zulu) - z tool + +- [Alpha](https://example.com/alpha) - a tool +` + + tmp, err := os.CreateTemp("", "readme-*.md") + if err != nil { + t.Fatal(err) + } + defer os.Remove(tmp.Name()) + + if _, err := tmp.WriteString(content); err != nil { + t.Fatal(err) + } + tmp.Close() + + firstCount, err := FixFile(tmp.Name()) + if err != nil { + t.Fatal(err) + } + if firstCount == 0 { + t.Fatal("expected first run to apply fixes") + } + + firstData, err := os.ReadFile(tmp.Name()) + if err != nil { + t.Fatal(err) + } + firstResult := string(firstData) + + alphaIdx := strings.Index(firstResult, "[Alpha]") + zuluIdx := strings.Index(firstResult, "[Zulu]") + if alphaIdx == -1 || zuluIdx == -1 { + t.Fatalf("expected both Alpha and Zulu in result:\n%s", firstResult) + } + if alphaIdx > zuluIdx { + t.Fatalf("expected Alpha before Zulu after fix:\n%s", firstResult) + } + + secondCount, err := FixFile(tmp.Name()) + if err != nil { + t.Fatal(err) + } + if secondCount != 0 { + t.Fatalf("expected second run to be idempotent, got %d changes", secondCount) + } +} diff --git a/internal/parser/parser.go b/internal/parser/parser.go index 3b428f1..5eb0d93 100644 --- a/internal/parser/parser.go +++ b/internal/parser/parser.go @@ -18,10 +18,13 @@ var entryRe = regexp.MustCompile(`^[-*]\s+\[([^\]]+)\]\(([^)]+)\)(.*?)\s+-\s+(.+ // headingRe matches markdown headings: # Title, ## Title, etc. var headingRe = regexp.MustCompile(`^(#{1,6})\s+(.+?)(?:\s*)?$`) -var markerMap = map[string]Marker{ - ":skull:": MarkerAbandoned, - ":heavy_dollar_sign:": MarkerPaid, - ":construction:": MarkerWIP, +var markerDefs = []struct { + text string + marker Marker +}{ + {text: ":skull:", marker: MarkerAbandoned}, + {text: ":heavy_dollar_sign:", marker: MarkerPaid}, + {text: ":construction:", marker: MarkerWIP}, } // ParseEntry parses a single markdown list line into an Entry. @@ -36,11 +39,11 @@ func ParseEntry(line string, lineNum int) (Entry, error) { var markers []Marker // Extract markers from both the middle section and the description - for text, marker := range markerMap { - if strings.Contains(middle, text) || strings.Contains(desc, text) { - markers = append(markers, marker) - middle = strings.ReplaceAll(middle, text, "") - desc = strings.ReplaceAll(desc, text, "") + for _, def := range markerDefs { + if strings.Contains(middle, def.text) || strings.Contains(desc, def.text) { + markers = append(markers, def.marker) + middle = strings.ReplaceAll(middle, def.text, "") + desc = strings.ReplaceAll(desc, def.text, "") } } desc = strings.TrimSpace(desc) diff --git a/internal/parser/parser_test.go b/internal/parser/parser_test.go index d414747..9f86b6f 100644 --- a/internal/parser/parser_test.go +++ b/internal/parser/parser_test.go @@ -54,6 +54,20 @@ func TestParseEntryMultipleMarkers(t *testing.T) { } } +func TestParseEntryMarkersCanonicalOrder(t *testing.T) { + line := `- [SomeProject](https://example.com) - :construction: A project. :skull:` + entry, err := ParseEntry(line, 1) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(entry.Markers) != 2 { + t.Fatalf("markers count = %d, want 2", len(entry.Markers)) + } + if entry.Markers[0] != MarkerAbandoned || entry.Markers[1] != MarkerWIP { + t.Fatalf("marker order = %v, want [MarkerAbandoned MarkerWIP]", entry.Markers) + } +} + func TestParseDocument(t *testing.T) { input := `# Awesome Docker diff --git a/internal/scorer/scorer.go b/internal/scorer/scorer.go index f2f34d6..1920e0c 100644 --- a/internal/scorer/scorer.go +++ b/internal/scorer/scorer.go @@ -23,12 +23,13 @@ const ( // ScoredEntry is a repo with its computed health status. type ScoredEntry struct { - URL string - Name string - Status Status - Stars int - Forks int - LastPush time.Time + URL string + Name string + Status Status + Stars int + Forks int + HasLicense bool + LastPush time.Time } // ReportSummary contains grouped status counts. @@ -75,12 +76,13 @@ func ScoreAll(infos []checker.RepoInfo) []ScoredEntry { results := make([]ScoredEntry, len(infos)) for i, info := range infos { results[i] = ScoredEntry{ - URL: info.URL, - Name: fmt.Sprintf("%s/%s", info.Owner, info.Name), - Status: Score(info), - Stars: info.Stars, - Forks: info.Forks, - LastPush: info.PushedAt, + URL: info.URL, + Name: fmt.Sprintf("%s/%s", info.Owner, info.Name), + Status: Score(info), + Stars: info.Stars, + Forks: info.Forks, + HasLicense: info.HasLicense, + LastPush: info.PushedAt, } } return results @@ -92,13 +94,14 @@ func ToCacheEntries(scored []ScoredEntry) []cache.HealthEntry { now := time.Now().UTC() for i, s := range scored { entries[i] = cache.HealthEntry{ - URL: s.URL, - Name: s.Name, - Status: string(s.Status), - Stars: s.Stars, - Forks: s.Forks, - LastPush: s.LastPush, - CheckedAt: now, + URL: s.URL, + Name: s.Name, + Status: string(s.Status), + Stars: s.Stars, + Forks: s.Forks, + HasLicense: s.HasLicense, + LastPush: s.LastPush, + CheckedAt: now, } } return entries