Improve checker/fixer cohesion and harden workflows

This commit is contained in:
Julien Bisconti
2026-02-28 01:31:37 +01:00
parent e6e23bf1cf
commit c14a071c8d
12 changed files with 279 additions and 85 deletions

View File

@@ -121,7 +121,10 @@ func checkCmd() *cobra.Command {
var urls []string
collectURLs(doc.Sections, &urls)
exclude, _ := cache.LoadExcludeList(excludePath)
exclude, err := cache.LoadExcludeList(excludePath)
if err != nil {
return fmt.Errorf("load exclude list: %w", err)
}
ghURLs, extURLs := checker.PartitionLinks(urls)
@@ -138,13 +141,15 @@ func checkCmd() *cobra.Command {
}
}
var ghErrs []error
if !prMode {
token := os.Getenv("GITHUB_TOKEN")
if token != "" {
fmt.Printf("Checking %d GitHub repositories...\n", len(ghURLs))
gc := checker.NewGitHubChecker(token)
_, errs := gc.CheckRepos(context.Background(), ghURLs, 50)
for _, e := range errs {
ghErrs = errs
for _, e := range ghErrs {
fmt.Printf(" GitHub error: %v\n", e)
}
} else {
@@ -164,8 +169,16 @@ func checkCmd() *cobra.Command {
for _, r := range broken {
fmt.Printf(" %s -> %d %s\n", r.URL, r.StatusCode, r.Error)
}
}
if len(broken) > 0 && len(ghErrs) > 0 {
return fmt.Errorf("found %d broken links and %d GitHub API errors", len(broken), len(ghErrs))
}
if len(broken) > 0 {
return fmt.Errorf("found %d broken links", len(broken))
}
if len(ghErrs) > 0 {
return fmt.Errorf("github checks failed with %d errors", len(ghErrs))
}
fmt.Println("All links OK")
return nil
@@ -256,11 +269,12 @@ func reportCmd() *cobra.Command {
var scored []scorer.ScoredEntry
for _, e := range hc.Entries {
scored = append(scored, scorer.ScoredEntry{
URL: e.URL,
Name: e.Name,
Status: scorer.Status(e.Status),
Stars: e.Stars,
LastPush: e.LastPush,
URL: e.URL,
Name: e.Name,
Status: scorer.Status(e.Status),
Stars: e.Stars,
HasLicense: e.HasLicense,
LastPush: e.LastPush,
})
}
@@ -307,7 +321,10 @@ func validateCmd() *cobra.Command {
fmt.Println("\n=== Checking links (PR mode) ===")
var urls []string
collectURLs(doc.Sections, &urls)
exclude, _ := cache.LoadExcludeList(excludePath)
exclude, err := cache.LoadExcludeList(excludePath)
if err != nil {
return fmt.Errorf("load exclude list: %w", err)
}
_, extURLs := checker.PartitionLinks(urls)
fmt.Printf("Checking %d external links...\n", len(extURLs))

View File

@@ -36,12 +36,30 @@ func Build(markdownPath, templatePath, outputPath string) error {
// Inject into template — support both placeholder formats
output := string(tmpl)
replacements := []struct{ old, new string }{
{`<div id="md"></div>`, `<div id="md">` + buf.String() + `</div>`},
{`<section id="md" class="main-content"></section>`, `<section id="md" class="main-content">` + buf.String() + `</section>`},
replacements := []struct {
old string
new string
}{
{
old: `<div id="md"></div>`,
new: `<div id="md">` + buf.String() + `</div>`,
},
{
old: `<section id="md" class="main-content"></section>`,
new: `<section id="md" class="main-content">` + buf.String() + `</section>`,
},
}
replaced := false
for _, r := range replacements {
output = strings.Replace(output, r.old, r.new, 1)
if strings.Contains(output, r.old) {
output = strings.Replace(output, r.old, r.new, 1)
replaced = true
break
}
}
if !replaced {
return fmt.Errorf("template missing supported markdown placeholder")
}
if err := os.WriteFile(outputPath, []byte(output), 0o644); err != nil {

View File

@@ -111,3 +111,23 @@ func TestBuildRealREADME(t *testing.T) {
}
t.Logf("Generated %d bytes", info.Size())
}
func TestBuildFailsWithoutPlaceholder(t *testing.T) {
dir := t.TempDir()
mdPath := filepath.Join(dir, "README.md")
if err := os.WriteFile(mdPath, []byte("# Title\n"), 0o644); err != nil {
t.Fatal(err)
}
tmplPath := filepath.Join(dir, "template.html")
if err := os.WriteFile(tmplPath, []byte("<html><body><main></main></body></html>"), 0o644); err != nil {
t.Fatal(err)
}
outPath := filepath.Join(dir, "index.html")
err := Build(mdPath, tmplPath, outPath)
if err == nil {
t.Fatal("expected Build to fail when template has no supported placeholder")
}
}

View File

@@ -89,6 +89,7 @@ func (hc *HealthCache) Merge(entries []HealthEntry) {
if i, exists := index[e.URL]; exists {
hc.Entries[i] = e
} else {
index[e.URL] = len(hc.Entries)
hc.Entries = append(hc.Entries, e)
}
}

View File

@@ -119,3 +119,19 @@ func TestMerge(t *testing.T) {
t.Errorf("last entry = %q, want C", hc.Entries[2].Name)
}
}
func TestMergeDeduplicatesIncomingBatch(t *testing.T) {
hc := &HealthCache{}
hc.Merge([]HealthEntry{
{URL: "https://github.com/c/c", Name: "C", Stars: 1},
{URL: "https://github.com/c/c", Name: "C", Stars: 2},
})
if len(hc.Entries) != 1 {
t.Fatalf("entries = %d, want 1", len(hc.Entries))
}
if hc.Entries[0].Stars != 2 {
t.Fatalf("stars = %d, want last value 2", hc.Entries[0].Stars)
}
}

View File

@@ -25,6 +25,15 @@ type LinkResult struct {
Error string
}
func shouldFallbackToGET(statusCode int) bool {
switch statusCode {
case http.StatusBadRequest, http.StatusForbidden, http.StatusMethodNotAllowed, http.StatusNotImplemented:
return true
default:
return false
}
}
// CheckLink checks a single URL. Uses HEAD first, falls back to GET.
func CheckLink(url string, client *http.Client) LinkResult {
result := LinkResult{URL: url}
@@ -32,14 +41,6 @@ func CheckLink(url string, client *http.Client) LinkResult {
ctx, cancel := context.WithTimeout(context.Background(), defaultTimeout)
defer cancel()
// Try HEAD first
req, err := http.NewRequestWithContext(ctx, http.MethodHead, url, nil)
if err != nil {
result.Error = err.Error()
return result
}
req.Header.Set("User-Agent", userAgent)
// Track redirects
var finalURL string
origCheckRedirect := client.CheckRedirect
@@ -52,16 +53,25 @@ func CheckLink(url string, client *http.Client) LinkResult {
}
defer func() { client.CheckRedirect = origCheckRedirect }()
resp, err := client.Do(req)
doRequest := func(method string) (*http.Response, error) {
req, err := http.NewRequestWithContext(ctx, method, url, nil)
if err != nil {
return nil, err
}
req.Header.Set("User-Agent", userAgent)
return client.Do(req)
}
resp, err := doRequest(http.MethodHead)
if err != nil {
// Fallback to GET
req, err2 := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
if err2 != nil {
resp, err = doRequest(http.MethodGet)
if err != nil {
result.Error = err.Error()
return result
}
req.Header.Set("User-Agent", userAgent)
resp, err = client.Do(req)
} else if shouldFallbackToGET(resp.StatusCode) {
resp.Body.Close()
resp, err = doRequest(http.MethodGet)
if err != nil {
result.Error = err.Error()
return result

View File

@@ -78,3 +78,41 @@ func TestCheckLinks(t *testing.T) {
}
}
}
func TestCheckLinkFallbackToGETOnMethodNotAllowed(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.Method == http.MethodHead {
w.WriteHeader(http.StatusMethodNotAllowed)
return
}
w.WriteHeader(http.StatusOK)
}))
defer server.Close()
result := CheckLink(server.URL, &http.Client{})
if !result.OK {
t.Errorf("expected OK after GET fallback, got status %d, error: %s", result.StatusCode, result.Error)
}
if result.StatusCode != http.StatusOK {
t.Errorf("status = %d, want 200", result.StatusCode)
}
}
func TestCheckLinkFallbackToGETOnForbiddenHead(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.Method == http.MethodHead {
w.WriteHeader(http.StatusForbidden)
return
}
w.WriteHeader(http.StatusOK)
}))
defer server.Close()
result := CheckLink(server.URL, &http.Client{})
if !result.OK {
t.Errorf("expected OK after GET fallback, got status %d, error: %s", result.StatusCode, result.Error)
}
if result.StatusCode != http.StatusOK {
t.Errorf("status = %d, want 200", result.StatusCode)
}
}

View File

@@ -20,6 +20,9 @@ var attributionRe = regexp.MustCompile(`\s+(?:(?:[Cc]reated|[Mm]aintained|[Bb]ui
// bareAttributionRe matches: by @author at end of line (no link).
var bareAttributionRe = regexp.MustCompile(`\s+by\s+@\w+\.?$`)
// sectionHeadingRe matches markdown headings.
var sectionHeadingRe = regexp.MustCompile(`^(#{1,6})\s+(.+?)(?:\s*<!--.*-->)?$`)
// RemoveAttribution strips author attribution from a description string.
func RemoveAttribution(desc string) string {
desc = attributionRe.ReplaceAllString(desc, "")
@@ -47,12 +50,6 @@ func FormatEntry(e parser.Entry) string {
return fmt.Sprintf("- [%s](%s) - %s", e.Name, e.URL, desc)
}
// entryGroup tracks a consecutive run of entry lines.
type entryGroup struct {
startIdx int // index in lines slice
entries []parser.Entry
}
// FixFile reads the README, fixes entries (capitalize, period, remove attribution,
// sort), and writes the result back.
func FixFile(path string) (int, error) {
@@ -71,34 +68,39 @@ func FixFile(path string) (int, error) {
return 0, err
}
// Identify entry groups (consecutive parsed entry lines)
var groups []entryGroup
var current *entryGroup
fixCount := 0
var headingLines []int
for i, line := range lines {
entry, err := parser.ParseEntry(line, i+1)
if err != nil {
// Not an entry — close any active group
if current != nil {
groups = append(groups, *current)
current = nil
}
continue
if sectionHeadingRe.MatchString(line) {
headingLines = append(headingLines, i)
}
if current == nil {
current = &entryGroup{startIdx: i}
}
current.entries = append(current.entries, entry)
}
if current != nil {
groups = append(groups, *current)
}
// Process each group: fix entries, sort, replace lines
for _, g := range groups {
// Process each heading block independently to match linter sort scope.
for i, headingIdx := range headingLines {
start := headingIdx + 1
end := len(lines)
if i+1 < len(headingLines) {
end = headingLines[i+1]
}
var entryPositions []int
var entries []parser.Entry
for lineIdx := start; lineIdx < end; lineIdx++ {
entry, err := parser.ParseEntry(lines[lineIdx], lineIdx+1)
if err != nil {
continue
}
entryPositions = append(entryPositions, lineIdx)
entries = append(entries, entry)
}
if len(entries) == 0 {
continue
}
var fixed []parser.Entry
for _, e := range g.entries {
for _, e := range entries {
f := FixEntry(e)
f.Description = RemoveAttribution(f.Description)
// Re-apply period after removing attribution (it may have been stripped)
@@ -109,13 +111,12 @@ func FixFile(path string) (int, error) {
}
sorted := SortEntries(fixed)
for j, e := range sorted {
newLine := FormatEntry(e)
idx := g.startIdx + j
if lines[idx] != newLine {
lineIdx := entryPositions[j]
if lines[lineIdx] != newLine {
fixCount++
lines[idx] = newLine
lines[lineIdx] = newLine
}
}
}

View File

@@ -138,3 +138,56 @@ Some text here.
t.Errorf("expected period added, got:\n%s", result)
}
}
func TestFixFileSortsAcrossBlankLinesAndIsIdempotent(t *testing.T) {
content := `# Awesome Docker
## Tools
- [Zulu](https://example.com/zulu) - z tool
- [Alpha](https://example.com/alpha) - a tool
`
tmp, err := os.CreateTemp("", "readme-*.md")
if err != nil {
t.Fatal(err)
}
defer os.Remove(tmp.Name())
if _, err := tmp.WriteString(content); err != nil {
t.Fatal(err)
}
tmp.Close()
firstCount, err := FixFile(tmp.Name())
if err != nil {
t.Fatal(err)
}
if firstCount == 0 {
t.Fatal("expected first run to apply fixes")
}
firstData, err := os.ReadFile(tmp.Name())
if err != nil {
t.Fatal(err)
}
firstResult := string(firstData)
alphaIdx := strings.Index(firstResult, "[Alpha]")
zuluIdx := strings.Index(firstResult, "[Zulu]")
if alphaIdx == -1 || zuluIdx == -1 {
t.Fatalf("expected both Alpha and Zulu in result:\n%s", firstResult)
}
if alphaIdx > zuluIdx {
t.Fatalf("expected Alpha before Zulu after fix:\n%s", firstResult)
}
secondCount, err := FixFile(tmp.Name())
if err != nil {
t.Fatal(err)
}
if secondCount != 0 {
t.Fatalf("expected second run to be idempotent, got %d changes", secondCount)
}
}

View File

@@ -18,10 +18,13 @@ var entryRe = regexp.MustCompile(`^[-*]\s+\[([^\]]+)\]\(([^)]+)\)(.*?)\s+-\s+(.+
// headingRe matches markdown headings: # Title, ## Title, etc.
var headingRe = regexp.MustCompile(`^(#{1,6})\s+(.+?)(?:\s*<!--.*-->)?$`)
var markerMap = map[string]Marker{
":skull:": MarkerAbandoned,
":heavy_dollar_sign:": MarkerPaid,
":construction:": MarkerWIP,
var markerDefs = []struct {
text string
marker Marker
}{
{text: ":skull:", marker: MarkerAbandoned},
{text: ":heavy_dollar_sign:", marker: MarkerPaid},
{text: ":construction:", marker: MarkerWIP},
}
// ParseEntry parses a single markdown list line into an Entry.
@@ -36,11 +39,11 @@ func ParseEntry(line string, lineNum int) (Entry, error) {
var markers []Marker
// Extract markers from both the middle section and the description
for text, marker := range markerMap {
if strings.Contains(middle, text) || strings.Contains(desc, text) {
markers = append(markers, marker)
middle = strings.ReplaceAll(middle, text, "")
desc = strings.ReplaceAll(desc, text, "")
for _, def := range markerDefs {
if strings.Contains(middle, def.text) || strings.Contains(desc, def.text) {
markers = append(markers, def.marker)
middle = strings.ReplaceAll(middle, def.text, "")
desc = strings.ReplaceAll(desc, def.text, "")
}
}
desc = strings.TrimSpace(desc)

View File

@@ -54,6 +54,20 @@ func TestParseEntryMultipleMarkers(t *testing.T) {
}
}
func TestParseEntryMarkersCanonicalOrder(t *testing.T) {
line := `- [SomeProject](https://example.com) - :construction: A project. :skull:`
entry, err := ParseEntry(line, 1)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if len(entry.Markers) != 2 {
t.Fatalf("markers count = %d, want 2", len(entry.Markers))
}
if entry.Markers[0] != MarkerAbandoned || entry.Markers[1] != MarkerWIP {
t.Fatalf("marker order = %v, want [MarkerAbandoned MarkerWIP]", entry.Markers)
}
}
func TestParseDocument(t *testing.T) {
input := `# Awesome Docker

View File

@@ -23,12 +23,13 @@ const (
// ScoredEntry is a repo with its computed health status.
type ScoredEntry struct {
URL string
Name string
Status Status
Stars int
Forks int
LastPush time.Time
URL string
Name string
Status Status
Stars int
Forks int
HasLicense bool
LastPush time.Time
}
// ReportSummary contains grouped status counts.
@@ -75,12 +76,13 @@ func ScoreAll(infos []checker.RepoInfo) []ScoredEntry {
results := make([]ScoredEntry, len(infos))
for i, info := range infos {
results[i] = ScoredEntry{
URL: info.URL,
Name: fmt.Sprintf("%s/%s", info.Owner, info.Name),
Status: Score(info),
Stars: info.Stars,
Forks: info.Forks,
LastPush: info.PushedAt,
URL: info.URL,
Name: fmt.Sprintf("%s/%s", info.Owner, info.Name),
Status: Score(info),
Stars: info.Stars,
Forks: info.Forks,
HasLicense: info.HasLicense,
LastPush: info.PushedAt,
}
}
return results
@@ -92,13 +94,14 @@ func ToCacheEntries(scored []ScoredEntry) []cache.HealthEntry {
now := time.Now().UTC()
for i, s := range scored {
entries[i] = cache.HealthEntry{
URL: s.URL,
Name: s.Name,
Status: string(s.Status),
Stars: s.Stars,
Forks: s.Forks,
LastPush: s.LastPush,
CheckedAt: now,
URL: s.URL,
Name: s.Name,
Status: string(s.Status),
Stars: s.Stars,
Forks: s.Forks,
HasLicense: s.HasLicense,
LastPush: s.LastPush,
CheckedAt: now,
}
}
return entries