Skip to content
56 changes: 51 additions & 5 deletions cmd/labs/github/github.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,20 @@ func WithUserContentOverride(ctx context.Context, override string) context.Conte

var ErrNotFound = errors.New("not found")

type pagedResponse struct {
Body []byte
NextLink string
}

func getBytes(ctx context.Context, method, url string, body io.Reader) ([]byte, error) {
resp, err := getPagedBytes(ctx, method, url, body)
if err != nil {
return nil, err
}
return resp.Body, nil
}

func getPagedBytes(ctx context.Context, method, url string, body io.Reader) (*pagedResponse, error) {
ao, ok := ctx.Value(&apiOverride).(string)
if ok {
url = strings.Replace(url, gitHubAPI, ao, 1)
Expand All @@ -57,14 +70,47 @@ func getBytes(ctx context.Context, method, url string, body io.Reader) ([]byte,
if res.StatusCode >= 400 {
return nil, fmt.Errorf("github request failed: %s", res.Status)
}
nextLink := parseNextLink(res.Header.Get("link"))
defer res.Body.Close()
return io.ReadAll(res.Body)
bodyBytes, err := io.ReadAll(res.Body)
if err != nil {
return nil, err
}
return &pagedResponse{
Body: bodyBytes,
NextLink: nextLink,
}, nil
}

func parseNextLink(linkHeader string) string {
if linkHeader == "" {
return ""
}
// Pagination and link headers are documented here:
// https://docs.github.com/en/rest/using-the-rest-api/using-pagination-in-the-rest-api?apiVersion=2022-11-28#using-link-headers
// An example link header to handle:
// link: <https://api.github.com/repositories/1300192/issues?page=2>; rel="prev", <https://api.github.com/repositories/1300192/issues?page=4>; rel="next", <https://api.github.com/repositories/1300192/issues?page=515>; rel="last", <https://api.github.com/repositories/1300192/issues?page=1>; rel="first"
links := strings.Split(linkHeader, ",")
for _, link := range links {
parts := strings.Split(strings.TrimSpace(link), ";")
if len(parts) != 2 {
continue
}
if strings.Contains(parts[1], `rel="next"`) {
urlField := strings.TrimSpace(parts[0])
if strings.HasPrefix(urlField, "<") && strings.HasSuffix(urlField, ">") {
url := urlField[1 : len(urlField)-1]
return url
}
}
}
return ""
}

func httpGetAndUnmarshal(ctx context.Context, url string, response any) error {
raw, err := getBytes(ctx, "GET", url, nil)
func httpGetAndUnmarshal(ctx context.Context, url string, response any) (string, error) {
raw, err := getPagedBytes(ctx, "GET", url, nil)
if err != nil {
return err
return "", err
}
return json.Unmarshal(raw, response)
return raw.NextLink, json.Unmarshal(raw.Body, response)
}
86 changes: 86 additions & 0 deletions cmd/labs/github/github_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
package github

import (
"testing"

"github.com/stretchr/testify/assert"
)

func TestParseNextLink(t *testing.T) {
tests := []struct {
name string
linkHeader string
expected string
}{
// First and foremost, the well-formed cases that we can expect from real GitHub API.
{
name: "no header",
linkHeader: "",
expected: "",
},
{
name: "documentation example",
linkHeader: `<https://api.github.com/repositories/1300192/issues?page=2>; rel="prev", <https://api.github.com/repositories/1300192/issues?page=4>; rel="next", <https://api.github.com/repositories/1300192/issues?page=515>; rel="last", <https://api.github.com/repositories/1300192/issues?page=1>; rel="first"`,
expected: "https://api.github.com/repositories/1300192/issues?page=4",
},
{
name: "with next only",
linkHeader: `<https://api.github.com/repos/databricks/cli/issues?page=2>; rel="next"`,
expected: "https://api.github.com/repos/databricks/cli/issues?page=2",
},
{
name: "without next",
linkHeader: `<https://api.github.com/repositories/1300192/issues?page=1>; rel="prev", <https://api.github.com/repositories/1300192/issues?page=1>; rel="first", <https://api.github.com/repositories/1300192/issues?page=515>; rel="last"`,
expected: "",
},
{
name: "next at beginning",
linkHeader: `<https://api.github.com/repos/test/test?page=5>; rel="next", <https://api.github.com/repos/test/test?page=10>; rel="last"`,
expected: "https://api.github.com/repos/test/test?page=5",
},
{
name: "next at end",
linkHeader: `<https://api.github.com/repos/test/test?page=10>; rel="last", <https://api.github.com/repos/test/test?page=5>; rel="next"`,
expected: "https://api.github.com/repos/test/test?page=5",
},
// Malformed cases to ensure robustness. (These should not occur in practice, but are here to demonstrate resilience.)
{
name: "malformed no semicolon",
linkHeader: `<https://api.github.com/repos/test/test?page=2> rel="next"`,
expected: "",
},
{
name: "malformed no angle-brackets",
linkHeader: `https://api.github.com/repos/test/test?page=2; rel="next"`,
expected: "",
},
{
name: "malformed multiple parts",
linkHeader: `<https://api.github.com/repos/test/test?page=2>; rel="next"; extra="value"`,
expected: "",
},
{
name: "malformed no url",
linkHeader: `<>; rel="next"`,
expected: "",
},
{
name: "malformed empty link",
linkHeader: `, <https://api.github.com/repos/test/test?page=5>; rel="next"`,
expected: "https://api.github.com/repos/test/test?page=5",
},
// Borderline case: some tolerance of whitespace.
{
name: "tolerate whitespace",
linkHeader: ` <https://api.github.com/repos/test/test?page=2> ; rel="next" `,
expected: "https://api.github.com/repos/test/test?page=2",
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := parseNextLink(tt.linkHeader)
assert.Equal(t, tt.expected, result)
})
}
}
2 changes: 1 addition & 1 deletion cmd/labs/github/releases.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ func getVersions(ctx context.Context, org, repo string) (Versions, error) {
var releases Versions
log.Debugf(ctx, "Fetching latest releases for %s/%s from GitHub API", org, repo)
url := fmt.Sprintf("%s/repos/%s/%s/releases", gitHubAPI, org, repo)
err := httpGetAndUnmarshal(ctx, url, &releases)
_, err := httpGetAndUnmarshal(ctx, url, &releases)
return releases, err
}

Expand Down
21 changes: 15 additions & 6 deletions cmd/labs/github/repositories.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,21 +30,30 @@ func (r *repositoryCache) Load(ctx context.Context) (Repositories, error) {
})
}

// getRepositories is considered to be privata API, as we want the usage to go through a cache
// getRepositories is considered to be private API, as we want the usage to go through a cache
func getRepositories(ctx context.Context, org string) (Repositories, error) {
var repos Repositories
var allRepos Repositories
log.Debugf(ctx, "Loading repositories for %s from GitHub API", org)
url := fmt.Sprintf("%s/users/%s/repos", gitHubAPI, org)
err := httpGetAndUnmarshal(ctx, url, &repos)
return repos, err
url := fmt.Sprintf("%s/users/%s/repos?per_page=100", gitHubAPI, org)

for url != "" {
var repos Repositories
nextUrl, err := httpGetAndUnmarshal(ctx, url, &repos)
if err != nil {
return nil, err
}
allRepos = append(allRepos, repos...)
url = nextUrl
}
return allRepos, nil
}

type Repositories []ghRepo

type ghRepo struct {
Name string `json:"name"`
Description string `json:"description"`
Langauge string `json:"language"`
Language string `json:"language"`
DefaultBranch string `json:"default_branch"`
Stars int `json:"stargazers_count"`
IsFork bool `json:"fork"`
Expand Down
61 changes: 61 additions & 0 deletions cmd/labs/github/repositories_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,11 @@ package github

import (
"context"
"fmt"
"maps"
"net/http"
"net/http/httptest"
"strconv"
"testing"

"github.com/stretchr/testify/assert"
Expand All @@ -29,3 +32,61 @@ func TestRepositories(t *testing.T) {
assert.NoError(t, err)
assert.NotEmpty(t, all)
}

func TestRepositoriesPagination(t *testing.T) {
var requestedURLs []string
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
requestedURLs = append(requestedURLs, r.URL.String())
switch r.URL.Path {
case "/users/databrickslabs/repos":
requestedQueryValues := r.URL.Query()
linkTo := func(page int, rel string) string {
nextQueryValues := maps.Clone(requestedQueryValues)
nextQueryValues.Set("page", strconv.Itoa(page))
builder := *r.URL
builder.Scheme = "http"
builder.Host = r.Host
builder.RawQuery = nextQueryValues.Encode()
return fmt.Sprintf(`<%s>; rel="%s"`, builder.String(), rel)
}
page := requestedQueryValues.Get("page")
var link string
var body string
switch page {
// Pagination logic with next and prev links for 3 pages of results.
case "", "1":
link = linkTo(2, "next")
body = `[{"name": "repo1"}, {"name": "repo2"}]`
case "2":
link = linkTo(1, "prev") + ", " + linkTo(3, "next")
body = `[{"name": "repo3"}, {"name": "repo4"}]`
case "3":
link = linkTo(2, "prev")
body = `[{"name": "repo5"}]`
}
w.Header().Set("link", link)
_, err := w.Write([]byte(body))
assert.NoError(t, err)
return
}
t.Logf("Requested: %s", r.URL.String())
panic("stub required")
}))
defer server.Close()

ctx := context.Background()
ctx = WithApiOverride(ctx, server.URL)

repos, err := getRepositories(ctx, "databrickslabs")
assert.NoError(t, err)
var names []string
for _, repo := range repos {
names = append(names, repo.Name)
}
assert.Equal(t, []string{
"/users/databrickslabs/repos?per_page=100",
"/users/databrickslabs/repos?page=2&per_page=100",
"/users/databrickslabs/repos?page=3&per_page=100",
}, requestedURLs)
assert.Equal(t, []string{"repo1", "repo2", "repo3", "repo4", "repo5"}, names)
}