fork of https://github.com/sourcegraph/zoekt
1package gitindex
2
3import (
4 "fmt"
5 "io"
6 "os"
7 "testing"
8
9 "github.com/go-git/go-git/v5/plumbing"
10)
11
12// Set ZOEKT_BENCH_REPO to a git checkout to enable these benchmarks.
13//
14// git clone --depth=1 https://github.com/kubernetes/kubernetes /tmp/k8s
15// ZOEKT_BENCH_REPO=/tmp/k8s go test ./gitindex/ -bench=BenchmarkBlobRead -benchmem -count=5 -timeout=600s
16
17func requireBenchGitRepo(b *testing.B) string {
18 b.Helper()
19 dir := os.Getenv("ZOEKT_BENCH_REPO")
20 if dir == "" {
21 b.Skip("ZOEKT_BENCH_REPO not set")
22 }
23 return dir
24}
25
26// collectBlobKeys opens the repo, walks HEAD, and returns all fileKeys with
27// their BlobLocations plus the repo directory path.
28func collectBlobKeys(b *testing.B, repoDir string) (map[fileKey]BlobLocation, string) {
29 b.Helper()
30
31 repo, closer, err := openRepo(repoDir)
32 if err != nil {
33 b.Fatalf("openRepo: %v", err)
34 }
35 b.Cleanup(func() { closer.Close() })
36
37 head, err := repo.Head()
38 if err != nil {
39 b.Fatalf("Head: %v", err)
40 }
41
42 commit, err := repo.CommitObject(head.Hash())
43 if err != nil {
44 b.Fatalf("CommitObject: %v", err)
45 }
46
47 tree, err := commit.Tree()
48 if err != nil {
49 b.Fatalf("Tree: %v", err)
50 }
51
52 rw := NewRepoWalker(repo, "https://example.com/repo", nil)
53 if _, err := rw.CollectFiles(tree, "HEAD", nil); err != nil {
54 b.Fatalf("CollectFiles: %v", err)
55 }
56
57 return rw.Files, repoDir
58}
59
60// sortedBlobKeys returns fileKeys for deterministic iteration.
61func sortedBlobKeys(files map[fileKey]BlobLocation) []fileKey {
62 keys := make([]fileKey, 0, len(files))
63 for k := range files {
64 keys = append(keys, k)
65 }
66 return keys
67}
68
69// BenchmarkBlobRead_GoGit measures the current go-git BlobObject approach:
70// sequential calls to repo.GitRepo.BlobObject(hash) for each file.
71func BenchmarkBlobRead_GoGit(b *testing.B) {
72 repoDir := requireBenchGitRepo(b)
73 files, _ := collectBlobKeys(b, repoDir)
74 keys := sortedBlobKeys(files)
75 b.Logf("collected %d blob keys", len(keys))
76
77 for _, n := range []int{1_000, 5_000, len(keys)} {
78 n = min(n, len(keys))
79 subset := keys[:n]
80
81 b.Run(fmt.Sprintf("files=%d", n), func(b *testing.B) {
82 b.ReportAllocs()
83 var totalBytes int64
84 for b.Loop() {
85 totalBytes = 0
86 for _, key := range subset {
87 loc := files[key]
88 blob, err := loc.GitRepo.BlobObject(key.ID)
89 if err != nil {
90 b.Fatalf("BlobObject(%s): %v", key.ID, err)
91 }
92 r, err := blob.Reader()
93 if err != nil {
94 b.Fatalf("Reader: %v", err)
95 }
96 n, err := io.Copy(io.Discard, r)
97 r.Close()
98 if err != nil {
99 b.Fatalf("Read: %v", err)
100 }
101 totalBytes += n
102 }
103 }
104 b.ReportMetric(float64(totalBytes), "content-bytes/op")
105 b.ReportMetric(float64(len(subset)), "files/op")
106 })
107 }
108}
109
110// BenchmarkBlobRead_CatfileReader measures the streaming catfileReader approach:
111// all SHAs written to stdin at once via --buffer, responses read one at a time.
112// This is the production path used by indexGitRepo.
113func BenchmarkBlobRead_CatfileReader(b *testing.B) {
114 repoDir := requireBenchGitRepo(b)
115 files, gitDir := collectBlobKeys(b, repoDir)
116 keys := sortedBlobKeys(files)
117 b.Logf("collected %d blob keys", len(keys))
118
119 ids := make([]plumbing.Hash, len(keys))
120 for i, k := range keys {
121 ids[i] = k.ID
122 }
123
124 for _, n := range []int{1_000, 5_000, len(keys)} {
125 n = min(n, len(keys))
126 subset := ids[:n]
127
128 b.Run(fmt.Sprintf("files=%d", n), func(b *testing.B) {
129 b.ReportAllocs()
130 var totalBytes int64
131 for b.Loop() {
132 totalBytes = 0
133 cr, err := newCatfileReader(gitDir, subset)
134 if err != nil {
135 b.Fatalf("newCatfileReader: %v", err)
136 }
137 for range subset {
138 size, missing, err := cr.Next()
139 if err != nil {
140 cr.Close()
141 b.Fatalf("Next: %v", err)
142 }
143 if missing {
144 continue
145 }
146 content := make([]byte, size)
147 if _, err := io.ReadFull(cr, content); err != nil {
148 cr.Close()
149 b.Fatalf("ReadFull: %v", err)
150 }
151 totalBytes += int64(len(content))
152 }
153 cr.Close()
154 }
155 b.ReportMetric(float64(totalBytes), "content-bytes/op")
156 b.ReportMetric(float64(len(subset)), "files/op")
157 })
158 }
159}