fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

1package gitindex 2 3import ( 4 "fmt" 5 "io" 6 "os" 7 "testing" 8 9 "github.com/go-git/go-git/v5/plumbing" 10) 11 12// Set ZOEKT_BENCH_REPO to a git checkout to enable these benchmarks. 13// 14// git clone --depth=1 https://github.com/kubernetes/kubernetes /tmp/k8s 15// ZOEKT_BENCH_REPO=/tmp/k8s go test ./gitindex/ -bench=BenchmarkBlobRead -benchmem -count=5 -timeout=600s 16 17func requireBenchGitRepo(b *testing.B) string { 18 b.Helper() 19 dir := os.Getenv("ZOEKT_BENCH_REPO") 20 if dir == "" { 21 b.Skip("ZOEKT_BENCH_REPO not set") 22 } 23 return dir 24} 25 26// collectBlobKeys opens the repo, walks HEAD, and returns all fileKeys with 27// their BlobLocations plus the repo directory path. 28func collectBlobKeys(b *testing.B, repoDir string) (map[fileKey]BlobLocation, string) { 29 b.Helper() 30 31 repo, closer, err := openRepo(repoDir) 32 if err != nil { 33 b.Fatalf("openRepo: %v", err) 34 } 35 b.Cleanup(func() { closer.Close() }) 36 37 head, err := repo.Head() 38 if err != nil { 39 b.Fatalf("Head: %v", err) 40 } 41 42 commit, err := repo.CommitObject(head.Hash()) 43 if err != nil { 44 b.Fatalf("CommitObject: %v", err) 45 } 46 47 tree, err := commit.Tree() 48 if err != nil { 49 b.Fatalf("Tree: %v", err) 50 } 51 52 rw := NewRepoWalker(repo, "https://example.com/repo", nil) 53 if _, err := rw.CollectFiles(tree, "HEAD", nil); err != nil { 54 b.Fatalf("CollectFiles: %v", err) 55 } 56 57 return rw.Files, repoDir 58} 59 60// sortedBlobKeys returns fileKeys for deterministic iteration. 61func sortedBlobKeys(files map[fileKey]BlobLocation) []fileKey { 62 keys := make([]fileKey, 0, len(files)) 63 for k := range files { 64 keys = append(keys, k) 65 } 66 return keys 67} 68 69// BenchmarkBlobRead_GoGit measures the current go-git BlobObject approach: 70// sequential calls to repo.GitRepo.BlobObject(hash) for each file. 71func BenchmarkBlobRead_GoGit(b *testing.B) { 72 repoDir := requireBenchGitRepo(b) 73 files, _ := collectBlobKeys(b, repoDir) 74 keys := sortedBlobKeys(files) 75 b.Logf("collected %d blob keys", len(keys)) 76 77 for _, n := range []int{1_000, 5_000, len(keys)} { 78 n = min(n, len(keys)) 79 subset := keys[:n] 80 81 b.Run(fmt.Sprintf("files=%d", n), func(b *testing.B) { 82 b.ReportAllocs() 83 var totalBytes int64 84 for b.Loop() { 85 totalBytes = 0 86 for _, key := range subset { 87 loc := files[key] 88 blob, err := loc.GitRepo.BlobObject(key.ID) 89 if err != nil { 90 b.Fatalf("BlobObject(%s): %v", key.ID, err) 91 } 92 r, err := blob.Reader() 93 if err != nil { 94 b.Fatalf("Reader: %v", err) 95 } 96 n, err := io.Copy(io.Discard, r) 97 r.Close() 98 if err != nil { 99 b.Fatalf("Read: %v", err) 100 } 101 totalBytes += n 102 } 103 } 104 b.ReportMetric(float64(totalBytes), "content-bytes/op") 105 b.ReportMetric(float64(len(subset)), "files/op") 106 }) 107 } 108} 109 110// BenchmarkBlobRead_CatfileReader measures the streaming catfileReader approach: 111// all SHAs written to stdin at once via --buffer, responses read one at a time. 112// This is the production path used by indexGitRepo. 113func BenchmarkBlobRead_CatfileReader(b *testing.B) { 114 repoDir := requireBenchGitRepo(b) 115 files, gitDir := collectBlobKeys(b, repoDir) 116 keys := sortedBlobKeys(files) 117 b.Logf("collected %d blob keys", len(keys)) 118 119 ids := make([]plumbing.Hash, len(keys)) 120 for i, k := range keys { 121 ids[i] = k.ID 122 } 123 124 for _, n := range []int{1_000, 5_000, len(keys)} { 125 n = min(n, len(keys)) 126 subset := ids[:n] 127 128 b.Run(fmt.Sprintf("files=%d", n), func(b *testing.B) { 129 b.ReportAllocs() 130 var totalBytes int64 131 for b.Loop() { 132 totalBytes = 0 133 cr, err := newCatfileReader(gitDir, subset, catfileReaderOptions{}) 134 if err != nil { 135 b.Fatalf("newCatfileReader: %v", err) 136 } 137 for range subset { 138 size, missing, excluded, err := cr.Next() 139 if err != nil { 140 cr.Close() 141 b.Fatalf("Next: %v", err) 142 } 143 if missing || excluded { 144 continue 145 } 146 content := make([]byte, size) 147 if _, err := io.ReadFull(cr, content); err != nil { 148 cr.Close() 149 b.Fatalf("ReadFull: %v", err) 150 } 151 totalBytes += int64(len(content)) 152 } 153 cr.Close() 154 } 155 b.ReportMetric(float64(totalBytes), "content-bytes/op") 156 b.ReportMetric(float64(len(subset)), "files/op") 157 }) 158 } 159}