fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

at main 4.1 kB View raw
1package index 2 3import ( 4 "bytes" 5 "fmt" 6 "io/fs" 7 "os" 8 "path/filepath" 9 "testing" 10) 11 12// Set ZOEKT_BENCH_REPO to a source tree (e.g. a kubernetes checkout) to enable. 13// 14// git clone --depth=1 https://github.com/kubernetes/kubernetes /tmp/k8s 15// ZOEKT_BENCH_REPO=/tmp/k8s go test ./index/ -bench=BenchmarkPostings -benchmem -count=5 -timeout=600s 16 17func requireBenchRepo(b *testing.B) string { 18 b.Helper() 19 dir := os.Getenv("ZOEKT_BENCH_REPO") 20 if dir == "" { 21 b.Skip("ZOEKT_BENCH_REPO not set") 22 } 23 return dir 24} 25 26// loadRepoFiles walks dir and returns file contents, skipping binary files, 27// empty files, and anything over 1 MB. Returns at most maxFiles entries. 28func loadRepoFiles(b *testing.B, dir string, maxFiles int) [][]byte { 29 b.Helper() 30 var files [][]byte 31 err := filepath.WalkDir(dir, func(path string, d fs.DirEntry, err error) error { 32 if err != nil { 33 return nil 34 } 35 if d.IsDir() { 36 switch d.Name() { 37 case ".git", "vendor", "node_modules": 38 return filepath.SkipDir 39 } 40 return nil 41 } 42 if len(files) >= maxFiles { 43 return filepath.SkipAll 44 } 45 info, err := d.Info() 46 if err != nil || info.Size() == 0 || info.Size() > 1<<20 { 47 return nil 48 } 49 data, err := os.ReadFile(path) 50 if err != nil { 51 return nil 52 } 53 if bytes.IndexByte(data, 0) >= 0 { 54 return nil // binary 55 } 56 files = append(files, data) 57 return nil 58 }) 59 if err != nil { 60 b.Fatalf("walking repo: %v", err) 61 } 62 if len(files) == 0 { 63 b.Fatal("no files found in repo") 64 } 65 return files 66} 67 68func totalSize(files [][]byte) int64 { 69 var n int64 70 for _, f := range files { 71 n += int64(len(f)) 72 } 73 return n 74} 75 76// BenchmarkPostings_NewSearchableString measures the core hot path: trigram 77// extraction, map lookups, delta encoding, and per-trigram slice growth. 78// Sub-benchmarks vary corpus size to show scaling with map size. 79func BenchmarkPostings_NewSearchableString(b *testing.B) { 80 dir := requireBenchRepo(b) 81 allFiles := loadRepoFiles(b, dir, 50_000) 82 b.Logf("loaded %d files, %.1f MB", len(allFiles), float64(totalSize(allFiles))/(1<<20)) 83 84 for _, n := range []int{1_000, 5_000, len(allFiles)} { 85 n = min(n, len(allFiles)) 86 files := allFiles[:n] 87 size := totalSize(files) 88 89 b.Run(fmt.Sprintf("files=%d", n), func(b *testing.B) { 90 b.ReportAllocs() 91 for b.Loop() { 92 pb := newPostingsBuilder(defaultShardMax) 93 for _, data := range files { 94 _, _, _ = pb.newSearchableString(data, nil) 95 } 96 } 97 b.ReportMetric(float64(size), "input-bytes/op") 98 }) 99 } 100} 101 102// BenchmarkPostings_Reuse measures the warm path: building postings with a 103// reset (pooled) postingsBuilder that retains its map and slice allocations 104// from a previous shard build. 105func BenchmarkPostings_Reuse(b *testing.B) { 106 dir := requireBenchRepo(b) 107 allFiles := loadRepoFiles(b, dir, 50_000) 108 size := totalSize(allFiles) 109 b.Logf("loaded %d files, %.1f MB", len(allFiles), float64(size)/(1<<20)) 110 111 // Warm up the builder so it has allocated map entries and slices. 112 pb := newPostingsBuilder(defaultShardMax) 113 for _, data := range allFiles { 114 _, _, _ = pb.newSearchableString(data, nil) 115 } 116 117 b.ResetTimer() 118 b.ReportAllocs() 119 for b.Loop() { 120 pb.reset() 121 for _, data := range allFiles { 122 _, _, _ = pb.newSearchableString(data, nil) 123 } 124 } 125 b.ReportMetric(float64(size), "input-bytes/op") 126} 127 128// BenchmarkPostings_WritePostings measures the marshaling path: sorting ngram 129// keys and writing varint-encoded posting lists. 130func BenchmarkPostings_WritePostings(b *testing.B) { 131 dir := requireBenchRepo(b) 132 allFiles := loadRepoFiles(b, dir, 50_000) 133 134 pb := newPostingsBuilder(defaultShardMax) 135 for _, data := range allFiles { 136 _, _, _ = pb.newSearchableString(data, nil) 137 } 138 b.Logf("built %d unique ngrams from %d files, %.1f MB", len(pb.postings), len(allFiles), float64(totalSize(allFiles))/(1<<20)) 139 140 buf := &bytes.Buffer{} 141 b.ResetTimer() 142 b.ReportAllocs() 143 for b.Loop() { 144 buf.Reset() 145 w := &writer{w: buf} 146 var ngramText, charOffsets, endRunes simpleSection 147 var postings compoundSection 148 writePostings(w, pb, &ngramText, &charOffsets, &postings, &endRunes) 149 } 150}