fork of https://github.com/sourcegraph/zoekt
1package query
2
3import (
4 "bytes"
5 "crypto/sha256"
6 "encoding/binary"
7 "encoding/gob"
8 "fmt"
9 "sort"
10 "testing"
11
12 "github.com/RoaringBitmap/roaring"
13 "github.com/google/go-cmp/cmp"
14)
15
16// We benchmark via Gob since that allows us to compare to no custom
17// marshalling.
18
19func BenchmarkRepoBranches_Encode(b *testing.B) {
20 repoBranches := genRepoBranches(5_500_000)
21
22 // do one write to amortize away the cost of gob registration
23 w := &countWriter{}
24 enc := gob.NewEncoder(w)
25 if err := enc.Encode(repoBranches); err != nil {
26 b.Fatal(err)
27 }
28
29 b.ResetTimer()
30 b.ReportAllocs()
31
32 b.ReportMetric(float64(w.n), "bytes")
33
34 for n := 0; n < b.N; n++ {
35 if err := enc.Encode(repoBranches); err != nil {
36 b.Fatal(err)
37 }
38 }
39}
40
41func BenchmarkBranchesRepos_Encode(b *testing.B) {
42 brs := genBranchesRepos(5_500_000)
43
44 // do one write to amortize away the cost of gob registration
45 w := &countWriter{}
46 enc := gob.NewEncoder(w)
47 if err := enc.Encode(brs); err != nil {
48 b.Fatal(err)
49 }
50
51 b.ResetTimer()
52 b.ReportAllocs()
53
54 b.ReportMetric(float64(w.n), "bytes")
55
56 for n := 0; n < b.N; n++ {
57 if err := enc.Encode(brs); err != nil {
58 b.Fatal(err)
59 }
60 }
61}
62
63func BenchmarkBranchesRepos_Decode(b *testing.B) {
64 brs := genBranchesRepos(5_500_000)
65
66 var buf bytes.Buffer
67 if err := gob.NewEncoder(&buf).Encode(brs); err != nil {
68 b.Fatal(err)
69 }
70
71 b.ResetTimer()
72 b.ReportAllocs()
73
74 for n := 0; n < b.N; n++ {
75 // We need to include gob.NewDecoder cost to avoid measuring encoding.
76 var brs BranchesRepos
77 if err := gob.NewDecoder(bytes.NewReader(buf.Bytes())).Decode(&brs); err != nil {
78 b.Fatal(err)
79 }
80 }
81}
82
83func TestBranchesRepos_Marshal(t *testing.T) {
84 want := genBranchesRepos(1000)
85
86 var buf bytes.Buffer
87 if err := gob.NewEncoder(&buf).Encode(want); err != nil {
88 t.Fatal(err)
89 }
90
91 var got BranchesRepos
92 if err := gob.NewDecoder(bytes.NewReader(buf.Bytes())).Decode(&got); err != nil {
93 t.Fatal(err)
94 }
95
96 tr := cmp.Transformer("", func(b *roaring.Bitmap) []uint32 { return b.ToArray() })
97 if diff := cmp.Diff(want, &got, tr); diff != "" {
98 t.Fatalf("mismatch IDs (-want +got):\n%s", diff)
99 }
100}
101
102func BenchmarkFileNameSet_Encode(b *testing.B) {
103 set := genFileNameSet(1000)
104
105 // do one write to amortize away the cost of gob registration
106 w := &countWriter{}
107 enc := gob.NewEncoder(w)
108 if err := enc.Encode(set); err != nil {
109 b.Fatal(err)
110 }
111
112 b.ResetTimer()
113 b.ReportAllocs()
114
115 b.ReportMetric(float64(w.n), "bytes")
116
117 for n := 0; n < b.N; n++ {
118 if err := enc.Encode(set); err != nil {
119 b.Fatal(err)
120 }
121 }
122}
123
124func BenchmarkFileNameSet_Decode(b *testing.B) {
125 set := genFileNameSet(1000)
126
127 var buf bytes.Buffer
128 if err := gob.NewEncoder(&buf).Encode(set); err != nil {
129 b.Fatal(err)
130 }
131
132 b.ResetTimer()
133 b.ReportAllocs()
134
135 for n := 0; n < b.N; n++ {
136 // We need to include gob.NewDecoder cost to avoid measuring encoding.
137 var repoBranches FileNameSet
138 if err := gob.NewDecoder(bytes.NewReader(buf.Bytes())).Decode(&repoBranches); err != nil {
139 b.Fatal(err)
140 }
141 }
142}
143
144func TestFileNameSet_Marshal(t *testing.T) {
145 for i := range []int{0, 1, 10, 100} {
146 want := genFileNameSet(i)
147
148 var buf bytes.Buffer
149 if err := gob.NewEncoder(&buf).Encode(want); err != nil {
150 t.Fatal(err)
151 }
152
153 var got FileNameSet
154 if err := gob.NewDecoder(bytes.NewReader(buf.Bytes())).Decode(&got); err != nil {
155 t.Fatal(err)
156 }
157
158 if diff := cmp.Diff(want, &got); diff != "" {
159 t.Fatalf("mismatch for set size %d (-want +got):\n%s", i, diff)
160 }
161 }
162}
163
164func genFileNameSet(size int) *FileNameSet {
165 set := make(map[string]struct{}, size)
166 for i := range size {
167 set[genName(i)] = struct{}{}
168 }
169 return &FileNameSet{Set: set}
170}
171
172// Generating 5.5M repos slows down the benchmark setup time, so we cache things.
173var genCache = map[string]any{}
174
175func genRepoBranches(n int) map[string][]string {
176 repoBranches := map[string][]string{}
177 orgIndex := 0
178 repoIndex := 0
179
180 for i := range n {
181 org := genName(orgIndex)
182 name := "github.com/" + org + "/" + genName(orgIndex*2+repoIndex)
183 repoBranches[name] = []string{"HEAD"}
184 if repoIndex%50 == 0 {
185 repoBranches[name] = append(repoBranches[name], "more", "branches")
186 }
187
188 if i%1000 == 0 {
189 orgIndex++
190 repoIndex = 0
191 }
192
193 repoIndex++
194 }
195
196 return repoBranches
197}
198
199func genName(n int) string {
200 bs := make([]byte, 8)
201 binary.LittleEndian.PutUint64(bs, uint64(n))
202 return fmt.Sprintf("%x", sha256.Sum256(bs))[:10]
203}
204
205func genBranchesRepos(n int) *BranchesRepos {
206 key := fmt.Sprintf("BranchesRepos:%d", n)
207 val, ok := genCache[key]
208 if ok {
209 return val.(*BranchesRepos)
210 }
211
212 set := genRepoBranches(n)
213 br := map[string]*roaring.Bitmap{}
214 id := uint32(1)
215
216 for _, branches := range set {
217 for _, branch := range branches {
218 ids, ok := br[branch]
219 if !ok {
220 ids = roaring.New()
221 br[branch] = ids
222 }
223 ids.Add(id)
224 }
225 id++
226 }
227
228 brs := make([]BranchRepos, 0, len(br))
229 for branch, ids := range br {
230 ids.RunOptimize()
231 brs = append(brs, BranchRepos{Branch: branch, Repos: ids})
232 }
233
234 sort.Slice(brs, func(i, j int) bool {
235 return brs[i].Branch < brs[j].Branch
236 })
237
238 q := &BranchesRepos{
239 List: brs,
240 }
241
242 genCache[key] = q
243
244 return q
245}
246
247type countWriter struct {
248 n int
249}
250
251func (w *countWriter) Write(b []byte) (int, error) {
252 w.n += len(b)
253 return len(b), nil
254}