fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

at main 5.2 kB View raw
1package query 2 3import ( 4 "bytes" 5 "crypto/sha256" 6 "encoding/binary" 7 "encoding/gob" 8 "fmt" 9 "sort" 10 "testing" 11 12 "github.com/RoaringBitmap/roaring" 13 "github.com/google/go-cmp/cmp" 14) 15 16// We benchmark via Gob since that allows us to compare to no custom 17// marshalling. 18 19func BenchmarkRepoBranches_Encode(b *testing.B) { 20 repoBranches := genRepoBranches(5_500_000) 21 22 // do one write to amortize away the cost of gob registration 23 w := &countWriter{} 24 enc := gob.NewEncoder(w) 25 if err := enc.Encode(repoBranches); err != nil { 26 b.Fatal(err) 27 } 28 29 b.ResetTimer() 30 b.ReportAllocs() 31 32 b.ReportMetric(float64(w.n), "bytes") 33 34 for n := 0; n < b.N; n++ { 35 if err := enc.Encode(repoBranches); err != nil { 36 b.Fatal(err) 37 } 38 } 39} 40 41func BenchmarkBranchesRepos_Encode(b *testing.B) { 42 brs := genBranchesRepos(5_500_000) 43 44 // do one write to amortize away the cost of gob registration 45 w := &countWriter{} 46 enc := gob.NewEncoder(w) 47 if err := enc.Encode(brs); err != nil { 48 b.Fatal(err) 49 } 50 51 b.ResetTimer() 52 b.ReportAllocs() 53 54 b.ReportMetric(float64(w.n), "bytes") 55 56 for n := 0; n < b.N; n++ { 57 if err := enc.Encode(brs); err != nil { 58 b.Fatal(err) 59 } 60 } 61} 62 63func BenchmarkBranchesRepos_Decode(b *testing.B) { 64 brs := genBranchesRepos(5_500_000) 65 66 var buf bytes.Buffer 67 if err := gob.NewEncoder(&buf).Encode(brs); err != nil { 68 b.Fatal(err) 69 } 70 71 b.ResetTimer() 72 b.ReportAllocs() 73 74 for n := 0; n < b.N; n++ { 75 // We need to include gob.NewDecoder cost to avoid measuring encoding. 76 var brs BranchesRepos 77 if err := gob.NewDecoder(bytes.NewReader(buf.Bytes())).Decode(&brs); err != nil { 78 b.Fatal(err) 79 } 80 } 81} 82 83func TestBranchesRepos_Marshal(t *testing.T) { 84 want := genBranchesRepos(1000) 85 86 var buf bytes.Buffer 87 if err := gob.NewEncoder(&buf).Encode(want); err != nil { 88 t.Fatal(err) 89 } 90 91 var got BranchesRepos 92 if err := gob.NewDecoder(bytes.NewReader(buf.Bytes())).Decode(&got); err != nil { 93 t.Fatal(err) 94 } 95 96 tr := cmp.Transformer("", func(b *roaring.Bitmap) []uint32 { return b.ToArray() }) 97 if diff := cmp.Diff(want, &got, tr); diff != "" { 98 t.Fatalf("mismatch IDs (-want +got):\n%s", diff) 99 } 100} 101 102func BenchmarkFileNameSet_Encode(b *testing.B) { 103 set := genFileNameSet(1000) 104 105 // do one write to amortize away the cost of gob registration 106 w := &countWriter{} 107 enc := gob.NewEncoder(w) 108 if err := enc.Encode(set); err != nil { 109 b.Fatal(err) 110 } 111 112 b.ResetTimer() 113 b.ReportAllocs() 114 115 b.ReportMetric(float64(w.n), "bytes") 116 117 for n := 0; n < b.N; n++ { 118 if err := enc.Encode(set); err != nil { 119 b.Fatal(err) 120 } 121 } 122} 123 124func BenchmarkFileNameSet_Decode(b *testing.B) { 125 set := genFileNameSet(1000) 126 127 var buf bytes.Buffer 128 if err := gob.NewEncoder(&buf).Encode(set); err != nil { 129 b.Fatal(err) 130 } 131 132 b.ResetTimer() 133 b.ReportAllocs() 134 135 for n := 0; n < b.N; n++ { 136 // We need to include gob.NewDecoder cost to avoid measuring encoding. 137 var repoBranches FileNameSet 138 if err := gob.NewDecoder(bytes.NewReader(buf.Bytes())).Decode(&repoBranches); err != nil { 139 b.Fatal(err) 140 } 141 } 142} 143 144func TestFileNameSet_Marshal(t *testing.T) { 145 for i := range []int{0, 1, 10, 100} { 146 want := genFileNameSet(i) 147 148 var buf bytes.Buffer 149 if err := gob.NewEncoder(&buf).Encode(want); err != nil { 150 t.Fatal(err) 151 } 152 153 var got FileNameSet 154 if err := gob.NewDecoder(bytes.NewReader(buf.Bytes())).Decode(&got); err != nil { 155 t.Fatal(err) 156 } 157 158 if diff := cmp.Diff(want, &got); diff != "" { 159 t.Fatalf("mismatch for set size %d (-want +got):\n%s", i, diff) 160 } 161 } 162} 163 164func genFileNameSet(size int) *FileNameSet { 165 set := make(map[string]struct{}, size) 166 for i := range size { 167 set[genName(i)] = struct{}{} 168 } 169 return &FileNameSet{Set: set} 170} 171 172// Generating 5.5M repos slows down the benchmark setup time, so we cache things. 173var genCache = map[string]any{} 174 175func genRepoBranches(n int) map[string][]string { 176 repoBranches := map[string][]string{} 177 orgIndex := 0 178 repoIndex := 0 179 180 for i := range n { 181 org := genName(orgIndex) 182 name := "github.com/" + org + "/" + genName(orgIndex*2+repoIndex) 183 repoBranches[name] = []string{"HEAD"} 184 if repoIndex%50 == 0 { 185 repoBranches[name] = append(repoBranches[name], "more", "branches") 186 } 187 188 if i%1000 == 0 { 189 orgIndex++ 190 repoIndex = 0 191 } 192 193 repoIndex++ 194 } 195 196 return repoBranches 197} 198 199func genName(n int) string { 200 bs := make([]byte, 8) 201 binary.LittleEndian.PutUint64(bs, uint64(n)) 202 return fmt.Sprintf("%x", sha256.Sum256(bs))[:10] 203} 204 205func genBranchesRepos(n int) *BranchesRepos { 206 key := fmt.Sprintf("BranchesRepos:%d", n) 207 val, ok := genCache[key] 208 if ok { 209 return val.(*BranchesRepos) 210 } 211 212 set := genRepoBranches(n) 213 br := map[string]*roaring.Bitmap{} 214 id := uint32(1) 215 216 for _, branches := range set { 217 for _, branch := range branches { 218 ids, ok := br[branch] 219 if !ok { 220 ids = roaring.New() 221 br[branch] = ids 222 } 223 ids.Add(id) 224 } 225 id++ 226 } 227 228 brs := make([]BranchRepos, 0, len(br)) 229 for branch, ids := range br { 230 ids.RunOptimize() 231 brs = append(brs, BranchRepos{Branch: branch, Repos: ids}) 232 } 233 234 sort.Slice(brs, func(i, j int) bool { 235 return brs[i].Branch < brs[j].Branch 236 }) 237 238 q := &BranchesRepos{ 239 List: brs, 240 } 241 242 genCache[key] = q 243 244 return q 245} 246 247type countWriter struct { 248 n int 249} 250 251func (w *countWriter) Write(b []byte) (int, error) { 252 w.n += len(b) 253 return len(b), nil 254}