fork of https://github.com/sourcegraph/zoekt
1// Copyright 2016 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package zoekt
16
17import (
18 "bytes"
19 "context"
20 "encoding/json"
21 "flag"
22 "fmt"
23 "io/fs"
24 "os"
25 "path"
26 "path/filepath"
27 "reflect"
28 "strconv"
29 "strings"
30 "testing"
31 "testing/quick"
32
33 "github.com/google/go-cmp/cmp"
34 "github.com/google/go-cmp/cmp/cmpopts"
35 "github.com/sourcegraph/zoekt/query"
36)
37
38var update = flag.Bool("update", false, "update golden files")
39
40func TestReadWrite(t *testing.T) {
41 b, err := NewIndexBuilder(nil)
42 if err != nil {
43 t.Fatalf("NewIndexBuilder: %v", err)
44 }
45
46 if err := b.AddFile("filename", []byte("abcde")); err != nil {
47 t.Fatalf("AddFile: %v", err)
48 }
49
50 var buf bytes.Buffer
51 if err := b.Write(&buf); err != nil {
52 t.Fatal(err)
53 }
54 f := &memSeeker{buf.Bytes()}
55
56 r := reader{r: f}
57
58 var toc indexTOC
59 err = r.readTOC(&toc)
60 if err != nil {
61 t.Errorf("got read error %v", err)
62 }
63 if toc.fileContents.data.sz != 5 {
64 t.Errorf("got contents size %d, want 5", toc.fileContents.data.sz)
65 }
66
67 data, err := r.readIndexData(&toc)
68 if err != nil {
69 t.Fatalf("readIndexData: %v", err)
70 }
71 if got := data.fileName(0); string(got) != "filename" {
72 t.Errorf("got filename %q, want %q", got, "filename")
73 }
74
75 contentNgrams := data.contentNgrams.DumpMap()
76 if len(contentNgrams) != 3 {
77 t.Fatalf("got ngrams %v, want 3 ngrams", contentNgrams)
78 }
79
80 if sec := data.contentNgrams.Get(stringToNGram("bcq")); sec.sz > 0 {
81 t.Errorf("found ngram bcq (%v) in %v", uint64(stringToNGram("bcq")), contentNgrams)
82 }
83}
84
85func TestReadWriteNames(t *testing.T) {
86 b, err := NewIndexBuilder(nil)
87 if err != nil {
88 t.Fatalf("NewIndexBuilder: %v", err)
89 }
90
91 if err := b.AddFile("abCd", []byte("")); err != nil {
92 t.Fatalf("AddFile: %v", err)
93 }
94
95 var buf bytes.Buffer
96 if err := b.Write(&buf); err != nil {
97 t.Fatal(err)
98 }
99 f := &memSeeker{buf.Bytes()}
100
101 r := reader{r: f}
102
103 var toc indexTOC
104 if err := r.readTOC(&toc); err != nil {
105 t.Errorf("got read error %v", err)
106 }
107 if toc.fileNames.data.sz != 4 {
108 t.Errorf("got contents size %d, want 4", toc.fileNames.data.sz)
109 }
110
111 data, err := r.readIndexData(&toc)
112 if err != nil {
113 t.Fatalf("readIndexData: %v", err)
114 }
115 if !reflect.DeepEqual([]uint32{0, 4}, data.fileNameIndex) {
116 t.Errorf("got index %v, want {0,4}", data.fileNameIndex)
117 }
118
119 gotSec := data.fileNameNgrams.Get(stringToNGram("bCd"))
120 if err != nil {
121 t.Fatalf("fileNameNgrams.GetBlob: %v", err)
122 }
123
124 if !reflect.DeepEqual(buf.Bytes()[gotSec.off:gotSec.off+gotSec.sz], []byte{1}) {
125 t.Errorf("got trigram bcd at bits %v, want sz 2", data.fileNameNgrams)
126 }
127}
128
129func TestGet(t *testing.T) {
130 b, err := NewIndexBuilder(nil)
131 if err != nil {
132 t.Fatalf("NewIndexBuilder: %v", err)
133 }
134
135 if err := b.AddFile("file_name", []byte("aaa bbbaaa")); err != nil {
136 t.Fatalf("AddFile: %v", err)
137 }
138
139 var buf bytes.Buffer
140 if err := b.Write(&buf); err != nil {
141 t.Fatal(err)
142 }
143 f := &memSeeker{buf.Bytes()}
144
145 r := reader{r: f}
146
147 var toc indexTOC
148 if err := r.readTOC(&toc); err != nil {
149 t.Errorf("got read error %v", err)
150 }
151
152 id, err := r.readIndexData(&toc)
153 if err != nil {
154 t.Fatalf("readIndexData: %v", err)
155 }
156
157 var off uint32 = 96
158
159 cases := []struct {
160 ng string
161 wantPostingList simpleSection
162 }{
163 {
164 ng: " bb",
165 wantPostingList: simpleSection{off: off, sz: 1},
166 },
167 {
168 ng: "a b",
169 wantPostingList: simpleSection{off: off + 1, sz: 1},
170 },
171 {
172 ng: "aa ",
173 wantPostingList: simpleSection{off: off + 2, sz: 1},
174 },
175 {
176 ng: "aaa",
177 wantPostingList: simpleSection{off: off + 3, sz: 2},
178 },
179 {
180 ng: "baa",
181 wantPostingList: simpleSection{off: off + 5, sz: 1},
182 },
183 {
184 ng: "bba",
185 wantPostingList: simpleSection{off: off + 6, sz: 1},
186 },
187 {
188 ng: "bbb",
189 wantPostingList: simpleSection{off: off + 7, sz: 1},
190 },
191 }
192
193 for _, tt := range cases {
194 t.Run(tt.ng, func(t *testing.T) {
195 havePostingList := id.contentNgrams.Get(stringToNGram(tt.ng))
196 if !reflect.DeepEqual(tt.wantPostingList, havePostingList) {
197 t.Fatalf("\nwant:%+v\ngot: %+v", tt.wantPostingList, havePostingList)
198 }
199 })
200 }
201}
202
203func loadShard(fn string) (Searcher, error) {
204 f, err := os.Open(fn)
205 if err != nil {
206 return nil, err
207 }
208
209 iFile, err := NewIndexFile(f)
210 if err != nil {
211 return nil, err
212 }
213 s, err := NewSearcher(iFile)
214 if err != nil {
215 iFile.Close()
216 return nil, fmt.Errorf("NewSearcher(%s): %v", fn, err)
217 }
218
219 return s, nil
220}
221
222func TestReadSearch(t *testing.T) {
223 type out struct {
224 FormatVersion int
225 FeatureVersion int
226 FileMatches [][]FileMatch
227 }
228
229 qs := []query.Q{
230 &query.Substring{Pattern: "func main", Content: true},
231 &query.Regexp{Regexp: mustParseRE("^package"), Content: true},
232 &query.Symbol{Expr: &query.Substring{Pattern: "num"}},
233 &query.Symbol{Expr: &query.Regexp{Regexp: mustParseRE("sage$")}},
234 }
235
236 shards, err := filepath.Glob("testdata/shards/*.zoekt")
237 if err != nil {
238 t.Fatal(err)
239 }
240
241 for _, path := range shards {
242 name := filepath.Base(path)
243 name = strings.TrimSuffix(name, ".zoekt")
244
245 shard, err := loadShard(path)
246 if err != nil {
247 t.Fatalf("error loading shard %s %v", name, err)
248 }
249
250 index, ok := shard.(*indexData)
251 if !ok {
252 t.Fatalf("expected *indexData for %s", name)
253 }
254
255 golden := "testdata/golden/TestReadSearch/" + name + ".golden"
256
257 if *update {
258 got := out{
259 FormatVersion: index.metaData.IndexFormatVersion,
260 FeatureVersion: index.metaData.IndexFeatureVersion,
261 }
262 for _, q := range qs {
263 res, err := shard.Search(context.Background(), q, &SearchOptions{})
264 if err != nil {
265 t.Fatalf("failed search %s on %s during updating: %v", q, name, err)
266 }
267 got.FileMatches = append(got.FileMatches, res.Files)
268 }
269
270 if raw, err := json.MarshalIndent(got, "", " "); err != nil {
271 t.Errorf("failed marshalling search results for %s during updating: %v", name, err)
272 continue
273 } else if err := os.WriteFile(golden, raw, 0o644); err != nil {
274 t.Errorf("failed writing search results for %s during updating: %v", name, err)
275 continue
276 }
277 }
278
279 var want out
280 if buf, err := os.ReadFile(golden); err != nil {
281 t.Fatalf("failed reading search results for %s: %v", name, err)
282 } else if err := json.Unmarshal(buf, &want); err != nil {
283 t.Fatalf("failed unmarshalling search results for %s: %v", name, err)
284 }
285
286 if index.metaData.IndexFormatVersion != want.FormatVersion {
287 t.Errorf("got %d index format version, want %d for %s", index.metaData.IndexFormatVersion, want.FormatVersion, name)
288 }
289
290 if index.metaData.IndexFeatureVersion != want.FeatureVersion {
291 t.Errorf("got %d index feature version, want %d for %s", index.metaData.IndexFeatureVersion, want.FeatureVersion, name)
292 }
293
294 for j, q := range qs {
295 res, err := shard.Search(context.Background(), q, &SearchOptions{})
296 if err != nil {
297 t.Fatalf("failed search %s on %s: %v", q, name, err)
298 }
299
300 if len(res.Files) != len(want.FileMatches[j]) {
301 t.Fatalf("got %d file matches for %s on %s, want %d", len(res.Files), q, name, len(want.FileMatches[j]))
302 }
303
304 if len(want.FileMatches[j]) == 0 {
305 continue
306 }
307
308 if d := cmp.Diff(want.FileMatches[j], res.Files); d != "" {
309 t.Errorf("matches for %s on %s (-want +got)\n%s", q, name, d)
310 }
311 }
312 }
313}
314
315func TestEncodeRawConfig(t *testing.T) {
316 mustParse := func(s string) uint8 {
317 i, err := strconv.ParseInt(s, 2, 8)
318 if err != nil {
319 t.Fatalf("failed to parse %s", s)
320 }
321 return uint8(i)
322 }
323
324 cases := []struct {
325 rawConfig map[string]string
326 want string
327 }{
328 {
329 rawConfig: map[string]string{"public": "1"},
330 want: "101001",
331 },
332 {
333 rawConfig: map[string]string{"fork": "1"},
334 want: "100110",
335 },
336 {
337 rawConfig: map[string]string{"public": "1", "fork": "1"},
338 want: "100101",
339 },
340 {
341 rawConfig: map[string]string{"public": "1", "fork": "1", "archived": "1"},
342 want: "010101",
343 },
344 {
345 rawConfig: map[string]string{},
346 want: "101010",
347 },
348 }
349 for _, c := range cases {
350 t.Run(c.want, func(t *testing.T) {
351 if got := encodeRawConfig(c.rawConfig); got != mustParse(c.want) {
352 t.Fatalf("want %s, got %s", c.want, strconv.FormatInt(int64(got), 2))
353 }
354 })
355 }
356}
357
358func TestBackwardsCompat(t *testing.T) {
359 if *update {
360 b, err := NewIndexBuilder(nil)
361 if err != nil {
362 t.Fatalf("NewIndexBuilder: %v", err)
363 }
364
365 if err := b.AddFile("filename", []byte("abcde")); err != nil {
366 t.Fatalf("AddFile: %v", err)
367 }
368
369 var buf bytes.Buffer
370 if err := b.Write(&buf); err != nil {
371 t.Fatal(err)
372 }
373
374 outname := fmt.Sprintf("testdata/backcompat/new_v%d.%05d.zoekt", IndexFormatVersion, 0)
375 t.Log("writing new file", outname)
376
377 err = os.WriteFile(outname, buf.Bytes(), 0o644)
378 if err != nil {
379 t.Fatalf("Creating output file: %v", err)
380 }
381 }
382
383 compatibleFiles, err := fs.Glob(os.DirFS("."), "testdata/backcompat/*.zoekt")
384 if err != nil {
385 t.Fatalf("fs.Glob: %v", err)
386 }
387
388 for _, fname := range compatibleFiles {
389 t.Run(path.Base(fname),
390 func(t *testing.T) {
391 f, err := os.Open(fname)
392 if err != nil {
393 t.Fatal("os.Open", err)
394 }
395 idx, err := NewIndexFile(f)
396 if err != nil {
397 t.Fatal("NewIndexFile", err)
398 }
399 r := reader{r: idx}
400
401 var toc indexTOC
402 err = r.readTOC(&toc)
403 if err != nil {
404 t.Errorf("got read error %v", err)
405 }
406 if toc.fileContents.data.sz != 5 {
407 t.Errorf("got contents size %d, want 5", toc.fileContents.data.sz)
408 }
409
410 data, err := r.readIndexData(&toc)
411 if err != nil {
412 t.Fatalf("readIndexData: %v", err)
413 }
414 if got := data.fileName(0); string(got) != "filename" {
415 t.Errorf("got filename %q, want %q", got, "filename")
416 }
417
418 contentNgrams := data.contentNgrams.DumpMap()
419 if len(data.contentNgrams.DumpMap()) != 3 {
420 t.Fatalf("got ngrams %v, want 3 ngrams", contentNgrams)
421 }
422
423 if sec := data.contentNgrams.Get(stringToNGram("bcq")); sec.sz > 0 {
424 t.Errorf("found ngram bcd in %v", contentNgrams)
425 }
426 },
427 )
428 }
429}
430
431func TestBackfillIDIsDeterministic(t *testing.T) {
432 repo := "github.com/a/b"
433 have1 := backfillID(repo)
434 have2 := backfillID(repo)
435
436 if have1 != have2 {
437 t.Fatalf("%s != %s ", have1, have2)
438 }
439}
440
441func TestEncodeRanks(t *testing.T) {
442 quick.Check(func(ranks [][]float64) bool {
443 buf := bytes.Buffer{}
444 w := &writer{w: &buf}
445
446 if err := encodeRanks(w, ranks); err != nil {
447 return false
448 }
449
450 // In case all rank vectors are empty, IE {{}, {}, ...}, we won't write anything
451 // to w and gob decode will decode this as "nil", which will fail the
452 // comparison even with cmpopts.EquateEmpty().
453 if w.off == 0 {
454 return true
455 }
456
457 d := &indexData{}
458 if err := decodeRanks(buf.Bytes(), &d.ranks); err != nil {
459 t.Fatal(err)
460 }
461
462 if d := cmp.Diff(ranks, d.ranks, cmpopts.EquateEmpty()); d != "" {
463 t.Fatalf("-want, +got:\n%s\n", d)
464 }
465
466 return true
467 }, nil)
468}
469
470func BenchmarkReadMetadata(b *testing.B) {
471 file, err := os.Open("testdata/benchmark/zoekt_v16.00000.zoekt")
472 if err != nil {
473 b.Fatalf("Failed to open test file: %v", err)
474 }
475 defer file.Close()
476
477 indexFile, err := NewIndexFile(file)
478 if err != nil {
479 b.Fatalf("could not open index: %v", err)
480 }
481
482 b.ReportAllocs()
483 b.ResetTimer()
484
485 for i := 0; i < b.N; i++ {
486 repos, metadata, err := ReadMetadata(indexFile)
487 if err != nil {
488 b.Fatalf("ReadMetadata failed: %v", err)
489 }
490 if len(repos) != 1 {
491 b.Fatalf("expected 1 repository")
492 }
493 if metadata == nil {
494 b.Fatalf("expected non-nil metadata")
495 }
496 }
497}