fork of https://github.com/sourcegraph/zoekt
1// Copyright 2016 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package build
16
17import (
18 "bytes"
19 "context"
20 "encoding/json"
21 "fmt"
22 "log"
23 "os"
24 "path/filepath"
25 "reflect"
26 "runtime"
27 "sort"
28 "strconv"
29 "strings"
30 "testing"
31 "time"
32
33 "github.com/google/go-cmp/cmp"
34 "github.com/google/go-cmp/cmp/cmpopts"
35 "github.com/grafana/regexp"
36 "github.com/stretchr/testify/require"
37
38 "github.com/sourcegraph/zoekt"
39 "github.com/sourcegraph/zoekt/internal/tenant"
40 "github.com/sourcegraph/zoekt/internal/tenant/tenanttest"
41 "github.com/sourcegraph/zoekt/query"
42 "github.com/sourcegraph/zoekt/shards"
43)
44
45func TestBasic(t *testing.T) {
46 dir := t.TempDir()
47
48 opts := Options{
49 IndexDir: dir,
50 ShardMax: 1024,
51 RepositoryDescription: zoekt.Repository{
52 Name: "repo",
53 },
54 Parallelism: 2,
55 SizeMax: 1 << 20,
56 }
57
58 b, err := NewBuilder(opts)
59 if err != nil {
60 t.Fatalf("NewBuilder: %v", err)
61 }
62
63 for i := 0; i < 4; i++ {
64 s := fmt.Sprintf("%d", i)
65 if err := b.AddFile("F"+s, []byte(strings.Repeat(s, 1000))); err != nil {
66 t.Fatal(err)
67 }
68 }
69
70 if err := b.Finish(); err != nil {
71 t.Errorf("Finish: %v", err)
72 }
73
74 fs, _ := filepath.Glob(dir + "/*.zoekt")
75 if len(fs) <= 1 {
76 t.Fatalf("want multiple shards, got %v", fs)
77 }
78
79 _, md0, err := zoekt.ReadMetadataPath(fs[0])
80 if err != nil {
81 t.Fatal(err)
82 }
83 for _, f := range fs[1:] {
84 _, md, err := zoekt.ReadMetadataPath(f)
85 if err != nil {
86 t.Fatal(err)
87 }
88 if md.IndexTime != md0.IndexTime {
89 t.Fatalf("wanted identical time stamps but got %v!=%v", md.IndexTime, md0.IndexTime)
90 }
91 if md.ID != md0.ID {
92 t.Fatalf("wanted identical IDs but got %s!=%s", md.ID, md0.ID)
93 }
94 }
95
96 ss, err := shards.NewDirectorySearcher(dir)
97 if err != nil {
98 t.Fatalf("NewDirectorySearcher(%s): %v", dir, err)
99 }
100 defer ss.Close()
101
102 q, err := query.Parse("111")
103 if err != nil {
104 t.Fatalf("Parse(111): %v", err)
105 }
106
107 var sOpts zoekt.SearchOptions
108 ctx := context.Background()
109 result, err := ss.Search(ctx, q, &sOpts)
110 if err != nil {
111 t.Fatalf("Search(%v): %v", q, err)
112 }
113
114 if len(result.Files) != 1 {
115 t.Errorf("got %v, want 1 file.", result.Files)
116 } else if gotFile, wantFile := result.Files[0].FileName, "F1"; gotFile != wantFile {
117 t.Errorf("got file %q, want %q", gotFile, wantFile)
118 } else if gotRepo, wantRepo := result.Files[0].Repository, "repo"; gotRepo != wantRepo {
119 t.Errorf("got repo %q, want %q", gotRepo, wantRepo)
120 }
121
122 t.Run("meta file", func(t *testing.T) {
123 // use retryTest to allow for the directory watcher to notice the meta
124 // file
125 retryTest(t, func(fatalf func(format string, args ...interface{})) {
126 // Add a .meta file for each shard with repo.Name set to
127 // "repo-mutated". We do this inside retry helper since we have noticed
128 // some flakiness on github CI.
129 for _, p := range fs {
130 repos, _, err := zoekt.ReadMetadataPath(p)
131 if err != nil {
132 t.Fatal(err)
133 }
134 repos[0].Name = "repo-mutated"
135 b, err := json.Marshal(repos[0])
136 if err != nil {
137 t.Fatal(err)
138 }
139
140 if err := os.WriteFile(p+".meta", b, 0o600); err != nil {
141 t.Fatal(err)
142 }
143 }
144
145 result, err := ss.Search(ctx, q, &sOpts)
146 if err != nil {
147 fatalf("Search(%v): %v", q, err)
148 }
149
150 if len(result.Files) != 1 {
151 fatalf("got %v, want 1 file.", result.Files)
152 } else if gotFile, wantFile := result.Files[0].FileName, "F1"; gotFile != wantFile {
153 fatalf("got file %q, want %q", gotFile, wantFile)
154 } else if gotRepo, wantRepo := result.Files[0].Repository, "repo-mutated"; gotRepo != wantRepo {
155 fatalf("got repo %q, want %q", gotRepo, wantRepo)
156 }
157 })
158 })
159}
160
161func TestSearchTenant(t *testing.T) {
162 tenanttest.MockEnforce(t)
163
164 dir := t.TempDir()
165
166 ctx1 := tenanttest.NewTestContext()
167 tnt1, err := tenant.FromContext(ctx1)
168 require.NoError(t, err)
169
170 opts := Options{
171 IndexDir: dir,
172 ShardMax: 1024,
173 RepositoryDescription: zoekt.Repository{
174 Name: "repo",
175 RawConfig: map[string]string{"tenantID": strconv.Itoa(tnt1.ID())},
176 },
177 Parallelism: 2,
178 SizeMax: 1 << 20,
179 }
180
181 b, err := NewBuilder(opts)
182 if err != nil {
183 t.Fatalf("NewBuilder: %v", err)
184 }
185
186 for i := 0; i < 4; i++ {
187 s := fmt.Sprintf("%d", i)
188 if err := b.AddFile("F"+s, []byte(strings.Repeat(s, 1000))); err != nil {
189 t.Fatal(err)
190 }
191 }
192
193 if err := b.Finish(); err != nil {
194 t.Errorf("Finish: %v", err)
195 }
196
197 fs, _ := filepath.Glob(dir + "/*.zoekt")
198 if len(fs) <= 1 {
199 t.Fatalf("want multiple shards, got %v", fs)
200 }
201
202 _, md0, err := zoekt.ReadMetadataPath(fs[0])
203 if err != nil {
204 t.Fatal(err)
205 }
206 for _, f := range fs[1:] {
207 _, md, err := zoekt.ReadMetadataPath(f)
208 if err != nil {
209 t.Fatal(err)
210 }
211 if md.IndexTime != md0.IndexTime {
212 t.Fatalf("wanted identical time stamps but got %v!=%v", md.IndexTime, md0.IndexTime)
213 }
214 if md.ID != md0.ID {
215 t.Fatalf("wanted identical IDs but got %s!=%s", md.ID, md0.ID)
216 }
217 }
218
219 ss, err := shards.NewDirectorySearcher(dir)
220 if err != nil {
221 t.Fatalf("NewDirectorySearcher(%s): %v", dir, err)
222 }
223 defer ss.Close()
224
225 q, err := query.Parse("111")
226 if err != nil {
227 t.Fatalf("Parse(111): %v", err)
228 }
229
230 var sOpts zoekt.SearchOptions
231
232 // Tenant 1 has access to the repo
233 result, err := ss.Search(ctx1, q, &sOpts)
234 require.NoError(t, err)
235 require.Len(t, result.Files, 1)
236
237 // Tenant 2 does not have access to the repo
238 ctx2 := tenanttest.NewTestContext()
239 result, err = ss.Search(ctx2, q, &sOpts)
240 require.NoError(t, err)
241 require.Len(t, result.Files, 0)
242}
243
244func TestListTenant(t *testing.T) {
245 tenanttest.MockEnforce(t)
246
247 dir := t.TempDir()
248
249 ctx1 := tenanttest.NewTestContext()
250 tnt1, err := tenant.FromContext(ctx1)
251 require.NoError(t, err)
252
253 opts := Options{
254 IndexDir: dir,
255 RepositoryDescription: zoekt.Repository{
256 Name: "repo",
257 RawConfig: map[string]string{"tenantID": strconv.Itoa(tnt1.ID())},
258 },
259 }
260 opts.SetDefaults()
261
262 b, err := NewBuilder(opts)
263 if err != nil {
264 t.Fatalf("NewBuilder: %v", err)
265 }
266 if err := b.Finish(); err != nil {
267 t.Errorf("Finish: %v", err)
268 }
269
270 fs, _ := filepath.Glob(dir + "/*.zoekt")
271 if len(fs) != 1 {
272 t.Fatalf("want a shard, got %v", fs)
273 }
274
275 ss, err := shards.NewDirectorySearcher(dir)
276 if err != nil {
277 t.Fatalf("NewDirectorySearcher(%s): %v", dir, err)
278 }
279 defer ss.Close()
280
281 // Tenant 1 has access to the repo
282 result, err := ss.List(ctx1, &query.Const{Value: true}, nil)
283 require.NoError(t, err)
284 require.Len(t, result.Repos, 1)
285
286 // Tenant 2 does not have access to the repo
287 ctx2 := tenanttest.NewTestContext()
288 result, err = ss.List(ctx2, &query.Const{Value: true}, nil)
289 require.NoError(t, err)
290 require.Len(t, result.Repos, 0)
291}
292
293// retryTest will retry f until min(t.Deadline(), time.Minute). It returns
294// once f doesn't call fatalf.
295func retryTest(t *testing.T, f func(fatalf func(format string, args ...interface{}))) {
296 t.Helper()
297
298 sleep := 10 * time.Millisecond
299 deadline := time.Now().Add(time.Minute)
300 if d, ok := t.Deadline(); ok && d.Before(deadline) {
301 // give 1s for us to do a final test run
302 deadline = d.Add(-time.Second)
303 }
304
305 for {
306 done := make(chan bool)
307 go func() {
308 defer close(done)
309
310 f(func(format string, args ...interface{}) {
311 runtime.Goexit()
312 })
313
314 done <- true
315 }()
316
317 success := <-done
318 if success {
319 return
320 }
321
322 // each time we increase sleep by 1.5
323 sleep := sleep*2 - sleep/2
324 if time.Now().Add(sleep).After(deadline) {
325 break
326 }
327 time.Sleep(sleep)
328 }
329
330 // final run for the test, using the real t.Fatalf
331 f(t.Fatalf)
332}
333
334func TestLargeFileOption(t *testing.T) {
335 dir := t.TempDir()
336
337 sizeMax := 1000
338 opts := Options{
339 IndexDir: dir,
340 LargeFiles: []string{"F0", "F1", "F2", "!F1"},
341 RepositoryDescription: zoekt.Repository{
342 Name: "repo",
343 },
344 SizeMax: sizeMax,
345 }
346
347 b, err := NewBuilder(opts)
348 if err != nil {
349 t.Fatalf("NewBuilder: %v", err)
350 }
351
352 for i := 0; i < 4; i++ {
353 s := fmt.Sprintf("%d", i)
354 if err := b.AddFile("F"+s, []byte(strings.Repeat("a", sizeMax+1))); err != nil {
355 t.Fatal(err)
356 }
357 }
358
359 if err := b.Finish(); err != nil {
360 t.Errorf("Finish: %v", err)
361 }
362
363 ss, err := shards.NewDirectorySearcher(dir)
364 if err != nil {
365 t.Fatalf("NewDirectorySearcher(%s): %v", dir, err)
366 }
367
368 q, err := query.Parse("aaa")
369 if err != nil {
370 t.Fatalf("Parse(aaa): %v", err)
371 }
372
373 var sOpts zoekt.SearchOptions
374 ctx := context.Background()
375 result, err := ss.Search(ctx, q, &sOpts)
376 if err != nil {
377 t.Fatalf("Search(%v): %v", q, err)
378 }
379
380 if len(result.Files) != 2 {
381 t.Errorf("got %v files, want 2 files.", len(result.Files))
382 }
383 defer ss.Close()
384}
385
386func TestUpdate(t *testing.T) {
387 dir := t.TempDir()
388
389 opts := Options{
390 IndexDir: dir,
391 ShardMax: 1024,
392 RepositoryDescription: zoekt.Repository{
393 Name: "repo",
394 FileURLTemplate: "url",
395 },
396 Parallelism: 2,
397 SizeMax: 1 << 20,
398 }
399
400 if b, err := NewBuilder(opts); err != nil {
401 t.Fatalf("NewBuilder: %v", err)
402 } else {
403 if err := b.AddFile("F", []byte("hoi")); err != nil {
404 t.Errorf("AddFile: %v", err)
405 }
406 if err := b.Finish(); err != nil {
407 t.Errorf("Finish: %v", err)
408 }
409 }
410 ss, err := shards.NewDirectorySearcher(dir)
411 if err != nil {
412 t.Fatalf("NewDirectorySearcher(%s): %v", dir, err)
413 }
414
415 ctx := context.Background()
416 repos, err := ss.List(ctx, &query.Repo{Regexp: regexp.MustCompile("repo")}, nil)
417 if err != nil {
418 t.Fatalf("List: %v", err)
419 }
420
421 if len(repos.Repos) != 1 {
422 t.Errorf("List(repo): got %v, want 1 repo", repos.Repos)
423 }
424
425 fs, err := filepath.Glob(filepath.Join(dir, "*"))
426 if err != nil {
427 t.Fatalf("glob: %v", err)
428 }
429
430 opts.RepositoryDescription = zoekt.Repository{
431 Name: "repo2",
432 FileURLTemplate: "url2",
433 }
434
435 if b, err := NewBuilder(opts); err != nil {
436 t.Fatalf("NewBuilder: %v", err)
437 } else {
438 if err := b.AddFile("F", []byte("hoi")); err != nil {
439 t.Errorf("AddFile: %v", err)
440 }
441 if err := b.Finish(); err != nil {
442 t.Errorf("Finish: %v", err)
443 }
444 }
445
446 // This is ugly, and potentially flaky, but there is no
447 // observable synchronization for the Sharded searcher, so
448 // this is the best we can do.
449 time.Sleep(100 * time.Millisecond)
450
451 ctx = context.Background()
452 if repos, err = ss.List(ctx, &query.Repo{Regexp: regexp.MustCompile("repo")}, nil); err != nil {
453 t.Fatalf("List: %v", err)
454 } else if len(repos.Repos) != 2 {
455 t.Errorf("List(repo): got %v, want 2 repos", repos.Repos)
456 }
457
458 for _, fn := range fs {
459 log.Printf("removing %s", fn)
460 if err := os.Remove(fn); err != nil {
461 t.Fatalf("Remove(%s): %v", fn, err)
462 }
463 }
464
465 time.Sleep(100 * time.Millisecond)
466
467 ctx = context.Background()
468 if repos, err = ss.List(ctx, &query.Repo{Regexp: regexp.MustCompile("repo")}, nil); err != nil {
469 t.Fatalf("List: %v", err)
470 } else if len(repos.Repos) != 1 {
471 var ss []string
472 for _, r := range repos.Repos {
473 ss = append(ss, r.Repository.Name)
474 }
475 t.Errorf("List(repo): got %v, want 1 repo", ss)
476 }
477}
478
479func TestDeleteOldShards(t *testing.T) {
480 dir := t.TempDir()
481
482 opts := Options{
483 IndexDir: dir,
484 ShardMax: 1024,
485 RepositoryDescription: zoekt.Repository{
486 Name: "repo",
487 FileURLTemplate: "url",
488 },
489 SizeMax: 1 << 20,
490 }
491 opts.SetDefaults()
492
493 b, err := NewBuilder(opts)
494 if err != nil {
495 t.Fatalf("NewBuilder: %v", err)
496 }
497 for i := 0; i < 4; i++ {
498 s := fmt.Sprintf("%d\n", i)
499 if err := b.AddFile("F"+s, []byte(strings.Repeat(s, 1024/2))); err != nil {
500 t.Errorf("AddFile: %v", err)
501 }
502 }
503 if err := b.Finish(); err != nil {
504 t.Errorf("Finish: %v", err)
505 }
506
507 glob := filepath.Join(dir, "*.zoekt")
508 fs, err := filepath.Glob(glob)
509 if err != nil {
510 t.Fatalf("Glob(%s): %v", glob, err)
511 } else if len(fs) != 4 {
512 t.Fatalf("Glob(%s): got %v, want 4 shards", glob, fs)
513 }
514
515 if fi, err := os.Lstat(fs[0]); err != nil {
516 t.Fatalf("Lstat: %v", err)
517 } else if fi.Mode()&0o666 == 0o600 {
518 // This fails spuriously if your umask is very restrictive.
519 t.Errorf("got mode %o, should respect umask.", fi.Mode())
520 }
521
522 // Do again, without sharding.
523 opts.ShardMax = 1 << 20
524 b, err = NewBuilder(opts)
525 if err != nil {
526 t.Fatalf("NewBuilder: %v", err)
527 }
528 for i := 0; i < 4; i++ {
529 s := fmt.Sprintf("%d\n", i)
530 if err := b.AddFile("F"+s, []byte(strings.Repeat(s, 1024/2))); err != nil {
531 t.Fatal(err)
532 }
533 }
534 if err := b.Finish(); err != nil {
535 t.Errorf("Finish: %v", err)
536 }
537
538 fs, err = filepath.Glob(glob)
539 if err != nil {
540 t.Fatalf("Glob(%s): %v", glob, err)
541 } else if len(fs) != 1 {
542 t.Fatalf("Glob(%s): got %v, want 1 shard", glob, fs)
543 }
544
545 // Again, but don't index anything; should leave old shards intact.
546 b, err = NewBuilder(opts)
547 if err != nil {
548 t.Fatalf("NewBuilder: %v", err)
549 }
550 if err := b.Finish(); err != nil {
551 t.Errorf("Finish: %v", err)
552 }
553
554 fs, err = filepath.Glob(glob)
555 if err != nil {
556 t.Fatalf("Glob(%s): %v", glob, err)
557 } else if len(fs) != 1 {
558 t.Fatalf("Glob(%s): got %v, want 1 shard", glob, fs)
559 }
560}
561
562func TestPartialSuccess(t *testing.T) {
563 dir := t.TempDir()
564
565 opts := Options{
566 IndexDir: dir,
567 ShardMax: 1024,
568 SizeMax: 1 << 20,
569 Parallelism: 1,
570 }
571 opts.RepositoryDescription.Name = "repo"
572 opts.SetDefaults()
573
574 b, err := NewBuilder(opts)
575 if err != nil {
576 t.Fatalf("NewBuilder: %v", err)
577 }
578
579 for i := 0; i < 4; i++ {
580 nm := fmt.Sprintf("F%d", i)
581 _ = b.AddFile(nm, []byte(strings.Repeat("01234567\n", 128)))
582 }
583 b.buildError = fmt.Errorf("any error")
584
585 // No error checking.
586 _ = b.Finish()
587
588 // Finish cleans up temporary files.
589 if fs, err := filepath.Glob(dir + "/*"); err != nil {
590 t.Errorf("glob(%s): %v", dir, err)
591 } else if len(fs) != 0 {
592 t.Errorf("got shards %v, want []", fs)
593 }
594}
595
596type filerankCase struct {
597 name string
598 docs []*zoekt.Document
599 want []int
600}
601
602func testFileRankAspect(t *testing.T, c filerankCase) {
603 var want []*zoekt.Document
604 for _, j := range c.want {
605 want = append(want, c.docs[j])
606 }
607
608 got := make([]*zoekt.Document, len(c.docs))
609 copy(got, c.docs)
610 sortDocuments(got)
611
612 print := func(ds []*zoekt.Document) string {
613 r := ""
614 for _, d := range ds {
615 r += fmt.Sprintf("%v, ", d)
616 }
617 return r
618 }
619 if !reflect.DeepEqual(got, want) {
620 t.Errorf("got docs [%v], want [%v]", print(got), print(want))
621 }
622}
623
624func TestFileRank(t *testing.T) {
625 for _, c := range []filerankCase{{
626 name: "filename",
627 docs: []*zoekt.Document{
628 {
629 Name: "longlonglong",
630 Content: []byte("bla"),
631 },
632 {
633 Name: "short",
634 Content: []byte("bla"),
635 },
636 },
637 want: []int{1, 0},
638 }, {
639 name: "test",
640 docs: []*zoekt.Document{
641 {
642 Name: "foo_test.go",
643 Content: []byte("bla"),
644 },
645 {
646 Name: "longlonglong",
647 Content: []byte("bla"),
648 },
649 },
650 want: []int{1, 0},
651 }, {
652 name: "content",
653 docs: []*zoekt.Document{
654 {
655 Content: []byte("bla"),
656 },
657 {
658 Content: []byte("blablablabla"),
659 },
660 {
661 Content: []byte("blabla"),
662 },
663 },
664 want: []int{0, 2, 1},
665 }, {
666 name: "skipped docs",
667 docs: []*zoekt.Document{
668 {
669 Name: "binary_file",
670 SkipReason: "binary file",
671 },
672 {
673 Name: "some_test.go",
674 Content: []byte("bla"),
675 },
676 {
677 Name: "large_file.go",
678 SkipReason: "too large",
679 },
680 {
681 Name: "file.go",
682 Content: []byte("blabla"),
683 },
684 },
685 want: []int{3, 1, 0, 2},
686 }} {
687 t.Run(c.name, func(t *testing.T) {
688 testFileRankAspect(t, c)
689 })
690 }
691}
692
693func TestEmptyContent(t *testing.T) {
694 dir := t.TempDir()
695
696 opts := Options{
697 IndexDir: dir,
698 RepositoryDescription: zoekt.Repository{
699 Name: "repo",
700 },
701 }
702 opts.SetDefaults()
703
704 b, err := NewBuilder(opts)
705 if err != nil {
706 t.Fatalf("NewBuilder: %v", err)
707 }
708 if err := b.Finish(); err != nil {
709 t.Errorf("Finish: %v", err)
710 }
711
712 fs, _ := filepath.Glob(dir + "/*.zoekt")
713 if len(fs) != 1 {
714 t.Fatalf("want a shard, got %v", fs)
715 }
716
717 ss, err := shards.NewDirectorySearcher(dir)
718 if err != nil {
719 t.Fatalf("NewDirectorySearcher(%s): %v", dir, err)
720 }
721 defer ss.Close()
722
723 ctx := context.Background()
724 result, err := ss.List(ctx, &query.Const{Value: true}, nil)
725 if err != nil {
726 t.Fatalf("List: %v", err)
727 }
728
729 if len(result.Repos) != 1 || result.Repos[0].Repository.Name != "repo" {
730 t.Errorf("got %+v, want 1 repo.", result.Repos)
731 }
732}
733
734func TestDeltaShards(t *testing.T) {
735 // TODO: Need to write a test for compound shards as well.
736 type step struct {
737 name string
738 documents []zoekt.Document
739 optFn func(t *testing.T, o *Options)
740
741 query string
742 expectedDocuments []zoekt.Document
743 }
744
745 var (
746 fooAtMain = zoekt.Document{Name: "foo.go", Branches: []string{"main"}, Content: []byte("common foo-main-v1")}
747 fooAtMainV2 = zoekt.Document{Name: "foo.go", Branches: []string{"main"}, Content: []byte("common foo-main-v2")}
748
749 fooAtMainAndRelease = zoekt.Document{Name: "foo.go", Branches: []string{"main", "release"}, Content: []byte("common foo-main-and-release")}
750
751 barAtMain = zoekt.Document{Name: "bar.go", Branches: []string{"main"}, Content: []byte("common bar-main")}
752 barAtMainV2 = zoekt.Document{Name: "bar.go", Branches: []string{"main"}, Content: []byte("common bar-main-v2")}
753 )
754
755 for _, test := range []struct {
756 name string
757 steps []step
758 }{
759 {
760 name: "tombstone older documents",
761 steps: []step{
762 {
763 name: "setup",
764 documents: []zoekt.Document{barAtMain, fooAtMain},
765 query: "common",
766 expectedDocuments: []zoekt.Document{barAtMain, fooAtMain},
767 },
768 {
769 name: "add new version of foo, tombstone older ones",
770 documents: []zoekt.Document{fooAtMainV2},
771 optFn: func(t *testing.T, o *Options) {
772 o.IsDelta = true
773 o.changedOrRemovedFiles = []string{"foo.go"}
774 },
775 query: "common",
776 expectedDocuments: []zoekt.Document{barAtMain, fooAtMainV2},
777 },
778 {
779 name: "add new version of bar, tombstone older ones",
780 documents: []zoekt.Document{barAtMainV2},
781 optFn: func(t *testing.T, o *Options) {
782 o.IsDelta = true
783 o.changedOrRemovedFiles = []string{"bar.go"}
784 },
785 query: "common",
786 expectedDocuments: []zoekt.Document{barAtMainV2, fooAtMainV2},
787 },
788 },
789 },
790 {
791 name: "tombstone older documents even if the latest shard has no documents",
792 steps: []step{
793 {
794 name: "setup",
795 documents: []zoekt.Document{barAtMain, fooAtMain},
796 query: "common",
797 expectedDocuments: []zoekt.Document{barAtMain, fooAtMain},
798 },
799 {
800 // a build with no documents could represent a deletion
801 name: "tombstone older documents",
802 documents: nil,
803 optFn: func(t *testing.T, o *Options) {
804 o.IsDelta = true
805 o.changedOrRemovedFiles = []string{"foo.go"}
806 },
807 query: "common",
808 expectedDocuments: []zoekt.Document{barAtMain},
809 },
810 },
811 },
812 {
813 name: "tombstones affect document across branches",
814 steps: []step{
815 {
816 name: "setup",
817 documents: []zoekt.Document{barAtMain, fooAtMainAndRelease},
818 query: "common",
819 expectedDocuments: []zoekt.Document{barAtMain, fooAtMainAndRelease},
820 },
821 {
822 name: "tombstone foo",
823 documents: nil,
824 optFn: func(t *testing.T, o *Options) {
825 o.IsDelta = true
826 o.changedOrRemovedFiles = []string{"foo.go"}
827 },
828 query: "common",
829 expectedDocuments: []zoekt.Document{barAtMain},
830 },
831 },
832 },
833 } {
834 t.Run(test.name, func(t *testing.T) {
835 indexDir := t.TempDir()
836
837 branchSet := make(map[string]struct{})
838
839 for _, s := range test.steps {
840 for _, d := range s.documents {
841 for _, b := range d.Branches {
842 branchSet[b] = struct{}{}
843 }
844 }
845 }
846
847 for _, step := range test.steps {
848 repository := zoekt.Repository{ID: 1, Name: "repository"}
849
850 for b := range branchSet {
851 repository.Branches = append(repository.Branches, zoekt.RepositoryBranch{Name: b})
852 }
853
854 sort.Slice(repository.Branches, func(i, j int) bool {
855 a, b := repository.Branches[i], repository.Branches[j]
856
857 return a.Name < b.Name
858 })
859
860 buildOpts := Options{
861 IndexDir: indexDir,
862 RepositoryDescription: repository,
863 }
864 buildOpts.SetDefaults()
865
866 if step.optFn != nil {
867 step.optFn(t, &buildOpts)
868 }
869
870 b, err := NewBuilder(buildOpts)
871 if err != nil {
872 t.Fatalf("step %q: NewBuilder: %s", step.name, err)
873 }
874
875 for _, d := range step.documents {
876 err := b.Add(d)
877 if err != nil {
878 t.Fatalf("step %q: adding document %q to builder: %s", step.name, d.Name, err)
879 }
880 }
881
882 // Call b.Finish() multiple times to ensure that it is idempotent
883 for i := 0; i < 3; i++ {
884
885 err = b.Finish()
886 if err != nil {
887 t.Fatalf("step %q: finishing builder (call #%d): %s", step.name, i, err)
888 }
889 }
890
891 err = b.Finish()
892 if err != nil {
893 t.Fatalf("step %q: finishing builder: %s", step.name, err)
894 }
895
896 state, _ := buildOpts.IndexState()
897 if diff := cmp.Diff(IndexStateEqual, state); diff != "" {
898 t.Errorf("unexpected diff in index state (-want +got):\n%s", diff)
899 }
900
901 ss, err := shards.NewDirectorySearcher(indexDir)
902 if err != nil {
903 t.Fatalf("step %q: NewDirectorySearcher(%s): %s", step.name, indexDir, err)
904 }
905 defer ss.Close()
906
907 searchOpts := &zoekt.SearchOptions{Whole: true}
908 q := &query.Substring{Pattern: step.query}
909
910 result, err := ss.Search(context.Background(), q, searchOpts)
911 if err != nil {
912 t.Fatalf("step %q: Search(%q): %s", step.name, step.query, err)
913 }
914
915 var receivedDocuments []zoekt.Document
916 for _, f := range result.Files {
917 receivedDocuments = append(receivedDocuments, zoekt.Document{
918 Name: f.FileName,
919 Content: f.Content,
920 })
921 }
922
923 cmpOpts := []cmp.Option{
924 cmpopts.IgnoreFields(zoekt.Document{}, "Branches"),
925 cmpopts.SortSlices(func(a, b zoekt.Document) bool {
926 if a.Name < b.Name {
927 return true
928 }
929
930 return bytes.Compare(a.Content, b.Content) < 0
931 }),
932 }
933
934 if diff := cmp.Diff(step.expectedDocuments, receivedDocuments, cmpOpts...); diff != "" {
935 t.Errorf("step %q: diff in received documents (-want +got):%s\n:", step.name, diff)
936 }
937 }
938 })
939 }
940}