fork of https://github.com/sourcegraph/zoekt
1package index
2
3import (
4 "errors"
5 "flag"
6 "fmt"
7 "io"
8 "log"
9 "os"
10 "path/filepath"
11 "reflect"
12 "strconv"
13 "strings"
14 "testing"
15 "time"
16
17 "github.com/google/go-cmp/cmp"
18 "github.com/google/go-cmp/cmp/cmpopts"
19 "github.com/prometheus/client_golang/prometheus/testutil"
20 "github.com/stretchr/testify/require"
21
22 "github.com/sourcegraph/zoekt"
23)
24
25var update = flag.Bool("update", false, "update golden file")
26
27// ensure we don't regress on how we build v16
28func TestBuildv16(t *testing.T) {
29 dir := t.TempDir()
30
31 opts := Options{
32 IndexDir: dir,
33 RepositoryDescription: zoekt.Repository{
34 Name: "repo",
35 Source: "./testdata/repo/",
36 Metadata: map[string]string{"foo": "bar"},
37 },
38 DisableCTags: true,
39 }
40 opts.SetDefaults()
41
42 b, err := NewBuilder(opts)
43 if err != nil {
44 t.Fatal(err)
45 }
46
47 for _, p := range []string{"main.go"} {
48 blob, err := os.ReadFile(filepath.Join("../testdata/repo", p))
49 if err != nil {
50 t.Fatal(err)
51 }
52 if err := b.AddFile(p, blob); err != nil {
53 t.Fatal(err)
54 }
55 }
56
57 wantP := filepath.Join("../testdata/shards", "repo_v16.00000.zoekt")
58
59 // fields indexTime and id depend on time. For this test, we copy the fields from
60 // the old shard.
61 _, wantMetadata, err := ReadMetadataPath(wantP)
62 if err != nil {
63 t.Fatal(err)
64 }
65 b.indexTime = wantMetadata.IndexTime
66 b.id = wantMetadata.ID
67
68 if err := b.Finish(); err != nil {
69 t.Fatal(err)
70 }
71
72 gotP := filepath.Join(dir, "repo_v16.00000.zoekt")
73
74 if *update {
75 data, err := os.ReadFile(gotP)
76 if err != nil {
77 t.Fatal(err)
78 }
79 err = os.WriteFile(wantP, data, 0o644)
80 if err != nil {
81 t.Fatal(err)
82 }
83 return
84 }
85
86 got, err := os.ReadFile(gotP)
87 if err != nil {
88 t.Fatal(err)
89 }
90 want, err := os.ReadFile(wantP)
91 if err != nil {
92 t.Fatal(err)
93 }
94
95 if d := cmp.Diff(want, got); d != "" {
96 t.Errorf("mismatch (-want +got):\n%s", d)
97 }
98}
99
100func TestFlags(t *testing.T) {
101 cases := []struct {
102 args []string
103 want Options
104 }{{
105 // Defaults
106 args: []string{},
107 want: Options{},
108 }, {
109 args: []string{"-index", "/tmp"},
110 want: Options{
111 IndexDir: "/tmp",
112 },
113 }, {
114 // single large file pattern
115 args: []string{"-large_file", "*.md"},
116 want: Options{
117 LargeFiles: []string{"*.md"},
118 },
119 }, {
120 // multiple large file pattern
121 args: []string{"-large_file", "*.md", "-large_file", "*.yaml"},
122 want: Options{
123 LargeFiles: []string{"*.md", "*.yaml"},
124 },
125 }, {
126 // multiple large file pattern with negated pattern
127 args: []string{"-large_file", "*.md", "-large_file", "!*.yaml"},
128 want: Options{
129 LargeFiles: []string{"*.md", "!*.yaml"},
130 },
131 }, {
132 // multiple large file pattern with escaped character
133 args: []string{"-large_file", "*.md", "-large_file", "\\!*.yaml"},
134 want: Options{
135 LargeFiles: []string{"*.md", "\\!*.yaml"},
136 },
137 }}
138
139 ignored := []cmp.Option{
140 // depends on $PATH setting.
141 cmpopts.IgnoreFields(Options{}, "CTagsPath"),
142 cmpopts.IgnoreFields(Options{}, "ScipCTagsPath"),
143 cmpopts.IgnoreFields(Options{}, "changedOrRemovedFiles"),
144 cmpopts.IgnoreFields(zoekt.Repository{}, "priority"),
145 }
146
147 for _, c := range cases {
148 c.want.SetDefaults()
149 // depends on $PATH setting.
150 c.want.CTagsPath = ""
151
152 got := Options{}
153 fs := flag.NewFlagSet("", flag.ContinueOnError)
154 got.Flags(fs)
155 if err := fs.Parse(c.args); err != nil {
156 t.Errorf("failed to parse args %v: %v", c.args, err)
157 } else if d := cmp.Diff(c.want, got, ignored...); d != "" {
158 t.Errorf("mismatch for %v (-want +got):\n%s", c.args, d)
159 }
160 }
161}
162
163func TestIncrementalSkipIndexing(t *testing.T) {
164 cases := []struct {
165 name string
166 want bool
167 opts Options
168 }{{
169 name: "v17-noop",
170 want: true,
171 opts: Options{
172 RepositoryDescription: zoekt.Repository{
173 Name: "repo17",
174 },
175 SizeMax: 2097152,
176 DisableCTags: true,
177 },
178 }, {
179 name: "v16-noop",
180 want: true,
181 opts: Options{
182 RepositoryDescription: zoekt.Repository{
183 Name: "repo",
184 },
185 SizeMax: 2097152,
186 DisableCTags: true,
187 },
188 }, {
189 name: "v17-id",
190 want: false,
191 opts: Options{
192 RepositoryDescription: zoekt.Repository{
193 Name: "repo17",
194 RawConfig: map[string]string{
195 "repoid": "123",
196 },
197 },
198 SizeMax: 2097152,
199 DisableCTags: true,
200 },
201 }, {
202 name: "doesnotexist",
203 want: false,
204 opts: Options{
205 RepositoryDescription: zoekt.Repository{
206 Name: "doesnotexist",
207 },
208 SizeMax: 2097152,
209 DisableCTags: true,
210 },
211 }}
212
213 for _, tc := range cases {
214 t.Run(tc.name, func(t *testing.T) {
215 tc.opts.IndexDir = "../testdata/shards"
216 t.Log(tc.opts.IndexState())
217 got := tc.opts.IncrementalSkipIndexing()
218 if got != tc.want {
219 t.Fatalf("want %v got %v", tc.want, got)
220 }
221 })
222 }
223}
224
225func TestMain(m *testing.M) {
226 flag.Parse()
227 if !testing.Verbose() {
228 log.SetOutput(io.Discard)
229 }
230 os.Exit(m.Run())
231}
232
233func TestDontCountContentOfSkippedFiles(t *testing.T) {
234 b, err := NewBuilder(Options{RepositoryDescription: zoekt.Repository{
235 Name: "foo",
236 }})
237 if err != nil {
238 t.Fatal(err)
239 }
240
241 // content with at least 100 bytes
242 binary := append([]byte("abc def \x00"), make([]byte, 100)...)
243 err = b.Add(Document{
244 Name: "f1",
245 Content: binary,
246 })
247 if err != nil {
248 t.Fatal(err)
249 }
250 if len(b.todo) != 1 || b.todo[0].SkipReason == SkipReasonNone {
251 t.Fatalf("document should have been skipped")
252 }
253 if b.todo[0].Content != nil {
254 t.Fatalf("document content should be empty")
255 }
256 if b.size >= 100 {
257 t.Fatalf("content of skipped documents should not count towards shard size thresold")
258 }
259}
260
261func TestPartialSuccess(t *testing.T) {
262 dir := t.TempDir()
263
264 opts := Options{
265 IndexDir: dir,
266 ShardMax: 1024,
267 SizeMax: 1 << 20,
268 Parallelism: 1,
269 }
270 opts.RepositoryDescription.Name = "repo"
271 opts.SetDefaults()
272
273 b, err := NewBuilder(opts)
274 if err != nil {
275 t.Fatalf("NewBuilder: %v", err)
276 }
277
278 for i := range 4 {
279 nm := fmt.Sprintf("F%d", i)
280 _ = b.AddFile(nm, []byte(strings.Repeat("01234567\n", 128)))
281 }
282 b.buildError = fmt.Errorf("any error")
283
284 // No error checking.
285 _ = b.Finish()
286
287 // Finish cleans up temporary files.
288 if fs, err := filepath.Glob(dir + "/*"); err != nil {
289 t.Errorf("glob(%s): %v", dir, err)
290 } else if len(fs) != 0 {
291 t.Errorf("got shards %v, want []", fs)
292 }
293}
294
295// Tests that we skip looping over repos in compound shards when we know that
296// the repository we are looking for is not in the shard.
297func TestSkipCompoundShards(t *testing.T) {
298 metricCompoundShardLookups.Reset()
299
300 compoundShards := [][]zoekt.Repository{
301 {
302 {Name: "repoA", ID: 1},
303 {Name: "repoB", ID: 2},
304 {Name: "repoC", ID: 3},
305 },
306 {
307 {Name: "repoD", ID: 4},
308 {Name: "repoE", ID: 5},
309 {Name: "repoF", ID: 6},
310 {Name: "repoF", ID: 7},
311 {Name: "repoF", ID: 8},
312 },
313 }
314 var lookForRepoID uint32 = 99
315 wantSkippedCount := 2
316
317 indexDir := t.TempDir()
318 for _, repositoryGroup := range compoundShards {
319 createTestCompoundShard(t, indexDir, repositoryGroup)
320 }
321 o := &Options{
322 IndexDir: indexDir,
323 RepositoryDescription: zoekt.Repository{ID: lookForRepoID},
324 }
325
326 shard := o.findCompoundShard()
327 require.Empty(t, shard)
328
329 // Check if the "skipped" counter was incremented
330 skippedCount := int(testutil.ToFloat64(metricCompoundShardLookups.WithLabelValues("skipped")))
331 require.Equal(t, wantSkippedCount, skippedCount)
332}
333
334// With optimization
335// BenchmarkFindCompoundShard-16 33505 36016 ns/op
336//
337// Without optimization
338// BenchmarkFindCompoundShard-16 76 15568589 ns/op
339func BenchmarkFindCompoundShard(b *testing.B) {
340 // Generate a large compound shard
341 const numRepos = 5000
342 repositories := make([]zoekt.Repository, numRepos)
343 for i := range numRepos {
344 repositories[i] = zoekt.Repository{
345 Name: fmt.Sprintf("repo%d", i+1),
346 ID: uint32(i + 1),
347 }
348 }
349 indexDir := b.TempDir()
350 createTestCompoundShard(b, indexDir, repositories)
351
352 // pick id that is not in the shard
353 var searchRepoID uint32 = numRepos + 1
354
355 b.ResetTimer()
356 for i := 0; i < b.N; i++ {
357 o := &Options{
358 IndexDir: indexDir,
359 RepositoryDescription: zoekt.Repository{ID: searchRepoID},
360 }
361
362 shard := o.findCompoundShard()
363 if shard != "" {
364 b.Fatal("expected empty result")
365 }
366 }
367}
368
369func TestOptions_FindAllShards(t *testing.T) {
370 type simpleShard struct {
371 Repository zoekt.Repository
372 // NumShards is the number of shards that should be created that
373 // contain data for "Repository".
374 NumShards int
375 }
376
377 tests := []struct {
378 name string
379 simpleShards []simpleShard
380 compoundShards [][]zoekt.Repository
381 expectedShardCount int
382 expectedRepository zoekt.Repository
383 }{
384 {
385 name: "repository in normal shard",
386 simpleShards: []simpleShard{
387 {Repository: zoekt.Repository{Name: "repoA", ID: 1}},
388 {Repository: zoekt.Repository{Name: "repoB", ID: 2}},
389 {Repository: zoekt.Repository{Name: "repoC", ID: 3}},
390 },
391 expectedShardCount: 1,
392 expectedRepository: zoekt.Repository{Name: "repoB", ID: 2},
393 },
394 {
395 name: "repository in compound shard",
396 compoundShards: [][]zoekt.Repository{
397 {
398 {Name: "repoA", ID: 1},
399 {Name: "repoB", ID: 2},
400 {Name: "repoC", ID: 3},
401 },
402 {
403 {Name: "repoD", ID: 4},
404 {Name: "repoE", ID: 5},
405 {Name: "repoF", ID: 6},
406 },
407 },
408 expectedShardCount: 1,
409 expectedRepository: zoekt.Repository{Name: "repoB", ID: 2},
410 },
411 {
412 name: "repository split across multiple shards",
413 simpleShards: []simpleShard{
414 {Repository: zoekt.Repository{Name: "repoA", ID: 1}},
415 {Repository: zoekt.Repository{Name: "repoB", ID: 2}, NumShards: 2},
416 {Repository: zoekt.Repository{Name: "repoC", ID: 3}},
417 },
418 expectedShardCount: 2,
419 expectedRepository: zoekt.Repository{Name: "repoB", ID: 2},
420 },
421 {
422 name: "unknown repository",
423 simpleShards: []simpleShard{
424 {Repository: zoekt.Repository{Name: "repoA", ID: 1}},
425 {Repository: zoekt.Repository{Name: "repoB", ID: 2}},
426 {Repository: zoekt.Repository{Name: "repoC", ID: 3}},
427 },
428 compoundShards: [][]zoekt.Repository{
429 {
430 {Name: "repoD", ID: 4},
431 {Name: "repoE", ID: 5},
432 {Name: "repoF", ID: 6},
433 },
434 },
435 expectedShardCount: 0,
436 },
437 {
438 name: "match on ID, not name (compound only)",
439 compoundShards: [][]zoekt.Repository{
440 {
441 {Name: "repoA", ID: 1},
442 {Name: "repoB", ID: 2},
443 {Name: "repoC", ID: 3},
444 },
445 {
446 {Name: "repoD", ID: 4},
447 {Name: "repoE", ID: 5},
448 {Name: "repoF", ID: 6},
449 },
450 },
451 expectedShardCount: 1,
452 expectedRepository: zoekt.Repository{Name: "something-else", ID: 5},
453 },
454 }
455 for _, tt := range tests {
456 t.Run(tt.name, func(t *testing.T) {
457 t.Parallel()
458
459 // prepare
460 indexDir := t.TempDir()
461
462 for _, s := range tt.simpleShards {
463 createTestShard(t, indexDir, s.Repository, s.NumShards)
464 }
465
466 for _, repositoryGroup := range tt.compoundShards {
467 createTestCompoundShard(t, indexDir, repositoryGroup)
468 }
469
470 o := &Options{
471 IndexDir: indexDir,
472 RepositoryDescription: tt.expectedRepository,
473 }
474 o.SetDefaults()
475
476 // run test
477 shards := o.FindAllShards()
478
479 // verify results
480 if len(shards) != tt.expectedShardCount {
481 t.Fatalf("expected %d shard(s), received %d shard(s)", tt.expectedShardCount, len(shards))
482 }
483
484 if tt.expectedShardCount > 0 {
485 for _, s := range shards {
486 // all shards should contain the metadata for the desired repository
487 repos, _, err := ReadMetadataPathAlive(s)
488 if err != nil {
489 t.Fatalf("reading metadata from shard %q: %s", s, err)
490 }
491
492 foundRepository := false
493 for _, r := range repos {
494 if r.ID == tt.expectedRepository.ID {
495 foundRepository = true
496 break
497 }
498 }
499
500 if !foundRepository {
501 t.Errorf("shard %q doesn't contain metadata for repository %d", s, tt.expectedRepository.ID)
502 }
503 }
504 }
505 })
506 }
507}
508
509func TestBuilder_BranchNamesEqual(t *testing.T) {
510 for i, test := range []struct {
511 oldBranches []zoekt.RepositoryBranch
512 newBranches []zoekt.RepositoryBranch
513 expected bool
514 }{
515 {
516 oldBranches: []zoekt.RepositoryBranch{{Name: "main", Version: "v1"}, {Name: "release", Version: "v1"}},
517 newBranches: []zoekt.RepositoryBranch{{Name: "main", Version: "v1"}, {Name: "release", Version: "v1"}},
518 expected: true,
519 },
520 {
521 oldBranches: []zoekt.RepositoryBranch{{Name: "main", Version: "v1"}, {Name: "release", Version: "v3"}},
522 newBranches: []zoekt.RepositoryBranch{{Name: "main", Version: "v2"}, {Name: "release", Version: "v4"}},
523 expected: true,
524 },
525 {
526 oldBranches: []zoekt.RepositoryBranch{{Name: "main", Version: "v1"}},
527 newBranches: []zoekt.RepositoryBranch{{Name: "main", Version: "v2"}, {Name: "release", Version: "v1"}},
528 expected: false,
529 },
530 {
531 oldBranches: []zoekt.RepositoryBranch{{Name: "main", Version: "v1"}},
532 newBranches: []zoekt.RepositoryBranch{{Name: "release", Version: "v1"}},
533 expected: false,
534 },
535 {
536 oldBranches: []zoekt.RepositoryBranch{{Name: "main", Version: "v1"}},
537 newBranches: []zoekt.RepositoryBranch{},
538 expected: false,
539 },
540 {
541 oldBranches: []zoekt.RepositoryBranch{},
542 newBranches: []zoekt.RepositoryBranch{{Name: "main", Version: "v1"}},
543 expected: false,
544 },
545 } {
546 t.Run(strconv.Itoa(i), func(t *testing.T) {
547 actual := BranchNamesEqual(test.oldBranches, test.newBranches)
548 if test.expected != actual {
549 t.Errorf("expected: %t, got: %t", test.expected, actual)
550 }
551 })
552 }
553}
554
555func TestBuilder_DeltaShardsBuildsShouldErrorOnBranchSet(t *testing.T) {
556 indexDir := t.TempDir()
557
558 repository := zoekt.Repository{
559 Name: "repo",
560 ID: 1,
561 Branches: []zoekt.RepositoryBranch{{Name: "foo"}, {Name: "bar"}},
562 }
563 createTestShard(t, indexDir, repository, 2)
564
565 repositoryNewBranches := zoekt.Repository{
566 Name: "repo",
567 ID: 1,
568 Branches: []zoekt.RepositoryBranch{{Name: "foo"}, {Name: "baz"}},
569 }
570
571 o := Options{
572 IndexDir: indexDir,
573 RepositoryDescription: repositoryNewBranches,
574 IsDelta: true,
575 }
576 o.SetDefaults()
577
578 b, err := NewBuilder(o)
579 if err != nil {
580 t.Fatalf("NewBuilder: %v", err)
581 }
582
583 err = b.Finish()
584 if !errors.As(err, &deltaBranchSetError{}) {
585 t.Fatalf("expected error complaning about different branch names, got: %s", err)
586 }
587}
588
589func TestBuilder_DeltaShardsBuildsShouldErrorOnIndexOptionsMismatch(t *testing.T) {
590 repository := zoekt.Repository{
591 Name: "repo",
592 ID: 1,
593 Branches: []zoekt.RepositoryBranch{{Name: "foo"}},
594 }
595
596 for _, test := range []struct {
597 name string
598 options func(options *Options)
599 }{
600 {
601 name: "update option CTagsPath to non default",
602 options: func(options *Options) { options.CTagsPath = "ctags_updated_test/universal-ctags" },
603 },
604 {
605 name: "update option DisableCTags to non default",
606 options: func(options *Options) { options.DisableCTags = true },
607 },
608 {
609 name: "update option SizeMax to non default",
610 options: func(options *Options) { options.SizeMax -= 10 },
611 },
612 {
613 name: "update option LargeFiles to non default",
614 options: func(options *Options) { options.LargeFiles = []string{"-large_file", "*.md", "-large_file", "*.yaml"} },
615 },
616 } {
617 test := test
618
619 t.Run(test.name, func(t *testing.T) {
620 indexDir := t.TempDir()
621
622 // initially use default options
623 createTestShard(t, indexDir, repository, 2)
624
625 o := Options{
626 IndexDir: indexDir,
627 RepositoryDescription: repository,
628 IsDelta: true,
629 }
630 test.options(&o)
631
632 b, err := NewBuilder(o)
633 if err != nil {
634 t.Fatalf("NewBuilder: %v", err)
635 }
636
637 err = b.Finish()
638 if err == nil {
639 t.Fatalf("no error regarding index options mismatch")
640 }
641
642 var optionsMismatchError *deltaIndexOptionsMismatchError
643 if !errors.As(err, &optionsMismatchError) {
644 t.Fatalf("expected error complaining about index options mismatch, got: %s", err)
645 }
646 })
647 }
648}
649
650func TestBuilder_DeltaShardsMetadataInOlderShards(t *testing.T) {
651 olderTime := time.Unix(0, 0)
652 newerTime := time.Unix(10000, 0)
653
654 for _, test := range []struct {
655 name string
656 originalRepository zoekt.Repository
657 updatedRepository zoekt.Repository
658 }{
659 {
660 name: "update commit information",
661 originalRepository: zoekt.Repository{
662 Name: "repo",
663 ID: 1,
664 Branches: []zoekt.RepositoryBranch{
665 {Name: "main", Version: "v1"},
666 {Name: "release", Version: "v1"},
667 },
668 },
669 updatedRepository: zoekt.Repository{
670 Name: "repo",
671 ID: 1,
672 Branches: []zoekt.RepositoryBranch{
673 {Name: "main", Version: "v2"},
674 {Name: "release", Version: "v2"},
675 },
676 },
677 },
678 {
679 name: "update latest commit date (older -> newer)",
680 originalRepository: zoekt.Repository{
681 Name: "repo",
682 ID: 1,
683 Branches: []zoekt.RepositoryBranch{
684 {Name: "main", Version: "v1"},
685 },
686 LatestCommitDate: olderTime,
687 },
688 updatedRepository: zoekt.Repository{
689 Name: "repo",
690 ID: 1,
691 Branches: []zoekt.RepositoryBranch{
692 {Name: "main", Version: "v2"},
693 },
694 LatestCommitDate: newerTime,
695 },
696 },
697 {
698 name: "update latest commit date (even if latest commit date is older - the most recent commits are the source of truth)",
699 originalRepository: zoekt.Repository{
700 Name: "repo",
701 ID: 1,
702 Branches: []zoekt.RepositoryBranch{
703 {Name: "main", Version: "v1"},
704 },
705 LatestCommitDate: newerTime,
706 },
707 updatedRepository: zoekt.Repository{
708 Name: "repo",
709 ID: 1,
710 Branches: []zoekt.RepositoryBranch{
711 {Name: "main", Version: "v2"},
712 },
713 LatestCommitDate: olderTime,
714 },
715 },
716 } {
717 test := test
718
719 t.Run(test.name, func(t *testing.T) {
720 indexDir := t.TempDir()
721
722 createTestShard(t, indexDir, test.originalRepository, 2, func(o *Options) {
723 o.DisableCTags = true
724 })
725
726 shards := createTestShard(t, indexDir, test.updatedRepository, 1, func(o *Options) {
727 o.IsDelta = true
728 o.DisableCTags = true
729 })
730
731 if len(shards) < 3 {
732 t.Fatalf("expected at least 3 shards, got %d (%s)", len(shards), strings.Join(shards, ", "))
733 }
734
735 for _, s := range shards {
736 repositories, _, err := ReadMetadataPathAlive(s)
737 if err != nil {
738 t.Fatalf("reading repository metadata from shard %q", s)
739 }
740
741 var foundRepository *zoekt.Repository
742 for _, r := range repositories {
743 if r.ID == test.updatedRepository.ID {
744 foundRepository = r
745 break
746 }
747 }
748
749 if foundRepository == nil {
750 t.Fatalf("repository ID %d not in shard %q", test.updatedRepository.ID, s)
751 }
752
753 diffOptions := []cmp.Option{
754 cmpopts.IgnoreUnexported(zoekt.Repository{}),
755 cmpopts.IgnoreFields(zoekt.Repository{}, "IndexOptions"),
756 cmpopts.EquateEmpty(),
757 }
758
759 if diff := cmp.Diff(&test.updatedRepository, foundRepository, diffOptions...); diff != "" {
760 t.Errorf("shard %q: unexpected diff in repository metadata (-want +got):\n%s", s, diff)
761 }
762 }
763 })
764 }
765}
766
767func TestFindRepositoryMetadata(t *testing.T) {
768 tests := []struct {
769 name string
770 normalShardRepositories []zoekt.Repository
771 compoundShardRepositories []zoekt.Repository
772 input *zoekt.Repository
773 expectedRepository *zoekt.Repository
774 expectedOk bool
775 }{
776 {
777 name: "repository in normal shards",
778 normalShardRepositories: []zoekt.Repository{
779 {Name: "repoA", ID: 1},
780 {Name: "repoB", ID: 2},
781 {Name: "repoC", ID: 3},
782 },
783 compoundShardRepositories: []zoekt.Repository{
784 {Name: "repoD", ID: 4},
785 {Name: "repoE", ID: 5},
786 {Name: "repoF", ID: 6},
787 },
788 input: &zoekt.Repository{Name: "repoB", ID: 2},
789 expectedRepository: &zoekt.Repository{Name: "repoB", ID: 2},
790 expectedOk: true,
791 },
792 {
793 name: "repository in compound shards",
794 normalShardRepositories: []zoekt.Repository{
795 {Name: "repoA", ID: 1},
796 {Name: "repoB", ID: 2},
797 {Name: "repoC", ID: 3},
798 },
799 compoundShardRepositories: []zoekt.Repository{
800 {Name: "repoD", ID: 4},
801 {Name: "repoE", ID: 5},
802 {Name: "repoF", ID: 6},
803 },
804 input: &zoekt.Repository{Name: "repoE", ID: 5},
805 expectedRepository: &zoekt.Repository{Name: "repoE", ID: 5},
806 expectedOk: true,
807 },
808 {
809 name: "repository not in any shard",
810 normalShardRepositories: []zoekt.Repository{
811 {Name: "repoA", ID: 1},
812 {Name: "repoB", ID: 2},
813 {Name: "repoC", ID: 3},
814 },
815 compoundShardRepositories: []zoekt.Repository{
816 {Name: "repoD", ID: 4},
817 {Name: "repoE", ID: 5},
818 {Name: "repoF", ID: 6},
819 },
820 input: &zoekt.Repository{Name: "notPresent", ID: 123},
821 expectedRepository: nil,
822 expectedOk: false,
823 },
824 }
825 for _, tt := range tests {
826 t.Run(tt.name, func(t *testing.T) {
827 // setup
828 indexDir := t.TempDir()
829
830 optFns := []func(o *Options){
831 // ctags aren't important for this test, and the equality checks
832 // for diffing repositories can break due to local configuration
833 func(o *Options) {
834 o.DisableCTags = true
835 },
836 }
837
838 for _, r := range tt.normalShardRepositories {
839 createTestShard(t, indexDir, r, 1, optFns...)
840 }
841
842 if len(tt.compoundShardRepositories) > 0 {
843 createTestCompoundShard(t, indexDir, tt.compoundShardRepositories, optFns...)
844 }
845
846 o := &Options{
847 IndexDir: indexDir,
848 RepositoryDescription: *tt.input,
849 }
850 o.SetDefaults()
851
852 // run test
853 got, _, gotOk, err := o.FindRepositoryMetadata()
854 if err != nil {
855 t.Errorf("received unexpected error: %v", err)
856 return
857 }
858
859 // check outcome
860 compareOptions := []cmp.Option{
861 cmpopts.IgnoreUnexported(zoekt.Repository{}),
862 cmpopts.IgnoreFields(zoekt.Repository{}, "IndexOptions"),
863 cmpopts.EquateEmpty(),
864 }
865
866 if diff := cmp.Diff(tt.expectedRepository, got, compareOptions...); diff != "" {
867 t.Errorf("unexpected difference in repositories (-want +got):\n%s", diff)
868 }
869
870 if tt.expectedOk != gotOk {
871 t.Errorf("unexpected difference in 'ok' value: wanted %t, got %t", tt.expectedOk, gotOk)
872 }
873 })
874 }
875}
876
877func createTestShard(t testing.TB, indexDir string, r zoekt.Repository, numShards int, optFns ...func(options *Options)) []string {
878 t.Helper()
879
880 if err := os.MkdirAll(filepath.Dir(indexDir), 0o700); err != nil {
881 t.Fatal(err)
882 }
883
884 o := Options{
885 IndexDir: indexDir,
886 RepositoryDescription: r,
887 ShardMax: 75, // create a new shard every 75 bytes
888 }
889 o.SetDefaults()
890
891 for _, fn := range optFns {
892 fn(&o)
893 }
894
895 b, err := NewBuilder(o)
896 if err != nil {
897 t.Fatalf("NewBuilder: %v", err)
898 }
899
900 if numShards == 0 {
901 // We have to make at least 1 shard.
902 numShards = 1
903 }
904
905 for i := range numShards {
906 // Create entries (file + contents) that are ~100 bytes each.
907 // This (along with our shardMax setting of 75 bytes) means that each shard
908 // will contain at most one of these.
909 fileName := strconv.Itoa(i)
910 document := Document{Name: fileName, Content: []byte(strings.Repeat("A", 100))}
911 for _, branch := range o.RepositoryDescription.Branches {
912 document.Branches = append(document.Branches, branch.Name)
913 }
914
915 err := b.Add(document)
916 if err != nil {
917 t.Fatalf("failed to add file %q to builder: %s", fileName, err)
918 }
919 }
920
921 if err := b.Finish(); err != nil {
922 t.Fatalf("Finish: %v", err)
923 }
924
925 return o.FindAllShards()
926}
927
928func createTestCompoundShard(t testing.TB, indexDir string, repositories []zoekt.Repository, optFns ...func(options *Options)) {
929 t.Helper()
930
931 var shardNames []string
932
933 for _, r := range repositories {
934 // create an isolated scratch space to store normal shards for this repository
935 scratchDir := t.TempDir()
936
937 // create shards that'll be merged later
938 createTestShard(t, scratchDir, r, 1, optFns...)
939
940 // discover file names for all the normal shards we created
941 // note: this only looks in the immediate 'scratchDir' folder and doesn't recurse
942 shards, err := filepath.Glob(filepath.Join(scratchDir, "*.zoekt"))
943 if err != nil {
944 t.Fatalf("while globbing %q to find normal shards: %s", scratchDir, err)
945 }
946
947 shardNames = append(shardNames, shards...)
948 }
949
950 // load the normal shards that we created
951 var files []IndexFile
952 for _, shard := range shardNames {
953 f, err := os.Open(shard)
954 if err != nil {
955 t.Fatalf("opening shard file: %s", err)
956 }
957 defer f.Close()
958
959 indexFile, err := NewIndexFile(f)
960 if err != nil {
961 t.Fatalf("creating index file: %s", err)
962 }
963 defer indexFile.Close()
964
965 files = append(files, indexFile)
966 }
967
968 // merge all the simple shards into a compound shard
969 tmpName, dstName, err := Merge(indexDir, files...)
970 if err != nil {
971 t.Fatalf("merging index files into compound shard: %s", err)
972 }
973 if err := os.Rename(tmpName, dstName); err != nil {
974 t.Fatal(err)
975 }
976}
977
978func TestIgnoreSizeMax(t *testing.T) {
979 for _, test := range []struct {
980 name string
981 largeFiles []string
982 filePaths []string
983 expected bool
984 }{
985 {
986 name: "empty pattern does nothing",
987 largeFiles: []string{""},
988 filePaths: []string{"F0"},
989 expected: false,
990 },
991 {
992 name: "positive match allows",
993 largeFiles: []string{"F0"},
994 filePaths: []string{"F0"},
995 expected: true,
996 },
997 {
998 name: "positive and negative patterns allows",
999 largeFiles: []string{"F?", "!F0"},
1000 filePaths: []string{"F1"},
1001 expected: true,
1002 },
1003 {
1004 name: "positive and negative patterns disallows",
1005 largeFiles: []string{"F?", "!F0"},
1006 filePaths: []string{"F0"},
1007 expected: false,
1008 },
1009 {
1010 name: "positive escaped pattern allows",
1011 largeFiles: []string{"\\!F?"},
1012 filePaths: []string{"!F0", "!F1"},
1013 expected: true,
1014 },
1015 {
1016 name: "postive escaped pattern does not disallow",
1017 largeFiles: []string{"F0", "\\!F?"},
1018 filePaths: []string{"F0", "!F0"},
1019 expected: true,
1020 },
1021 {
1022 name: "combined meta and literal interpretation disallows",
1023 largeFiles: []string{"*F*", "!!F*"},
1024 filePaths: []string{"!F0"},
1025 expected: false,
1026 },
1027 {
1028 name: "combined meta and literal interpretation allows",
1029 largeFiles: []string{"*F*", "!!F*"},
1030 filePaths: []string{"F0"},
1031 expected: true,
1032 },
1033 {
1034 name: "largeFiles order: positive match overrides previous negative match and allows",
1035 largeFiles: []string{"F?", "!F0", "!F1", "F0"},
1036 filePaths: []string{"F0"},
1037 expected: true,
1038 },
1039 {
1040 name: "largeFiles order: positive match overrides previous negative match and disallows",
1041 largeFiles: []string{"F?", "!F0", "!F1", "F0"},
1042 filePaths: []string{"F1"},
1043 expected: false,
1044 },
1045 {
1046 name: "largeFiles order: negative match overrides previous positive match and allows",
1047 largeFiles: []string{"F?", "!?0", "F0", "!F0"},
1048 filePaths: []string{"F1"},
1049 expected: true,
1050 },
1051 {
1052 name: "largeFiles order: negative match overrides previous positive match and disallows",
1053 largeFiles: []string{"F?", "!?0", "F0", "!F0"},
1054 filePaths: []string{"F0"},
1055 expected: false,
1056 },
1057 } {
1058 t.Run(test.name, func(t *testing.T) {
1059 o := Options{
1060 LargeFiles: test.largeFiles,
1061 }
1062
1063 for _, filePath := range test.filePaths {
1064 ignore := o.IgnoreSizeMax(filePath)
1065 if ignore != test.expected {
1066 t.Errorf("IgnoreSizeMax() for filepath %v returned unexpected result %v", filePath, ignore)
1067 }
1068 }
1069 })
1070 }
1071}
1072
1073type filerankCase struct {
1074 name string
1075 docs []*Document
1076 want []int
1077}
1078
1079func testFileRankAspect(t *testing.T, c filerankCase) {
1080 var want []*Document
1081 for _, j := range c.want {
1082 want = append(want, c.docs[j])
1083 }
1084
1085 got := make([]*Document, len(c.docs))
1086 copy(got, c.docs)
1087 for _, d := range got {
1088 DetermineFileCategory(d)
1089 }
1090 sortDocuments(got)
1091
1092 print := func(ds []*Document) string {
1093 r := ""
1094 for _, d := range ds {
1095 r += fmt.Sprintf("%v, ", d)
1096 }
1097 return r
1098 }
1099 if !reflect.DeepEqual(got, want) {
1100 t.Errorf("got docs [%v], want [%v]", print(got), print(want))
1101 }
1102}
1103
1104func TestFileRank(t *testing.T) {
1105 for _, c := range []filerankCase{{
1106 name: "filename",
1107 docs: []*Document{
1108 {
1109 Name: "longlonglong",
1110 Content: []byte("bla"),
1111 },
1112 {
1113 Name: "short",
1114 Content: []byte("bla"),
1115 },
1116 },
1117 want: []int{1, 0},
1118 }, {
1119 name: "test",
1120 docs: []*Document{
1121 {
1122 Name: "foo_test.go",
1123 Content: []byte("bla"),
1124 },
1125 {
1126 Name: "longlonglong",
1127 Content: []byte("bla"),
1128 },
1129 },
1130 want: []int{1, 0},
1131 }, {
1132 name: "content",
1133 docs: []*Document{
1134 {
1135 Content: []byte("bla"),
1136 },
1137 {
1138 Content: []byte("blablablabla"),
1139 },
1140 {
1141 Content: []byte("blabla"),
1142 },
1143 },
1144 want: []int{0, 2, 1},
1145 }, {
1146 name: "skipped docs",
1147 docs: []*Document{
1148 {
1149 Name: "binary_file",
1150 SkipReason: SkipReasonBinary,
1151 },
1152 {
1153 Name: "some_test.go",
1154 Content: []byte("bla"),
1155 },
1156 {
1157 Name: "large_file.go",
1158 SkipReason: SkipReasonTooLarge,
1159 },
1160 {
1161 Name: "file.go",
1162 Content: []byte("blabla"),
1163 },
1164 },
1165 want: []int{3, 1, 0, 2},
1166 }} {
1167 t.Run(c.name, func(t *testing.T) {
1168 testFileRankAspect(t, c)
1169 })
1170 }
1171}
1172
1173func TestOptions_shardName(t *testing.T) {
1174 opts := Options{
1175 IndexDir: "/data",
1176 RepositoryDescription: zoekt.Repository{
1177 Name: "a/b",
1178 TenantID: 123,
1179 ID: 456,
1180 },
1181 }
1182
1183 t.Setenv("WORKSPACES_API_URL", "")
1184 if got, want := opts.shardNameVersion(16, 0), "/data/a%2Fb_v16.00000.zoekt"; got != want {
1185 t.Fatalf("expected shard name to be repo name based:\ngot: %q\nwant: %q", got, want)
1186 }
1187
1188 t.Setenv("WORKSPACES_API_URL", "http://example.com")
1189 if got, want := opts.shardNameVersion(16, 0), "/data/000000123_000000456_v16.00000.zoekt"; got != want {
1190 t.Fatalf("expected shard name to be ID based:\ngot: %q\nwant: %q", got, want)
1191 }
1192
1193 // If something goes wrong and TenantID and RepoID is unset, we create a
1194 // name which won't be visible by any tenant.
1195 opts = Options{
1196 IndexDir: "/data",
1197 RepositoryDescription: zoekt.Repository{
1198 Name: "a/b",
1199 },
1200 }
1201 if got, want := opts.shardNameVersion(16, 0), "/data/000000000_000000000_v16.00000.zoekt"; got != want {
1202 t.Fatalf("expected shard name to be with no tenant:\ngot: %q\nwant: %q", got, want)
1203 }
1204}