fork of https://github.com/sourcegraph/zoekt
1package index
2
3import (
4 "errors"
5 "flag"
6 "fmt"
7 "io"
8 "log"
9 "os"
10 "path/filepath"
11 "reflect"
12 "strconv"
13 "strings"
14 "testing"
15 "time"
16
17 "github.com/google/go-cmp/cmp"
18 "github.com/google/go-cmp/cmp/cmpopts"
19 "github.com/prometheus/client_golang/prometheus/testutil"
20 "github.com/stretchr/testify/require"
21
22 "github.com/sourcegraph/zoekt"
23)
24
25var update = flag.Bool("update", false, "update golden file")
26
27// ensure we don't regress on how we build v16
28func TestBuildv16(t *testing.T) {
29 dir := t.TempDir()
30
31 opts := Options{
32 IndexDir: dir,
33 RepositoryDescription: zoekt.Repository{
34 Name: "repo",
35 Source: "./testdata/repo/",
36 },
37 DisableCTags: true,
38 }
39 opts.SetDefaults()
40
41 b, err := NewBuilder(opts)
42 if err != nil {
43 t.Fatal(err)
44 }
45
46 for _, p := range []string{"main.go"} {
47 blob, err := os.ReadFile(filepath.Join("../testdata/repo", p))
48 if err != nil {
49 t.Fatal(err)
50 }
51 if err := b.AddFile(p, blob); err != nil {
52 t.Fatal(err)
53 }
54 }
55
56 wantP := filepath.Join("../testdata/shards", "repo_v16.00000.zoekt")
57
58 // fields indexTime and id depend on time. For this test, we copy the fields from
59 // the old shard.
60 _, wantMetadata, err := ReadMetadataPath(wantP)
61 if err != nil {
62 t.Fatal(err)
63 }
64 b.indexTime = wantMetadata.IndexTime
65 b.id = wantMetadata.ID
66
67 if err := b.Finish(); err != nil {
68 t.Fatal(err)
69 }
70
71 gotP := filepath.Join(dir, "repo_v16.00000.zoekt")
72
73 if *update {
74 data, err := os.ReadFile(gotP)
75 if err != nil {
76 t.Fatal(err)
77 }
78 err = os.WriteFile(wantP, data, 0o644)
79 if err != nil {
80 t.Fatal(err)
81 }
82 return
83 }
84
85 got, err := os.ReadFile(gotP)
86 if err != nil {
87 t.Fatal(err)
88 }
89 want, err := os.ReadFile(wantP)
90 if err != nil {
91 t.Fatal(err)
92 }
93
94 if d := cmp.Diff(want, got); d != "" {
95 t.Errorf("mismatch (-want +got):\n%s", d)
96 }
97}
98
99func TestFlags(t *testing.T) {
100 cases := []struct {
101 args []string
102 want Options
103 }{{
104 // Defaults
105 args: []string{},
106 want: Options{},
107 }, {
108 args: []string{"-index", "/tmp"},
109 want: Options{
110 IndexDir: "/tmp",
111 },
112 }, {
113 // single large file pattern
114 args: []string{"-large_file", "*.md"},
115 want: Options{
116 LargeFiles: []string{"*.md"},
117 },
118 }, {
119 // multiple large file pattern
120 args: []string{"-large_file", "*.md", "-large_file", "*.yaml"},
121 want: Options{
122 LargeFiles: []string{"*.md", "*.yaml"},
123 },
124 }, {
125 // multiple large file pattern with negated pattern
126 args: []string{"-large_file", "*.md", "-large_file", "!*.yaml"},
127 want: Options{
128 LargeFiles: []string{"*.md", "!*.yaml"},
129 },
130 }, {
131 // multiple large file pattern with escaped character
132 args: []string{"-large_file", "*.md", "-large_file", "\\!*.yaml"},
133 want: Options{
134 LargeFiles: []string{"*.md", "\\!*.yaml"},
135 },
136 }}
137
138 ignored := []cmp.Option{
139 // depends on $PATH setting.
140 cmpopts.IgnoreFields(Options{}, "CTagsPath"),
141 cmpopts.IgnoreFields(Options{}, "ScipCTagsPath"),
142 cmpopts.IgnoreFields(Options{}, "changedOrRemovedFiles"),
143 cmpopts.IgnoreFields(zoekt.Repository{}, "priority"),
144 }
145
146 for _, c := range cases {
147 c.want.SetDefaults()
148 // depends on $PATH setting.
149 c.want.CTagsPath = ""
150
151 got := Options{}
152 fs := flag.NewFlagSet("", flag.ContinueOnError)
153 got.Flags(fs)
154 if err := fs.Parse(c.args); err != nil {
155 t.Errorf("failed to parse args %v: %v", c.args, err)
156 } else if d := cmp.Diff(c.want, got, ignored...); d != "" {
157 t.Errorf("mismatch for %v (-want +got):\n%s", c.args, d)
158 }
159 }
160}
161
162func TestIncrementalSkipIndexing(t *testing.T) {
163 cases := []struct {
164 name string
165 want bool
166 opts Options
167 }{{
168 name: "v17-noop",
169 want: true,
170 opts: Options{
171 RepositoryDescription: zoekt.Repository{
172 Name: "repo17",
173 },
174 SizeMax: 2097152,
175 DisableCTags: true,
176 },
177 }, {
178 name: "v16-noop",
179 want: true,
180 opts: Options{
181 RepositoryDescription: zoekt.Repository{
182 Name: "repo",
183 },
184 SizeMax: 2097152,
185 DisableCTags: true,
186 },
187 }, {
188 name: "v17-id",
189 want: false,
190 opts: Options{
191 RepositoryDescription: zoekt.Repository{
192 Name: "repo17",
193 RawConfig: map[string]string{
194 "repoid": "123",
195 },
196 },
197 SizeMax: 2097152,
198 DisableCTags: true,
199 },
200 }, {
201 name: "doesnotexist",
202 want: false,
203 opts: Options{
204 RepositoryDescription: zoekt.Repository{
205 Name: "doesnotexist",
206 },
207 SizeMax: 2097152,
208 DisableCTags: true,
209 },
210 }}
211
212 for _, tc := range cases {
213 t.Run(tc.name, func(t *testing.T) {
214 tc.opts.IndexDir = "../testdata/shards"
215 t.Log(tc.opts.IndexState())
216 got := tc.opts.IncrementalSkipIndexing()
217 if got != tc.want {
218 t.Fatalf("want %v got %v", tc.want, got)
219 }
220 })
221 }
222}
223
224func TestMain(m *testing.M) {
225 flag.Parse()
226 if !testing.Verbose() {
227 log.SetOutput(io.Discard)
228 }
229 os.Exit(m.Run())
230}
231
232func TestDontCountContentOfSkippedFiles(t *testing.T) {
233 b, err := NewBuilder(Options{RepositoryDescription: zoekt.Repository{
234 Name: "foo",
235 }})
236 if err != nil {
237 t.Fatal(err)
238 }
239
240 // content with at least 100 bytes
241 binary := append([]byte("abc def \x00"), make([]byte, 100)...)
242 err = b.Add(Document{
243 Name: "f1",
244 Content: binary,
245 })
246 if err != nil {
247 t.Fatal(err)
248 }
249 if len(b.todo) != 1 || b.todo[0].SkipReason == "" {
250 t.Fatalf("document should have been skipped")
251 }
252 if b.todo[0].Content != nil {
253 t.Fatalf("document content should be empty")
254 }
255 if b.size >= 100 {
256 t.Fatalf("content of skipped documents should not count towards shard size thresold")
257 }
258}
259
260func TestPartialSuccess(t *testing.T) {
261 dir := t.TempDir()
262
263 opts := Options{
264 IndexDir: dir,
265 ShardMax: 1024,
266 SizeMax: 1 << 20,
267 Parallelism: 1,
268 }
269 opts.RepositoryDescription.Name = "repo"
270 opts.SetDefaults()
271
272 b, err := NewBuilder(opts)
273 if err != nil {
274 t.Fatalf("NewBuilder: %v", err)
275 }
276
277 for i := 0; i < 4; i++ {
278 nm := fmt.Sprintf("F%d", i)
279 _ = b.AddFile(nm, []byte(strings.Repeat("01234567\n", 128)))
280 }
281 b.buildError = fmt.Errorf("any error")
282
283 // No error checking.
284 _ = b.Finish()
285
286 // Finish cleans up temporary files.
287 if fs, err := filepath.Glob(dir + "/*"); err != nil {
288 t.Errorf("glob(%s): %v", dir, err)
289 } else if len(fs) != 0 {
290 t.Errorf("got shards %v, want []", fs)
291 }
292}
293
294// Tests that we skip looping over repos in compound shards when we know that
295// the repository we are looking for is not in the shard.
296func TestSkipCompoundShards(t *testing.T) {
297 metricCompoundShardLookups.Reset()
298
299 compoundShards := [][]zoekt.Repository{
300 {
301 {Name: "repoA", ID: 1},
302 {Name: "repoB", ID: 2},
303 {Name: "repoC", ID: 3},
304 },
305 {
306 {Name: "repoD", ID: 4},
307 {Name: "repoE", ID: 5},
308 {Name: "repoF", ID: 6},
309 {Name: "repoF", ID: 7},
310 {Name: "repoF", ID: 8},
311 },
312 }
313 var lookForRepoID uint32 = 99
314 wantSkippedCount := 2
315
316 indexDir := t.TempDir()
317 for _, repositoryGroup := range compoundShards {
318 createTestCompoundShard(t, indexDir, repositoryGroup)
319 }
320 o := &Options{
321 IndexDir: indexDir,
322 RepositoryDescription: zoekt.Repository{ID: lookForRepoID},
323 }
324
325 shard := o.findCompoundShard()
326 require.Empty(t, shard)
327
328 // Check if the "skipped" counter was incremented
329 skippedCount := int(testutil.ToFloat64(metricCompoundShardLookups.WithLabelValues("skipped")))
330 require.Equal(t, wantSkippedCount, skippedCount)
331}
332
333// With optimization
334// BenchmarkFindCompoundShard-16 33505 36016 ns/op
335//
336// Without optimization
337// BenchmarkFindCompoundShard-16 76 15568589 ns/op
338func BenchmarkFindCompoundShard(b *testing.B) {
339 // Generate a large compound shard
340 const numRepos = 5000
341 repositories := make([]zoekt.Repository, numRepos)
342 for i := 0; i < numRepos; i++ {
343 repositories[i] = zoekt.Repository{
344 Name: fmt.Sprintf("repo%d", i+1),
345 ID: uint32(i + 1),
346 }
347 }
348 indexDir := b.TempDir()
349 createTestCompoundShard(b, indexDir, repositories)
350
351 // pick id that is not in the shard
352 var searchRepoID uint32 = numRepos + 1
353
354 b.ResetTimer()
355 for i := 0; i < b.N; i++ {
356 o := &Options{
357 IndexDir: indexDir,
358 RepositoryDescription: zoekt.Repository{ID: searchRepoID},
359 }
360
361 shard := o.findCompoundShard()
362 if shard != "" {
363 b.Fatal("expected empty result")
364 }
365 }
366}
367
368func TestOptions_FindAllShards(t *testing.T) {
369 type simpleShard struct {
370 Repository zoekt.Repository
371 // NumShards is the number of shards that should be created that
372 // contain data for "Repository".
373 NumShards int
374 }
375
376 tests := []struct {
377 name string
378 simpleShards []simpleShard
379 compoundShards [][]zoekt.Repository
380 expectedShardCount int
381 expectedRepository zoekt.Repository
382 }{
383 {
384 name: "repository in normal shard",
385 simpleShards: []simpleShard{
386 {Repository: zoekt.Repository{Name: "repoA", ID: 1}},
387 {Repository: zoekt.Repository{Name: "repoB", ID: 2}},
388 {Repository: zoekt.Repository{Name: "repoC", ID: 3}},
389 },
390 expectedShardCount: 1,
391 expectedRepository: zoekt.Repository{Name: "repoB", ID: 2},
392 },
393 {
394 name: "repository in compound shard",
395 compoundShards: [][]zoekt.Repository{
396 {
397 {Name: "repoA", ID: 1},
398 {Name: "repoB", ID: 2},
399 {Name: "repoC", ID: 3},
400 },
401 {
402 {Name: "repoD", ID: 4},
403 {Name: "repoE", ID: 5},
404 {Name: "repoF", ID: 6},
405 },
406 },
407 expectedShardCount: 1,
408 expectedRepository: zoekt.Repository{Name: "repoB", ID: 2},
409 },
410 {
411 name: "repository split across multiple shards",
412 simpleShards: []simpleShard{
413 {Repository: zoekt.Repository{Name: "repoA", ID: 1}},
414 {Repository: zoekt.Repository{Name: "repoB", ID: 2}, NumShards: 2},
415 {Repository: zoekt.Repository{Name: "repoC", ID: 3}},
416 },
417 expectedShardCount: 2,
418 expectedRepository: zoekt.Repository{Name: "repoB", ID: 2},
419 },
420 {
421 name: "unknown repository",
422 simpleShards: []simpleShard{
423 {Repository: zoekt.Repository{Name: "repoA", ID: 1}},
424 {Repository: zoekt.Repository{Name: "repoB", ID: 2}},
425 {Repository: zoekt.Repository{Name: "repoC", ID: 3}},
426 },
427 compoundShards: [][]zoekt.Repository{
428 {
429 {Name: "repoD", ID: 4},
430 {Name: "repoE", ID: 5},
431 {Name: "repoF", ID: 6},
432 },
433 },
434 expectedShardCount: 0,
435 },
436 {
437 name: "match on ID, not name (compound only)",
438 compoundShards: [][]zoekt.Repository{
439 {
440 {Name: "repoA", ID: 1},
441 {Name: "repoB", ID: 2},
442 {Name: "repoC", ID: 3},
443 },
444 {
445 {Name: "repoD", ID: 4},
446 {Name: "repoE", ID: 5},
447 {Name: "repoF", ID: 6},
448 },
449 },
450 expectedShardCount: 1,
451 expectedRepository: zoekt.Repository{Name: "something-else", ID: 5},
452 },
453 }
454 for _, tt := range tests {
455 t.Run(tt.name, func(t *testing.T) {
456 t.Parallel()
457
458 // prepare
459 indexDir := t.TempDir()
460
461 for _, s := range tt.simpleShards {
462 createTestShard(t, indexDir, s.Repository, s.NumShards)
463 }
464
465 for _, repositoryGroup := range tt.compoundShards {
466 createTestCompoundShard(t, indexDir, repositoryGroup)
467 }
468
469 o := &Options{
470 IndexDir: indexDir,
471 RepositoryDescription: tt.expectedRepository,
472 }
473 o.SetDefaults()
474
475 // run test
476 shards := o.FindAllShards()
477
478 // verify results
479 if len(shards) != tt.expectedShardCount {
480 t.Fatalf("expected %d shard(s), received %d shard(s)", tt.expectedShardCount, len(shards))
481 }
482
483 if tt.expectedShardCount > 0 {
484 for _, s := range shards {
485 // all shards should contain the metadata for the desired repository
486 repos, _, err := ReadMetadataPathAlive(s)
487 if err != nil {
488 t.Fatalf("reading metadata from shard %q: %s", s, err)
489 }
490
491 foundRepository := false
492 for _, r := range repos {
493 if r.ID == tt.expectedRepository.ID {
494 foundRepository = true
495 break
496 }
497 }
498
499 if !foundRepository {
500 t.Errorf("shard %q doesn't contain metadata for repository %d", s, tt.expectedRepository.ID)
501 }
502 }
503 }
504 })
505 }
506}
507
508func TestBuilder_BranchNamesEqual(t *testing.T) {
509 for i, test := range []struct {
510 oldBranches []zoekt.RepositoryBranch
511 newBranches []zoekt.RepositoryBranch
512 expected bool
513 }{
514 {
515 oldBranches: []zoekt.RepositoryBranch{{Name: "main", Version: "v1"}, {Name: "release", Version: "v1"}},
516 newBranches: []zoekt.RepositoryBranch{{Name: "main", Version: "v1"}, {Name: "release", Version: "v1"}},
517 expected: true,
518 },
519 {
520 oldBranches: []zoekt.RepositoryBranch{{Name: "main", Version: "v1"}, {Name: "release", Version: "v3"}},
521 newBranches: []zoekt.RepositoryBranch{{Name: "main", Version: "v2"}, {Name: "release", Version: "v4"}},
522 expected: true,
523 },
524 {
525 oldBranches: []zoekt.RepositoryBranch{{Name: "main", Version: "v1"}},
526 newBranches: []zoekt.RepositoryBranch{{Name: "main", Version: "v2"}, {Name: "release", Version: "v1"}},
527 expected: false,
528 },
529 {
530 oldBranches: []zoekt.RepositoryBranch{{Name: "main", Version: "v1"}},
531 newBranches: []zoekt.RepositoryBranch{{Name: "release", Version: "v1"}},
532 expected: false,
533 },
534 {
535 oldBranches: []zoekt.RepositoryBranch{{Name: "main", Version: "v1"}},
536 newBranches: []zoekt.RepositoryBranch{},
537 expected: false,
538 },
539 {
540 oldBranches: []zoekt.RepositoryBranch{},
541 newBranches: []zoekt.RepositoryBranch{{Name: "main", Version: "v1"}},
542 expected: false,
543 },
544 } {
545 t.Run(strconv.Itoa(i), func(t *testing.T) {
546 actual := BranchNamesEqual(test.oldBranches, test.newBranches)
547 if test.expected != actual {
548 t.Errorf("expected: %t, got: %t", test.expected, actual)
549 }
550 })
551 }
552}
553
554func TestBuilder_DeltaShardsBuildsShouldErrorOnBranchSet(t *testing.T) {
555 indexDir := t.TempDir()
556
557 repository := zoekt.Repository{
558 Name: "repo",
559 ID: 1,
560 Branches: []zoekt.RepositoryBranch{{Name: "foo"}, {Name: "bar"}},
561 }
562 createTestShard(t, indexDir, repository, 2)
563
564 repositoryNewBranches := zoekt.Repository{
565 Name: "repo",
566 ID: 1,
567 Branches: []zoekt.RepositoryBranch{{Name: "foo"}, {Name: "baz"}},
568 }
569
570 o := Options{
571 IndexDir: indexDir,
572 RepositoryDescription: repositoryNewBranches,
573 IsDelta: true,
574 }
575 o.SetDefaults()
576
577 b, err := NewBuilder(o)
578 if err != nil {
579 t.Fatalf("NewBuilder: %v", err)
580 }
581
582 err = b.Finish()
583 if !errors.As(err, &deltaBranchSetError{}) {
584 t.Fatalf("expected error complaning about different branch names, got: %s", err)
585 }
586}
587
588func TestBuilder_DeltaShardsBuildsShouldErrorOnIndexOptionsMismatch(t *testing.T) {
589 repository := zoekt.Repository{
590 Name: "repo",
591 ID: 1,
592 Branches: []zoekt.RepositoryBranch{{Name: "foo"}},
593 }
594
595 for _, test := range []struct {
596 name string
597 options func(options *Options)
598 }{
599 {
600 name: "update option CTagsPath to non default",
601 options: func(options *Options) { options.CTagsPath = "ctags_updated_test/universal-ctags" },
602 },
603 {
604 name: "update option DisableCTags to non default",
605 options: func(options *Options) { options.DisableCTags = true },
606 },
607 {
608 name: "update option SizeMax to non default",
609 options: func(options *Options) { options.SizeMax -= 10 },
610 },
611 {
612 name: "update option LargeFiles to non default",
613 options: func(options *Options) { options.LargeFiles = []string{"-large_file", "*.md", "-large_file", "*.yaml"} },
614 },
615 } {
616 test := test
617
618 t.Run(test.name, func(t *testing.T) {
619 indexDir := t.TempDir()
620
621 // initially use default options
622 createTestShard(t, indexDir, repository, 2)
623
624 o := Options{
625 IndexDir: indexDir,
626 RepositoryDescription: repository,
627 IsDelta: true,
628 }
629 test.options(&o)
630
631 b, err := NewBuilder(o)
632 if err != nil {
633 t.Fatalf("NewBuilder: %v", err)
634 }
635
636 err = b.Finish()
637 if err == nil {
638 t.Fatalf("no error regarding index options mismatch")
639 }
640
641 var optionsMismatchError *deltaIndexOptionsMismatchError
642 if !errors.As(err, &optionsMismatchError) {
643 t.Fatalf("expected error complaining about index options mismatch, got: %s", err)
644 }
645 })
646 }
647}
648
649func TestBuilder_DeltaShardsMetadataInOlderShards(t *testing.T) {
650 olderTime := time.Unix(0, 0)
651 newerTime := time.Unix(10000, 0)
652
653 for _, test := range []struct {
654 name string
655 originalRepository zoekt.Repository
656 updatedRepository zoekt.Repository
657 }{
658 {
659 name: "update commit information",
660 originalRepository: zoekt.Repository{
661 Name: "repo",
662 ID: 1,
663 Branches: []zoekt.RepositoryBranch{
664 {Name: "main", Version: "v1"},
665 {Name: "release", Version: "v1"},
666 },
667 },
668 updatedRepository: zoekt.Repository{
669 Name: "repo",
670 ID: 1,
671 Branches: []zoekt.RepositoryBranch{
672 {Name: "main", Version: "v2"},
673 {Name: "release", Version: "v2"},
674 },
675 },
676 },
677 {
678 name: "update latest commit date (older -> newer)",
679 originalRepository: zoekt.Repository{
680 Name: "repo",
681 ID: 1,
682 Branches: []zoekt.RepositoryBranch{
683 {Name: "main", Version: "v1"},
684 },
685 LatestCommitDate: olderTime,
686 },
687 updatedRepository: zoekt.Repository{
688 Name: "repo",
689 ID: 1,
690 Branches: []zoekt.RepositoryBranch{
691 {Name: "main", Version: "v2"},
692 },
693 LatestCommitDate: newerTime,
694 },
695 },
696 {
697 name: "update latest commit date (even if latest commit date is older - the most recent commits are the source of truth)",
698 originalRepository: zoekt.Repository{
699 Name: "repo",
700 ID: 1,
701 Branches: []zoekt.RepositoryBranch{
702 {Name: "main", Version: "v1"},
703 },
704 LatestCommitDate: newerTime,
705 },
706 updatedRepository: zoekt.Repository{
707 Name: "repo",
708 ID: 1,
709 Branches: []zoekt.RepositoryBranch{
710 {Name: "main", Version: "v2"},
711 },
712 LatestCommitDate: olderTime,
713 },
714 },
715 } {
716 test := test
717
718 t.Run(test.name, func(t *testing.T) {
719 indexDir := t.TempDir()
720
721 createTestShard(t, indexDir, test.originalRepository, 2, func(o *Options) {
722 o.DisableCTags = true
723 })
724
725 shards := createTestShard(t, indexDir, test.updatedRepository, 1, func(o *Options) {
726 o.IsDelta = true
727 o.DisableCTags = true
728 })
729
730 if len(shards) < 3 {
731 t.Fatalf("expected at least 3 shards, got %d (%s)", len(shards), strings.Join(shards, ", "))
732 }
733
734 for _, s := range shards {
735 repositories, _, err := ReadMetadataPathAlive(s)
736 if err != nil {
737 t.Fatalf("reading repository metadata from shard %q", s)
738 }
739
740 var foundRepository *zoekt.Repository
741 for _, r := range repositories {
742 if r.ID == test.updatedRepository.ID {
743 foundRepository = r
744 break
745 }
746 }
747
748 if foundRepository == nil {
749 t.Fatalf("repository ID %d not in shard %q", test.updatedRepository.ID, s)
750 }
751
752 diffOptions := []cmp.Option{
753 cmpopts.IgnoreUnexported(zoekt.Repository{}),
754 cmpopts.IgnoreFields(zoekt.Repository{}, "IndexOptions"),
755 cmpopts.EquateEmpty(),
756 }
757
758 if diff := cmp.Diff(&test.updatedRepository, foundRepository, diffOptions...); diff != "" {
759 t.Errorf("shard %q: unexpected diff in repository metadata (-want +got):\n%s", s, diff)
760 }
761 }
762 })
763 }
764}
765
766func TestFindRepositoryMetadata(t *testing.T) {
767 tests := []struct {
768 name string
769 normalShardRepositories []zoekt.Repository
770 compoundShardRepositories []zoekt.Repository
771 input *zoekt.Repository
772 expectedRepository *zoekt.Repository
773 expectedOk bool
774 }{
775 {
776 name: "repository in normal shards",
777 normalShardRepositories: []zoekt.Repository{
778 {Name: "repoA", ID: 1},
779 {Name: "repoB", ID: 2},
780 {Name: "repoC", ID: 3},
781 },
782 compoundShardRepositories: []zoekt.Repository{
783 {Name: "repoD", ID: 4},
784 {Name: "repoE", ID: 5},
785 {Name: "repoF", ID: 6},
786 },
787 input: &zoekt.Repository{Name: "repoB", ID: 2},
788 expectedRepository: &zoekt.Repository{Name: "repoB", ID: 2},
789 expectedOk: true,
790 },
791 {
792 name: "repository in compound shards",
793 normalShardRepositories: []zoekt.Repository{
794 {Name: "repoA", ID: 1},
795 {Name: "repoB", ID: 2},
796 {Name: "repoC", ID: 3},
797 },
798 compoundShardRepositories: []zoekt.Repository{
799 {Name: "repoD", ID: 4},
800 {Name: "repoE", ID: 5},
801 {Name: "repoF", ID: 6},
802 },
803 input: &zoekt.Repository{Name: "repoE", ID: 5},
804 expectedRepository: &zoekt.Repository{Name: "repoE", ID: 5},
805 expectedOk: true,
806 },
807 {
808 name: "repository not in any shard",
809 normalShardRepositories: []zoekt.Repository{
810 {Name: "repoA", ID: 1},
811 {Name: "repoB", ID: 2},
812 {Name: "repoC", ID: 3},
813 },
814 compoundShardRepositories: []zoekt.Repository{
815 {Name: "repoD", ID: 4},
816 {Name: "repoE", ID: 5},
817 {Name: "repoF", ID: 6},
818 },
819 input: &zoekt.Repository{Name: "notPresent", ID: 123},
820 expectedRepository: nil,
821 expectedOk: false,
822 },
823 }
824 for _, tt := range tests {
825 t.Run(tt.name, func(t *testing.T) {
826 // setup
827 indexDir := t.TempDir()
828
829 optFns := []func(o *Options){
830 // ctags aren't important for this test, and the equality checks
831 // for diffing repositories can break due to local configuration
832 func(o *Options) {
833 o.DisableCTags = true
834 },
835 }
836
837 for _, r := range tt.normalShardRepositories {
838 createTestShard(t, indexDir, r, 1, optFns...)
839 }
840
841 if len(tt.compoundShardRepositories) > 0 {
842 createTestCompoundShard(t, indexDir, tt.compoundShardRepositories, optFns...)
843 }
844
845 o := &Options{
846 IndexDir: indexDir,
847 RepositoryDescription: *tt.input,
848 }
849 o.SetDefaults()
850
851 // run test
852 got, _, gotOk, err := o.FindRepositoryMetadata()
853 if err != nil {
854 t.Errorf("received unexpected error: %v", err)
855 return
856 }
857
858 // check outcome
859 compareOptions := []cmp.Option{
860 cmpopts.IgnoreUnexported(zoekt.Repository{}),
861 cmpopts.IgnoreFields(zoekt.Repository{}, "IndexOptions"),
862 cmpopts.EquateEmpty(),
863 }
864
865 if diff := cmp.Diff(tt.expectedRepository, got, compareOptions...); diff != "" {
866 t.Errorf("unexpected difference in repositories (-want +got):\n%s", diff)
867 }
868
869 if tt.expectedOk != gotOk {
870 t.Errorf("unexpected difference in 'ok' value: wanted %t, got %t", tt.expectedOk, gotOk)
871 }
872 })
873 }
874}
875
876func TestIsLowPriority(t *testing.T) {
877 cases := []string{
878 "builder_test.go",
879 "test/TestQuery.java",
880 "search/vendor/thirdparty.cc",
881 "search/node_modules/search/js",
882 "search.min.js",
883 "internal/search.js.map",
884 }
885
886 for _, tt := range cases {
887 t.Run(tt, func(t *testing.T) {
888 if !IsLowPriority(tt, nil) {
889 t.Errorf("expected file '%s' to be low priority", tt)
890 }
891 })
892 }
893
894 negativeCases := []string{
895 "builder.go",
896 "RoutesTrigger.java",
897 "search.js",
898 }
899
900 for _, tt := range negativeCases {
901 t.Run(tt, func(t *testing.T) {
902 if IsLowPriority(tt, nil) {
903 t.Errorf("did not expect file '%s' to be low priority", tt)
904 }
905 })
906 }
907
908 // Explicitly check that content is important by using the same filename but
909 // different content.
910 normal := "package mock\n\nvar Mock struct {}"
911 generated := "// Code generated by mock\npackage mock\n\nvar Mock struct {}"
912 if IsLowPriority("mock.go", []byte(normal)) {
913 t.Error("expected non-generated content to not be low priority")
914 }
915 if !IsLowPriority("mock.go", []byte(generated)) {
916 t.Error("expected generated content to be low priority")
917 }
918}
919
920func createTestShard(t testing.TB, indexDir string, r zoekt.Repository, numShards int, optFns ...func(options *Options)) []string {
921 t.Helper()
922
923 if err := os.MkdirAll(filepath.Dir(indexDir), 0o700); err != nil {
924 t.Fatal(err)
925 }
926
927 o := Options{
928 IndexDir: indexDir,
929 RepositoryDescription: r,
930 ShardMax: 75, // create a new shard every 75 bytes
931 }
932 o.SetDefaults()
933
934 for _, fn := range optFns {
935 fn(&o)
936 }
937
938 b, err := NewBuilder(o)
939 if err != nil {
940 t.Fatalf("NewBuilder: %v", err)
941 }
942
943 if numShards == 0 {
944 // We have to make at least 1 shard.
945 numShards = 1
946 }
947
948 for i := 0; i < numShards; i++ {
949 // Create entries (file + contents) that are ~100 bytes each.
950 // This (along with our shardMax setting of 75 bytes) means that each shard
951 // will contain at most one of these.
952 fileName := strconv.Itoa(i)
953 document := Document{Name: fileName, Content: []byte(strings.Repeat("A", 100))}
954 for _, branch := range o.RepositoryDescription.Branches {
955 document.Branches = append(document.Branches, branch.Name)
956 }
957
958 err := b.Add(document)
959 if err != nil {
960 t.Fatalf("failed to add file %q to builder: %s", fileName, err)
961 }
962 }
963
964 if err := b.Finish(); err != nil {
965 t.Fatalf("Finish: %v", err)
966 }
967
968 return o.FindAllShards()
969}
970
971func createTestCompoundShard(t testing.TB, indexDir string, repositories []zoekt.Repository, optFns ...func(options *Options)) {
972 t.Helper()
973
974 var shardNames []string
975
976 for _, r := range repositories {
977 // create an isolated scratch space to store normal shards for this repository
978 scratchDir := t.TempDir()
979
980 // create shards that'll be merged later
981 createTestShard(t, scratchDir, r, 1, optFns...)
982
983 // discover file names for all the normal shards we created
984 // note: this only looks in the immediate 'scratchDir' folder and doesn't recurse
985 shards, err := filepath.Glob(filepath.Join(scratchDir, "*.zoekt"))
986 if err != nil {
987 t.Fatalf("while globbing %q to find normal shards: %s", scratchDir, err)
988 }
989
990 shardNames = append(shardNames, shards...)
991 }
992
993 // load the normal shards that we created
994 var files []IndexFile
995 for _, shard := range shardNames {
996 f, err := os.Open(shard)
997 if err != nil {
998 t.Fatalf("opening shard file: %s", err)
999 }
1000 defer f.Close()
1001
1002 indexFile, err := NewIndexFile(f)
1003 if err != nil {
1004 t.Fatalf("creating index file: %s", err)
1005 }
1006 defer indexFile.Close()
1007
1008 files = append(files, indexFile)
1009 }
1010
1011 // merge all the simple shards into a compound shard
1012 tmpName, dstName, err := Merge(indexDir, files...)
1013 if err != nil {
1014 t.Fatalf("merging index files into compound shard: %s", err)
1015 }
1016 if err := os.Rename(tmpName, dstName); err != nil {
1017 t.Fatal(err)
1018 }
1019}
1020
1021func TestIgnoreSizeMax(t *testing.T) {
1022 for _, test := range []struct {
1023 name string
1024 largeFiles []string
1025 filePaths []string
1026 expected bool
1027 }{
1028 {
1029 name: "empty pattern does nothing",
1030 largeFiles: []string{""},
1031 filePaths: []string{"F0"},
1032 expected: false,
1033 },
1034 {
1035 name: "positive match allows",
1036 largeFiles: []string{"F0"},
1037 filePaths: []string{"F0"},
1038 expected: true,
1039 },
1040 {
1041 name: "positive and negative patterns allows",
1042 largeFiles: []string{"F?", "!F0"},
1043 filePaths: []string{"F1"},
1044 expected: true,
1045 },
1046 {
1047 name: "positive and negative patterns disallows",
1048 largeFiles: []string{"F?", "!F0"},
1049 filePaths: []string{"F0"},
1050 expected: false,
1051 },
1052 {
1053 name: "positive escaped pattern allows",
1054 largeFiles: []string{"\\!F?"},
1055 filePaths: []string{"!F0", "!F1"},
1056 expected: true,
1057 },
1058 {
1059 name: "postive escaped pattern does not disallow",
1060 largeFiles: []string{"F0", "\\!F?"},
1061 filePaths: []string{"F0", "!F0"},
1062 expected: true,
1063 },
1064 {
1065 name: "combined meta and literal interpretation disallows",
1066 largeFiles: []string{"*F*", "!!F*"},
1067 filePaths: []string{"!F0"},
1068 expected: false,
1069 },
1070 {
1071 name: "combined meta and literal interpretation allows",
1072 largeFiles: []string{"*F*", "!!F*"},
1073 filePaths: []string{"F0"},
1074 expected: true,
1075 },
1076 {
1077 name: "largeFiles order: positive match overrides previous negative match and allows",
1078 largeFiles: []string{"F?", "!F0", "!F1", "F0"},
1079 filePaths: []string{"F0"},
1080 expected: true,
1081 },
1082 {
1083 name: "largeFiles order: positive match overrides previous negative match and disallows",
1084 largeFiles: []string{"F?", "!F0", "!F1", "F0"},
1085 filePaths: []string{"F1"},
1086 expected: false,
1087 },
1088 {
1089 name: "largeFiles order: negative match overrides previous positive match and allows",
1090 largeFiles: []string{"F?", "!?0", "F0", "!F0"},
1091 filePaths: []string{"F1"},
1092 expected: true,
1093 },
1094 {
1095 name: "largeFiles order: negative match overrides previous positive match and disallows",
1096 largeFiles: []string{"F?", "!?0", "F0", "!F0"},
1097 filePaths: []string{"F0"},
1098 expected: false,
1099 },
1100 } {
1101 t.Run(test.name, func(t *testing.T) {
1102 o := Options{
1103 LargeFiles: test.largeFiles,
1104 }
1105
1106 for _, filePath := range test.filePaths {
1107 ignore := o.IgnoreSizeMax(filePath)
1108 if ignore != test.expected {
1109 t.Errorf("IgnoreSizeMax() for filepath %v returned unexpected result %v", filePath, ignore)
1110 }
1111 }
1112 })
1113 }
1114}
1115
1116type filerankCase struct {
1117 name string
1118 docs []*Document
1119 want []int
1120}
1121
1122func testFileRankAspect(t *testing.T, c filerankCase) {
1123 var want []*Document
1124 for _, j := range c.want {
1125 want = append(want, c.docs[j])
1126 }
1127
1128 got := make([]*Document, len(c.docs))
1129 copy(got, c.docs)
1130 sortDocuments(got)
1131
1132 print := func(ds []*Document) string {
1133 r := ""
1134 for _, d := range ds {
1135 r += fmt.Sprintf("%v, ", d)
1136 }
1137 return r
1138 }
1139 if !reflect.DeepEqual(got, want) {
1140 t.Errorf("got docs [%v], want [%v]", print(got), print(want))
1141 }
1142}
1143
1144func TestFileRank(t *testing.T) {
1145 for _, c := range []filerankCase{{
1146 name: "filename",
1147 docs: []*Document{
1148 {
1149 Name: "longlonglong",
1150 Content: []byte("bla"),
1151 },
1152 {
1153 Name: "short",
1154 Content: []byte("bla"),
1155 },
1156 },
1157 want: []int{1, 0},
1158 }, {
1159 name: "test",
1160 docs: []*Document{
1161 {
1162 Name: "foo_test.go",
1163 Content: []byte("bla"),
1164 },
1165 {
1166 Name: "longlonglong",
1167 Content: []byte("bla"),
1168 },
1169 },
1170 want: []int{1, 0},
1171 }, {
1172 name: "content",
1173 docs: []*Document{
1174 {
1175 Content: []byte("bla"),
1176 },
1177 {
1178 Content: []byte("blablablabla"),
1179 },
1180 {
1181 Content: []byte("blabla"),
1182 },
1183 },
1184 want: []int{0, 2, 1},
1185 }, {
1186 name: "skipped docs",
1187 docs: []*Document{
1188 {
1189 Name: "binary_file",
1190 SkipReason: "binary file",
1191 },
1192 {
1193 Name: "some_test.go",
1194 Content: []byte("bla"),
1195 },
1196 {
1197 Name: "large_file.go",
1198 SkipReason: "too large",
1199 },
1200 {
1201 Name: "file.go",
1202 Content: []byte("blabla"),
1203 },
1204 },
1205 want: []int{3, 1, 0, 2},
1206 }} {
1207 t.Run(c.name, func(t *testing.T) {
1208 testFileRankAspect(t, c)
1209 })
1210 }
1211}