fork of https://github.com/sourcegraph/zoekt
1package index
2
3import (
4 "errors"
5 "flag"
6 "fmt"
7 "io"
8 "log"
9 "os"
10 "path/filepath"
11 "reflect"
12 "strconv"
13 "strings"
14 "testing"
15 "time"
16
17 "github.com/google/go-cmp/cmp"
18 "github.com/google/go-cmp/cmp/cmpopts"
19 "github.com/sourcegraph/zoekt"
20)
21
22var update = flag.Bool("update", false, "update golden file")
23
24// ensure we don't regress on how we build v16
25func TestBuildv16(t *testing.T) {
26 dir := t.TempDir()
27
28 opts := Options{
29 IndexDir: dir,
30 RepositoryDescription: zoekt.Repository{
31 Name: "repo",
32 Source: "./testdata/repo/",
33 },
34 DisableCTags: true,
35 }
36 opts.SetDefaults()
37
38 b, err := NewBuilder(opts)
39 if err != nil {
40 t.Fatal(err)
41 }
42
43 for _, p := range []string{"main.go"} {
44 blob, err := os.ReadFile(filepath.Join("../testdata/repo", p))
45 if err != nil {
46 t.Fatal(err)
47 }
48 if err := b.AddFile(p, blob); err != nil {
49 t.Fatal(err)
50 }
51 }
52
53 wantP := filepath.Join("../testdata/shards", "repo_v16.00000.zoekt")
54
55 // fields indexTime and id depend on time. For this test, we copy the fields from
56 // the old shard.
57 _, wantMetadata, err := ReadMetadataPath(wantP)
58 if err != nil {
59 t.Fatal(err)
60 }
61 b.indexTime = wantMetadata.IndexTime
62 b.id = wantMetadata.ID
63
64 if err := b.Finish(); err != nil {
65 t.Fatal(err)
66 }
67
68 gotP := filepath.Join(dir, "repo_v16.00000.zoekt")
69
70 if *update {
71 data, err := os.ReadFile(gotP)
72 if err != nil {
73 t.Fatal(err)
74 }
75 err = os.WriteFile(wantP, data, 0o644)
76 if err != nil {
77 t.Fatal(err)
78 }
79 return
80 }
81
82 got, err := os.ReadFile(gotP)
83 if err != nil {
84 t.Fatal(err)
85 }
86 want, err := os.ReadFile(wantP)
87 if err != nil {
88 t.Fatal(err)
89 }
90
91 if d := cmp.Diff(want, got); d != "" {
92 t.Errorf("mismatch (-want +got):\n%s", d)
93 }
94}
95
96func TestFlags(t *testing.T) {
97 cases := []struct {
98 args []string
99 want Options
100 }{{
101 // Defaults
102 args: []string{},
103 want: Options{},
104 }, {
105 args: []string{"-index", "/tmp"},
106 want: Options{
107 IndexDir: "/tmp",
108 },
109 }, {
110 // single large file pattern
111 args: []string{"-large_file", "*.md"},
112 want: Options{
113 LargeFiles: []string{"*.md"},
114 },
115 }, {
116 // multiple large file pattern
117 args: []string{"-large_file", "*.md", "-large_file", "*.yaml"},
118 want: Options{
119 LargeFiles: []string{"*.md", "*.yaml"},
120 },
121 }, {
122 // multiple large file pattern with negated pattern
123 args: []string{"-large_file", "*.md", "-large_file", "!*.yaml"},
124 want: Options{
125 LargeFiles: []string{"*.md", "!*.yaml"},
126 },
127 }, {
128 // multiple large file pattern with escaped character
129 args: []string{"-large_file", "*.md", "-large_file", "\\!*.yaml"},
130 want: Options{
131 LargeFiles: []string{"*.md", "\\!*.yaml"},
132 },
133 }}
134
135 ignored := []cmp.Option{
136 // depends on $PATH setting.
137 cmpopts.IgnoreFields(Options{}, "CTagsPath"),
138 cmpopts.IgnoreFields(Options{}, "ScipCTagsPath"),
139 cmpopts.IgnoreFields(Options{}, "changedOrRemovedFiles"),
140 cmpopts.IgnoreFields(zoekt.Repository{}, "priority"),
141 }
142
143 for _, c := range cases {
144 c.want.SetDefaults()
145 // depends on $PATH setting.
146 c.want.CTagsPath = ""
147
148 got := Options{}
149 fs := flag.NewFlagSet("", flag.ContinueOnError)
150 got.Flags(fs)
151 if err := fs.Parse(c.args); err != nil {
152 t.Errorf("failed to parse args %v: %v", c.args, err)
153 } else if d := cmp.Diff(c.want, got, ignored...); d != "" {
154 t.Errorf("mismatch for %v (-want +got):\n%s", c.args, d)
155 }
156 }
157}
158
159func TestIncrementalSkipIndexing(t *testing.T) {
160 cases := []struct {
161 name string
162 want bool
163 opts Options
164 }{{
165 name: "v17-noop",
166 want: true,
167 opts: Options{
168 RepositoryDescription: zoekt.Repository{
169 Name: "repo17",
170 },
171 SizeMax: 2097152,
172 DisableCTags: true,
173 },
174 }, {
175 name: "v16-noop",
176 want: true,
177 opts: Options{
178 RepositoryDescription: zoekt.Repository{
179 Name: "repo",
180 },
181 SizeMax: 2097152,
182 DisableCTags: true,
183 },
184 }, {
185 name: "v17-id",
186 want: false,
187 opts: Options{
188 RepositoryDescription: zoekt.Repository{
189 Name: "repo17",
190 RawConfig: map[string]string{
191 "repoid": "123",
192 },
193 },
194 SizeMax: 2097152,
195 DisableCTags: true,
196 },
197 }, {
198 name: "doesnotexist",
199 want: false,
200 opts: Options{
201 RepositoryDescription: zoekt.Repository{
202 Name: "doesnotexist",
203 },
204 SizeMax: 2097152,
205 DisableCTags: true,
206 },
207 }}
208
209 for _, tc := range cases {
210 t.Run(tc.name, func(t *testing.T) {
211 tc.opts.IndexDir = "../testdata/shards"
212 t.Log(tc.opts.IndexState())
213 got := tc.opts.IncrementalSkipIndexing()
214 if got != tc.want {
215 t.Fatalf("want %v got %v", tc.want, got)
216 }
217 })
218 }
219}
220
221func TestMain(m *testing.M) {
222 flag.Parse()
223 if !testing.Verbose() {
224 log.SetOutput(io.Discard)
225 }
226 os.Exit(m.Run())
227}
228
229func TestDontCountContentOfSkippedFiles(t *testing.T) {
230 b, err := NewBuilder(Options{RepositoryDescription: zoekt.Repository{
231 Name: "foo",
232 }})
233 if err != nil {
234 t.Fatal(err)
235 }
236
237 // content with at least 100 bytes
238 binary := append([]byte("abc def \x00"), make([]byte, 100)...)
239 err = b.Add(Document{
240 Name: "f1",
241 Content: binary,
242 })
243 if err != nil {
244 t.Fatal(err)
245 }
246 if len(b.todo) != 1 || b.todo[0].SkipReason == "" {
247 t.Fatalf("document should have been skipped")
248 }
249 if b.todo[0].Content != nil {
250 t.Fatalf("document content should be empty")
251 }
252 if b.size >= 100 {
253 t.Fatalf("content of skipped documents should not count towards shard size thresold")
254 }
255}
256
257func TestPartialSuccess(t *testing.T) {
258 dir := t.TempDir()
259
260 opts := Options{
261 IndexDir: dir,
262 ShardMax: 1024,
263 SizeMax: 1 << 20,
264 Parallelism: 1,
265 }
266 opts.RepositoryDescription.Name = "repo"
267 opts.SetDefaults()
268
269 b, err := NewBuilder(opts)
270 if err != nil {
271 t.Fatalf("NewBuilder: %v", err)
272 }
273
274 for i := 0; i < 4; i++ {
275 nm := fmt.Sprintf("F%d", i)
276 _ = b.AddFile(nm, []byte(strings.Repeat("01234567\n", 128)))
277 }
278 b.buildError = fmt.Errorf("any error")
279
280 // No error checking.
281 _ = b.Finish()
282
283 // Finish cleans up temporary files.
284 if fs, err := filepath.Glob(dir + "/*"); err != nil {
285 t.Errorf("glob(%s): %v", dir, err)
286 } else if len(fs) != 0 {
287 t.Errorf("got shards %v, want []", fs)
288 }
289}
290
291func TestOptions_FindAllShards(t *testing.T) {
292 type simpleShard struct {
293 Repository zoekt.Repository
294 // NumShards is the number of shards that should be created that
295 // contain data for "Repository".
296 NumShards int
297 }
298
299 tests := []struct {
300 name string
301 simpleShards []simpleShard
302 compoundShards [][]zoekt.Repository
303 expectedShardCount int
304 expectedRepository zoekt.Repository
305 }{
306 {
307 name: "repository in normal shard",
308 simpleShards: []simpleShard{
309 {Repository: zoekt.Repository{Name: "repoA", ID: 1}},
310 {Repository: zoekt.Repository{Name: "repoB", ID: 2}},
311 {Repository: zoekt.Repository{Name: "repoC", ID: 3}},
312 },
313 expectedShardCount: 1,
314 expectedRepository: zoekt.Repository{Name: "repoB", ID: 2},
315 },
316 {
317 name: "repository in compound shard",
318 compoundShards: [][]zoekt.Repository{
319 {
320 {Name: "repoA", ID: 1},
321 {Name: "repoB", ID: 2},
322 {Name: "repoC", ID: 3},
323 },
324 {
325 {Name: "repoD", ID: 4},
326 {Name: "repoE", ID: 5},
327 {Name: "repoF", ID: 6},
328 },
329 },
330 expectedShardCount: 1,
331 expectedRepository: zoekt.Repository{Name: "repoB", ID: 2},
332 },
333 {
334 name: "repository split across multiple shards",
335 simpleShards: []simpleShard{
336 {Repository: zoekt.Repository{Name: "repoA", ID: 1}},
337 {Repository: zoekt.Repository{Name: "repoB", ID: 2}, NumShards: 2},
338 {Repository: zoekt.Repository{Name: "repoC", ID: 3}},
339 },
340 expectedShardCount: 2,
341 expectedRepository: zoekt.Repository{Name: "repoB", ID: 2},
342 },
343 {
344 name: "unknown repository",
345 simpleShards: []simpleShard{
346 {Repository: zoekt.Repository{Name: "repoA", ID: 1}},
347 {Repository: zoekt.Repository{Name: "repoB", ID: 2}},
348 {Repository: zoekt.Repository{Name: "repoC", ID: 3}},
349 },
350 compoundShards: [][]zoekt.Repository{
351 {
352 {Name: "repoD", ID: 4},
353 {Name: "repoE", ID: 5},
354 {Name: "repoF", ID: 6},
355 },
356 },
357 expectedShardCount: 0,
358 },
359 {
360 name: "match on ID, not name (compound only)",
361 compoundShards: [][]zoekt.Repository{
362 {
363 {Name: "repoA", ID: 1},
364 {Name: "sameName", ID: 2},
365 {Name: "sameName", ID: 3},
366 },
367 {
368 {Name: "repoB", ID: 4},
369 {Name: "sameName", ID: 5},
370 {Name: "sameName", ID: 6},
371 },
372 },
373 expectedShardCount: 1,
374 expectedRepository: zoekt.Repository{Name: "sameName", ID: 5},
375 },
376 }
377 for _, tt := range tests {
378 t.Run(tt.name, func(t *testing.T) {
379 t.Parallel()
380
381 // prepare
382 indexDir := t.TempDir()
383
384 for _, s := range tt.simpleShards {
385 createTestShard(t, indexDir, s.Repository, s.NumShards)
386 }
387
388 for _, repositoryGroup := range tt.compoundShards {
389 createTestCompoundShard(t, indexDir, repositoryGroup)
390 }
391
392 o := &Options{
393 IndexDir: indexDir,
394 RepositoryDescription: tt.expectedRepository,
395 }
396 o.SetDefaults()
397
398 // run test
399 shards := o.FindAllShards()
400
401 // verify results
402 if len(shards) != tt.expectedShardCount {
403 t.Fatalf("expected %d shard(s), received %d shard(s)", tt.expectedShardCount, len(shards))
404 }
405
406 if tt.expectedShardCount > 0 {
407 for _, s := range shards {
408 // all shards should contain the metadata for the desired repository
409 repos, _, err := ReadMetadataPathAlive(s)
410 if err != nil {
411 t.Fatalf("reading metadata from shard %q: %s", s, err)
412 }
413
414 foundRepository := false
415 for _, r := range repos {
416 if r.ID == tt.expectedRepository.ID {
417 foundRepository = true
418 break
419 }
420 }
421
422 if !foundRepository {
423 t.Errorf("shard %q doesn't contain metadata for repository %d", s, tt.expectedRepository.ID)
424 }
425 }
426 }
427 })
428 }
429}
430
431func TestBuilder_BranchNamesEqual(t *testing.T) {
432 for i, test := range []struct {
433 oldBranches []zoekt.RepositoryBranch
434 newBranches []zoekt.RepositoryBranch
435 expected bool
436 }{
437 {
438 oldBranches: []zoekt.RepositoryBranch{{Name: "main", Version: "v1"}, {Name: "release", Version: "v1"}},
439 newBranches: []zoekt.RepositoryBranch{{Name: "main", Version: "v1"}, {Name: "release", Version: "v1"}},
440 expected: true,
441 },
442 {
443 oldBranches: []zoekt.RepositoryBranch{{Name: "main", Version: "v1"}, {Name: "release", Version: "v3"}},
444 newBranches: []zoekt.RepositoryBranch{{Name: "main", Version: "v2"}, {Name: "release", Version: "v4"}},
445 expected: true,
446 },
447 {
448 oldBranches: []zoekt.RepositoryBranch{{Name: "main", Version: "v1"}},
449 newBranches: []zoekt.RepositoryBranch{{Name: "main", Version: "v2"}, {Name: "release", Version: "v1"}},
450 expected: false,
451 },
452 {
453 oldBranches: []zoekt.RepositoryBranch{{Name: "main", Version: "v1"}},
454 newBranches: []zoekt.RepositoryBranch{{Name: "release", Version: "v1"}},
455 expected: false,
456 },
457 {
458 oldBranches: []zoekt.RepositoryBranch{{Name: "main", Version: "v1"}},
459 newBranches: []zoekt.RepositoryBranch{},
460 expected: false,
461 },
462 {
463 oldBranches: []zoekt.RepositoryBranch{},
464 newBranches: []zoekt.RepositoryBranch{{Name: "main", Version: "v1"}},
465 expected: false,
466 },
467 } {
468 t.Run(strconv.Itoa(i), func(t *testing.T) {
469 actual := BranchNamesEqual(test.oldBranches, test.newBranches)
470 if test.expected != actual {
471 t.Errorf("expected: %t, got: %t", test.expected, actual)
472 }
473 })
474 }
475}
476
477func TestBuilder_DeltaShardsBuildsShouldErrorOnBranchSet(t *testing.T) {
478 indexDir := t.TempDir()
479
480 repository := zoekt.Repository{
481 Name: "repo",
482 ID: 1,
483 Branches: []zoekt.RepositoryBranch{{Name: "foo"}, {Name: "bar"}},
484 }
485 createTestShard(t, indexDir, repository, 2)
486
487 repositoryNewBranches := zoekt.Repository{
488 Name: "repo",
489 ID: 1,
490 Branches: []zoekt.RepositoryBranch{{Name: "foo"}, {Name: "baz"}},
491 }
492
493 o := Options{
494 IndexDir: indexDir,
495 RepositoryDescription: repositoryNewBranches,
496 IsDelta: true,
497 }
498 o.SetDefaults()
499
500 b, err := NewBuilder(o)
501 if err != nil {
502 t.Fatalf("NewBuilder: %v", err)
503 }
504
505 err = b.Finish()
506 if !errors.As(err, &deltaBranchSetError{}) {
507 t.Fatalf("expected error complaning about different branch names, got: %s", err)
508 }
509}
510
511func TestBuilder_DeltaShardsBuildsShouldErrorOnIndexOptionsMismatch(t *testing.T) {
512 repository := zoekt.Repository{
513 Name: "repo",
514 ID: 1,
515 Branches: []zoekt.RepositoryBranch{{Name: "foo"}},
516 }
517
518 for _, test := range []struct {
519 name string
520 options func(options *Options)
521 }{
522 {
523 name: "update option CTagsPath to non default",
524 options: func(options *Options) { options.CTagsPath = "ctags_updated_test/universal-ctags" },
525 },
526 {
527 name: "update option DisableCTags to non default",
528 options: func(options *Options) { options.DisableCTags = true },
529 },
530 {
531 name: "update option SizeMax to non default",
532 options: func(options *Options) { options.SizeMax -= 10 },
533 },
534 {
535 name: "update option LargeFiles to non default",
536 options: func(options *Options) { options.LargeFiles = []string{"-large_file", "*.md", "-large_file", "*.yaml"} },
537 },
538 } {
539 test := test
540
541 t.Run(test.name, func(t *testing.T) {
542 indexDir := t.TempDir()
543
544 // initially use default options
545 createTestShard(t, indexDir, repository, 2)
546
547 o := Options{
548 IndexDir: indexDir,
549 RepositoryDescription: repository,
550 IsDelta: true,
551 }
552 test.options(&o)
553
554 b, err := NewBuilder(o)
555 if err != nil {
556 t.Fatalf("NewBuilder: %v", err)
557 }
558
559 err = b.Finish()
560 if err == nil {
561 t.Fatalf("no error regarding index options mismatch")
562 }
563
564 var optionsMismatchError *deltaIndexOptionsMismatchError
565 if !errors.As(err, &optionsMismatchError) {
566 t.Fatalf("expected error complaining about index options mismatch, got: %s", err)
567 }
568 })
569 }
570}
571
572func TestBuilder_DeltaShardsMetadataInOlderShards(t *testing.T) {
573 olderTime := time.Unix(0, 0)
574 newerTime := time.Unix(10000, 0)
575
576 for _, test := range []struct {
577 name string
578 originalRepository zoekt.Repository
579 updatedRepository zoekt.Repository
580 }{
581 {
582 name: "update commit information",
583 originalRepository: zoekt.Repository{
584 Name: "repo",
585 ID: 1,
586 Branches: []zoekt.RepositoryBranch{
587 {Name: "main", Version: "v1"},
588 {Name: "release", Version: "v1"},
589 },
590 },
591 updatedRepository: zoekt.Repository{
592 Name: "repo",
593 ID: 1,
594 Branches: []zoekt.RepositoryBranch{
595 {Name: "main", Version: "v2"},
596 {Name: "release", Version: "v2"},
597 },
598 },
599 },
600 {
601 name: "update latest commit date (older -> newer)",
602 originalRepository: zoekt.Repository{
603 Name: "repo",
604 ID: 1,
605 Branches: []zoekt.RepositoryBranch{
606 {Name: "main", Version: "v1"},
607 },
608 LatestCommitDate: olderTime,
609 },
610 updatedRepository: zoekt.Repository{
611 Name: "repo",
612 ID: 1,
613 Branches: []zoekt.RepositoryBranch{
614 {Name: "main", Version: "v2"},
615 },
616 LatestCommitDate: newerTime,
617 },
618 },
619 {
620 name: "update latest commit date (even if latest commit date is older - the most recent commits are the source of truth)",
621 originalRepository: zoekt.Repository{
622 Name: "repo",
623 ID: 1,
624 Branches: []zoekt.RepositoryBranch{
625 {Name: "main", Version: "v1"},
626 },
627 LatestCommitDate: newerTime,
628 },
629 updatedRepository: zoekt.Repository{
630 Name: "repo",
631 ID: 1,
632 Branches: []zoekt.RepositoryBranch{
633 {Name: "main", Version: "v2"},
634 },
635 LatestCommitDate: olderTime,
636 },
637 },
638 } {
639 test := test
640
641 t.Run(test.name, func(t *testing.T) {
642 indexDir := t.TempDir()
643
644 createTestShard(t, indexDir, test.originalRepository, 2, func(o *Options) {
645 o.DisableCTags = true
646 })
647
648 shards := createTestShard(t, indexDir, test.updatedRepository, 1, func(o *Options) {
649 o.IsDelta = true
650 o.DisableCTags = true
651 })
652
653 if len(shards) < 3 {
654 t.Fatalf("expected at least 3 shards, got %d (%s)", len(shards), strings.Join(shards, ", "))
655 }
656
657 for _, s := range shards {
658 repositories, _, err := ReadMetadataPathAlive(s)
659 if err != nil {
660 t.Fatalf("reading repository metadata from shard %q", s)
661 }
662
663 var foundRepository *zoekt.Repository
664 for _, r := range repositories {
665 if r.ID == test.updatedRepository.ID {
666 foundRepository = r
667 break
668 }
669 }
670
671 if foundRepository == nil {
672 t.Fatalf("repository ID %d not in shard %q", test.updatedRepository.ID, s)
673 }
674
675 diffOptions := []cmp.Option{
676 cmpopts.IgnoreUnexported(zoekt.Repository{}),
677 cmpopts.IgnoreFields(zoekt.Repository{}, "IndexOptions"),
678 cmpopts.EquateEmpty(),
679 }
680
681 if diff := cmp.Diff(&test.updatedRepository, foundRepository, diffOptions...); diff != "" {
682 t.Errorf("shard %q: unexpected diff in repository metadata (-want +got):\n%s", s, diff)
683 }
684 }
685 })
686 }
687}
688
689func TestFindRepositoryMetadata(t *testing.T) {
690 tests := []struct {
691 name string
692 normalShardRepositories []zoekt.Repository
693 compoundShardRepositories []zoekt.Repository
694 input *zoekt.Repository
695 expectedRepository *zoekt.Repository
696 expectedOk bool
697 }{
698 {
699 name: "repository in normal shards",
700 normalShardRepositories: []zoekt.Repository{
701 {Name: "repoA", ID: 1},
702 {Name: "repoB", ID: 2},
703 {Name: "repoC", ID: 3},
704 },
705 compoundShardRepositories: []zoekt.Repository{
706 {Name: "repoD", ID: 4},
707 {Name: "repoE", ID: 5},
708 {Name: "repoF", ID: 6},
709 },
710 input: &zoekt.Repository{Name: "repoB", ID: 2},
711 expectedRepository: &zoekt.Repository{Name: "repoB", ID: 2},
712 expectedOk: true,
713 },
714 {
715 name: "repository in compound shards",
716 normalShardRepositories: []zoekt.Repository{
717 {Name: "repoA", ID: 1},
718 {Name: "repoB", ID: 2},
719 {Name: "repoC", ID: 3},
720 },
721 compoundShardRepositories: []zoekt.Repository{
722 {Name: "repoD", ID: 4},
723 {Name: "repoE", ID: 5},
724 {Name: "repoF", ID: 6},
725 },
726 input: &zoekt.Repository{Name: "repoE", ID: 5},
727 expectedRepository: &zoekt.Repository{Name: "repoE", ID: 5},
728 expectedOk: true,
729 },
730 {
731 name: "repository not in any shard",
732 normalShardRepositories: []zoekt.Repository{
733 {Name: "repoA", ID: 1},
734 {Name: "repoB", ID: 2},
735 {Name: "repoC", ID: 3},
736 },
737 compoundShardRepositories: []zoekt.Repository{
738 {Name: "repoD", ID: 4},
739 {Name: "repoE", ID: 5},
740 {Name: "repoF", ID: 6},
741 },
742 input: &zoekt.Repository{Name: "notPresent", ID: 123},
743 expectedRepository: nil,
744 expectedOk: false,
745 },
746 }
747 for _, tt := range tests {
748 t.Run(tt.name, func(t *testing.T) {
749 // setup
750 indexDir := t.TempDir()
751
752 optFns := []func(o *Options){
753 // ctags aren't important for this test, and the equality checks
754 // for diffing repositories can break due to local configuration
755 func(o *Options) {
756 o.DisableCTags = true
757 },
758 }
759
760 for _, r := range tt.normalShardRepositories {
761 createTestShard(t, indexDir, r, 1, optFns...)
762 }
763
764 if len(tt.compoundShardRepositories) > 0 {
765 createTestCompoundShard(t, indexDir, tt.compoundShardRepositories, optFns...)
766 }
767
768 o := &Options{
769 IndexDir: indexDir,
770 RepositoryDescription: *tt.input,
771 }
772 o.SetDefaults()
773
774 // run test
775 got, _, gotOk, err := o.FindRepositoryMetadata()
776 if err != nil {
777 t.Errorf("received unexpected error: %v", err)
778 return
779 }
780
781 // check outcome
782 compareOptions := []cmp.Option{
783 cmpopts.IgnoreUnexported(zoekt.Repository{}),
784 cmpopts.IgnoreFields(zoekt.Repository{}, "IndexOptions"),
785 cmpopts.EquateEmpty(),
786 }
787
788 if diff := cmp.Diff(tt.expectedRepository, got, compareOptions...); diff != "" {
789 t.Errorf("unexpected difference in repositories (-want +got):\n%s", diff)
790 }
791
792 if tt.expectedOk != gotOk {
793 t.Errorf("unexpected difference in 'ok' value: wanted %t, got %t", tt.expectedOk, gotOk)
794 }
795 })
796 }
797}
798
799func TestIsLowPriority(t *testing.T) {
800 cases := []string{
801 "builder_test.go",
802 "test/TestQuery.java",
803 "search/vendor/thirdparty.cc",
804 "search/node_modules/search/js",
805 "search.min.js",
806 "internal/search.js.map",
807 }
808
809 for _, tt := range cases {
810 t.Run(tt, func(t *testing.T) {
811 if !IsLowPriority(tt, nil) {
812 t.Errorf("expected file '%s' to be low priority", tt)
813 }
814 })
815 }
816
817 negativeCases := []string{
818 "builder.go",
819 "RoutesTrigger.java",
820 "search.js",
821 }
822
823 for _, tt := range negativeCases {
824 t.Run(tt, func(t *testing.T) {
825 if IsLowPriority(tt, nil) {
826 t.Errorf("did not expect file '%s' to be low priority", tt)
827 }
828 })
829 }
830
831 // Explicitly check that content is important by using the same filename but
832 // different content.
833 normal := "package mock\n\nvar Mock struct {}"
834 generated := "// Code generated by mock\npackage mock\n\nvar Mock struct {}"
835 if IsLowPriority("mock.go", []byte(normal)) {
836 t.Error("expected non-generated content to not be low priority")
837 }
838 if !IsLowPriority("mock.go", []byte(generated)) {
839 t.Error("expected generated content to be low priority")
840 }
841}
842
843func createTestShard(t *testing.T, indexDir string, r zoekt.Repository, numShards int, optFns ...func(options *Options)) []string {
844 t.Helper()
845
846 if err := os.MkdirAll(filepath.Dir(indexDir), 0o700); err != nil {
847 t.Fatal(err)
848 }
849
850 o := Options{
851 IndexDir: indexDir,
852 RepositoryDescription: r,
853 ShardMax: 75, // create a new shard every 75 bytes
854 }
855 o.SetDefaults()
856
857 for _, fn := range optFns {
858 fn(&o)
859 }
860
861 b, err := NewBuilder(o)
862 if err != nil {
863 t.Fatalf("NewBuilder: %v", err)
864 }
865
866 if numShards == 0 {
867 // We have to make at least 1 shard.
868 numShards = 1
869 }
870
871 for i := 0; i < numShards; i++ {
872 // Create entries (file + contents) that are ~100 bytes each.
873 // This (along with our shardMax setting of 75 bytes) means that each shard
874 // will contain at most one of these.
875 fileName := strconv.Itoa(i)
876 document := Document{Name: fileName, Content: []byte(strings.Repeat("A", 100))}
877 for _, branch := range o.RepositoryDescription.Branches {
878 document.Branches = append(document.Branches, branch.Name)
879 }
880
881 err := b.Add(document)
882 if err != nil {
883 t.Fatalf("failed to add file %q to builder: %s", fileName, err)
884 }
885 }
886
887 if err := b.Finish(); err != nil {
888 t.Fatalf("Finish: %v", err)
889 }
890
891 return o.FindAllShards()
892}
893
894func createTestCompoundShard(t *testing.T, indexDir string, repositories []zoekt.Repository, optFns ...func(options *Options)) {
895 t.Helper()
896
897 var shardNames []string
898
899 for _, r := range repositories {
900 // create an isolated scratch space to store normal shards for this repository
901 scratchDir := t.TempDir()
902
903 // create shards that'll be merged later
904 createTestShard(t, scratchDir, r, 1, optFns...)
905
906 // discover file names for all the normal shards we created
907 // note: this only looks in the immediate 'scratchDir' folder and doesn't recurse
908 shards, err := filepath.Glob(filepath.Join(scratchDir, "*.zoekt"))
909 if err != nil {
910 t.Fatalf("while globbing %q to find normal shards: %s", scratchDir, err)
911 }
912
913 shardNames = append(shardNames, shards...)
914 }
915
916 // load the normal shards that we created
917 var files []IndexFile
918 for _, shard := range shardNames {
919 f, err := os.Open(shard)
920 if err != nil {
921 t.Fatalf("opening shard file: %s", err)
922 }
923 defer f.Close()
924
925 indexFile, err := NewIndexFile(f)
926 if err != nil {
927 t.Fatalf("creating index file: %s", err)
928 }
929 defer indexFile.Close()
930
931 files = append(files, indexFile)
932 }
933
934 // merge all the simple shards into a compound shard
935 tmpName, dstName, err := Merge(indexDir, files...)
936 if err != nil {
937 t.Fatalf("merging index files into compound shard: %s", err)
938 }
939 if err := os.Rename(tmpName, dstName); err != nil {
940 t.Fatal(err)
941 }
942}
943
944func TestIgnoreSizeMax(t *testing.T) {
945 for _, test := range []struct {
946 name string
947 largeFiles []string
948 filePaths []string
949 expected bool
950 }{
951 {
952 name: "empty pattern does nothing",
953 largeFiles: []string{""},
954 filePaths: []string{"F0"},
955 expected: false,
956 },
957 {
958 name: "positive match allows",
959 largeFiles: []string{"F0"},
960 filePaths: []string{"F0"},
961 expected: true,
962 },
963 {
964 name: "positive and negative patterns allows",
965 largeFiles: []string{"F?", "!F0"},
966 filePaths: []string{"F1"},
967 expected: true,
968 },
969 {
970 name: "positive and negative patterns disallows",
971 largeFiles: []string{"F?", "!F0"},
972 filePaths: []string{"F0"},
973 expected: false,
974 },
975 {
976 name: "positive escaped pattern allows",
977 largeFiles: []string{"\\!F?"},
978 filePaths: []string{"!F0", "!F1"},
979 expected: true,
980 },
981 {
982 name: "postive escaped pattern does not disallow",
983 largeFiles: []string{"F0", "\\!F?"},
984 filePaths: []string{"F0", "!F0"},
985 expected: true,
986 },
987 {
988 name: "combined meta and literal interpretation disallows",
989 largeFiles: []string{"*F*", "!!F*"},
990 filePaths: []string{"!F0"},
991 expected: false,
992 },
993 {
994 name: "combined meta and literal interpretation allows",
995 largeFiles: []string{"*F*", "!!F*"},
996 filePaths: []string{"F0"},
997 expected: true,
998 },
999 {
1000 name: "largeFiles order: positive match overrides previous negative match and allows",
1001 largeFiles: []string{"F?", "!F0", "!F1", "F0"},
1002 filePaths: []string{"F0"},
1003 expected: true,
1004 },
1005 {
1006 name: "largeFiles order: positive match overrides previous negative match and disallows",
1007 largeFiles: []string{"F?", "!F0", "!F1", "F0"},
1008 filePaths: []string{"F1"},
1009 expected: false,
1010 },
1011 {
1012 name: "largeFiles order: negative match overrides previous positive match and allows",
1013 largeFiles: []string{"F?", "!?0", "F0", "!F0"},
1014 filePaths: []string{"F1"},
1015 expected: true,
1016 },
1017 {
1018 name: "largeFiles order: negative match overrides previous positive match and disallows",
1019 largeFiles: []string{"F?", "!?0", "F0", "!F0"},
1020 filePaths: []string{"F0"},
1021 expected: false,
1022 },
1023 } {
1024 t.Run(test.name, func(t *testing.T) {
1025 o := Options{
1026 LargeFiles: test.largeFiles,
1027 }
1028
1029 for _, filePath := range test.filePaths {
1030 ignore := o.IgnoreSizeMax(filePath)
1031 if ignore != test.expected {
1032 t.Errorf("IgnoreSizeMax() for filepath %v returned unexpected result %v", filePath, ignore)
1033 }
1034 }
1035 })
1036 }
1037}
1038
1039type filerankCase struct {
1040 name string
1041 docs []*Document
1042 want []int
1043}
1044
1045func testFileRankAspect(t *testing.T, c filerankCase) {
1046 var want []*Document
1047 for _, j := range c.want {
1048 want = append(want, c.docs[j])
1049 }
1050
1051 got := make([]*Document, len(c.docs))
1052 copy(got, c.docs)
1053 sortDocuments(got)
1054
1055 print := func(ds []*Document) string {
1056 r := ""
1057 for _, d := range ds {
1058 r += fmt.Sprintf("%v, ", d)
1059 }
1060 return r
1061 }
1062 if !reflect.DeepEqual(got, want) {
1063 t.Errorf("got docs [%v], want [%v]", print(got), print(want))
1064 }
1065}
1066
1067func TestFileRank(t *testing.T) {
1068 for _, c := range []filerankCase{{
1069 name: "filename",
1070 docs: []*Document{
1071 {
1072 Name: "longlonglong",
1073 Content: []byte("bla"),
1074 },
1075 {
1076 Name: "short",
1077 Content: []byte("bla"),
1078 },
1079 },
1080 want: []int{1, 0},
1081 }, {
1082 name: "test",
1083 docs: []*Document{
1084 {
1085 Name: "foo_test.go",
1086 Content: []byte("bla"),
1087 },
1088 {
1089 Name: "longlonglong",
1090 Content: []byte("bla"),
1091 },
1092 },
1093 want: []int{1, 0},
1094 }, {
1095 name: "content",
1096 docs: []*Document{
1097 {
1098 Content: []byte("bla"),
1099 },
1100 {
1101 Content: []byte("blablablabla"),
1102 },
1103 {
1104 Content: []byte("blabla"),
1105 },
1106 },
1107 want: []int{0, 2, 1},
1108 }, {
1109 name: "skipped docs",
1110 docs: []*Document{
1111 {
1112 Name: "binary_file",
1113 SkipReason: "binary file",
1114 },
1115 {
1116 Name: "some_test.go",
1117 Content: []byte("bla"),
1118 },
1119 {
1120 Name: "large_file.go",
1121 SkipReason: "too large",
1122 },
1123 {
1124 Name: "file.go",
1125 Content: []byte("blabla"),
1126 },
1127 },
1128 want: []int{3, 1, 0, 2},
1129 }} {
1130 t.Run(c.name, func(t *testing.T) {
1131 testFileRankAspect(t, c)
1132 })
1133 }
1134}