fork of https://github.com/sourcegraph/zoekt
1package index
2
3import (
4 "errors"
5 "flag"
6 "fmt"
7 "io"
8 "log"
9 "os"
10 "path/filepath"
11 "reflect"
12 "strconv"
13 "strings"
14 "testing"
15 "time"
16
17 "github.com/google/go-cmp/cmp"
18 "github.com/google/go-cmp/cmp/cmpopts"
19 "github.com/prometheus/client_golang/prometheus/testutil"
20 "github.com/stretchr/testify/require"
21
22 "github.com/sourcegraph/zoekt"
23)
24
25var update = flag.Bool("update", false, "update golden file")
26
27// ensure we don't regress on how we build v16
28func TestBuildv16(t *testing.T) {
29 dir := t.TempDir()
30
31 opts := Options{
32 IndexDir: dir,
33 RepositoryDescription: zoekt.Repository{
34 Name: "repo",
35 Source: "./testdata/repo/",
36 },
37 DisableCTags: true,
38 }
39 opts.SetDefaults()
40
41 b, err := NewBuilder(opts)
42 if err != nil {
43 t.Fatal(err)
44 }
45
46 for _, p := range []string{"main.go"} {
47 blob, err := os.ReadFile(filepath.Join("../testdata/repo", p))
48 if err != nil {
49 t.Fatal(err)
50 }
51 if err := b.AddFile(p, blob); err != nil {
52 t.Fatal(err)
53 }
54 }
55
56 wantP := filepath.Join("../testdata/shards", "repo_v16.00000.zoekt")
57
58 // fields indexTime and id depend on time. For this test, we copy the fields from
59 // the old shard.
60 _, wantMetadata, err := ReadMetadataPath(wantP)
61 if err != nil {
62 t.Fatal(err)
63 }
64 b.indexTime = wantMetadata.IndexTime
65 b.id = wantMetadata.ID
66
67 if err := b.Finish(); err != nil {
68 t.Fatal(err)
69 }
70
71 gotP := filepath.Join(dir, "repo_v16.00000.zoekt")
72
73 if *update {
74 data, err := os.ReadFile(gotP)
75 if err != nil {
76 t.Fatal(err)
77 }
78 err = os.WriteFile(wantP, data, 0o644)
79 if err != nil {
80 t.Fatal(err)
81 }
82 return
83 }
84
85 got, err := os.ReadFile(gotP)
86 if err != nil {
87 t.Fatal(err)
88 }
89 want, err := os.ReadFile(wantP)
90 if err != nil {
91 t.Fatal(err)
92 }
93
94 if d := cmp.Diff(want, got); d != "" {
95 t.Errorf("mismatch (-want +got):\n%s", d)
96 }
97}
98
99func TestFlags(t *testing.T) {
100 cases := []struct {
101 args []string
102 want Options
103 }{{
104 // Defaults
105 args: []string{},
106 want: Options{},
107 }, {
108 args: []string{"-index", "/tmp"},
109 want: Options{
110 IndexDir: "/tmp",
111 },
112 }, {
113 // single large file pattern
114 args: []string{"-large_file", "*.md"},
115 want: Options{
116 LargeFiles: []string{"*.md"},
117 },
118 }, {
119 // multiple large file pattern
120 args: []string{"-large_file", "*.md", "-large_file", "*.yaml"},
121 want: Options{
122 LargeFiles: []string{"*.md", "*.yaml"},
123 },
124 }, {
125 // multiple large file pattern with negated pattern
126 args: []string{"-large_file", "*.md", "-large_file", "!*.yaml"},
127 want: Options{
128 LargeFiles: []string{"*.md", "!*.yaml"},
129 },
130 }, {
131 // multiple large file pattern with escaped character
132 args: []string{"-large_file", "*.md", "-large_file", "\\!*.yaml"},
133 want: Options{
134 LargeFiles: []string{"*.md", "\\!*.yaml"},
135 },
136 }}
137
138 ignored := []cmp.Option{
139 // depends on $PATH setting.
140 cmpopts.IgnoreFields(Options{}, "CTagsPath"),
141 cmpopts.IgnoreFields(Options{}, "ScipCTagsPath"),
142 cmpopts.IgnoreFields(Options{}, "changedOrRemovedFiles"),
143 cmpopts.IgnoreFields(zoekt.Repository{}, "priority"),
144 }
145
146 for _, c := range cases {
147 c.want.SetDefaults()
148 // depends on $PATH setting.
149 c.want.CTagsPath = ""
150
151 got := Options{}
152 fs := flag.NewFlagSet("", flag.ContinueOnError)
153 got.Flags(fs)
154 if err := fs.Parse(c.args); err != nil {
155 t.Errorf("failed to parse args %v: %v", c.args, err)
156 } else if d := cmp.Diff(c.want, got, ignored...); d != "" {
157 t.Errorf("mismatch for %v (-want +got):\n%s", c.args, d)
158 }
159 }
160}
161
162func TestIncrementalSkipIndexing(t *testing.T) {
163 cases := []struct {
164 name string
165 want bool
166 opts Options
167 }{{
168 name: "v17-noop",
169 want: true,
170 opts: Options{
171 RepositoryDescription: zoekt.Repository{
172 Name: "repo17",
173 },
174 SizeMax: 2097152,
175 DisableCTags: true,
176 },
177 }, {
178 name: "v16-noop",
179 want: true,
180 opts: Options{
181 RepositoryDescription: zoekt.Repository{
182 Name: "repo",
183 },
184 SizeMax: 2097152,
185 DisableCTags: true,
186 },
187 }, {
188 name: "v17-id",
189 want: false,
190 opts: Options{
191 RepositoryDescription: zoekt.Repository{
192 Name: "repo17",
193 RawConfig: map[string]string{
194 "repoid": "123",
195 },
196 },
197 SizeMax: 2097152,
198 DisableCTags: true,
199 },
200 }, {
201 name: "doesnotexist",
202 want: false,
203 opts: Options{
204 RepositoryDescription: zoekt.Repository{
205 Name: "doesnotexist",
206 },
207 SizeMax: 2097152,
208 DisableCTags: true,
209 },
210 }}
211
212 for _, tc := range cases {
213 t.Run(tc.name, func(t *testing.T) {
214 tc.opts.IndexDir = "../testdata/shards"
215 t.Log(tc.opts.IndexState())
216 got := tc.opts.IncrementalSkipIndexing()
217 if got != tc.want {
218 t.Fatalf("want %v got %v", tc.want, got)
219 }
220 })
221 }
222}
223
224func TestMain(m *testing.M) {
225 flag.Parse()
226 if !testing.Verbose() {
227 log.SetOutput(io.Discard)
228 }
229 os.Exit(m.Run())
230}
231
232func TestDontCountContentOfSkippedFiles(t *testing.T) {
233 b, err := NewBuilder(Options{RepositoryDescription: zoekt.Repository{
234 Name: "foo",
235 }})
236 if err != nil {
237 t.Fatal(err)
238 }
239
240 // content with at least 100 bytes
241 binary := append([]byte("abc def \x00"), make([]byte, 100)...)
242 err = b.Add(Document{
243 Name: "f1",
244 Content: binary,
245 })
246 if err != nil {
247 t.Fatal(err)
248 }
249 if len(b.todo) != 1 || b.todo[0].SkipReason == SkipReasonNone {
250 t.Fatalf("document should have been skipped")
251 }
252 if b.todo[0].Content != nil {
253 t.Fatalf("document content should be empty")
254 }
255 if b.size >= 100 {
256 t.Fatalf("content of skipped documents should not count towards shard size thresold")
257 }
258}
259
260func TestPartialSuccess(t *testing.T) {
261 dir := t.TempDir()
262
263 opts := Options{
264 IndexDir: dir,
265 ShardMax: 1024,
266 SizeMax: 1 << 20,
267 Parallelism: 1,
268 }
269 opts.RepositoryDescription.Name = "repo"
270 opts.SetDefaults()
271
272 b, err := NewBuilder(opts)
273 if err != nil {
274 t.Fatalf("NewBuilder: %v", err)
275 }
276
277 for i := range 4 {
278 nm := fmt.Sprintf("F%d", i)
279 _ = b.AddFile(nm, []byte(strings.Repeat("01234567\n", 128)))
280 }
281 b.buildError = fmt.Errorf("any error")
282
283 // No error checking.
284 _ = b.Finish()
285
286 // Finish cleans up temporary files.
287 if fs, err := filepath.Glob(dir + "/*"); err != nil {
288 t.Errorf("glob(%s): %v", dir, err)
289 } else if len(fs) != 0 {
290 t.Errorf("got shards %v, want []", fs)
291 }
292}
293
294// Tests that we skip looping over repos in compound shards when we know that
295// the repository we are looking for is not in the shard.
296func TestSkipCompoundShards(t *testing.T) {
297 metricCompoundShardLookups.Reset()
298
299 compoundShards := [][]zoekt.Repository{
300 {
301 {Name: "repoA", ID: 1},
302 {Name: "repoB", ID: 2},
303 {Name: "repoC", ID: 3},
304 },
305 {
306 {Name: "repoD", ID: 4},
307 {Name: "repoE", ID: 5},
308 {Name: "repoF", ID: 6},
309 {Name: "repoF", ID: 7},
310 {Name: "repoF", ID: 8},
311 },
312 }
313 var lookForRepoID uint32 = 99
314 wantSkippedCount := 2
315
316 indexDir := t.TempDir()
317 for _, repositoryGroup := range compoundShards {
318 createTestCompoundShard(t, indexDir, repositoryGroup)
319 }
320 o := &Options{
321 IndexDir: indexDir,
322 RepositoryDescription: zoekt.Repository{ID: lookForRepoID},
323 }
324
325 shard := o.findCompoundShard()
326 require.Empty(t, shard)
327
328 // Check if the "skipped" counter was incremented
329 skippedCount := int(testutil.ToFloat64(metricCompoundShardLookups.WithLabelValues("skipped")))
330 require.Equal(t, wantSkippedCount, skippedCount)
331}
332
333// With optimization
334// BenchmarkFindCompoundShard-16 33505 36016 ns/op
335//
336// Without optimization
337// BenchmarkFindCompoundShard-16 76 15568589 ns/op
338func BenchmarkFindCompoundShard(b *testing.B) {
339 // Generate a large compound shard
340 const numRepos = 5000
341 repositories := make([]zoekt.Repository, numRepos)
342 for i := range numRepos {
343 repositories[i] = zoekt.Repository{
344 Name: fmt.Sprintf("repo%d", i+1),
345 ID: uint32(i + 1),
346 }
347 }
348 indexDir := b.TempDir()
349 createTestCompoundShard(b, indexDir, repositories)
350
351 // pick id that is not in the shard
352 var searchRepoID uint32 = numRepos + 1
353
354 b.ResetTimer()
355 for i := 0; i < b.N; i++ {
356 o := &Options{
357 IndexDir: indexDir,
358 RepositoryDescription: zoekt.Repository{ID: searchRepoID},
359 }
360
361 shard := o.findCompoundShard()
362 if shard != "" {
363 b.Fatal("expected empty result")
364 }
365 }
366}
367
368func TestOptions_FindAllShards(t *testing.T) {
369 type simpleShard struct {
370 Repository zoekt.Repository
371 // NumShards is the number of shards that should be created that
372 // contain data for "Repository".
373 NumShards int
374 }
375
376 tests := []struct {
377 name string
378 simpleShards []simpleShard
379 compoundShards [][]zoekt.Repository
380 expectedShardCount int
381 expectedRepository zoekt.Repository
382 }{
383 {
384 name: "repository in normal shard",
385 simpleShards: []simpleShard{
386 {Repository: zoekt.Repository{Name: "repoA", ID: 1}},
387 {Repository: zoekt.Repository{Name: "repoB", ID: 2}},
388 {Repository: zoekt.Repository{Name: "repoC", ID: 3}},
389 },
390 expectedShardCount: 1,
391 expectedRepository: zoekt.Repository{Name: "repoB", ID: 2},
392 },
393 {
394 name: "repository in compound shard",
395 compoundShards: [][]zoekt.Repository{
396 {
397 {Name: "repoA", ID: 1},
398 {Name: "repoB", ID: 2},
399 {Name: "repoC", ID: 3},
400 },
401 {
402 {Name: "repoD", ID: 4},
403 {Name: "repoE", ID: 5},
404 {Name: "repoF", ID: 6},
405 },
406 },
407 expectedShardCount: 1,
408 expectedRepository: zoekt.Repository{Name: "repoB", ID: 2},
409 },
410 {
411 name: "repository split across multiple shards",
412 simpleShards: []simpleShard{
413 {Repository: zoekt.Repository{Name: "repoA", ID: 1}},
414 {Repository: zoekt.Repository{Name: "repoB", ID: 2}, NumShards: 2},
415 {Repository: zoekt.Repository{Name: "repoC", ID: 3}},
416 },
417 expectedShardCount: 2,
418 expectedRepository: zoekt.Repository{Name: "repoB", ID: 2},
419 },
420 {
421 name: "unknown repository",
422 simpleShards: []simpleShard{
423 {Repository: zoekt.Repository{Name: "repoA", ID: 1}},
424 {Repository: zoekt.Repository{Name: "repoB", ID: 2}},
425 {Repository: zoekt.Repository{Name: "repoC", ID: 3}},
426 },
427 compoundShards: [][]zoekt.Repository{
428 {
429 {Name: "repoD", ID: 4},
430 {Name: "repoE", ID: 5},
431 {Name: "repoF", ID: 6},
432 },
433 },
434 expectedShardCount: 0,
435 },
436 {
437 name: "match on ID, not name (compound only)",
438 compoundShards: [][]zoekt.Repository{
439 {
440 {Name: "repoA", ID: 1},
441 {Name: "repoB", ID: 2},
442 {Name: "repoC", ID: 3},
443 },
444 {
445 {Name: "repoD", ID: 4},
446 {Name: "repoE", ID: 5},
447 {Name: "repoF", ID: 6},
448 },
449 },
450 expectedShardCount: 1,
451 expectedRepository: zoekt.Repository{Name: "something-else", ID: 5},
452 },
453 }
454 for _, tt := range tests {
455 t.Run(tt.name, func(t *testing.T) {
456 t.Parallel()
457
458 // prepare
459 indexDir := t.TempDir()
460
461 for _, s := range tt.simpleShards {
462 createTestShard(t, indexDir, s.Repository, s.NumShards)
463 }
464
465 for _, repositoryGroup := range tt.compoundShards {
466 createTestCompoundShard(t, indexDir, repositoryGroup)
467 }
468
469 o := &Options{
470 IndexDir: indexDir,
471 RepositoryDescription: tt.expectedRepository,
472 }
473 o.SetDefaults()
474
475 // run test
476 shards := o.FindAllShards()
477
478 // verify results
479 if len(shards) != tt.expectedShardCount {
480 t.Fatalf("expected %d shard(s), received %d shard(s)", tt.expectedShardCount, len(shards))
481 }
482
483 if tt.expectedShardCount > 0 {
484 for _, s := range shards {
485 // all shards should contain the metadata for the desired repository
486 repos, _, err := ReadMetadataPathAlive(s)
487 if err != nil {
488 t.Fatalf("reading metadata from shard %q: %s", s, err)
489 }
490
491 foundRepository := false
492 for _, r := range repos {
493 if r.ID == tt.expectedRepository.ID {
494 foundRepository = true
495 break
496 }
497 }
498
499 if !foundRepository {
500 t.Errorf("shard %q doesn't contain metadata for repository %d", s, tt.expectedRepository.ID)
501 }
502 }
503 }
504 })
505 }
506}
507
508func TestBuilder_BranchNamesEqual(t *testing.T) {
509 for i, test := range []struct {
510 oldBranches []zoekt.RepositoryBranch
511 newBranches []zoekt.RepositoryBranch
512 expected bool
513 }{
514 {
515 oldBranches: []zoekt.RepositoryBranch{{Name: "main", Version: "v1"}, {Name: "release", Version: "v1"}},
516 newBranches: []zoekt.RepositoryBranch{{Name: "main", Version: "v1"}, {Name: "release", Version: "v1"}},
517 expected: true,
518 },
519 {
520 oldBranches: []zoekt.RepositoryBranch{{Name: "main", Version: "v1"}, {Name: "release", Version: "v3"}},
521 newBranches: []zoekt.RepositoryBranch{{Name: "main", Version: "v2"}, {Name: "release", Version: "v4"}},
522 expected: true,
523 },
524 {
525 oldBranches: []zoekt.RepositoryBranch{{Name: "main", Version: "v1"}},
526 newBranches: []zoekt.RepositoryBranch{{Name: "main", Version: "v2"}, {Name: "release", Version: "v1"}},
527 expected: false,
528 },
529 {
530 oldBranches: []zoekt.RepositoryBranch{{Name: "main", Version: "v1"}},
531 newBranches: []zoekt.RepositoryBranch{{Name: "release", Version: "v1"}},
532 expected: false,
533 },
534 {
535 oldBranches: []zoekt.RepositoryBranch{{Name: "main", Version: "v1"}},
536 newBranches: []zoekt.RepositoryBranch{},
537 expected: false,
538 },
539 {
540 oldBranches: []zoekt.RepositoryBranch{},
541 newBranches: []zoekt.RepositoryBranch{{Name: "main", Version: "v1"}},
542 expected: false,
543 },
544 } {
545 t.Run(strconv.Itoa(i), func(t *testing.T) {
546 actual := BranchNamesEqual(test.oldBranches, test.newBranches)
547 if test.expected != actual {
548 t.Errorf("expected: %t, got: %t", test.expected, actual)
549 }
550 })
551 }
552}
553
554func TestBuilder_DeltaShardsBuildsShouldErrorOnBranchSet(t *testing.T) {
555 indexDir := t.TempDir()
556
557 repository := zoekt.Repository{
558 Name: "repo",
559 ID: 1,
560 Branches: []zoekt.RepositoryBranch{{Name: "foo"}, {Name: "bar"}},
561 }
562 createTestShard(t, indexDir, repository, 2)
563
564 repositoryNewBranches := zoekt.Repository{
565 Name: "repo",
566 ID: 1,
567 Branches: []zoekt.RepositoryBranch{{Name: "foo"}, {Name: "baz"}},
568 }
569
570 o := Options{
571 IndexDir: indexDir,
572 RepositoryDescription: repositoryNewBranches,
573 IsDelta: true,
574 }
575 o.SetDefaults()
576
577 b, err := NewBuilder(o)
578 if err != nil {
579 t.Fatalf("NewBuilder: %v", err)
580 }
581
582 err = b.Finish()
583 if !errors.As(err, &deltaBranchSetError{}) {
584 t.Fatalf("expected error complaning about different branch names, got: %s", err)
585 }
586}
587
588func TestBuilder_DeltaShardsBuildsShouldErrorOnIndexOptionsMismatch(t *testing.T) {
589 repository := zoekt.Repository{
590 Name: "repo",
591 ID: 1,
592 Branches: []zoekt.RepositoryBranch{{Name: "foo"}},
593 }
594
595 for _, test := range []struct {
596 name string
597 options func(options *Options)
598 }{
599 {
600 name: "update option CTagsPath to non default",
601 options: func(options *Options) { options.CTagsPath = "ctags_updated_test/universal-ctags" },
602 },
603 {
604 name: "update option DisableCTags to non default",
605 options: func(options *Options) { options.DisableCTags = true },
606 },
607 {
608 name: "update option SizeMax to non default",
609 options: func(options *Options) { options.SizeMax -= 10 },
610 },
611 {
612 name: "update option LargeFiles to non default",
613 options: func(options *Options) { options.LargeFiles = []string{"-large_file", "*.md", "-large_file", "*.yaml"} },
614 },
615 } {
616 test := test
617
618 t.Run(test.name, func(t *testing.T) {
619 indexDir := t.TempDir()
620
621 // initially use default options
622 createTestShard(t, indexDir, repository, 2)
623
624 o := Options{
625 IndexDir: indexDir,
626 RepositoryDescription: repository,
627 IsDelta: true,
628 }
629 test.options(&o)
630
631 b, err := NewBuilder(o)
632 if err != nil {
633 t.Fatalf("NewBuilder: %v", err)
634 }
635
636 err = b.Finish()
637 if err == nil {
638 t.Fatalf("no error regarding index options mismatch")
639 }
640
641 var optionsMismatchError *deltaIndexOptionsMismatchError
642 if !errors.As(err, &optionsMismatchError) {
643 t.Fatalf("expected error complaining about index options mismatch, got: %s", err)
644 }
645 })
646 }
647}
648
649func TestBuilder_DeltaShardsMetadataInOlderShards(t *testing.T) {
650 olderTime := time.Unix(0, 0)
651 newerTime := time.Unix(10000, 0)
652
653 for _, test := range []struct {
654 name string
655 originalRepository zoekt.Repository
656 updatedRepository zoekt.Repository
657 }{
658 {
659 name: "update commit information",
660 originalRepository: zoekt.Repository{
661 Name: "repo",
662 ID: 1,
663 Branches: []zoekt.RepositoryBranch{
664 {Name: "main", Version: "v1"},
665 {Name: "release", Version: "v1"},
666 },
667 },
668 updatedRepository: zoekt.Repository{
669 Name: "repo",
670 ID: 1,
671 Branches: []zoekt.RepositoryBranch{
672 {Name: "main", Version: "v2"},
673 {Name: "release", Version: "v2"},
674 },
675 },
676 },
677 {
678 name: "update latest commit date (older -> newer)",
679 originalRepository: zoekt.Repository{
680 Name: "repo",
681 ID: 1,
682 Branches: []zoekt.RepositoryBranch{
683 {Name: "main", Version: "v1"},
684 },
685 LatestCommitDate: olderTime,
686 },
687 updatedRepository: zoekt.Repository{
688 Name: "repo",
689 ID: 1,
690 Branches: []zoekt.RepositoryBranch{
691 {Name: "main", Version: "v2"},
692 },
693 LatestCommitDate: newerTime,
694 },
695 },
696 {
697 name: "update latest commit date (even if latest commit date is older - the most recent commits are the source of truth)",
698 originalRepository: zoekt.Repository{
699 Name: "repo",
700 ID: 1,
701 Branches: []zoekt.RepositoryBranch{
702 {Name: "main", Version: "v1"},
703 },
704 LatestCommitDate: newerTime,
705 },
706 updatedRepository: zoekt.Repository{
707 Name: "repo",
708 ID: 1,
709 Branches: []zoekt.RepositoryBranch{
710 {Name: "main", Version: "v2"},
711 },
712 LatestCommitDate: olderTime,
713 },
714 },
715 } {
716 test := test
717
718 t.Run(test.name, func(t *testing.T) {
719 indexDir := t.TempDir()
720
721 createTestShard(t, indexDir, test.originalRepository, 2, func(o *Options) {
722 o.DisableCTags = true
723 })
724
725 shards := createTestShard(t, indexDir, test.updatedRepository, 1, func(o *Options) {
726 o.IsDelta = true
727 o.DisableCTags = true
728 })
729
730 if len(shards) < 3 {
731 t.Fatalf("expected at least 3 shards, got %d (%s)", len(shards), strings.Join(shards, ", "))
732 }
733
734 for _, s := range shards {
735 repositories, _, err := ReadMetadataPathAlive(s)
736 if err != nil {
737 t.Fatalf("reading repository metadata from shard %q", s)
738 }
739
740 var foundRepository *zoekt.Repository
741 for _, r := range repositories {
742 if r.ID == test.updatedRepository.ID {
743 foundRepository = r
744 break
745 }
746 }
747
748 if foundRepository == nil {
749 t.Fatalf("repository ID %d not in shard %q", test.updatedRepository.ID, s)
750 }
751
752 diffOptions := []cmp.Option{
753 cmpopts.IgnoreUnexported(zoekt.Repository{}),
754 cmpopts.IgnoreFields(zoekt.Repository{}, "IndexOptions"),
755 cmpopts.EquateEmpty(),
756 }
757
758 if diff := cmp.Diff(&test.updatedRepository, foundRepository, diffOptions...); diff != "" {
759 t.Errorf("shard %q: unexpected diff in repository metadata (-want +got):\n%s", s, diff)
760 }
761 }
762 })
763 }
764}
765
766func TestFindRepositoryMetadata(t *testing.T) {
767 tests := []struct {
768 name string
769 normalShardRepositories []zoekt.Repository
770 compoundShardRepositories []zoekt.Repository
771 input *zoekt.Repository
772 expectedRepository *zoekt.Repository
773 expectedOk bool
774 }{
775 {
776 name: "repository in normal shards",
777 normalShardRepositories: []zoekt.Repository{
778 {Name: "repoA", ID: 1},
779 {Name: "repoB", ID: 2},
780 {Name: "repoC", ID: 3},
781 },
782 compoundShardRepositories: []zoekt.Repository{
783 {Name: "repoD", ID: 4},
784 {Name: "repoE", ID: 5},
785 {Name: "repoF", ID: 6},
786 },
787 input: &zoekt.Repository{Name: "repoB", ID: 2},
788 expectedRepository: &zoekt.Repository{Name: "repoB", ID: 2},
789 expectedOk: true,
790 },
791 {
792 name: "repository in compound shards",
793 normalShardRepositories: []zoekt.Repository{
794 {Name: "repoA", ID: 1},
795 {Name: "repoB", ID: 2},
796 {Name: "repoC", ID: 3},
797 },
798 compoundShardRepositories: []zoekt.Repository{
799 {Name: "repoD", ID: 4},
800 {Name: "repoE", ID: 5},
801 {Name: "repoF", ID: 6},
802 },
803 input: &zoekt.Repository{Name: "repoE", ID: 5},
804 expectedRepository: &zoekt.Repository{Name: "repoE", ID: 5},
805 expectedOk: true,
806 },
807 {
808 name: "repository not in any shard",
809 normalShardRepositories: []zoekt.Repository{
810 {Name: "repoA", ID: 1},
811 {Name: "repoB", ID: 2},
812 {Name: "repoC", ID: 3},
813 },
814 compoundShardRepositories: []zoekt.Repository{
815 {Name: "repoD", ID: 4},
816 {Name: "repoE", ID: 5},
817 {Name: "repoF", ID: 6},
818 },
819 input: &zoekt.Repository{Name: "notPresent", ID: 123},
820 expectedRepository: nil,
821 expectedOk: false,
822 },
823 }
824 for _, tt := range tests {
825 t.Run(tt.name, func(t *testing.T) {
826 // setup
827 indexDir := t.TempDir()
828
829 optFns := []func(o *Options){
830 // ctags aren't important for this test, and the equality checks
831 // for diffing repositories can break due to local configuration
832 func(o *Options) {
833 o.DisableCTags = true
834 },
835 }
836
837 for _, r := range tt.normalShardRepositories {
838 createTestShard(t, indexDir, r, 1, optFns...)
839 }
840
841 if len(tt.compoundShardRepositories) > 0 {
842 createTestCompoundShard(t, indexDir, tt.compoundShardRepositories, optFns...)
843 }
844
845 o := &Options{
846 IndexDir: indexDir,
847 RepositoryDescription: *tt.input,
848 }
849 o.SetDefaults()
850
851 // run test
852 got, _, gotOk, err := o.FindRepositoryMetadata()
853 if err != nil {
854 t.Errorf("received unexpected error: %v", err)
855 return
856 }
857
858 // check outcome
859 compareOptions := []cmp.Option{
860 cmpopts.IgnoreUnexported(zoekt.Repository{}),
861 cmpopts.IgnoreFields(zoekt.Repository{}, "IndexOptions"),
862 cmpopts.EquateEmpty(),
863 }
864
865 if diff := cmp.Diff(tt.expectedRepository, got, compareOptions...); diff != "" {
866 t.Errorf("unexpected difference in repositories (-want +got):\n%s", diff)
867 }
868
869 if tt.expectedOk != gotOk {
870 t.Errorf("unexpected difference in 'ok' value: wanted %t, got %t", tt.expectedOk, gotOk)
871 }
872 })
873 }
874}
875
876func createTestShard(t testing.TB, indexDir string, r zoekt.Repository, numShards int, optFns ...func(options *Options)) []string {
877 t.Helper()
878
879 if err := os.MkdirAll(filepath.Dir(indexDir), 0o700); err != nil {
880 t.Fatal(err)
881 }
882
883 o := Options{
884 IndexDir: indexDir,
885 RepositoryDescription: r,
886 ShardMax: 75, // create a new shard every 75 bytes
887 }
888 o.SetDefaults()
889
890 for _, fn := range optFns {
891 fn(&o)
892 }
893
894 b, err := NewBuilder(o)
895 if err != nil {
896 t.Fatalf("NewBuilder: %v", err)
897 }
898
899 if numShards == 0 {
900 // We have to make at least 1 shard.
901 numShards = 1
902 }
903
904 for i := range numShards {
905 // Create entries (file + contents) that are ~100 bytes each.
906 // This (along with our shardMax setting of 75 bytes) means that each shard
907 // will contain at most one of these.
908 fileName := strconv.Itoa(i)
909 document := Document{Name: fileName, Content: []byte(strings.Repeat("A", 100))}
910 for _, branch := range o.RepositoryDescription.Branches {
911 document.Branches = append(document.Branches, branch.Name)
912 }
913
914 err := b.Add(document)
915 if err != nil {
916 t.Fatalf("failed to add file %q to builder: %s", fileName, err)
917 }
918 }
919
920 if err := b.Finish(); err != nil {
921 t.Fatalf("Finish: %v", err)
922 }
923
924 return o.FindAllShards()
925}
926
927func createTestCompoundShard(t testing.TB, indexDir string, repositories []zoekt.Repository, optFns ...func(options *Options)) {
928 t.Helper()
929
930 var shardNames []string
931
932 for _, r := range repositories {
933 // create an isolated scratch space to store normal shards for this repository
934 scratchDir := t.TempDir()
935
936 // create shards that'll be merged later
937 createTestShard(t, scratchDir, r, 1, optFns...)
938
939 // discover file names for all the normal shards we created
940 // note: this only looks in the immediate 'scratchDir' folder and doesn't recurse
941 shards, err := filepath.Glob(filepath.Join(scratchDir, "*.zoekt"))
942 if err != nil {
943 t.Fatalf("while globbing %q to find normal shards: %s", scratchDir, err)
944 }
945
946 shardNames = append(shardNames, shards...)
947 }
948
949 // load the normal shards that we created
950 var files []IndexFile
951 for _, shard := range shardNames {
952 f, err := os.Open(shard)
953 if err != nil {
954 t.Fatalf("opening shard file: %s", err)
955 }
956 defer f.Close()
957
958 indexFile, err := NewIndexFile(f)
959 if err != nil {
960 t.Fatalf("creating index file: %s", err)
961 }
962 defer indexFile.Close()
963
964 files = append(files, indexFile)
965 }
966
967 // merge all the simple shards into a compound shard
968 tmpName, dstName, err := Merge(indexDir, files...)
969 if err != nil {
970 t.Fatalf("merging index files into compound shard: %s", err)
971 }
972 if err := os.Rename(tmpName, dstName); err != nil {
973 t.Fatal(err)
974 }
975}
976
977func TestIgnoreSizeMax(t *testing.T) {
978 for _, test := range []struct {
979 name string
980 largeFiles []string
981 filePaths []string
982 expected bool
983 }{
984 {
985 name: "empty pattern does nothing",
986 largeFiles: []string{""},
987 filePaths: []string{"F0"},
988 expected: false,
989 },
990 {
991 name: "positive match allows",
992 largeFiles: []string{"F0"},
993 filePaths: []string{"F0"},
994 expected: true,
995 },
996 {
997 name: "positive and negative patterns allows",
998 largeFiles: []string{"F?", "!F0"},
999 filePaths: []string{"F1"},
1000 expected: true,
1001 },
1002 {
1003 name: "positive and negative patterns disallows",
1004 largeFiles: []string{"F?", "!F0"},
1005 filePaths: []string{"F0"},
1006 expected: false,
1007 },
1008 {
1009 name: "positive escaped pattern allows",
1010 largeFiles: []string{"\\!F?"},
1011 filePaths: []string{"!F0", "!F1"},
1012 expected: true,
1013 },
1014 {
1015 name: "postive escaped pattern does not disallow",
1016 largeFiles: []string{"F0", "\\!F?"},
1017 filePaths: []string{"F0", "!F0"},
1018 expected: true,
1019 },
1020 {
1021 name: "combined meta and literal interpretation disallows",
1022 largeFiles: []string{"*F*", "!!F*"},
1023 filePaths: []string{"!F0"},
1024 expected: false,
1025 },
1026 {
1027 name: "combined meta and literal interpretation allows",
1028 largeFiles: []string{"*F*", "!!F*"},
1029 filePaths: []string{"F0"},
1030 expected: true,
1031 },
1032 {
1033 name: "largeFiles order: positive match overrides previous negative match and allows",
1034 largeFiles: []string{"F?", "!F0", "!F1", "F0"},
1035 filePaths: []string{"F0"},
1036 expected: true,
1037 },
1038 {
1039 name: "largeFiles order: positive match overrides previous negative match and disallows",
1040 largeFiles: []string{"F?", "!F0", "!F1", "F0"},
1041 filePaths: []string{"F1"},
1042 expected: false,
1043 },
1044 {
1045 name: "largeFiles order: negative match overrides previous positive match and allows",
1046 largeFiles: []string{"F?", "!?0", "F0", "!F0"},
1047 filePaths: []string{"F1"},
1048 expected: true,
1049 },
1050 {
1051 name: "largeFiles order: negative match overrides previous positive match and disallows",
1052 largeFiles: []string{"F?", "!?0", "F0", "!F0"},
1053 filePaths: []string{"F0"},
1054 expected: false,
1055 },
1056 } {
1057 t.Run(test.name, func(t *testing.T) {
1058 o := Options{
1059 LargeFiles: test.largeFiles,
1060 }
1061
1062 for _, filePath := range test.filePaths {
1063 ignore := o.IgnoreSizeMax(filePath)
1064 if ignore != test.expected {
1065 t.Errorf("IgnoreSizeMax() for filepath %v returned unexpected result %v", filePath, ignore)
1066 }
1067 }
1068 })
1069 }
1070}
1071
1072type filerankCase struct {
1073 name string
1074 docs []*Document
1075 want []int
1076}
1077
1078func testFileRankAspect(t *testing.T, c filerankCase) {
1079 var want []*Document
1080 for _, j := range c.want {
1081 want = append(want, c.docs[j])
1082 }
1083
1084 got := make([]*Document, len(c.docs))
1085 copy(got, c.docs)
1086 sortDocuments(got)
1087
1088 print := func(ds []*Document) string {
1089 r := ""
1090 for _, d := range ds {
1091 r += fmt.Sprintf("%v, ", d)
1092 }
1093 return r
1094 }
1095 if !reflect.DeepEqual(got, want) {
1096 t.Errorf("got docs [%v], want [%v]", print(got), print(want))
1097 }
1098}
1099
1100func TestFileRank(t *testing.T) {
1101 for _, c := range []filerankCase{{
1102 name: "filename",
1103 docs: []*Document{
1104 {
1105 Name: "longlonglong",
1106 Content: []byte("bla"),
1107 },
1108 {
1109 Name: "short",
1110 Content: []byte("bla"),
1111 },
1112 },
1113 want: []int{1, 0},
1114 }, {
1115 name: "test",
1116 docs: []*Document{
1117 {
1118 Name: "foo_test.go",
1119 Content: []byte("bla"),
1120 },
1121 {
1122 Name: "longlonglong",
1123 Content: []byte("bla"),
1124 },
1125 },
1126 want: []int{1, 0},
1127 }, {
1128 name: "content",
1129 docs: []*Document{
1130 {
1131 Content: []byte("bla"),
1132 },
1133 {
1134 Content: []byte("blablablabla"),
1135 },
1136 {
1137 Content: []byte("blabla"),
1138 },
1139 },
1140 want: []int{0, 2, 1},
1141 }, {
1142 name: "skipped docs",
1143 docs: []*Document{
1144 {
1145 Name: "binary_file",
1146 SkipReason: SkipReasonBinary,
1147 },
1148 {
1149 Name: "some_test.go",
1150 Content: []byte("bla"),
1151 },
1152 {
1153 Name: "large_file.go",
1154 SkipReason: SkipReasonTooLarge,
1155 },
1156 {
1157 Name: "file.go",
1158 Content: []byte("blabla"),
1159 },
1160 },
1161 want: []int{3, 1, 0, 2},
1162 }} {
1163 t.Run(c.name, func(t *testing.T) {
1164 testFileRankAspect(t, c)
1165 })
1166 }
1167}