fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

1package index 2 3import ( 4 "errors" 5 "flag" 6 "fmt" 7 "io" 8 "log" 9 "os" 10 "path/filepath" 11 "reflect" 12 "strconv" 13 "strings" 14 "testing" 15 "time" 16 17 "github.com/google/go-cmp/cmp" 18 "github.com/google/go-cmp/cmp/cmpopts" 19 "github.com/sourcegraph/zoekt" 20) 21 22var update = flag.Bool("update", false, "update golden file") 23 24// ensure we don't regress on how we build v16 25func TestBuildv16(t *testing.T) { 26 dir := t.TempDir() 27 28 opts := Options{ 29 IndexDir: dir, 30 RepositoryDescription: zoekt.Repository{ 31 Name: "repo", 32 Source: "./testdata/repo/", 33 }, 34 DisableCTags: true, 35 } 36 opts.SetDefaults() 37 38 b, err := NewBuilder(opts) 39 if err != nil { 40 t.Fatal(err) 41 } 42 43 for _, p := range []string{"main.go"} { 44 blob, err := os.ReadFile(filepath.Join("../testdata/repo", p)) 45 if err != nil { 46 t.Fatal(err) 47 } 48 if err := b.AddFile(p, blob); err != nil { 49 t.Fatal(err) 50 } 51 } 52 53 wantP := filepath.Join("../testdata/shards", "repo_v16.00000.zoekt") 54 55 // fields indexTime and id depend on time. For this test, we copy the fields from 56 // the old shard. 57 _, wantMetadata, err := ReadMetadataPath(wantP) 58 if err != nil { 59 t.Fatal(err) 60 } 61 b.indexTime = wantMetadata.IndexTime 62 b.id = wantMetadata.ID 63 64 if err := b.Finish(); err != nil { 65 t.Fatal(err) 66 } 67 68 gotP := filepath.Join(dir, "repo_v16.00000.zoekt") 69 70 if *update { 71 data, err := os.ReadFile(gotP) 72 if err != nil { 73 t.Fatal(err) 74 } 75 err = os.WriteFile(wantP, data, 0o644) 76 if err != nil { 77 t.Fatal(err) 78 } 79 return 80 } 81 82 got, err := os.ReadFile(gotP) 83 if err != nil { 84 t.Fatal(err) 85 } 86 want, err := os.ReadFile(wantP) 87 if err != nil { 88 t.Fatal(err) 89 } 90 91 if d := cmp.Diff(want, got); d != "" { 92 t.Errorf("mismatch (-want +got):\n%s", d) 93 } 94} 95 96func TestFlags(t *testing.T) { 97 cases := []struct { 98 args []string 99 want Options 100 }{{ 101 // Defaults 102 args: []string{}, 103 want: Options{}, 104 }, { 105 args: []string{"-index", "/tmp"}, 106 want: Options{ 107 IndexDir: "/tmp", 108 }, 109 }, { 110 // single large file pattern 111 args: []string{"-large_file", "*.md"}, 112 want: Options{ 113 LargeFiles: []string{"*.md"}, 114 }, 115 }, { 116 // multiple large file pattern 117 args: []string{"-large_file", "*.md", "-large_file", "*.yaml"}, 118 want: Options{ 119 LargeFiles: []string{"*.md", "*.yaml"}, 120 }, 121 }, { 122 // multiple large file pattern with negated pattern 123 args: []string{"-large_file", "*.md", "-large_file", "!*.yaml"}, 124 want: Options{ 125 LargeFiles: []string{"*.md", "!*.yaml"}, 126 }, 127 }, { 128 // multiple large file pattern with escaped character 129 args: []string{"-large_file", "*.md", "-large_file", "\\!*.yaml"}, 130 want: Options{ 131 LargeFiles: []string{"*.md", "\\!*.yaml"}, 132 }, 133 }} 134 135 ignored := []cmp.Option{ 136 // depends on $PATH setting. 137 cmpopts.IgnoreFields(Options{}, "CTagsPath"), 138 cmpopts.IgnoreFields(Options{}, "ScipCTagsPath"), 139 cmpopts.IgnoreFields(Options{}, "changedOrRemovedFiles"), 140 cmpopts.IgnoreFields(zoekt.Repository{}, "priority"), 141 } 142 143 for _, c := range cases { 144 c.want.SetDefaults() 145 // depends on $PATH setting. 146 c.want.CTagsPath = "" 147 148 got := Options{} 149 fs := flag.NewFlagSet("", flag.ContinueOnError) 150 got.Flags(fs) 151 if err := fs.Parse(c.args); err != nil { 152 t.Errorf("failed to parse args %v: %v", c.args, err) 153 } else if d := cmp.Diff(c.want, got, ignored...); d != "" { 154 t.Errorf("mismatch for %v (-want +got):\n%s", c.args, d) 155 } 156 } 157} 158 159func TestIncrementalSkipIndexing(t *testing.T) { 160 cases := []struct { 161 name string 162 want bool 163 opts Options 164 }{{ 165 name: "v17-noop", 166 want: true, 167 opts: Options{ 168 RepositoryDescription: zoekt.Repository{ 169 Name: "repo17", 170 }, 171 SizeMax: 2097152, 172 DisableCTags: true, 173 }, 174 }, { 175 name: "v16-noop", 176 want: true, 177 opts: Options{ 178 RepositoryDescription: zoekt.Repository{ 179 Name: "repo", 180 }, 181 SizeMax: 2097152, 182 DisableCTags: true, 183 }, 184 }, { 185 name: "v17-id", 186 want: false, 187 opts: Options{ 188 RepositoryDescription: zoekt.Repository{ 189 Name: "repo17", 190 RawConfig: map[string]string{ 191 "repoid": "123", 192 }, 193 }, 194 SizeMax: 2097152, 195 DisableCTags: true, 196 }, 197 }, { 198 name: "doesnotexist", 199 want: false, 200 opts: Options{ 201 RepositoryDescription: zoekt.Repository{ 202 Name: "doesnotexist", 203 }, 204 SizeMax: 2097152, 205 DisableCTags: true, 206 }, 207 }} 208 209 for _, tc := range cases { 210 t.Run(tc.name, func(t *testing.T) { 211 tc.opts.IndexDir = "../testdata/shards" 212 t.Log(tc.opts.IndexState()) 213 got := tc.opts.IncrementalSkipIndexing() 214 if got != tc.want { 215 t.Fatalf("want %v got %v", tc.want, got) 216 } 217 }) 218 } 219} 220 221func TestMain(m *testing.M) { 222 flag.Parse() 223 if !testing.Verbose() { 224 log.SetOutput(io.Discard) 225 } 226 os.Exit(m.Run()) 227} 228 229func TestDontCountContentOfSkippedFiles(t *testing.T) { 230 b, err := NewBuilder(Options{RepositoryDescription: zoekt.Repository{ 231 Name: "foo", 232 }}) 233 if err != nil { 234 t.Fatal(err) 235 } 236 237 // content with at least 100 bytes 238 binary := append([]byte("abc def \x00"), make([]byte, 100)...) 239 err = b.Add(Document{ 240 Name: "f1", 241 Content: binary, 242 }) 243 if err != nil { 244 t.Fatal(err) 245 } 246 if len(b.todo) != 1 || b.todo[0].SkipReason == "" { 247 t.Fatalf("document should have been skipped") 248 } 249 if b.todo[0].Content != nil { 250 t.Fatalf("document content should be empty") 251 } 252 if b.size >= 100 { 253 t.Fatalf("content of skipped documents should not count towards shard size thresold") 254 } 255} 256 257func TestPartialSuccess(t *testing.T) { 258 dir := t.TempDir() 259 260 opts := Options{ 261 IndexDir: dir, 262 ShardMax: 1024, 263 SizeMax: 1 << 20, 264 Parallelism: 1, 265 } 266 opts.RepositoryDescription.Name = "repo" 267 opts.SetDefaults() 268 269 b, err := NewBuilder(opts) 270 if err != nil { 271 t.Fatalf("NewBuilder: %v", err) 272 } 273 274 for i := 0; i < 4; i++ { 275 nm := fmt.Sprintf("F%d", i) 276 _ = b.AddFile(nm, []byte(strings.Repeat("01234567\n", 128))) 277 } 278 b.buildError = fmt.Errorf("any error") 279 280 // No error checking. 281 _ = b.Finish() 282 283 // Finish cleans up temporary files. 284 if fs, err := filepath.Glob(dir + "/*"); err != nil { 285 t.Errorf("glob(%s): %v", dir, err) 286 } else if len(fs) != 0 { 287 t.Errorf("got shards %v, want []", fs) 288 } 289} 290 291func TestOptions_FindAllShards(t *testing.T) { 292 type simpleShard struct { 293 Repository zoekt.Repository 294 // NumShards is the number of shards that should be created that 295 // contain data for "Repository". 296 NumShards int 297 } 298 299 tests := []struct { 300 name string 301 simpleShards []simpleShard 302 compoundShards [][]zoekt.Repository 303 expectedShardCount int 304 expectedRepository zoekt.Repository 305 }{ 306 { 307 name: "repository in normal shard", 308 simpleShards: []simpleShard{ 309 {Repository: zoekt.Repository{Name: "repoA", ID: 1}}, 310 {Repository: zoekt.Repository{Name: "repoB", ID: 2}}, 311 {Repository: zoekt.Repository{Name: "repoC", ID: 3}}, 312 }, 313 expectedShardCount: 1, 314 expectedRepository: zoekt.Repository{Name: "repoB", ID: 2}, 315 }, 316 { 317 name: "repository in compound shard", 318 compoundShards: [][]zoekt.Repository{ 319 { 320 {Name: "repoA", ID: 1}, 321 {Name: "repoB", ID: 2}, 322 {Name: "repoC", ID: 3}, 323 }, 324 { 325 {Name: "repoD", ID: 4}, 326 {Name: "repoE", ID: 5}, 327 {Name: "repoF", ID: 6}, 328 }, 329 }, 330 expectedShardCount: 1, 331 expectedRepository: zoekt.Repository{Name: "repoB", ID: 2}, 332 }, 333 { 334 name: "repository split across multiple shards", 335 simpleShards: []simpleShard{ 336 {Repository: zoekt.Repository{Name: "repoA", ID: 1}}, 337 {Repository: zoekt.Repository{Name: "repoB", ID: 2}, NumShards: 2}, 338 {Repository: zoekt.Repository{Name: "repoC", ID: 3}}, 339 }, 340 expectedShardCount: 2, 341 expectedRepository: zoekt.Repository{Name: "repoB", ID: 2}, 342 }, 343 { 344 name: "unknown repository", 345 simpleShards: []simpleShard{ 346 {Repository: zoekt.Repository{Name: "repoA", ID: 1}}, 347 {Repository: zoekt.Repository{Name: "repoB", ID: 2}}, 348 {Repository: zoekt.Repository{Name: "repoC", ID: 3}}, 349 }, 350 compoundShards: [][]zoekt.Repository{ 351 { 352 {Name: "repoD", ID: 4}, 353 {Name: "repoE", ID: 5}, 354 {Name: "repoF", ID: 6}, 355 }, 356 }, 357 expectedShardCount: 0, 358 }, 359 { 360 name: "match on ID, not name (compound only)", 361 compoundShards: [][]zoekt.Repository{ 362 { 363 {Name: "repoA", ID: 1}, 364 {Name: "sameName", ID: 2}, 365 {Name: "sameName", ID: 3}, 366 }, 367 { 368 {Name: "repoB", ID: 4}, 369 {Name: "sameName", ID: 5}, 370 {Name: "sameName", ID: 6}, 371 }, 372 }, 373 expectedShardCount: 1, 374 expectedRepository: zoekt.Repository{Name: "sameName", ID: 5}, 375 }, 376 } 377 for _, tt := range tests { 378 t.Run(tt.name, func(t *testing.T) { 379 t.Parallel() 380 381 // prepare 382 indexDir := t.TempDir() 383 384 for _, s := range tt.simpleShards { 385 createTestShard(t, indexDir, s.Repository, s.NumShards) 386 } 387 388 for _, repositoryGroup := range tt.compoundShards { 389 createTestCompoundShard(t, indexDir, repositoryGroup) 390 } 391 392 o := &Options{ 393 IndexDir: indexDir, 394 RepositoryDescription: tt.expectedRepository, 395 } 396 o.SetDefaults() 397 398 // run test 399 shards := o.FindAllShards() 400 401 // verify results 402 if len(shards) != tt.expectedShardCount { 403 t.Fatalf("expected %d shard(s), received %d shard(s)", tt.expectedShardCount, len(shards)) 404 } 405 406 if tt.expectedShardCount > 0 { 407 for _, s := range shards { 408 // all shards should contain the metadata for the desired repository 409 repos, _, err := ReadMetadataPathAlive(s) 410 if err != nil { 411 t.Fatalf("reading metadata from shard %q: %s", s, err) 412 } 413 414 foundRepository := false 415 for _, r := range repos { 416 if r.ID == tt.expectedRepository.ID { 417 foundRepository = true 418 break 419 } 420 } 421 422 if !foundRepository { 423 t.Errorf("shard %q doesn't contain metadata for repository %d", s, tt.expectedRepository.ID) 424 } 425 } 426 } 427 }) 428 } 429} 430 431func TestBuilder_BranchNamesEqual(t *testing.T) { 432 for i, test := range []struct { 433 oldBranches []zoekt.RepositoryBranch 434 newBranches []zoekt.RepositoryBranch 435 expected bool 436 }{ 437 { 438 oldBranches: []zoekt.RepositoryBranch{{Name: "main", Version: "v1"}, {Name: "release", Version: "v1"}}, 439 newBranches: []zoekt.RepositoryBranch{{Name: "main", Version: "v1"}, {Name: "release", Version: "v1"}}, 440 expected: true, 441 }, 442 { 443 oldBranches: []zoekt.RepositoryBranch{{Name: "main", Version: "v1"}, {Name: "release", Version: "v3"}}, 444 newBranches: []zoekt.RepositoryBranch{{Name: "main", Version: "v2"}, {Name: "release", Version: "v4"}}, 445 expected: true, 446 }, 447 { 448 oldBranches: []zoekt.RepositoryBranch{{Name: "main", Version: "v1"}}, 449 newBranches: []zoekt.RepositoryBranch{{Name: "main", Version: "v2"}, {Name: "release", Version: "v1"}}, 450 expected: false, 451 }, 452 { 453 oldBranches: []zoekt.RepositoryBranch{{Name: "main", Version: "v1"}}, 454 newBranches: []zoekt.RepositoryBranch{{Name: "release", Version: "v1"}}, 455 expected: false, 456 }, 457 { 458 oldBranches: []zoekt.RepositoryBranch{{Name: "main", Version: "v1"}}, 459 newBranches: []zoekt.RepositoryBranch{}, 460 expected: false, 461 }, 462 { 463 oldBranches: []zoekt.RepositoryBranch{}, 464 newBranches: []zoekt.RepositoryBranch{{Name: "main", Version: "v1"}}, 465 expected: false, 466 }, 467 } { 468 t.Run(strconv.Itoa(i), func(t *testing.T) { 469 actual := BranchNamesEqual(test.oldBranches, test.newBranches) 470 if test.expected != actual { 471 t.Errorf("expected: %t, got: %t", test.expected, actual) 472 } 473 }) 474 } 475} 476 477func TestBuilder_DeltaShardsBuildsShouldErrorOnBranchSet(t *testing.T) { 478 indexDir := t.TempDir() 479 480 repository := zoekt.Repository{ 481 Name: "repo", 482 ID: 1, 483 Branches: []zoekt.RepositoryBranch{{Name: "foo"}, {Name: "bar"}}, 484 } 485 createTestShard(t, indexDir, repository, 2) 486 487 repositoryNewBranches := zoekt.Repository{ 488 Name: "repo", 489 ID: 1, 490 Branches: []zoekt.RepositoryBranch{{Name: "foo"}, {Name: "baz"}}, 491 } 492 493 o := Options{ 494 IndexDir: indexDir, 495 RepositoryDescription: repositoryNewBranches, 496 IsDelta: true, 497 } 498 o.SetDefaults() 499 500 b, err := NewBuilder(o) 501 if err != nil { 502 t.Fatalf("NewBuilder: %v", err) 503 } 504 505 err = b.Finish() 506 if !errors.As(err, &deltaBranchSetError{}) { 507 t.Fatalf("expected error complaning about different branch names, got: %s", err) 508 } 509} 510 511func TestBuilder_DeltaShardsBuildsShouldErrorOnIndexOptionsMismatch(t *testing.T) { 512 repository := zoekt.Repository{ 513 Name: "repo", 514 ID: 1, 515 Branches: []zoekt.RepositoryBranch{{Name: "foo"}}, 516 } 517 518 for _, test := range []struct { 519 name string 520 options func(options *Options) 521 }{ 522 { 523 name: "update option CTagsPath to non default", 524 options: func(options *Options) { options.CTagsPath = "ctags_updated_test/universal-ctags" }, 525 }, 526 { 527 name: "update option DisableCTags to non default", 528 options: func(options *Options) { options.DisableCTags = true }, 529 }, 530 { 531 name: "update option SizeMax to non default", 532 options: func(options *Options) { options.SizeMax -= 10 }, 533 }, 534 { 535 name: "update option LargeFiles to non default", 536 options: func(options *Options) { options.LargeFiles = []string{"-large_file", "*.md", "-large_file", "*.yaml"} }, 537 }, 538 } { 539 test := test 540 541 t.Run(test.name, func(t *testing.T) { 542 indexDir := t.TempDir() 543 544 // initially use default options 545 createTestShard(t, indexDir, repository, 2) 546 547 o := Options{ 548 IndexDir: indexDir, 549 RepositoryDescription: repository, 550 IsDelta: true, 551 } 552 test.options(&o) 553 554 b, err := NewBuilder(o) 555 if err != nil { 556 t.Fatalf("NewBuilder: %v", err) 557 } 558 559 err = b.Finish() 560 if err == nil { 561 t.Fatalf("no error regarding index options mismatch") 562 } 563 564 var optionsMismatchError *deltaIndexOptionsMismatchError 565 if !errors.As(err, &optionsMismatchError) { 566 t.Fatalf("expected error complaining about index options mismatch, got: %s", err) 567 } 568 }) 569 } 570} 571 572func TestBuilder_DeltaShardsMetadataInOlderShards(t *testing.T) { 573 olderTime := time.Unix(0, 0) 574 newerTime := time.Unix(10000, 0) 575 576 for _, test := range []struct { 577 name string 578 originalRepository zoekt.Repository 579 updatedRepository zoekt.Repository 580 }{ 581 { 582 name: "update commit information", 583 originalRepository: zoekt.Repository{ 584 Name: "repo", 585 ID: 1, 586 Branches: []zoekt.RepositoryBranch{ 587 {Name: "main", Version: "v1"}, 588 {Name: "release", Version: "v1"}, 589 }, 590 }, 591 updatedRepository: zoekt.Repository{ 592 Name: "repo", 593 ID: 1, 594 Branches: []zoekt.RepositoryBranch{ 595 {Name: "main", Version: "v2"}, 596 {Name: "release", Version: "v2"}, 597 }, 598 }, 599 }, 600 { 601 name: "update latest commit date (older -> newer)", 602 originalRepository: zoekt.Repository{ 603 Name: "repo", 604 ID: 1, 605 Branches: []zoekt.RepositoryBranch{ 606 {Name: "main", Version: "v1"}, 607 }, 608 LatestCommitDate: olderTime, 609 }, 610 updatedRepository: zoekt.Repository{ 611 Name: "repo", 612 ID: 1, 613 Branches: []zoekt.RepositoryBranch{ 614 {Name: "main", Version: "v2"}, 615 }, 616 LatestCommitDate: newerTime, 617 }, 618 }, 619 { 620 name: "update latest commit date (even if latest commit date is older - the most recent commits are the source of truth)", 621 originalRepository: zoekt.Repository{ 622 Name: "repo", 623 ID: 1, 624 Branches: []zoekt.RepositoryBranch{ 625 {Name: "main", Version: "v1"}, 626 }, 627 LatestCommitDate: newerTime, 628 }, 629 updatedRepository: zoekt.Repository{ 630 Name: "repo", 631 ID: 1, 632 Branches: []zoekt.RepositoryBranch{ 633 {Name: "main", Version: "v2"}, 634 }, 635 LatestCommitDate: olderTime, 636 }, 637 }, 638 } { 639 test := test 640 641 t.Run(test.name, func(t *testing.T) { 642 indexDir := t.TempDir() 643 644 createTestShard(t, indexDir, test.originalRepository, 2, func(o *Options) { 645 o.DisableCTags = true 646 }) 647 648 shards := createTestShard(t, indexDir, test.updatedRepository, 1, func(o *Options) { 649 o.IsDelta = true 650 o.DisableCTags = true 651 }) 652 653 if len(shards) < 3 { 654 t.Fatalf("expected at least 3 shards, got %d (%s)", len(shards), strings.Join(shards, ", ")) 655 } 656 657 for _, s := range shards { 658 repositories, _, err := ReadMetadataPathAlive(s) 659 if err != nil { 660 t.Fatalf("reading repository metadata from shard %q", s) 661 } 662 663 var foundRepository *zoekt.Repository 664 for _, r := range repositories { 665 if r.ID == test.updatedRepository.ID { 666 foundRepository = r 667 break 668 } 669 } 670 671 if foundRepository == nil { 672 t.Fatalf("repository ID %d not in shard %q", test.updatedRepository.ID, s) 673 } 674 675 diffOptions := []cmp.Option{ 676 cmpopts.IgnoreUnexported(zoekt.Repository{}), 677 cmpopts.IgnoreFields(zoekt.Repository{}, "IndexOptions"), 678 cmpopts.EquateEmpty(), 679 } 680 681 if diff := cmp.Diff(&test.updatedRepository, foundRepository, diffOptions...); diff != "" { 682 t.Errorf("shard %q: unexpected diff in repository metadata (-want +got):\n%s", s, diff) 683 } 684 } 685 }) 686 } 687} 688 689func TestFindRepositoryMetadata(t *testing.T) { 690 tests := []struct { 691 name string 692 normalShardRepositories []zoekt.Repository 693 compoundShardRepositories []zoekt.Repository 694 input *zoekt.Repository 695 expectedRepository *zoekt.Repository 696 expectedOk bool 697 }{ 698 { 699 name: "repository in normal shards", 700 normalShardRepositories: []zoekt.Repository{ 701 {Name: "repoA", ID: 1}, 702 {Name: "repoB", ID: 2}, 703 {Name: "repoC", ID: 3}, 704 }, 705 compoundShardRepositories: []zoekt.Repository{ 706 {Name: "repoD", ID: 4}, 707 {Name: "repoE", ID: 5}, 708 {Name: "repoF", ID: 6}, 709 }, 710 input: &zoekt.Repository{Name: "repoB", ID: 2}, 711 expectedRepository: &zoekt.Repository{Name: "repoB", ID: 2}, 712 expectedOk: true, 713 }, 714 { 715 name: "repository in compound shards", 716 normalShardRepositories: []zoekt.Repository{ 717 {Name: "repoA", ID: 1}, 718 {Name: "repoB", ID: 2}, 719 {Name: "repoC", ID: 3}, 720 }, 721 compoundShardRepositories: []zoekt.Repository{ 722 {Name: "repoD", ID: 4}, 723 {Name: "repoE", ID: 5}, 724 {Name: "repoF", ID: 6}, 725 }, 726 input: &zoekt.Repository{Name: "repoE", ID: 5}, 727 expectedRepository: &zoekt.Repository{Name: "repoE", ID: 5}, 728 expectedOk: true, 729 }, 730 { 731 name: "repository not in any shard", 732 normalShardRepositories: []zoekt.Repository{ 733 {Name: "repoA", ID: 1}, 734 {Name: "repoB", ID: 2}, 735 {Name: "repoC", ID: 3}, 736 }, 737 compoundShardRepositories: []zoekt.Repository{ 738 {Name: "repoD", ID: 4}, 739 {Name: "repoE", ID: 5}, 740 {Name: "repoF", ID: 6}, 741 }, 742 input: &zoekt.Repository{Name: "notPresent", ID: 123}, 743 expectedRepository: nil, 744 expectedOk: false, 745 }, 746 } 747 for _, tt := range tests { 748 t.Run(tt.name, func(t *testing.T) { 749 // setup 750 indexDir := t.TempDir() 751 752 optFns := []func(o *Options){ 753 // ctags aren't important for this test, and the equality checks 754 // for diffing repositories can break due to local configuration 755 func(o *Options) { 756 o.DisableCTags = true 757 }, 758 } 759 760 for _, r := range tt.normalShardRepositories { 761 createTestShard(t, indexDir, r, 1, optFns...) 762 } 763 764 if len(tt.compoundShardRepositories) > 0 { 765 createTestCompoundShard(t, indexDir, tt.compoundShardRepositories, optFns...) 766 } 767 768 o := &Options{ 769 IndexDir: indexDir, 770 RepositoryDescription: *tt.input, 771 } 772 o.SetDefaults() 773 774 // run test 775 got, _, gotOk, err := o.FindRepositoryMetadata() 776 if err != nil { 777 t.Errorf("received unexpected error: %v", err) 778 return 779 } 780 781 // check outcome 782 compareOptions := []cmp.Option{ 783 cmpopts.IgnoreUnexported(zoekt.Repository{}), 784 cmpopts.IgnoreFields(zoekt.Repository{}, "IndexOptions"), 785 cmpopts.EquateEmpty(), 786 } 787 788 if diff := cmp.Diff(tt.expectedRepository, got, compareOptions...); diff != "" { 789 t.Errorf("unexpected difference in repositories (-want +got):\n%s", diff) 790 } 791 792 if tt.expectedOk != gotOk { 793 t.Errorf("unexpected difference in 'ok' value: wanted %t, got %t", tt.expectedOk, gotOk) 794 } 795 }) 796 } 797} 798 799func TestIsLowPriority(t *testing.T) { 800 cases := []string{ 801 "builder_test.go", 802 "test/TestQuery.java", 803 "search/vendor/thirdparty.cc", 804 "search/node_modules/search/js", 805 "search.min.js", 806 "internal/search.js.map", 807 } 808 809 for _, tt := range cases { 810 t.Run(tt, func(t *testing.T) { 811 if !IsLowPriority(tt, nil) { 812 t.Errorf("expected file '%s' to be low priority", tt) 813 } 814 }) 815 } 816 817 negativeCases := []string{ 818 "builder.go", 819 "RoutesTrigger.java", 820 "search.js", 821 } 822 823 for _, tt := range negativeCases { 824 t.Run(tt, func(t *testing.T) { 825 if IsLowPriority(tt, nil) { 826 t.Errorf("did not expect file '%s' to be low priority", tt) 827 } 828 }) 829 } 830 831 // Explicitly check that content is important by using the same filename but 832 // different content. 833 normal := "package mock\n\nvar Mock struct {}" 834 generated := "// Code generated by mock\npackage mock\n\nvar Mock struct {}" 835 if IsLowPriority("mock.go", []byte(normal)) { 836 t.Error("expected non-generated content to not be low priority") 837 } 838 if !IsLowPriority("mock.go", []byte(generated)) { 839 t.Error("expected generated content to be low priority") 840 } 841} 842 843func createTestShard(t *testing.T, indexDir string, r zoekt.Repository, numShards int, optFns ...func(options *Options)) []string { 844 t.Helper() 845 846 if err := os.MkdirAll(filepath.Dir(indexDir), 0o700); err != nil { 847 t.Fatal(err) 848 } 849 850 o := Options{ 851 IndexDir: indexDir, 852 RepositoryDescription: r, 853 ShardMax: 75, // create a new shard every 75 bytes 854 } 855 o.SetDefaults() 856 857 for _, fn := range optFns { 858 fn(&o) 859 } 860 861 b, err := NewBuilder(o) 862 if err != nil { 863 t.Fatalf("NewBuilder: %v", err) 864 } 865 866 if numShards == 0 { 867 // We have to make at least 1 shard. 868 numShards = 1 869 } 870 871 for i := 0; i < numShards; i++ { 872 // Create entries (file + contents) that are ~100 bytes each. 873 // This (along with our shardMax setting of 75 bytes) means that each shard 874 // will contain at most one of these. 875 fileName := strconv.Itoa(i) 876 document := Document{Name: fileName, Content: []byte(strings.Repeat("A", 100))} 877 for _, branch := range o.RepositoryDescription.Branches { 878 document.Branches = append(document.Branches, branch.Name) 879 } 880 881 err := b.Add(document) 882 if err != nil { 883 t.Fatalf("failed to add file %q to builder: %s", fileName, err) 884 } 885 } 886 887 if err := b.Finish(); err != nil { 888 t.Fatalf("Finish: %v", err) 889 } 890 891 return o.FindAllShards() 892} 893 894func createTestCompoundShard(t *testing.T, indexDir string, repositories []zoekt.Repository, optFns ...func(options *Options)) { 895 t.Helper() 896 897 var shardNames []string 898 899 for _, r := range repositories { 900 // create an isolated scratch space to store normal shards for this repository 901 scratchDir := t.TempDir() 902 903 // create shards that'll be merged later 904 createTestShard(t, scratchDir, r, 1, optFns...) 905 906 // discover file names for all the normal shards we created 907 // note: this only looks in the immediate 'scratchDir' folder and doesn't recurse 908 shards, err := filepath.Glob(filepath.Join(scratchDir, "*.zoekt")) 909 if err != nil { 910 t.Fatalf("while globbing %q to find normal shards: %s", scratchDir, err) 911 } 912 913 shardNames = append(shardNames, shards...) 914 } 915 916 // load the normal shards that we created 917 var files []IndexFile 918 for _, shard := range shardNames { 919 f, err := os.Open(shard) 920 if err != nil { 921 t.Fatalf("opening shard file: %s", err) 922 } 923 defer f.Close() 924 925 indexFile, err := NewIndexFile(f) 926 if err != nil { 927 t.Fatalf("creating index file: %s", err) 928 } 929 defer indexFile.Close() 930 931 files = append(files, indexFile) 932 } 933 934 // merge all the simple shards into a compound shard 935 tmpName, dstName, err := Merge(indexDir, files...) 936 if err != nil { 937 t.Fatalf("merging index files into compound shard: %s", err) 938 } 939 if err := os.Rename(tmpName, dstName); err != nil { 940 t.Fatal(err) 941 } 942} 943 944func TestIgnoreSizeMax(t *testing.T) { 945 for _, test := range []struct { 946 name string 947 largeFiles []string 948 filePaths []string 949 expected bool 950 }{ 951 { 952 name: "empty pattern does nothing", 953 largeFiles: []string{""}, 954 filePaths: []string{"F0"}, 955 expected: false, 956 }, 957 { 958 name: "positive match allows", 959 largeFiles: []string{"F0"}, 960 filePaths: []string{"F0"}, 961 expected: true, 962 }, 963 { 964 name: "positive and negative patterns allows", 965 largeFiles: []string{"F?", "!F0"}, 966 filePaths: []string{"F1"}, 967 expected: true, 968 }, 969 { 970 name: "positive and negative patterns disallows", 971 largeFiles: []string{"F?", "!F0"}, 972 filePaths: []string{"F0"}, 973 expected: false, 974 }, 975 { 976 name: "positive escaped pattern allows", 977 largeFiles: []string{"\\!F?"}, 978 filePaths: []string{"!F0", "!F1"}, 979 expected: true, 980 }, 981 { 982 name: "postive escaped pattern does not disallow", 983 largeFiles: []string{"F0", "\\!F?"}, 984 filePaths: []string{"F0", "!F0"}, 985 expected: true, 986 }, 987 { 988 name: "combined meta and literal interpretation disallows", 989 largeFiles: []string{"*F*", "!!F*"}, 990 filePaths: []string{"!F0"}, 991 expected: false, 992 }, 993 { 994 name: "combined meta and literal interpretation allows", 995 largeFiles: []string{"*F*", "!!F*"}, 996 filePaths: []string{"F0"}, 997 expected: true, 998 }, 999 { 1000 name: "largeFiles order: positive match overrides previous negative match and allows", 1001 largeFiles: []string{"F?", "!F0", "!F1", "F0"}, 1002 filePaths: []string{"F0"}, 1003 expected: true, 1004 }, 1005 { 1006 name: "largeFiles order: positive match overrides previous negative match and disallows", 1007 largeFiles: []string{"F?", "!F0", "!F1", "F0"}, 1008 filePaths: []string{"F1"}, 1009 expected: false, 1010 }, 1011 { 1012 name: "largeFiles order: negative match overrides previous positive match and allows", 1013 largeFiles: []string{"F?", "!?0", "F0", "!F0"}, 1014 filePaths: []string{"F1"}, 1015 expected: true, 1016 }, 1017 { 1018 name: "largeFiles order: negative match overrides previous positive match and disallows", 1019 largeFiles: []string{"F?", "!?0", "F0", "!F0"}, 1020 filePaths: []string{"F0"}, 1021 expected: false, 1022 }, 1023 } { 1024 t.Run(test.name, func(t *testing.T) { 1025 o := Options{ 1026 LargeFiles: test.largeFiles, 1027 } 1028 1029 for _, filePath := range test.filePaths { 1030 ignore := o.IgnoreSizeMax(filePath) 1031 if ignore != test.expected { 1032 t.Errorf("IgnoreSizeMax() for filepath %v returned unexpected result %v", filePath, ignore) 1033 } 1034 } 1035 }) 1036 } 1037} 1038 1039type filerankCase struct { 1040 name string 1041 docs []*Document 1042 want []int 1043} 1044 1045func testFileRankAspect(t *testing.T, c filerankCase) { 1046 var want []*Document 1047 for _, j := range c.want { 1048 want = append(want, c.docs[j]) 1049 } 1050 1051 got := make([]*Document, len(c.docs)) 1052 copy(got, c.docs) 1053 sortDocuments(got) 1054 1055 print := func(ds []*Document) string { 1056 r := "" 1057 for _, d := range ds { 1058 r += fmt.Sprintf("%v, ", d) 1059 } 1060 return r 1061 } 1062 if !reflect.DeepEqual(got, want) { 1063 t.Errorf("got docs [%v], want [%v]", print(got), print(want)) 1064 } 1065} 1066 1067func TestFileRank(t *testing.T) { 1068 for _, c := range []filerankCase{{ 1069 name: "filename", 1070 docs: []*Document{ 1071 { 1072 Name: "longlonglong", 1073 Content: []byte("bla"), 1074 }, 1075 { 1076 Name: "short", 1077 Content: []byte("bla"), 1078 }, 1079 }, 1080 want: []int{1, 0}, 1081 }, { 1082 name: "test", 1083 docs: []*Document{ 1084 { 1085 Name: "foo_test.go", 1086 Content: []byte("bla"), 1087 }, 1088 { 1089 Name: "longlonglong", 1090 Content: []byte("bla"), 1091 }, 1092 }, 1093 want: []int{1, 0}, 1094 }, { 1095 name: "content", 1096 docs: []*Document{ 1097 { 1098 Content: []byte("bla"), 1099 }, 1100 { 1101 Content: []byte("blablablabla"), 1102 }, 1103 { 1104 Content: []byte("blabla"), 1105 }, 1106 }, 1107 want: []int{0, 2, 1}, 1108 }, { 1109 name: "skipped docs", 1110 docs: []*Document{ 1111 { 1112 Name: "binary_file", 1113 SkipReason: "binary file", 1114 }, 1115 { 1116 Name: "some_test.go", 1117 Content: []byte("bla"), 1118 }, 1119 { 1120 Name: "large_file.go", 1121 SkipReason: "too large", 1122 }, 1123 { 1124 Name: "file.go", 1125 Content: []byte("blabla"), 1126 }, 1127 }, 1128 want: []int{3, 1, 0, 2}, 1129 }} { 1130 t.Run(c.name, func(t *testing.T) { 1131 testFileRankAspect(t, c) 1132 }) 1133 } 1134}