fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

zoekt-git-index: add support for delta shards (#310)

+1294 -172
+4
api.go
··· 245 245 Version string 246 246 } 247 247 248 + func (r RepositoryBranch) String() string { 249 + return fmt.Sprintf("%s@%s", r.Name, r.Version) 250 + } 251 + 248 252 // Repository holds repository metadata. 249 253 type Repository struct { 250 254 // Sourcergaph's repository ID
+72 -29
build/builder.go
··· 99 99 // last run. 100 100 IsDelta bool 101 101 102 - // ChangedOrRemovedFiles is a list of file paths that have been changed or removed 102 + // changedOrRemovedFiles is a list of file paths that have been changed or removed 103 103 // since the last indexing job for this repository. These files will be tombstoned 104 104 // in the older shards for this repository. 105 - ChangedOrRemovedFiles []string 105 + changedOrRemovedFiles []string 106 106 } 107 107 108 108 // HashOptions creates a hash of the options that affect an index. ··· 222 222 223 223 // a sortable 20 chars long id. 224 224 id string 225 + 226 + finishCalled bool 225 227 } 226 228 227 229 type finishedShard struct { ··· 294 296 type IndexState string 295 297 296 298 const ( 297 - IndexStateMissing IndexState = "missing" 298 - IndexStateCorrupt IndexState = "corrupt" 299 - IndexStateVersion IndexState = "version-mismatch" 300 - IndexStateOption IndexState = "option-mismatch" 301 - IndexStateMeta IndexState = "meta-mismatch" 302 - IndexStateContent IndexState = "content-mismatch" 303 - IndexStateBranchSet IndexState = "branch-set-mismatch" 304 - IndexStateBranchVersion IndexState = "branch-version-mismatch" 305 - IndexStateEqual IndexState = "equal" 299 + IndexStateMissing IndexState = "missing" 300 + IndexStateCorrupt IndexState = "corrupt" 301 + IndexStateVersion IndexState = "version-mismatch" 302 + IndexStateOption IndexState = "option-mismatch" 303 + IndexStateMeta IndexState = "meta-mismatch" 304 + IndexStateContent IndexState = "content-mismatch" 305 + IndexStateEqual IndexState = "equal" 306 306 ) 307 307 308 308 var readVersions = []struct { ··· 361 361 return IndexStateOption, fn 362 362 } 363 363 364 - if o.IsDelta { // TODO: Get rid of this guard once the delta shard behavior is the default 365 - state := compareBranches(repo.Branches, o.RepositoryDescription.Branches) 366 - if state != IndexStateEqual { 367 - return state, fn 368 - } 369 - } else if !reflect.DeepEqual(repo.Branches, o.RepositoryDescription.Branches) { 364 + if !reflect.DeepEqual(repo.Branches, o.RepositoryDescription.Branches) { 370 365 return IndexStateContent, fn 371 366 } 372 367 ··· 383 378 return IndexStateEqual, fn 384 379 } 385 380 381 + // FindRepositoryMetadata returns the index metadata for the repository 382 + // specified in the options. 'ok' is false if the repository's metadata 383 + // couldn't be found or if an error occurred. 384 + func (o *Options) FindRepositoryMetadata() (repository *zoekt.Repository, ok bool, err error) { 385 + shard := o.findShard() 386 + if shard == "" { 387 + return nil, false, nil 388 + } 389 + 390 + repositories, _, err := zoekt.ReadMetadataPathAlive(shard) 391 + if err != nil { 392 + return nil, false, fmt.Errorf("reading metadata for shard %q: %w", shard, err) 393 + } 394 + 395 + ID := o.RepositoryDescription.ID 396 + for _, r := range repositories { 397 + // compound shards contain multiple repositories, so we 398 + // have to pick only the one we're looking for 399 + if r.ID == ID { 400 + return r, true, nil 401 + } 402 + } 403 + 404 + // If we're here, then we're somehow in a state where we found a matching 405 + // shard that's missing the repository metadata we're looking for. This 406 + // should never happen. 407 + name := o.RepositoryDescription.Name 408 + return nil, false, fmt.Errorf("matching shard %q doesn't contain metadata for repo id %d (%q)", shard, ID, name) 409 + } 410 + 386 411 func (o *Options) findShard() string { 387 412 for _, v := range readVersions { 388 413 fn := o.shardNameVersion(v.IndexFormatVersion, 0) ··· 516 541 } 517 542 518 543 func (b *Builder) Add(doc zoekt.Document) error { 544 + if b.finishCalled { 545 + return nil 546 + } 547 + 519 548 allowLargeFile := b.opts.IgnoreSizeMax(doc.Name) 520 549 521 550 // Adjust trigramMax for allowed large files so we don't exclude them. ··· 550 579 return nil 551 580 } 552 581 582 + // MarkFileAsChangedOrRemoved indicates that the file specified by the given path 583 + // has been changed or removed since the last indexing job for this repository. 584 + // 585 + // If this build is a delta build, these files will be tombstoned in the older shards for this repository. 586 + func (b *Builder) MarkFileAsChangedOrRemoved(path string) { 587 + b.opts.changedOrRemovedFiles = append(b.opts.changedOrRemovedFiles, path) 588 + } 589 + 553 590 // Finish creates a last shard from the buffered documents, and clears 554 591 // stale shards from previous runs. This should always be called, also 555 592 // in failure cases, to ensure cleanup. 593 + // 594 + // It is safe to call Finish() multiple times. 556 595 func (b *Builder) Finish() error { 596 + if b.finishCalled { 597 + return b.buildError 598 + } 599 + 600 + b.finishCalled = true 601 + 557 602 b.flush() 558 603 b.building.Wait() 559 604 ··· 596 641 return fmt.Errorf("shard %q doesn't contain repository ID %d (%q)", shard, b.opts.RepositoryDescription.ID, b.opts.RepositoryDescription.Name) 597 642 } 598 643 599 - if len(b.opts.ChangedOrRemovedFiles) > 0 && repository.FileTombstones == nil { 600 - repository.FileTombstones = make(map[string]struct{}, len(b.opts.ChangedOrRemovedFiles)) 644 + if len(b.opts.changedOrRemovedFiles) > 0 && repository.FileTombstones == nil { 645 + repository.FileTombstones = make(map[string]struct{}, len(b.opts.changedOrRemovedFiles)) 601 646 } 602 647 603 - for _, f := range b.opts.ChangedOrRemovedFiles { 648 + for _, f := range b.opts.changedOrRemovedFiles { 604 649 repository.FileTombstones[f] = struct{}{} 605 650 } 606 651 607 - if compareBranches(repository.Branches, b.opts.RepositoryDescription.Branches) == IndexStateBranchSet { 608 - // NOTE: Should we be handling IndexStateBranchVersion and IndexStateCorrupt here too? 652 + if !BranchNamesEqual(repository.Branches, b.opts.RepositoryDescription.Branches) { 609 653 return deltaBranchSetError{ 610 654 shardName: shard, 611 655 old: repository.Branches, ··· 689 733 return b.buildError 690 734 } 691 735 692 - func compareBranches(a, b []zoekt.RepositoryBranch) IndexState { 736 + // BranchNamesEqual compares the given zoekt.RepositoryBranch slices, and returns true 737 + // iff both slices specify the same set of branch names in the same order. 738 + func BranchNamesEqual(a, b []zoekt.RepositoryBranch) bool { 693 739 if len(a) != len(b) { 694 - return IndexStateBranchSet 740 + return false 695 741 } 696 742 697 743 for i := range a { 698 744 x, y := a[i], b[i] 699 745 if x.Name != y.Name { 700 - return IndexStateBranchSet 701 - } 702 - if x.Version != y.Version { 703 - return IndexStateBranchVersion 746 + return false 704 747 } 705 748 } 706 749 707 - return IndexStateEqual 750 + return true 708 751 } 709 752 710 753 func (b *Builder) flush() error {
+140 -53
build/builder_test.go
··· 120 120 ignored := []cmp.Option{ 121 121 // depends on $PATH setting. 122 122 cmpopts.IgnoreFields(Options{}, "CTags"), 123 + cmpopts.IgnoreFields(Options{}, "changedOrRemovedFiles"), 123 124 cmpopts.IgnoreFields(zoekt.Repository{}, "priority"), 124 125 } 125 126 ··· 322 323 } 323 324 for _, tt := range tests { 324 325 t.Run(tt.name, func(t *testing.T) { 326 + t.Parallel() 327 + 325 328 // prepare 326 329 indexDir := t.TempDir() 327 330 ··· 372 375 } 373 376 } 374 377 375 - func TestBuilder_DeltaShardsIndexState(t *testing.T) { 378 + func TestBuilder_BranchNamesEqual(t *testing.T) { 376 379 for i, test := range []struct { 377 - oldBranches []zoekt.RepositoryBranch 378 - newBranches []zoekt.RepositoryBranch 379 - expectedState IndexState 380 + oldBranches []zoekt.RepositoryBranch 381 + newBranches []zoekt.RepositoryBranch 382 + expected bool 380 383 }{ 381 384 { 382 - oldBranches: []zoekt.RepositoryBranch{{Name: "main", Version: "v1"}}, 383 - newBranches: []zoekt.RepositoryBranch{{Name: "main", Version: "v2"}}, 384 - expectedState: IndexStateBranchVersion, 385 + oldBranches: []zoekt.RepositoryBranch{{Name: "main", Version: "v1"}, {Name: "release", Version: "v1"}}, 386 + newBranches: []zoekt.RepositoryBranch{{Name: "main", Version: "v1"}, {Name: "release", Version: "v1"}}, 387 + expected: true, 385 388 }, 386 389 { 387 - oldBranches: []zoekt.RepositoryBranch{{Name: "main", Version: "v1"}}, 388 - newBranches: []zoekt.RepositoryBranch{ 389 - {Name: "main", Version: "v2"}, 390 - {Name: "release", Version: "v1"}, 391 - }, 392 - expectedState: IndexStateBranchSet, 390 + oldBranches: []zoekt.RepositoryBranch{{Name: "main", Version: "v1"}, {Name: "release", Version: "v3"}}, 391 + newBranches: []zoekt.RepositoryBranch{{Name: "main", Version: "v2"}, {Name: "release", Version: "v4"}}, 392 + expected: true, 393 393 }, 394 394 { 395 - oldBranches: []zoekt.RepositoryBranch{{Name: "main", Version: "v1"}}, 396 - newBranches: []zoekt.RepositoryBranch{{Name: "release", Version: "v1"}}, 397 - expectedState: IndexStateBranchSet, 395 + oldBranches: []zoekt.RepositoryBranch{{Name: "main", Version: "v1"}}, 396 + newBranches: []zoekt.RepositoryBranch{{Name: "main", Version: "v2"}, {Name: "release", Version: "v1"}}, 397 + expected: false, 398 398 }, 399 399 { 400 - oldBranches: []zoekt.RepositoryBranch{{Name: "main", Version: "v1"}}, 401 - newBranches: []zoekt.RepositoryBranch{}, 402 - expectedState: IndexStateBranchSet, 400 + oldBranches: []zoekt.RepositoryBranch{{Name: "main", Version: "v1"}}, 401 + newBranches: []zoekt.RepositoryBranch{{Name: "release", Version: "v1"}}, 402 + expected: false, 403 403 }, 404 404 { 405 - oldBranches: []zoekt.RepositoryBranch{}, 406 - newBranches: []zoekt.RepositoryBranch{{Name: "main", Version: "v1"}}, 407 - expectedState: IndexStateBranchSet, 405 + oldBranches: []zoekt.RepositoryBranch{{Name: "main", Version: "v1"}}, 406 + newBranches: []zoekt.RepositoryBranch{}, 407 + expected: false, 408 408 }, 409 409 { 410 - oldBranches: []zoekt.RepositoryBranch{}, 411 - newBranches: []zoekt.RepositoryBranch{{Name: "main", Version: "v1"}}, 412 - expectedState: IndexStateBranchSet, 410 + oldBranches: []zoekt.RepositoryBranch{}, 411 + newBranches: []zoekt.RepositoryBranch{{Name: "main", Version: "v1"}}, 412 + expected: false, 413 413 }, 414 414 } { 415 415 t.Run(strconv.Itoa(i), func(t *testing.T) { 416 - indexDir := t.TempDir() 417 - 418 - repositoryV1 := zoekt.Repository{ 419 - Name: "repo", 420 - ID: 1, 421 - Branches: test.oldBranches, 422 - } 423 - 424 - createTestShard(t, indexDir, repositoryV1, 2) 425 - 426 - repositoryV2 := zoekt.Repository{ 427 - Name: "repo", 428 - ID: 1, 429 - Branches: test.newBranches, 430 - } 431 - 432 - o := Options{ 433 - IndexDir: indexDir, 434 - RepositoryDescription: repositoryV2, 435 - IsDelta: true, 436 - } 437 - o.SetDefaults() 438 - 439 - state, _ := o.IndexState() 440 - if diff := cmp.Diff(test.expectedState, state); diff != "" { 441 - t.Errorf("unexpected diff in index state (-want +got):\n%s", diff) 416 + actual := BranchNamesEqual(test.oldBranches, test.newBranches) 417 + if test.expected != actual { 418 + t.Errorf("expected: %t, got: %t", test.expected, actual) 442 419 } 443 420 }) 444 421 } ··· 542 519 } 543 520 } 544 521 522 + func TestFindRepositoryMetadata(t *testing.T) { 523 + tests := []struct { 524 + name string 525 + normalShardRepositories []zoekt.Repository 526 + compoundShardRepositories []zoekt.Repository 527 + input *zoekt.Repository 528 + expectedRepository *zoekt.Repository 529 + expectedOk bool 530 + }{ 531 + { 532 + name: "repository in normal shards", 533 + normalShardRepositories: []zoekt.Repository{ 534 + {Name: "repoA", ID: 1}, 535 + {Name: "repoB", ID: 2}, 536 + {Name: "repoC", ID: 3}, 537 + }, 538 + compoundShardRepositories: []zoekt.Repository{ 539 + {Name: "repoD", ID: 4}, 540 + {Name: "repoE", ID: 5}, 541 + {Name: "repoF", ID: 6}, 542 + }, 543 + input: &zoekt.Repository{Name: "repoB", ID: 2}, 544 + expectedRepository: &zoekt.Repository{Name: "repoB", ID: 2}, 545 + expectedOk: true, 546 + }, 547 + { 548 + name: "repository in compound shards", 549 + normalShardRepositories: []zoekt.Repository{ 550 + {Name: "repoA", ID: 1}, 551 + {Name: "repoB", ID: 2}, 552 + {Name: "repoC", ID: 3}, 553 + }, 554 + compoundShardRepositories: []zoekt.Repository{ 555 + {Name: "repoD", ID: 4}, 556 + {Name: "repoE", ID: 5}, 557 + {Name: "repoF", ID: 6}, 558 + }, 559 + input: &zoekt.Repository{Name: "repoE", ID: 5}, 560 + expectedRepository: &zoekt.Repository{Name: "repoE", ID: 5}, 561 + expectedOk: true, 562 + }, 563 + { 564 + name: "repository not in any shard", 565 + normalShardRepositories: []zoekt.Repository{ 566 + {Name: "repoA", ID: 1}, 567 + {Name: "repoB", ID: 2}, 568 + {Name: "repoC", ID: 3}, 569 + }, 570 + compoundShardRepositories: []zoekt.Repository{ 571 + {Name: "repoD", ID: 4}, 572 + {Name: "repoE", ID: 5}, 573 + {Name: "repoF", ID: 6}, 574 + }, 575 + input: &zoekt.Repository{Name: "notPresent", ID: 123}, 576 + expectedRepository: nil, 577 + expectedOk: false, 578 + }, 579 + } 580 + for _, tt := range tests { 581 + t.Run(tt.name, func(t *testing.T) { 582 + // setup 583 + indexDir := t.TempDir() 584 + 585 + optFns := []func(o *Options){ 586 + // ctags aren't important for this test, and the equality checks 587 + // for diffing repositories can break due to local configuration 588 + func(o *Options) { 589 + o.DisableCTags = true 590 + }, 591 + } 592 + 593 + for _, r := range tt.normalShardRepositories { 594 + createTestShard(t, indexDir, r, 1, optFns...) 595 + } 596 + 597 + if len(tt.compoundShardRepositories) > 0 { 598 + createTestCompoundShard(t, indexDir, tt.compoundShardRepositories, optFns...) 599 + } 600 + 601 + o := &Options{ 602 + IndexDir: indexDir, 603 + RepositoryDescription: *tt.input, 604 + } 605 + o.SetDefaults() 606 + 607 + // run test 608 + got, gotOk, err := o.FindRepositoryMetadata() 609 + if err != nil { 610 + t.Errorf("received unexpected error: %v", err) 611 + return 612 + } 613 + 614 + // check outcome 615 + compareOptions := []cmp.Option{ 616 + cmpopts.IgnoreUnexported(zoekt.Repository{}), 617 + cmpopts.IgnoreFields(zoekt.Repository{}, "IndexOptions"), 618 + cmpopts.EquateEmpty(), 619 + } 620 + 621 + if diff := cmp.Diff(tt.expectedRepository, got, compareOptions...); diff != "" { 622 + t.Errorf("unexpected difference in repositories (-want +got):\n%s", diff) 623 + } 624 + 625 + if tt.expectedOk != gotOk { 626 + t.Errorf("unexpected difference in 'ok' value: wanted %t, got %t", tt.expectedOk, gotOk) 627 + } 628 + }) 629 + } 630 + } 631 + 545 632 func createTestShard(t *testing.T, indexDir string, r zoekt.Repository, numShards int, optFns ...func(options *Options)) []string { 546 633 t.Helper() 547 634 ··· 593 680 return o.FindAllShards() 594 681 } 595 682 596 - func createTestCompoundShard(t *testing.T, indexDir string, repositories []zoekt.Repository) { 683 + func createTestCompoundShard(t *testing.T, indexDir string, repositories []zoekt.Repository, optFns ...func(options *Options)) { 597 684 t.Helper() 598 685 599 686 var shardNames []string ··· 603 690 scratchDir := t.TempDir() 604 691 605 692 // create shards that'll be merged later 606 - createTestShard(t, scratchDir, r, 1) 693 + createTestShard(t, scratchDir, r, 1, optFns...) 607 694 608 695 // discover file names for all the normal shards we created 609 696 // note: this only looks in the immediate 'scratchDir' folder and doesn't recurse
+20 -4
build/e2e_test.go
··· 25 25 "path/filepath" 26 26 "reflect" 27 27 "runtime" 28 + "sort" 28 29 "strings" 29 30 "testing" 30 31 "time" ··· 633 634 documents: []zoekt.Document{fooAtMainV2}, 634 635 optFn: func(t *testing.T, o *Options) { 635 636 o.IsDelta = true 636 - o.ChangedOrRemovedFiles = []string{"foo.go"} 637 + o.changedOrRemovedFiles = []string{"foo.go"} 637 638 }, 638 639 query: "common", 639 640 expectedDocuments: []zoekt.Document{barAtMain, fooAtMainV2}, ··· 643 644 documents: []zoekt.Document{barAtMainV2}, 644 645 optFn: func(t *testing.T, o *Options) { 645 646 o.IsDelta = true 646 - o.ChangedOrRemovedFiles = []string{"bar.go"} 647 + o.changedOrRemovedFiles = []string{"bar.go"} 647 648 }, 648 649 query: "common", 649 650 expectedDocuments: []zoekt.Document{barAtMainV2, fooAtMainV2}, ··· 665 666 documents: nil, 666 667 optFn: func(t *testing.T, o *Options) { 667 668 o.IsDelta = true 668 - o.ChangedOrRemovedFiles = []string{"foo.go"} 669 + o.changedOrRemovedFiles = []string{"foo.go"} 669 670 }, 670 671 query: "common", 671 672 expectedDocuments: []zoekt.Document{barAtMain}, ··· 687 688 documents: nil, 688 689 optFn: func(t *testing.T, o *Options) { 689 690 o.IsDelta = true 690 - o.ChangedOrRemovedFiles = []string{"foo.go"} 691 + o.changedOrRemovedFiles = []string{"foo.go"} 691 692 }, 692 693 query: "common", 693 694 expectedDocuments: []zoekt.Document{barAtMain}, ··· 715 716 repository.Branches = append(repository.Branches, zoekt.RepositoryBranch{Name: b}) 716 717 } 717 718 719 + sort.Slice(repository.Branches, func(i, j int) bool { 720 + a, b := repository.Branches[i], repository.Branches[j] 721 + 722 + return a.Name < b.Name 723 + }) 724 + 718 725 buildOpts := Options{ 719 726 IndexDir: indexDir, 720 727 RepositoryDescription: repository, ··· 734 741 err := b.Add(d) 735 742 if err != nil { 736 743 t.Fatalf("step %q: adding document %q to builder: %s", step.name, d.Name, err) 744 + } 745 + } 746 + 747 + // Call b.Finish() multiple times to ensure that it is idempotent 748 + for i := 0; i < 3; i++ { 749 + 750 + err = b.Finish() 751 + if err != nil { 752 + t.Fatalf("step %q: finishing builder (call #%d): %s", step.name, i, err) 737 753 } 738 754 } 739 755
+3 -1
cmd/zoekt-git-index/main.go
··· 36 36 repoCacheDir := flag.String("repo_cache", "", "directory holding bare git repos, named by URL. "+ 37 37 "this is used to find repositories for submodules. "+ 38 38 "It also affects name if the indexed repository is under this directory.") 39 + isDelta := flag.Bool("delta", false, "whether we should use delta build") 39 40 flag.Parse() 40 41 41 42 // Tune GOMAXPROCS to match Linux container CPU quota. ··· 49 50 *repoCacheDir = dir 50 51 } 51 52 opts := cmd.OptionsFromFlags() 53 + opts.IsDelta = *isDelta 52 54 53 55 var branches []string 54 56 if *branchesStr != "" { ··· 88 90 } 89 91 90 92 if err := gitindex.IndexGitRepo(gitOpts); err != nil { 91 - log.Printf("indexGitRepo(%s): %v", dir, err) 93 + log.Printf("indexGitRepo(%s, delta=%t): %v", dir, gitOpts.BuildOptions.IsDelta, err) 92 94 exitStatus = 1 93 95 } 94 96 }
+100 -20
cmd/zoekt-sourcegraph-indexserver/index.go
··· 72 72 73 73 // FileLimit is the maximum size of a file 74 74 FileLimit int 75 + 76 + // UseDelta is true if we want to use the new delta indexer. This should 77 + // only be true for repositories we explicitly enable. 78 + UseDelta bool 75 79 } 76 80 77 81 // BuildOptions returns a build.Options represented by indexArgs. Note: it ··· 100 104 LargeFiles: o.LargeFiles, 101 105 CTagsMustSucceed: o.Symbols, 102 106 DisableCTags: !o.Symbols, 107 + IsDelta: o.UseDelta, 103 108 } 104 109 } 105 110 ··· 122 127 return s 123 128 } 124 129 125 - func gitIndex(o *indexArgs, runCmd func(*exec.Cmd) error) error { 130 + type gitIndexConfig struct { 131 + // runCmd is the function that's used to execute all external commands (such as calls to "git" or "zoekt-git-index") 132 + // that gitIndex may construct. 133 + runCmd func(*exec.Cmd) error 134 + 135 + // findRepositoryMetadata is the function that returns the repository metadata for the 136 + // repository specified in args. 'ok' is false if the repository's metadata 137 + // couldn't be found or if an error occurred. 138 + // 139 + // The primary purpose of this configuration option is to be able to provide a stub 140 + // implementation for this in our test suite. All other callers should use build.Options.FindRepositoryMetadata(). 141 + findRepositoryMetadata func(args *indexArgs) (repository *zoekt.Repository, ok bool, err error) 142 + } 143 + 144 + func gitIndex(c gitIndexConfig, o *indexArgs) error { 126 145 if len(o.Branches) == 0 { 127 146 return errors.New("zoekt-git-index requires 1 or more branches") 128 147 } 129 148 149 + if c.runCmd == nil { 150 + return errors.New("runCmd in provided configuration was nil - a function must be provided") 151 + } 152 + runCmd := c.runCmd 153 + 154 + if c.findRepositoryMetadata == nil { 155 + return errors.New("findRepositoryMetadata in provided configuration was nil - a function must be provided") 156 + } 157 + findRepositoryMetadata := c.findRepositoryMetadata 158 + 130 159 buildOptions := o.BuildOptions() 131 160 132 161 // An index should never take longer than an hour. ··· 156 185 return err 157 186 } 158 187 159 - fetchStart := time.Now() 188 + var fetchDuration time.Duration 189 + successfullyFetchedCommitsCount := 0 190 + allFetchesSucceeded := true 160 191 161 - // We shallow fetch each commit specified in zoekt.Branches. This requires 162 - // the server to have configured both uploadpack.allowAnySHA1InWant and 163 - // uploadpack.allowFilter. (See gitservice.go in the Sourcegraph repository) 164 - fetchArgs := []string{"-C", gitDir, "-c", "protocol.version=2", "fetch", "--depth=1", o.CloneURL} 165 - var commits []string 166 - for _, b := range o.Branches { 167 - commits = append(commits, b.Version) 168 - } 169 - fetchArgs = append(fetchArgs, commits...) 192 + defer func() { 193 + success := strconv.FormatBool(allFetchesSucceeded) 194 + name := repoNameForMetric(o.Name) 195 + metricFetchDuration.WithLabelValues(success, name).Observe(fetchDuration.Seconds()) 196 + }() 170 197 171 - cmd = exec.CommandContext(ctx, "git", fetchArgs...) 172 - cmd.Stdin = &bytes.Buffer{} 198 + var fetch = func(branches []zoekt.RepositoryBranch) error { 199 + // We shallow fetch each commit specified in zoekt.Branches. This requires 200 + // the server to have configured both uploadpack.allowAnySHA1InWant and 201 + // uploadpack.allowFilter. (See gitservice.go in the Sourcegraph repository) 202 + fetchArgs := []string{"-C", gitDir, "-c", "protocol.version=2", "fetch", "--depth=1", o.CloneURL} 173 203 174 - err = runCmd(cmd) 175 - fetchDuration := time.Since(fetchStart) 204 + var commits []string 205 + for _, b := range branches { 206 + commits = append(commits, b.Version) 207 + } 208 + 209 + fetchArgs = append(fetchArgs, commits...) 210 + 211 + cmd = exec.CommandContext(ctx, "git", fetchArgs...) 212 + cmd.Stdin = &bytes.Buffer{} 213 + 214 + start := time.Now() 215 + err := runCmd(cmd) 216 + fetchDuration += time.Since(start) 217 + 218 + if err != nil { 219 + allFetchesSucceeded = false 220 + return err 221 + } 222 + 223 + successfullyFetchedCommitsCount += len(commits) 224 + return nil 225 + } 226 + 227 + err = fetch(o.Branches) 176 228 if err != nil { 177 - metricFetchDuration.WithLabelValues("false", repoNameForMetric(o.Name)).Observe(fetchDuration.Seconds()) 178 229 return err 179 230 } 180 231 181 - metricFetchDuration.WithLabelValues("true", repoNameForMetric(o.Name)).Observe(fetchDuration.Seconds()) 182 - debug.Printf("fetched git data for %q (%d commit(s)) in %s", o.Name, len(commits), fetchDuration) 232 + if o.UseDelta { 233 + // Try fetching prior commits for delta builds 234 + // If we're unable to fetch prior commits, we continue anyway 235 + // knowing that zoekt-git-index will fall back to a "full" normal build 236 + existingRepository, found, err := findRepositoryMetadata(o) 237 + if err != nil { 238 + return fmt.Errorf("delta build: failed to get repository metadata: %w", err) 239 + } 240 + 241 + if found && len(existingRepository.Branches) > 0 { 242 + err := fetch(existingRepository.Branches) 243 + if err != nil { 244 + var bs []string 245 + for _, b := range existingRepository.Branches { 246 + bs = append(bs, b.String()) 247 + } 248 + 249 + formattedBranches := strings.Join(bs, ", ") 250 + name := buildOptions.RepositoryDescription.Name 251 + id := buildOptions.RepositoryDescription.ID 252 + 253 + log.Printf("delta build: failed to prepare delta build for %q (ID %d): failed to fetch prior commits (%s): %s", name, id, formattedBranches, err) 254 + } 255 + } 256 + } 257 + 258 + debug.Printf("successfully fetched git data for %q (%d commit(s)) in %s", o.Name, successfullyFetchedCommitsCount, fetchDuration) 183 259 184 260 // We then create the relevant refs for each fetched commit. 185 261 for _, b := range o.Branches { ··· 194 270 } 195 271 } 196 272 197 - // create git config with options 273 + // create git configuration with options 198 274 type configKV struct{ Key, Value string } 199 275 config := []configKV{{ 200 276 // zoekt.name is used by zoekt-git-index to set the repository name. ··· 208 284 return config[i].Key < config[j].Key 209 285 }) 210 286 211 - // write config to repo 287 + // write git configuration to repo 212 288 for _, kv := range config { 213 289 cmd = exec.CommandContext(ctx, "git", "-C", gitDir, "config", "zoekt."+kv.Key, kv.Value) 214 290 cmd.Stdin = &bytes.Buffer{} ··· 233 309 branches = append(branches, b.Name) 234 310 } 235 311 args = append(args, "-branches", strings.Join(branches, ",")) 312 + 313 + if o.UseDelta { 314 + args = append(args, "-delta") 315 + } 236 316 237 317 args = append(args, buildOptions.Args()...) 238 318 args = append(args, gitDir)
+66 -4
cmd/zoekt-sourcegraph-indexserver/index_test.go
··· 120 120 121 121 func TestIndex(t *testing.T) { 122 122 cases := []struct { 123 - name string 124 - args indexArgs 125 - want []string 123 + name string 124 + args indexArgs 125 + mockRepositoryMetadata *zoekt.Repository 126 + want []string 126 127 }{{ 127 128 name: "minimal", 128 129 args: indexArgs{ ··· 199 200 "-file_limit 123 -parallelism 4 -index /data/index -require_ctags -large_file foo -large_file bar " + 200 201 "$TMPDIR/test%2Frepo.git", 201 202 }, 203 + }, { 204 + name: "delta", 205 + args: indexArgs{ 206 + Incremental: true, 207 + IndexDir: "/data/index", 208 + Parallelism: 4, 209 + FileLimit: 123, 210 + UseDelta: true, 211 + IndexOptions: IndexOptions{ 212 + RepoID: 0, 213 + Name: "test/repo", 214 + CloneURL: "http://api.test/.internal/git/test/repo", 215 + LargeFiles: []string{"foo", "bar"}, 216 + Symbols: true, 217 + Branches: []zoekt.RepositoryBranch{ 218 + {Name: "HEAD", Version: "deadbeef"}, 219 + {Name: "dev", Version: "feebdaed"}, 220 + {Name: "release", Version: "12345678"}, 221 + }, 222 + }, 223 + }, 224 + mockRepositoryMetadata: &zoekt.Repository{ 225 + ID: 0, 226 + Name: "test/repo", 227 + Branches: []zoekt.RepositoryBranch{ 228 + {Name: "HEAD", Version: "oldhead"}, 229 + {Name: "dev", Version: "olddev"}, 230 + {Name: "release", Version: "oldrelease"}, 231 + }, 232 + }, 233 + want: []string{ 234 + "git -c init.defaultBranch=nonExistentBranchBB0FOFCH32 init --bare $TMPDIR/test%2Frepo.git", 235 + "git -C $TMPDIR/test%2Frepo.git -c protocol.version=2 fetch --depth=1 http://api.test/.internal/git/test/repo deadbeef feebdaed 12345678", 236 + "git -C $TMPDIR/test%2Frepo.git -c protocol.version=2 fetch --depth=1 http://api.test/.internal/git/test/repo oldhead olddev oldrelease", 237 + "git -C $TMPDIR/test%2Frepo.git update-ref HEAD deadbeef", 238 + "git -C $TMPDIR/test%2Frepo.git update-ref refs/heads/dev feebdaed", 239 + "git -C $TMPDIR/test%2Frepo.git update-ref refs/heads/release 12345678", 240 + "git -C $TMPDIR/test%2Frepo.git config zoekt.archived 0", 241 + "git -C $TMPDIR/test%2Frepo.git config zoekt.fork 0", 242 + "git -C $TMPDIR/test%2Frepo.git config zoekt.name test/repo", 243 + "git -C $TMPDIR/test%2Frepo.git config zoekt.priority 0", 244 + "git -C $TMPDIR/test%2Frepo.git config zoekt.public 0", 245 + "git -C $TMPDIR/test%2Frepo.git config zoekt.repoid 0", 246 + "zoekt-git-index -submodules=false -incremental -branches HEAD,dev,release " + 247 + "-delta -file_limit 123 -parallelism 4 -index /data/index -require_ctags -large_file foo -large_file bar " + 248 + "$TMPDIR/test%2Frepo.git", 249 + }, 202 250 }} 203 251 204 252 for _, tc := range cases { 205 253 t.Run(tc.name, func(t *testing.T) { 254 + 206 255 var got []string 207 256 runCmd := func(c *exec.Cmd) error { 208 257 cmd := strings.Join(c.Args, " ") ··· 211 260 return nil 212 261 } 213 262 214 - if err := gitIndex(&tc.args, runCmd); err != nil { 263 + findRepositoryMetadata := func(args *indexArgs) (repository *zoekt.Repository, ok bool, err error) { 264 + if tc.mockRepositoryMetadata == nil { 265 + return args.BuildOptions().FindRepositoryMetadata() 266 + } 267 + 268 + return tc.mockRepositoryMetadata, true, nil 269 + } 270 + 271 + c := gitIndexConfig{ 272 + runCmd: runCmd, 273 + findRepositoryMetadata: findRepositoryMetadata, 274 + } 275 + 276 + if err := gitIndex(c, &tc.args); err != nil { 215 277 t.Fatal(err) 216 278 } 217 279 if !cmp.Equal(got, tc.want) {
+63 -26
cmd/zoekt-sourcegraph-indexserver/main.go
··· 165 165 166 166 // If true, shard merging is enabled. 167 167 shardMerging bool 168 + 169 + // deltaBuildRepositoriesAllowList is an allowlist for repositories that we 170 + // use delta-builds for instead of normal builds 171 + deltaBuildRepositoriesAllowList map[string]struct{} 168 172 } 169 173 170 174 var debug = log.New(ioutil.Discard, "", log.LstdFlags) ··· 465 469 return indexStateEmpty, createEmptyShard(args) 466 470 } 467 471 472 + repositoryName := args.Name 473 + if _, ok := s.deltaBuildRepositoriesAllowList[repositoryName]; ok { 474 + repositoryID := args.BuildOptions().RepositoryDescription.ID 475 + debug.Printf("delta build: Server.Index: marking %q (ID %d) for delta build", repositoryName, repositoryID) 476 + 477 + args.UseDelta = true 478 + } 479 + 468 480 reason := "forced" 481 + 469 482 if args.Incremental { 470 483 bo := args.BuildOptions() 471 484 bo.SetDefaults() 472 485 incrementalState, fn := bo.IndexState() 473 486 reason = string(incrementalState) 474 487 metricIndexIncrementalIndexState.WithLabelValues(string(incrementalState)).Inc() 488 + 475 489 switch incrementalState { 476 490 case build.IndexStateEqual: 477 491 debug.Printf("%s index already up to date. Shard=%s", args.String(), fn) ··· 493 507 494 508 log.Printf("updating index %s reason=%s", args.String(), reason) 495 509 496 - runCmd := func(cmd *exec.Cmd) error { return s.loggedRun(tr, cmd) } 497 510 metricIndexingTotal.Inc() 498 - return indexStateSuccess, gitIndex(args, runCmd) 511 + c := gitIndexConfig{ 512 + runCmd: func(cmd *exec.Cmd) error { 513 + return s.loggedRun(tr, cmd) 514 + }, 515 + 516 + findRepositoryMetadata: func(args *indexArgs) (repository *zoekt.Repository, ok bool, err error) { 517 + return args.BuildOptions().FindRepositoryMetadata() 518 + }, 519 + } 520 + 521 + return indexStateSuccess, gitIndex(c, args) 499 522 } 500 523 501 524 func (s *Server) indexArgs(opts IndexOptions) *indexArgs { ··· 722 745 return v 723 746 } 724 747 748 + func getEnvWithDefaultEmptySet(k string) map[string]struct{} { 749 + set := map[string]struct{}{} 750 + for _, v := range strings.Split(os.Getenv(k), ",") { 751 + v = strings.TrimSpace(v) 752 + if v != "" { 753 + set[v] = struct{}{} 754 + } 755 + } 756 + return set 757 + } 758 + 759 + func joinStringSet(set map[string]struct{}, sep string) string { 760 + var xs []string 761 + for x := range set { 762 + xs = append(xs, x) 763 + } 764 + 765 + return strings.Join(xs, sep) 766 + } 767 + 725 768 func setCompoundShardCounter(indexDir string) { 726 769 fns, err := filepath.Glob(filepath.Join(indexDir, "compound-*.zoekt")) 727 770 if err != nil { ··· 843 886 debug = log.New(os.Stderr, "", log.LstdFlags) 844 887 } 845 888 846 - indexingMetricsReposAllowlist := os.Getenv("INDEXING_METRICS_REPOS_ALLOWLIST") 847 - if indexingMetricsReposAllowlist != "" { 848 - var repos []string 849 - 850 - for _, r := range strings.Split(indexingMetricsReposAllowlist, ",") { 851 - r = strings.TrimSpace(r) 852 - if r != "" { 853 - repos = append(repos, r) 854 - } 855 - } 856 - 857 - for _, r := range repos { 858 - reposWithSeparateIndexingMetrics[r] = struct{}{} 859 - } 889 + reposWithSeparateIndexingMetrics = getEnvWithDefaultEmptySet("INDEXING_METRICS_REPOS_ALLOWLIST") 890 + if len(reposWithSeparateIndexingMetrics) > 0 { 891 + debug.Printf("capturing separate indexing metrics for: %s", joinStringSet(reposWithSeparateIndexingMetrics, ", ")) 892 + } 860 893 861 - debug.Printf("capturing separate indexing metrics for: %s", repos) 894 + deltaBuildRepositoriesAllowList := getEnvWithDefaultEmptySet("DELTA_BUILD_REPOS_ALLOWLIST") 895 + if len(deltaBuildRepositoriesAllowList) > 0 { 896 + debug.Printf("using delta shard builds for: %s", joinStringSet(deltaBuildRepositoriesAllowList, ", ")) 862 897 } 863 898 864 899 var sg Sourcegraph ··· 890 925 if cpuCount < 1 { 891 926 cpuCount = 1 892 927 } 928 + 893 929 return &Server{ 894 - Sourcegraph: sg, 895 - IndexDir: conf.index, 896 - Interval: conf.interval, 897 - VacuumInterval: conf.vacuumInterval, 898 - MergeInterval: conf.mergeInterval, 899 - CPUCount: cpuCount, 900 - TargetSizeBytes: conf.targetSize * 1024 * 1024, 901 - minSizeBytes: conf.minSize * 1024 * 1024, 902 - shardMerging: zoekt.ShardMergingEnabled(), 930 + Sourcegraph: sg, 931 + IndexDir: conf.index, 932 + Interval: conf.interval, 933 + VacuumInterval: conf.vacuumInterval, 934 + MergeInterval: conf.mergeInterval, 935 + CPUCount: cpuCount, 936 + TargetSizeBytes: conf.targetSize * 1024 * 1024, 937 + minSizeBytes: conf.minSize * 1024 * 1024, 938 + shardMerging: zoekt.ShardMergingEnabled(), 939 + deltaBuildRepositoriesAllowList: deltaBuildRepositoriesAllowList, 903 940 }, err 904 941 } 905 942
+295 -35
gitindex/index.go
··· 17 17 18 18 import ( 19 19 "bytes" 20 + "context" 21 + "errors" 20 22 "fmt" 21 23 "io" 22 24 "log" ··· 351 353 352 354 // IndexGitRepo indexes the git repository as specified by the options. 353 355 func IndexGitRepo(opts Options) error { 356 + return indexGitRepo(opts, gitIndexConfig{}) 357 + } 358 + 359 + // indexGitRepo indexes the git repository as specified by the options and the provided gitIndexConfig. 360 + func indexGitRepo(opts Options, config gitIndexConfig) error { 361 + prepareDeltaBuild := prepareDeltaBuild 362 + if config.prepareDeltaBuild != nil { 363 + prepareDeltaBuild = config.prepareDeltaBuild 364 + } 365 + 366 + prepareNormalBuild := prepareNormalBuild 367 + if config.prepareNormalBuild != nil { 368 + prepareNormalBuild = config.prepareNormalBuild 369 + } 370 + 354 371 // Set max thresholds, since we use them in this function. 355 372 opts.BuildOptions.SetDefaults() 356 373 if opts.RepoDir == "" { ··· 367 384 log.Printf("setTemplatesFromConfig(%s): %s", opts.RepoDir, err) 368 385 } 369 386 370 - var repoCache *RepoCache 371 - if opts.Submodules { 372 - repoCache = NewRepoCache(opts.RepoCacheDir) 373 - } 374 - 375 - // branch => (path, sha1) => repo. 376 - repos := map[fileKey]BlobLocation{} 377 - 378 - // fileKey => branches 379 - branchMap := map[fileKey][]string{} 380 - 381 - // Branch => Repo => SHA1 382 - branchVersions := map[string]map[string]plumbing.Hash{} 383 - 384 387 branches, err := expandBranches(repo, opts.Branches, opts.BranchPrefix) 385 388 if err != nil { 386 389 return fmt.Errorf("expandBranches: %w", err) ··· 403 406 if when := commit.Committer.When; when.After(opts.BuildOptions.RepositoryDescription.LatestCommitDate) { 404 407 opts.BuildOptions.RepositoryDescription.LatestCommitDate = when 405 408 } 409 + } 406 410 407 - tree, err := commit.Tree() 408 - if err != nil { 409 - return fmt.Errorf("commit.Tree: %w", err) 410 - } 411 + if opts.Incremental && opts.BuildOptions.IncrementalSkipIndexing() { 412 + return nil 413 + } 411 414 412 - ig, err := newIgnoreMatcher(tree) 413 - if err != nil { 414 - return fmt.Errorf("newIgnoreMatcher: %w", err) 415 - } 415 + // branch => (path, sha1) => repo. 416 + var repos map[fileKey]BlobLocation 416 417 417 - files, subVersions, err := TreeToFiles(repo, tree, opts.BuildOptions.RepositoryDescription.URL, repoCache) 418 + // fileKey => branches 419 + var branchMap map[fileKey][]string 420 + 421 + // Branch => Repo => SHA1 422 + var branchVersions map[string]map[string]plumbing.Hash 423 + 424 + // set of file paths that have been changed or deleted since 425 + // the last indexed commit 426 + // 427 + // These only have an effect on delta builds 428 + var changedOrRemovedFiles []string 429 + 430 + if opts.BuildOptions.IsDelta { 431 + repos, branchMap, branchVersions, changedOrRemovedFiles, err = prepareDeltaBuild(opts, repo) 418 432 if err != nil { 419 - return fmt.Errorf("TreeToFiles: %w", err) 433 + log.Printf("delta build: falling back to normal build since delta build failed, repository=%q, err=%s", opts.BuildOptions.RepositoryDescription.Name, err) 434 + opts.BuildOptions.IsDelta = false 420 435 } 421 - for k, v := range files { 422 - if ig.Match(k.Path) { 423 - continue 424 - } 425 - repos[k] = v 426 - branchMap[k] = append(branchMap[k], b) 427 - } 428 - 429 - branchVersions[b] = subVersions 430 436 } 431 437 432 - if opts.Incremental && opts.BuildOptions.IncrementalSkipIndexing() { 433 - return nil 438 + if !opts.BuildOptions.IsDelta { 439 + repos, branchMap, branchVersions, err = prepareNormalBuild(opts, repo) 440 + if err != nil { 441 + return fmt.Errorf("preparing normal build: %w", err) 442 + } 434 443 } 435 444 436 445 reposByPath := map[string]BlobLocation{} ··· 449 458 } 450 459 opts.BuildOptions.SubRepositories[path] = &tpl 451 460 } 461 + 452 462 for _, br := range opts.BuildOptions.RepositoryDescription.Branches { 453 463 for path, repo := range opts.BuildOptions.SubRepositories { 454 464 id := branchVersions[br.Name][path] ··· 466 476 // we don't need to check error, since we either already have an error, or 467 477 // we returning the first call to builder.Finish. 468 478 defer builder.Finish() // nolint:errcheck 479 + 480 + for _, f := range changedOrRemovedFiles { 481 + builder.MarkFileAsChangedOrRemoved(f) 482 + } 469 483 470 484 var names []string 471 485 fileKeys := map[string][]fileKey{} ··· 530 544 return nil, err 531 545 } 532 546 return ignore.ParseIgnoreFile(strings.NewReader(content)) 547 + } 548 + 549 + // prepareDeltaBuildFunc is a function that calculates the necessary metadata for preparing 550 + // a build.Builder instance for generating a delta build. 551 + type prepareDeltaBuildFunc func(options Options, repository *git.Repository) (repos map[fileKey]BlobLocation, branchMap map[fileKey][]string, branchVersions map[string]map[string]plumbing.Hash, changedOrDeletedPaths []string, err error) 552 + 553 + // prepareNormalBuildFunc is a function that calculates the necessary metadata for preparing 554 + // a build.Builder instance for generating a normal build. 555 + type prepareNormalBuildFunc func(options Options, repository *git.Repository) (repos map[fileKey]BlobLocation, branchMap map[fileKey][]string, branchVersions map[string]map[string]plumbing.Hash, err error) 556 + 557 + type gitIndexConfig struct { 558 + // prepareDeltaBuild, if not nil, is the function that is used to calculate the metadata that will be used to 559 + // prepare the build.Builder instance for generating a delta build. 560 + // 561 + // If prepareDeltaBuild is nil, gitindex.prepareDeltaBuild will be used instead. 562 + prepareDeltaBuild prepareDeltaBuildFunc 563 + 564 + // prepareNormalBuild, if not nil, is the function that is used to calculate the metadata that will be used to 565 + // prepare the build.Builder instance for generating a normal build. 566 + // 567 + // If prepareNormalBuild is nil, gitindex.prepareNormalBuild will be used instead. 568 + prepareNormalBuild prepareNormalBuildFunc 569 + } 570 + 571 + func prepareDeltaBuild(options Options, repository *git.Repository) (repos map[fileKey]BlobLocation, branchMap map[fileKey][]string, branchVersions map[string]map[string]plumbing.Hash, changedOrDeletedPaths []string, err error) { 572 + // discover what commits we indexed during our last build 573 + 574 + if options.Submodules { 575 + return nil, nil, nil, nil, fmt.Errorf("delta builds currently don't support submodule indexing") 576 + } 577 + 578 + existingRepository, ok, err := options.BuildOptions.FindRepositoryMetadata() 579 + if err != nil { 580 + return nil, nil, nil, nil, fmt.Errorf("failed to get repository metadata: %w", err) 581 + } 582 + 583 + if !ok { 584 + return nil, nil, nil, nil, fmt.Errorf("no existing shards found for repository") 585 + } 586 + 587 + // Check to see if the set of branch names is consistent with what we last indexed. 588 + // If it isn't consistent, that we can't proceed with a delta build (and the caller should fall back to a 589 + // normal one). 590 + 591 + if !build.BranchNamesEqual(existingRepository.Branches, options.BuildOptions.RepositoryDescription.Branches) { 592 + var existingBranchNames []string 593 + for _, b := range existingRepository.Branches { 594 + existingBranchNames = append(existingBranchNames, b.Name) 595 + } 596 + 597 + var optionsBranchNames []string 598 + for _, b := range options.BuildOptions.RepositoryDescription.Branches { 599 + optionsBranchNames = append(optionsBranchNames, b.Name) 600 + } 601 + 602 + existingBranchList := strings.Join(existingBranchNames, ", ") 603 + optionsBranchList := strings.Join(optionsBranchNames, ", ") 604 + 605 + return nil, nil, nil, nil, fmt.Errorf("requested branch set in build options (%q) != branch set found on disk (%q) - branch set must be the same for delta shards", optionsBranchList, existingBranchList) 606 + } 607 + 608 + // branch => (path, sha1) => repo. 609 + repos = map[fileKey]BlobLocation{} 610 + 611 + // fileKey => branches 612 + branchMap = map[fileKey][]string{} 613 + 614 + // branch name -> git worktree at most current commit 615 + branchToCurrentTree := make(map[string]*object.Tree, len(options.Branches)) 616 + 617 + for _, b := range options.Branches { 618 + commit, err := getCommit(repository, options.BranchPrefix, b) 619 + if err != nil { 620 + return nil, nil, nil, nil, fmt.Errorf("getting last current commit for branch %q: %w", b, err) 621 + } 622 + 623 + tree, err := commit.Tree() 624 + if err != nil { 625 + return nil, nil, nil, nil, fmt.Errorf("getting current git tree for branch %q: %w", b, err) 626 + } 627 + 628 + branchToCurrentTree[b] = tree 629 + } 630 + 631 + rawURL := options.BuildOptions.RepositoryDescription.URL 632 + u, err := url.Parse(rawURL) 633 + if err != nil { 634 + return nil, nil, nil, nil, fmt.Errorf("parsing repository URL %q: %w", rawURL, err) 635 + } 636 + 637 + // TODO: Support repository submodules for delta builds 638 + // For this prototype, we are ignoring repository submodules, which means that we can use the same 639 + // blob location for all files 640 + hackSharedBlobLocation := BlobLocation{ 641 + Repo: repository, 642 + URL: u, 643 + } 644 + 645 + // loop over all branches, calculate the diff between our 646 + // last indexed commit and the current commit, and add files mentioned in the diff 647 + for _, branch := range existingRepository.Branches { 648 + lastIndexedCommit, err := getCommit(repository, "", branch.Version) 649 + if err != nil { 650 + return nil, nil, nil, nil, fmt.Errorf("getting last indexed commit for branch %q: %w", branch.Name, err) 651 + } 652 + 653 + lastIndexedTree, err := lastIndexedCommit.Tree() 654 + if err != nil { 655 + return nil, nil, nil, nil, fmt.Errorf("getting lasted indexed git tree for branch %q: %w", branch.Name, err) 656 + } 657 + 658 + changes, err := object.DiffTreeWithOptions(context.Background(), lastIndexedTree, branchToCurrentTree[branch.Name], &object.DiffTreeOptions{DetectRenames: false}) 659 + if err != nil { 660 + return nil, nil, nil, nil, fmt.Errorf("generating changeset for branch %q: %w", branch.Name, err) 661 + } 662 + 663 + for i, c := range changes { 664 + oldFile, newFile, err := c.Files() 665 + if err != nil { 666 + return nil, nil, nil, nil, fmt.Errorf("change #%d: getting files before and after change: %w", i, err) 667 + } 668 + 669 + if newFile != nil { 670 + // note: newFile.Name could be a path that isn't relative to the repository root - using the 671 + // change's Name field is the only way that @ggilmore saw to get the full path relative to the root 672 + newFileRelativeRootPath := c.To.Name 673 + 674 + // TODO@ggilmore: HACK - remove once ignore files are supported in delta builds 675 + if newFileRelativeRootPath == ignore.IgnoreFile { 676 + return nil, nil, nil, nil, fmt.Errorf("%q file is not yet supported in delta builds", ignore.IgnoreFile) 677 + } 678 + 679 + // either file is added or renamed, so we need to add the new version to the build 680 + file := fileKey{Path: newFileRelativeRootPath, ID: newFile.Hash} 681 + repos[file] = hackSharedBlobLocation 682 + branchMap[file] = append(branchMap[file], branch.Name) 683 + } 684 + 685 + if oldFile == nil { 686 + // file added - nothing more to do 687 + continue 688 + } 689 + 690 + // Note: oldFile.Name could be a path that isn't relative to the repository root - using the 691 + // change's "Name" field is the only way that ggilmore saw to get the full path relative to the root 692 + oldFileRelativeRootPath := c.From.Name 693 + 694 + if oldFileRelativeRootPath == ignore.IgnoreFile { 695 + return nil, nil, nil, nil, fmt.Errorf("%q file is not yet supported in delta builds", ignore.IgnoreFile) 696 + } 697 + 698 + // The file is either modified or deleted. So, we need to add ALL versions 699 + // of the old file (across all branches) to the build. 700 + for b, currentTree := range branchToCurrentTree { 701 + f, err := currentTree.File(oldFileRelativeRootPath) 702 + if err != nil { 703 + // the file doesn't exist in this branch 704 + if errors.Is(err, object.ErrFileNotFound) { 705 + continue 706 + } 707 + 708 + return nil, nil, nil, nil, fmt.Errorf("getting hash for file %q in branch %q: %w", oldFile.Name, b, err) 709 + } 710 + 711 + file := fileKey{Path: oldFileRelativeRootPath, ID: f.ID()} 712 + repos[file] = hackSharedBlobLocation 713 + branchMap[file] = append(branchMap[file], b) 714 + } 715 + 716 + changedOrDeletedPaths = append(changedOrDeletedPaths, oldFileRelativeRootPath) 717 + } 718 + } 719 + 720 + // we need to de-duplicate the branch map before returning it - it's possible for the same 721 + // branch to have been added multiple times if a file has been modified across multiple commits 722 + 723 + for file, branches := range branchMap { 724 + sort.Strings(branches) 725 + branchMap[file] = uniq(branches) 726 + } 727 + 728 + // we also need to de-duplicate the list of changed or deleted file paths, it's also possible to have duplicates 729 + // for the same reasoning as above 730 + 731 + sort.Strings(changedOrDeletedPaths) 732 + changedOrDeletedPaths = uniq(changedOrDeletedPaths) 733 + 734 + return repos, branchMap, nil, changedOrDeletedPaths, nil 735 + } 736 + 737 + func prepareNormalBuild(options Options, repository *git.Repository) (repos map[fileKey]BlobLocation, branchMap map[fileKey][]string, branchVersions map[string]map[string]plumbing.Hash, err error) { 738 + var repoCache *RepoCache 739 + if options.Submodules { 740 + repoCache = NewRepoCache(options.RepoCacheDir) 741 + } 742 + 743 + // branch => (path, sha1) => repo. 744 + repos = map[fileKey]BlobLocation{} 745 + 746 + // fileKey => branches 747 + branchMap = map[fileKey][]string{} 748 + 749 + // Branch => Repo => SHA1 750 + branchVersions = map[string]map[string]plumbing.Hash{} 751 + 752 + branches, err := expandBranches(repository, options.Branches, options.BranchPrefix) 753 + if err != nil { 754 + return nil, nil, nil, fmt.Errorf("expandBranches: %w", err) 755 + } 756 + 757 + for _, b := range branches { 758 + commit, err := getCommit(repository, options.BranchPrefix, b) 759 + if err != nil { 760 + if options.AllowMissingBranch && err.Error() == "reference not found" { 761 + continue 762 + } 763 + 764 + return nil, nil, nil, fmt.Errorf("getCommit: %w", err) 765 + } 766 + 767 + tree, err := commit.Tree() 768 + if err != nil { 769 + return nil, nil, nil, fmt.Errorf("commit.Tree: %w", err) 770 + } 771 + 772 + ig, err := newIgnoreMatcher(tree) 773 + if err != nil { 774 + return nil, nil, nil, fmt.Errorf("newIgnoreMatcher: %w", err) 775 + } 776 + 777 + files, subVersions, err := TreeToFiles(repository, tree, options.BuildOptions.RepositoryDescription.URL, repoCache) 778 + if err != nil { 779 + return nil, nil, nil, fmt.Errorf("TreeToFiles: %w", err) 780 + } 781 + for k, v := range files { 782 + if ig.Match(k.Path) { 783 + continue 784 + } 785 + repos[k] = v 786 + branchMap[k] = append(branchMap[k], b) 787 + } 788 + 789 + branchVersions[b] = subVersions 790 + } 791 + 792 + return repos, branchMap, branchVersions, nil 533 793 } 534 794 535 795 func blobContents(blob *object.Blob) ([]byte, error) {
+531
gitindex/index_test.go
··· 15 15 package gitindex 16 16 17 17 import ( 18 + "bytes" 19 + "context" 20 + "fmt" 18 21 "io/ioutil" 19 22 "os" 20 23 "os/exec" 21 24 "path/filepath" 25 + "sort" 22 26 "testing" 23 27 28 + "github.com/go-git/go-git/v5" 29 + "github.com/go-git/go-git/v5/plumbing" 30 + "github.com/google/go-cmp/cmp" 31 + "github.com/google/go-cmp/cmp/cmpopts" 24 32 "github.com/google/zoekt" 25 33 "github.com/google/zoekt/build" 34 + "github.com/google/zoekt/ignore" 35 + "github.com/google/zoekt/query" 36 + "github.com/google/zoekt/shards" 26 37 ) 27 38 28 39 func TestIndexEmptyRepo(t *testing.T) { ··· 54 65 t.Fatalf("IndexGitRepo: %v", err) 55 66 } 56 67 } 68 + 69 + func TestIndexDeltaBasic(t *testing.T) { 70 + type branchToDocumentMap map[string][]zoekt.Document 71 + 72 + type step struct { 73 + name string 74 + addedDocuments branchToDocumentMap 75 + deletedDocuments branchToDocumentMap 76 + optFn func(t *testing.T, options *Options) 77 + 78 + expectedFallbackToNormalBuild bool 79 + expectedDocuments []zoekt.Document 80 + } 81 + 82 + helloWorld := zoekt.Document{Name: "hello_world.txt", Content: []byte("hello")} 83 + 84 + fruitV1 := zoekt.Document{Name: "best_fruit.txt", Content: []byte("strawberry")} 85 + fruitV1InFolder := zoekt.Document{Name: "the_best/best_fruit.txt", Content: fruitV1.Content} 86 + fruitV1WithNewName := zoekt.Document{Name: "new_fruit.txt", Content: fruitV1.Content} 87 + 88 + fruitV2 := zoekt.Document{Name: "best_fruit.txt", Content: []byte("grapes")} 89 + fruitV2InFolder := zoekt.Document{Name: "the_best/best_fruit.txt", Content: fruitV2.Content} 90 + 91 + fruitV3 := zoekt.Document{Name: "best_fruit.txt", Content: []byte("oranges")} 92 + fruitV4 := zoekt.Document{Name: "best_fruit.txt", Content: []byte("apples")} 93 + 94 + foo := zoekt.Document{Name: "foo.txt", Content: []byte("bar")} 95 + 96 + emptySourcegraphIgnore := zoekt.Document{Name: ignore.IgnoreFile} 97 + sourcegraphIgnoreWithContent := zoekt.Document{Name: ignore.IgnoreFile, Content: []byte("good_content.txt")} 98 + 99 + for _, test := range []struct { 100 + name string 101 + branches []string 102 + steps []step 103 + }{ 104 + { 105 + name: "modification", 106 + branches: []string{"main"}, 107 + steps: []step{ 108 + { 109 + name: "setup", 110 + addedDocuments: branchToDocumentMap{ 111 + "main": []zoekt.Document{helloWorld, fruitV1}, 112 + }, 113 + 114 + expectedDocuments: []zoekt.Document{helloWorld, fruitV1}, 115 + }, 116 + { 117 + name: "add newer version of fruits", 118 + addedDocuments: branchToDocumentMap{ 119 + "main": []zoekt.Document{fruitV2}, 120 + }, 121 + optFn: func(t *testing.T, options *Options) { 122 + options.BuildOptions.IsDelta = true 123 + }, 124 + 125 + expectedDocuments: []zoekt.Document{helloWorld, fruitV2}, 126 + }, 127 + }, 128 + }, 129 + { 130 + name: "modification only inside nested folder", 131 + branches: []string{"main"}, 132 + steps: []step{ 133 + { 134 + name: "setup", 135 + addedDocuments: branchToDocumentMap{ 136 + "main": []zoekt.Document{foo, fruitV1InFolder}, 137 + }, 138 + 139 + expectedDocuments: []zoekt.Document{foo, fruitV1InFolder}, 140 + }, 141 + { 142 + name: "add newer version of fruits inside folder", 143 + addedDocuments: branchToDocumentMap{ 144 + "main": []zoekt.Document{fruitV2InFolder}, 145 + }, 146 + optFn: func(t *testing.T, options *Options) { 147 + options.BuildOptions.IsDelta = true 148 + }, 149 + 150 + expectedDocuments: []zoekt.Document{foo, fruitV2InFolder}, 151 + }, 152 + }, 153 + }, 154 + { 155 + name: "addition", 156 + branches: []string{"main"}, 157 + steps: []step{ 158 + { 159 + name: "setup", 160 + addedDocuments: branchToDocumentMap{ 161 + "main": []zoekt.Document{helloWorld, fruitV1}, 162 + }, 163 + 164 + expectedDocuments: []zoekt.Document{helloWorld, fruitV1}, 165 + }, 166 + { 167 + name: "add new file - foo", 168 + addedDocuments: branchToDocumentMap{ 169 + "main": []zoekt.Document{foo}, 170 + }, 171 + optFn: func(t *testing.T, options *Options) { 172 + options.BuildOptions.IsDelta = true 173 + }, 174 + 175 + expectedDocuments: []zoekt.Document{helloWorld, fruitV1, foo}, 176 + }, 177 + }, 178 + }, 179 + { 180 + name: "deletion", 181 + branches: []string{"main"}, 182 + steps: []step{ 183 + { 184 + name: "setup", 185 + addedDocuments: branchToDocumentMap{ 186 + "main": []zoekt.Document{helloWorld, fruitV1, foo}, 187 + }, 188 + 189 + expectedDocuments: []zoekt.Document{helloWorld, fruitV1, foo}, 190 + }, 191 + { 192 + name: "delete foo file", 193 + addedDocuments: nil, 194 + deletedDocuments: branchToDocumentMap{ 195 + "main": []zoekt.Document{foo}, 196 + }, 197 + 198 + optFn: func(t *testing.T, options *Options) { 199 + options.BuildOptions.IsDelta = true 200 + }, 201 + 202 + expectedDocuments: []zoekt.Document{helloWorld, fruitV1}, 203 + }, 204 + }, 205 + }, 206 + { 207 + name: "addition and deletion on only one branch", 208 + branches: []string{"main", "release", "dev"}, 209 + steps: []step{ 210 + { 211 + name: "setup", 212 + addedDocuments: branchToDocumentMap{ 213 + "main": []zoekt.Document{fruitV1}, 214 + "release": []zoekt.Document{fruitV2}, 215 + "dev": []zoekt.Document{fruitV3}, 216 + }, 217 + 218 + expectedDocuments: []zoekt.Document{fruitV1, fruitV2, fruitV3}, 219 + }, 220 + { 221 + name: "replace fruits v3 with v4 on 'dev', delete fruits on 'main'", 222 + addedDocuments: branchToDocumentMap{ 223 + "dev": []zoekt.Document{fruitV4}, 224 + }, 225 + deletedDocuments: branchToDocumentMap{ 226 + "main": []zoekt.Document{fruitV1}, 227 + }, 228 + 229 + optFn: func(t *testing.T, options *Options) { 230 + options.BuildOptions.IsDelta = true 231 + }, 232 + 233 + expectedDocuments: []zoekt.Document{fruitV2, fruitV4}, 234 + }, 235 + }, 236 + }, 237 + { 238 + name: "rename", 239 + branches: []string{"main", "release"}, 240 + steps: []step{ 241 + { 242 + name: "setup", 243 + addedDocuments: branchToDocumentMap{ 244 + "main": []zoekt.Document{fruitV1}, 245 + "release": []zoekt.Document{fruitV2}, 246 + }, 247 + expectedDocuments: []zoekt.Document{fruitV1, fruitV2}, 248 + }, 249 + { 250 + name: "rename fruits file on 'main' + ensure that unmodified fruits file on 'release' is still searchable", 251 + addedDocuments: branchToDocumentMap{ 252 + "main": []zoekt.Document{fruitV1WithNewName}, 253 + }, 254 + deletedDocuments: branchToDocumentMap{ 255 + "main": []zoekt.Document{fruitV1}, 256 + }, 257 + 258 + optFn: func(t *testing.T, options *Options) { 259 + options.BuildOptions.IsDelta = true 260 + }, 261 + 262 + expectedDocuments: []zoekt.Document{fruitV1WithNewName, fruitV2}, 263 + }, 264 + }, 265 + }, 266 + { 267 + name: "modification: update one branch with version of document from another branch (a.k.a. Keegan's test)", 268 + branches: []string{"main", "dev"}, 269 + steps: []step{ 270 + { 271 + name: "setup", 272 + addedDocuments: branchToDocumentMap{ 273 + "main": []zoekt.Document{fruitV1}, 274 + "dev": []zoekt.Document{fruitV2}, 275 + }, 276 + expectedDocuments: []zoekt.Document{fruitV1, fruitV2}, 277 + }, 278 + { 279 + name: "switch main to dev's older version of fruits + bump dev's fruits to new version", 280 + addedDocuments: branchToDocumentMap{ 281 + "main": []zoekt.Document{fruitV2}, 282 + "dev": []zoekt.Document{fruitV3}, 283 + }, 284 + 285 + optFn: func(t *testing.T, options *Options) { 286 + options.BuildOptions.IsDelta = true 287 + }, 288 + 289 + expectedDocuments: []zoekt.Document{fruitV2, fruitV3}, 290 + }, 291 + }, 292 + }, 293 + { 294 + name: "no-op delta builds (reindexing the same commits)", 295 + branches: []string{"main", "dev"}, 296 + steps: []step{ 297 + { 298 + name: "setup", 299 + addedDocuments: branchToDocumentMap{ 300 + "main": []zoekt.Document{fruitV1, foo}, 301 + "dev": []zoekt.Document{helloWorld}, 302 + }, 303 + expectedDocuments: []zoekt.Document{fruitV1, foo, helloWorld}, 304 + }, 305 + { 306 + name: "first no-op (normal build -> delta build)", 307 + optFn: func(t *testing.T, options *Options) { 308 + options.BuildOptions.IsDelta = true 309 + }, 310 + 311 + expectedDocuments: []zoekt.Document{fruitV1, foo, helloWorld}, 312 + }, 313 + { 314 + name: "second no-op (delta build -> delta build)", 315 + optFn: func(t *testing.T, options *Options) { 316 + options.BuildOptions.IsDelta = true 317 + }, 318 + 319 + expectedDocuments: []zoekt.Document{fruitV1, foo, helloWorld}, 320 + }, 321 + }, 322 + }, 323 + { 324 + name: "should fallback to normal build if no prior shards exist", 325 + branches: []string{"main"}, 326 + steps: []step{ 327 + { 328 + name: "attempt delta build on a repository that hasn't been indexed yet", 329 + addedDocuments: branchToDocumentMap{ 330 + "main": []zoekt.Document{helloWorld}, 331 + }, 332 + optFn: func(t *testing.T, options *Options) { 333 + options.BuildOptions.IsDelta = true 334 + }, 335 + 336 + expectedFallbackToNormalBuild: true, 337 + expectedDocuments: []zoekt.Document{helloWorld}, 338 + }, 339 + }, 340 + }, 341 + { 342 + name: "should fallback to normal build if the set of requested repository branches changes", 343 + branches: []string{"main", "release", "dev"}, 344 + steps: []step{ 345 + { 346 + name: "setup", 347 + addedDocuments: branchToDocumentMap{ 348 + "main": []zoekt.Document{fruitV1}, 349 + "release": []zoekt.Document{fruitV2}, 350 + "dev": []zoekt.Document{fruitV3}, 351 + }, 352 + 353 + expectedDocuments: []zoekt.Document{fruitV1, fruitV2, fruitV3}, 354 + }, 355 + { 356 + name: "try delta build after dropping 'main' branch from index ", 357 + addedDocuments: branchToDocumentMap{ 358 + "release": []zoekt.Document{fruitV4}, 359 + }, 360 + optFn: func(t *testing.T, options *Options) { 361 + options.Branches = []string{"HEAD", "release", "dev"} // a bit of a hack to override it this way, but it gets the job done 362 + options.BuildOptions.IsDelta = true 363 + }, 364 + 365 + expectedFallbackToNormalBuild: true, 366 + expectedDocuments: []zoekt.Document{fruitV3, fruitV4}, 367 + }, 368 + }, 369 + }, 370 + { 371 + name: "should fallback to normal build if repository has unsupported Sourcegraph ignore file", 372 + branches: []string{"main"}, 373 + steps: []step{ 374 + { 375 + name: "setup", 376 + addedDocuments: branchToDocumentMap{ 377 + "main": []zoekt.Document{emptySourcegraphIgnore}, 378 + }, 379 + 380 + expectedDocuments: []zoekt.Document{emptySourcegraphIgnore}, 381 + }, 382 + { 383 + name: "attempt delta build after modifying ignore file", 384 + addedDocuments: branchToDocumentMap{ 385 + "main": []zoekt.Document{sourcegraphIgnoreWithContent}, 386 + }, 387 + optFn: func(t *testing.T, options *Options) { 388 + options.BuildOptions.IsDelta = true 389 + }, 390 + 391 + expectedFallbackToNormalBuild: true, 392 + expectedDocuments: []zoekt.Document{sourcegraphIgnoreWithContent}, 393 + }, 394 + }, 395 + }, 396 + } { 397 + test := test 398 + 399 + t.Run(test.name, func(t *testing.T) { 400 + t.Parallel() 401 + 402 + indexDir := t.TempDir() 403 + repositoryDir := t.TempDir() 404 + 405 + // setup: initialize the repository and all of its branches 406 + runScript(t, repositoryDir, "git init") 407 + runScript(t, repositoryDir, fmt.Sprintf("git config user.email %q", "you@example.com")) 408 + runScript(t, repositoryDir, fmt.Sprintf("git config user.name %q", "Your Name")) 409 + 410 + for _, b := range test.branches { 411 + runScript(t, repositoryDir, fmt.Sprintf("git checkout -b %q", b)) 412 + runScript(t, repositoryDir, fmt.Sprintf("git commit --allow-empty -m %q", "empty commit")) 413 + } 414 + 415 + for _, step := range test.steps { 416 + t.Run(step.name, func(t *testing.T) { 417 + for _, b := range test.branches { 418 + // setup: for each branch, process any document deletions / additions and commit those changes 419 + 420 + hadChange := false 421 + 422 + runScript(t, repositoryDir, fmt.Sprintf("git checkout %q", b)) 423 + 424 + for _, d := range step.deletedDocuments[b] { 425 + hadChange = true 426 + 427 + file := filepath.Join(repositoryDir, d.Name) 428 + 429 + err := os.Remove(file) 430 + if err != nil { 431 + t.Fatalf("deleting file %q: %s", d.Name, err) 432 + } 433 + 434 + runScript(t, repositoryDir, fmt.Sprintf("git add %q", file)) 435 + } 436 + 437 + for _, d := range step.addedDocuments[b] { 438 + hadChange = true 439 + 440 + file := filepath.Join(repositoryDir, d.Name) 441 + 442 + err := os.MkdirAll(filepath.Dir(file), 0755) 443 + if err != nil { 444 + t.Fatalf("ensuring that folders exist for file %q: %s", file, err) 445 + } 446 + 447 + err = os.WriteFile(file, d.Content, 0644) 448 + if err != nil { 449 + t.Fatalf("writing file %q: %s", d.Name, err) 450 + } 451 + 452 + runScript(t, repositoryDir, fmt.Sprintf("git add %q", file)) 453 + } 454 + 455 + if !hadChange { 456 + continue 457 + } 458 + 459 + runScript(t, repositoryDir, fmt.Sprintf("git commit -m %q", step.name)) 460 + } 461 + 462 + // setup: prepare indexOptions with given overrides 463 + buildOptions := build.Options{ 464 + IndexDir: indexDir, 465 + RepositoryDescription: zoekt.Repository{ 466 + Name: "repository", 467 + }, 468 + IsDelta: false, 469 + } 470 + buildOptions.SetDefaults() 471 + 472 + branches := append([]string{"HEAD"}, test.branches...) 473 + 474 + options := Options{ 475 + RepoDir: filepath.Join(repositoryDir, ".git"), 476 + BuildOptions: buildOptions, 477 + Branches: branches, 478 + } 479 + 480 + if step.optFn != nil { 481 + step.optFn(t, &options) 482 + } 483 + 484 + // setup: prepare spy versions of prepare delta / normal build so that we can observe 485 + // whether they were called appropriately 486 + deltaBuildCalled := false 487 + prepareDeltaSpy := func(options Options, repository *git.Repository) (repos map[fileKey]BlobLocation, branchMap map[fileKey][]string, branchVersions map[string]map[string]plumbing.Hash, changedOrDeletedPaths []string, err error) { 488 + deltaBuildCalled = true 489 + return prepareDeltaBuild(options, repository) 490 + } 491 + 492 + normalBuildCalled := false 493 + prepareNormalSpy := func(options Options, repository *git.Repository) (repos map[fileKey]BlobLocation, branchMap map[fileKey][]string, branchVersions map[string]map[string]plumbing.Hash, err error) { 494 + normalBuildCalled = true 495 + return prepareNormalBuild(options, repository) 496 + } 497 + 498 + // run test 499 + err := indexGitRepo(options, gitIndexConfig{ 500 + prepareDeltaBuild: prepareDeltaSpy, 501 + prepareNormalBuild: prepareNormalSpy, 502 + }) 503 + if err != nil { 504 + t.Fatalf("IndexGitRepo: %s", err) 505 + } 506 + 507 + if options.BuildOptions.IsDelta != deltaBuildCalled { 508 + // We should always try a delta build if we request it in the options. 509 + t.Fatalf("expected deltaBuildCalled to be %t, got %t", options.BuildOptions.IsDelta, deltaBuildCalled) 510 + } 511 + 512 + if options.BuildOptions.IsDelta && (step.expectedFallbackToNormalBuild != normalBuildCalled) { 513 + // We only check the normal spy on delta builds because it's only considered a "fallback" if we 514 + // asked for a delta build in the first place. 515 + t.Fatalf("expected normalBuildCalled to be %t, got %t", step.expectedFallbackToNormalBuild, normalBuildCalled) 516 + } 517 + 518 + // examine outcome: load shards into a searcher instance and run a dummy search query 519 + // that returns every document contained in the shards 520 + // 521 + // then, compare returned set of documents with the expected set for the step and see if they agree 522 + 523 + ss, err := shards.NewDirectorySearcher(indexDir) 524 + if err != nil { 525 + t.Fatalf("NewDirectorySearcher(%s): %s", indexDir, err) 526 + } 527 + defer ss.Close() 528 + 529 + searchOpts := &zoekt.SearchOptions{Whole: true} 530 + result, err := ss.Search(context.Background(), &query.Const{Value: true}, searchOpts) 531 + if err != nil { 532 + t.Fatalf("Search: %s", err) 533 + } 534 + 535 + var receivedDocuments []zoekt.Document 536 + for _, f := range result.Files { 537 + receivedDocuments = append(receivedDocuments, zoekt.Document{ 538 + Name: f.FileName, 539 + Content: f.Content, 540 + }) 541 + } 542 + 543 + for _, docs := range [][]zoekt.Document{step.expectedDocuments, receivedDocuments} { 544 + sort.Slice(docs, func(i, j int) bool { 545 + a, b := docs[i], docs[j] 546 + 547 + // first compare names, then fallback to contents if the names are equal 548 + 549 + if a.Name < b.Name { 550 + return true 551 + } 552 + 553 + if a.Name > b.Name { 554 + return false 555 + } 556 + 557 + return bytes.Compare(a.Content, b.Content) < 0 558 + }) 559 + } 560 + 561 + compareOptions := []cmp.Option{ 562 + cmpopts.IgnoreFields(zoekt.Document{}, "Branches"), 563 + cmpopts.EquateEmpty(), 564 + } 565 + 566 + if diff := cmp.Diff(step.expectedDocuments, receivedDocuments, compareOptions...); diff != "" { 567 + t.Errorf("diff in received documents (-want +got):%s\n:", diff) 568 + } 569 + }) 570 + } 571 + }) 572 + } 573 + } 574 + 575 + func runScript(t *testing.T, cwd string, script string) { 576 + err := os.MkdirAll(cwd, 0755) 577 + if err != nil { 578 + t.Fatalf("ensuring path %q exists: %s", cwd, err) 579 + } 580 + 581 + cmd := exec.Command("sh", "-euxc", script) 582 + cmd.Dir = cwd 583 + 584 + if out, err := cmd.CombinedOutput(); err != nil { 585 + t.Fatalf("execution error: %v, output %s", err, out) 586 + } 587 + }