···9999 // last run.
100100 IsDelta bool
101101102102- // ChangedOrRemovedFiles is a list of file paths that have been changed or removed
102102+ // changedOrRemovedFiles is a list of file paths that have been changed or removed
103103 // since the last indexing job for this repository. These files will be tombstoned
104104 // in the older shards for this repository.
105105- ChangedOrRemovedFiles []string
105105+ changedOrRemovedFiles []string
106106}
107107108108// HashOptions creates a hash of the options that affect an index.
···222222223223 // a sortable 20 chars long id.
224224 id string
225225+226226+ finishCalled bool
225227}
226228227229type finishedShard struct {
···294296type IndexState string
295297296298const (
297297- IndexStateMissing IndexState = "missing"
298298- IndexStateCorrupt IndexState = "corrupt"
299299- IndexStateVersion IndexState = "version-mismatch"
300300- IndexStateOption IndexState = "option-mismatch"
301301- IndexStateMeta IndexState = "meta-mismatch"
302302- IndexStateContent IndexState = "content-mismatch"
303303- IndexStateBranchSet IndexState = "branch-set-mismatch"
304304- IndexStateBranchVersion IndexState = "branch-version-mismatch"
305305- IndexStateEqual IndexState = "equal"
299299+ IndexStateMissing IndexState = "missing"
300300+ IndexStateCorrupt IndexState = "corrupt"
301301+ IndexStateVersion IndexState = "version-mismatch"
302302+ IndexStateOption IndexState = "option-mismatch"
303303+ IndexStateMeta IndexState = "meta-mismatch"
304304+ IndexStateContent IndexState = "content-mismatch"
305305+ IndexStateEqual IndexState = "equal"
306306)
307307308308var readVersions = []struct {
···361361 return IndexStateOption, fn
362362 }
363363364364- if o.IsDelta { // TODO: Get rid of this guard once the delta shard behavior is the default
365365- state := compareBranches(repo.Branches, o.RepositoryDescription.Branches)
366366- if state != IndexStateEqual {
367367- return state, fn
368368- }
369369- } else if !reflect.DeepEqual(repo.Branches, o.RepositoryDescription.Branches) {
364364+ if !reflect.DeepEqual(repo.Branches, o.RepositoryDescription.Branches) {
370365 return IndexStateContent, fn
371366 }
372367···383378 return IndexStateEqual, fn
384379}
385380381381+// FindRepositoryMetadata returns the index metadata for the repository
382382+// specified in the options. 'ok' is false if the repository's metadata
383383+// couldn't be found or if an error occurred.
384384+func (o *Options) FindRepositoryMetadata() (repository *zoekt.Repository, ok bool, err error) {
385385+ shard := o.findShard()
386386+ if shard == "" {
387387+ return nil, false, nil
388388+ }
389389+390390+ repositories, _, err := zoekt.ReadMetadataPathAlive(shard)
391391+ if err != nil {
392392+ return nil, false, fmt.Errorf("reading metadata for shard %q: %w", shard, err)
393393+ }
394394+395395+ ID := o.RepositoryDescription.ID
396396+ for _, r := range repositories {
397397+ // compound shards contain multiple repositories, so we
398398+ // have to pick only the one we're looking for
399399+ if r.ID == ID {
400400+ return r, true, nil
401401+ }
402402+ }
403403+404404+ // If we're here, then we're somehow in a state where we found a matching
405405+ // shard that's missing the repository metadata we're looking for. This
406406+ // should never happen.
407407+ name := o.RepositoryDescription.Name
408408+ return nil, false, fmt.Errorf("matching shard %q doesn't contain metadata for repo id %d (%q)", shard, ID, name)
409409+}
410410+386411func (o *Options) findShard() string {
387412 for _, v := range readVersions {
388413 fn := o.shardNameVersion(v.IndexFormatVersion, 0)
···516541}
517542518543func (b *Builder) Add(doc zoekt.Document) error {
544544+ if b.finishCalled {
545545+ return nil
546546+ }
547547+519548 allowLargeFile := b.opts.IgnoreSizeMax(doc.Name)
520549521550 // Adjust trigramMax for allowed large files so we don't exclude them.
···550579 return nil
551580}
552581582582+// MarkFileAsChangedOrRemoved indicates that the file specified by the given path
583583+// has been changed or removed since the last indexing job for this repository.
584584+//
585585+// If this build is a delta build, these files will be tombstoned in the older shards for this repository.
586586+func (b *Builder) MarkFileAsChangedOrRemoved(path string) {
587587+ b.opts.changedOrRemovedFiles = append(b.opts.changedOrRemovedFiles, path)
588588+}
589589+553590// Finish creates a last shard from the buffered documents, and clears
554591// stale shards from previous runs. This should always be called, also
555592// in failure cases, to ensure cleanup.
593593+//
594594+// It is safe to call Finish() multiple times.
556595func (b *Builder) Finish() error {
596596+ if b.finishCalled {
597597+ return b.buildError
598598+ }
599599+600600+ b.finishCalled = true
601601+557602 b.flush()
558603 b.building.Wait()
559604···596641 return fmt.Errorf("shard %q doesn't contain repository ID %d (%q)", shard, b.opts.RepositoryDescription.ID, b.opts.RepositoryDescription.Name)
597642 }
598643599599- if len(b.opts.ChangedOrRemovedFiles) > 0 && repository.FileTombstones == nil {
600600- repository.FileTombstones = make(map[string]struct{}, len(b.opts.ChangedOrRemovedFiles))
644644+ if len(b.opts.changedOrRemovedFiles) > 0 && repository.FileTombstones == nil {
645645+ repository.FileTombstones = make(map[string]struct{}, len(b.opts.changedOrRemovedFiles))
601646 }
602647603603- for _, f := range b.opts.ChangedOrRemovedFiles {
648648+ for _, f := range b.opts.changedOrRemovedFiles {
604649 repository.FileTombstones[f] = struct{}{}
605650 }
606651607607- if compareBranches(repository.Branches, b.opts.RepositoryDescription.Branches) == IndexStateBranchSet {
608608- // NOTE: Should we be handling IndexStateBranchVersion and IndexStateCorrupt here too?
652652+ if !BranchNamesEqual(repository.Branches, b.opts.RepositoryDescription.Branches) {
609653 return deltaBranchSetError{
610654 shardName: shard,
611655 old: repository.Branches,
···689733 return b.buildError
690734}
691735692692-func compareBranches(a, b []zoekt.RepositoryBranch) IndexState {
736736+// BranchNamesEqual compares the given zoekt.RepositoryBranch slices, and returns true
737737+// iff both slices specify the same set of branch names in the same order.
738738+func BranchNamesEqual(a, b []zoekt.RepositoryBranch) bool {
693739 if len(a) != len(b) {
694694- return IndexStateBranchSet
740740+ return false
695741 }
696742697743 for i := range a {
698744 x, y := a[i], b[i]
699745 if x.Name != y.Name {
700700- return IndexStateBranchSet
701701- }
702702- if x.Version != y.Version {
703703- return IndexStateBranchVersion
746746+ return false
704747 }
705748 }
706749707707- return IndexStateEqual
750750+ return true
708751}
709752710753func (b *Builder) flush() error {
···3636 repoCacheDir := flag.String("repo_cache", "", "directory holding bare git repos, named by URL. "+
3737 "this is used to find repositories for submodules. "+
3838 "It also affects name if the indexed repository is under this directory.")
3939+ isDelta := flag.Bool("delta", false, "whether we should use delta build")
3940 flag.Parse()
40414142 // Tune GOMAXPROCS to match Linux container CPU quota.
···4950 *repoCacheDir = dir
5051 }
5152 opts := cmd.OptionsFromFlags()
5353+ opts.IsDelta = *isDelta
52545355 var branches []string
5456 if *branchesStr != "" {
···8890 }
89919092 if err := gitindex.IndexGitRepo(gitOpts); err != nil {
9191- log.Printf("indexGitRepo(%s): %v", dir, err)
9393+ log.Printf("indexGitRepo(%s, delta=%t): %v", dir, gitOpts.BuildOptions.IsDelta, err)
9294 exitStatus = 1
9395 }
9496 }
+100-20
cmd/zoekt-sourcegraph-indexserver/index.go
···72727373 // FileLimit is the maximum size of a file
7474 FileLimit int
7575+7676+ // UseDelta is true if we want to use the new delta indexer. This should
7777+ // only be true for repositories we explicitly enable.
7878+ UseDelta bool
7579}
76807781// BuildOptions returns a build.Options represented by indexArgs. Note: it
···100104 LargeFiles: o.LargeFiles,
101105 CTagsMustSucceed: o.Symbols,
102106 DisableCTags: !o.Symbols,
107107+ IsDelta: o.UseDelta,
103108 }
104109}
105110···122127 return s
123128}
124129125125-func gitIndex(o *indexArgs, runCmd func(*exec.Cmd) error) error {
130130+type gitIndexConfig struct {
131131+ // runCmd is the function that's used to execute all external commands (such as calls to "git" or "zoekt-git-index")
132132+ // that gitIndex may construct.
133133+ runCmd func(*exec.Cmd) error
134134+135135+ // findRepositoryMetadata is the function that returns the repository metadata for the
136136+ // repository specified in args. 'ok' is false if the repository's metadata
137137+ // couldn't be found or if an error occurred.
138138+ //
139139+ // The primary purpose of this configuration option is to be able to provide a stub
140140+ // implementation for this in our test suite. All other callers should use build.Options.FindRepositoryMetadata().
141141+ findRepositoryMetadata func(args *indexArgs) (repository *zoekt.Repository, ok bool, err error)
142142+}
143143+144144+func gitIndex(c gitIndexConfig, o *indexArgs) error {
126145 if len(o.Branches) == 0 {
127146 return errors.New("zoekt-git-index requires 1 or more branches")
128147 }
129148149149+ if c.runCmd == nil {
150150+ return errors.New("runCmd in provided configuration was nil - a function must be provided")
151151+ }
152152+ runCmd := c.runCmd
153153+154154+ if c.findRepositoryMetadata == nil {
155155+ return errors.New("findRepositoryMetadata in provided configuration was nil - a function must be provided")
156156+ }
157157+ findRepositoryMetadata := c.findRepositoryMetadata
158158+130159 buildOptions := o.BuildOptions()
131160132161 // An index should never take longer than an hour.
···156185 return err
157186 }
158187159159- fetchStart := time.Now()
188188+ var fetchDuration time.Duration
189189+ successfullyFetchedCommitsCount := 0
190190+ allFetchesSucceeded := true
160191161161- // We shallow fetch each commit specified in zoekt.Branches. This requires
162162- // the server to have configured both uploadpack.allowAnySHA1InWant and
163163- // uploadpack.allowFilter. (See gitservice.go in the Sourcegraph repository)
164164- fetchArgs := []string{"-C", gitDir, "-c", "protocol.version=2", "fetch", "--depth=1", o.CloneURL}
165165- var commits []string
166166- for _, b := range o.Branches {
167167- commits = append(commits, b.Version)
168168- }
169169- fetchArgs = append(fetchArgs, commits...)
192192+ defer func() {
193193+ success := strconv.FormatBool(allFetchesSucceeded)
194194+ name := repoNameForMetric(o.Name)
195195+ metricFetchDuration.WithLabelValues(success, name).Observe(fetchDuration.Seconds())
196196+ }()
170197171171- cmd = exec.CommandContext(ctx, "git", fetchArgs...)
172172- cmd.Stdin = &bytes.Buffer{}
198198+ var fetch = func(branches []zoekt.RepositoryBranch) error {
199199+ // We shallow fetch each commit specified in zoekt.Branches. This requires
200200+ // the server to have configured both uploadpack.allowAnySHA1InWant and
201201+ // uploadpack.allowFilter. (See gitservice.go in the Sourcegraph repository)
202202+ fetchArgs := []string{"-C", gitDir, "-c", "protocol.version=2", "fetch", "--depth=1", o.CloneURL}
173203174174- err = runCmd(cmd)
175175- fetchDuration := time.Since(fetchStart)
204204+ var commits []string
205205+ for _, b := range branches {
206206+ commits = append(commits, b.Version)
207207+ }
208208+209209+ fetchArgs = append(fetchArgs, commits...)
210210+211211+ cmd = exec.CommandContext(ctx, "git", fetchArgs...)
212212+ cmd.Stdin = &bytes.Buffer{}
213213+214214+ start := time.Now()
215215+ err := runCmd(cmd)
216216+ fetchDuration += time.Since(start)
217217+218218+ if err != nil {
219219+ allFetchesSucceeded = false
220220+ return err
221221+ }
222222+223223+ successfullyFetchedCommitsCount += len(commits)
224224+ return nil
225225+ }
226226+227227+ err = fetch(o.Branches)
176228 if err != nil {
177177- metricFetchDuration.WithLabelValues("false", repoNameForMetric(o.Name)).Observe(fetchDuration.Seconds())
178229 return err
179230 }
180231181181- metricFetchDuration.WithLabelValues("true", repoNameForMetric(o.Name)).Observe(fetchDuration.Seconds())
182182- debug.Printf("fetched git data for %q (%d commit(s)) in %s", o.Name, len(commits), fetchDuration)
232232+ if o.UseDelta {
233233+ // Try fetching prior commits for delta builds
234234+ // If we're unable to fetch prior commits, we continue anyway
235235+ // knowing that zoekt-git-index will fall back to a "full" normal build
236236+ existingRepository, found, err := findRepositoryMetadata(o)
237237+ if err != nil {
238238+ return fmt.Errorf("delta build: failed to get repository metadata: %w", err)
239239+ }
240240+241241+ if found && len(existingRepository.Branches) > 0 {
242242+ err := fetch(existingRepository.Branches)
243243+ if err != nil {
244244+ var bs []string
245245+ for _, b := range existingRepository.Branches {
246246+ bs = append(bs, b.String())
247247+ }
248248+249249+ formattedBranches := strings.Join(bs, ", ")
250250+ name := buildOptions.RepositoryDescription.Name
251251+ id := buildOptions.RepositoryDescription.ID
252252+253253+ log.Printf("delta build: failed to prepare delta build for %q (ID %d): failed to fetch prior commits (%s): %s", name, id, formattedBranches, err)
254254+ }
255255+ }
256256+ }
257257+258258+ debug.Printf("successfully fetched git data for %q (%d commit(s)) in %s", o.Name, successfullyFetchedCommitsCount, fetchDuration)
183259184260 // We then create the relevant refs for each fetched commit.
185261 for _, b := range o.Branches {
···194270 }
195271 }
196272197197- // create git config with options
273273+ // create git configuration with options
198274 type configKV struct{ Key, Value string }
199275 config := []configKV{{
200276 // zoekt.name is used by zoekt-git-index to set the repository name.
···208284 return config[i].Key < config[j].Key
209285 })
210286211211- // write config to repo
287287+ // write git configuration to repo
212288 for _, kv := range config {
213289 cmd = exec.CommandContext(ctx, "git", "-C", gitDir, "config", "zoekt."+kv.Key, kv.Value)
214290 cmd.Stdin = &bytes.Buffer{}
···233309 branches = append(branches, b.Name)
234310 }
235311 args = append(args, "-branches", strings.Join(branches, ","))
312312+313313+ if o.UseDelta {
314314+ args = append(args, "-delta")
315315+ }
236316237317 args = append(args, buildOptions.Args()...)
238318 args = append(args, gitDir)
···165165166166 // If true, shard merging is enabled.
167167 shardMerging bool
168168+169169+ // deltaBuildRepositoriesAllowList is an allowlist for repositories that we
170170+ // use delta-builds for instead of normal builds
171171+ deltaBuildRepositoriesAllowList map[string]struct{}
168172}
169173170174var debug = log.New(ioutil.Discard, "", log.LstdFlags)
···465469 return indexStateEmpty, createEmptyShard(args)
466470 }
467471472472+ repositoryName := args.Name
473473+ if _, ok := s.deltaBuildRepositoriesAllowList[repositoryName]; ok {
474474+ repositoryID := args.BuildOptions().RepositoryDescription.ID
475475+ debug.Printf("delta build: Server.Index: marking %q (ID %d) for delta build", repositoryName, repositoryID)
476476+477477+ args.UseDelta = true
478478+ }
479479+468480 reason := "forced"
481481+469482 if args.Incremental {
470483 bo := args.BuildOptions()
471484 bo.SetDefaults()
472485 incrementalState, fn := bo.IndexState()
473486 reason = string(incrementalState)
474487 metricIndexIncrementalIndexState.WithLabelValues(string(incrementalState)).Inc()
488488+475489 switch incrementalState {
476490 case build.IndexStateEqual:
477491 debug.Printf("%s index already up to date. Shard=%s", args.String(), fn)
···493507494508 log.Printf("updating index %s reason=%s", args.String(), reason)
495509496496- runCmd := func(cmd *exec.Cmd) error { return s.loggedRun(tr, cmd) }
497510 metricIndexingTotal.Inc()
498498- return indexStateSuccess, gitIndex(args, runCmd)
511511+ c := gitIndexConfig{
512512+ runCmd: func(cmd *exec.Cmd) error {
513513+ return s.loggedRun(tr, cmd)
514514+ },
515515+516516+ findRepositoryMetadata: func(args *indexArgs) (repository *zoekt.Repository, ok bool, err error) {
517517+ return args.BuildOptions().FindRepositoryMetadata()
518518+ },
519519+ }
520520+521521+ return indexStateSuccess, gitIndex(c, args)
499522}
500523501524func (s *Server) indexArgs(opts IndexOptions) *indexArgs {
···722745 return v
723746}
724747748748+func getEnvWithDefaultEmptySet(k string) map[string]struct{} {
749749+ set := map[string]struct{}{}
750750+ for _, v := range strings.Split(os.Getenv(k), ",") {
751751+ v = strings.TrimSpace(v)
752752+ if v != "" {
753753+ set[v] = struct{}{}
754754+ }
755755+ }
756756+ return set
757757+}
758758+759759+func joinStringSet(set map[string]struct{}, sep string) string {
760760+ var xs []string
761761+ for x := range set {
762762+ xs = append(xs, x)
763763+ }
764764+765765+ return strings.Join(xs, sep)
766766+}
767767+725768func setCompoundShardCounter(indexDir string) {
726769 fns, err := filepath.Glob(filepath.Join(indexDir, "compound-*.zoekt"))
727770 if err != nil {
···843886 debug = log.New(os.Stderr, "", log.LstdFlags)
844887 }
845888846846- indexingMetricsReposAllowlist := os.Getenv("INDEXING_METRICS_REPOS_ALLOWLIST")
847847- if indexingMetricsReposAllowlist != "" {
848848- var repos []string
849849-850850- for _, r := range strings.Split(indexingMetricsReposAllowlist, ",") {
851851- r = strings.TrimSpace(r)
852852- if r != "" {
853853- repos = append(repos, r)
854854- }
855855- }
856856-857857- for _, r := range repos {
858858- reposWithSeparateIndexingMetrics[r] = struct{}{}
859859- }
889889+ reposWithSeparateIndexingMetrics = getEnvWithDefaultEmptySet("INDEXING_METRICS_REPOS_ALLOWLIST")
890890+ if len(reposWithSeparateIndexingMetrics) > 0 {
891891+ debug.Printf("capturing separate indexing metrics for: %s", joinStringSet(reposWithSeparateIndexingMetrics, ", "))
892892+ }
860893861861- debug.Printf("capturing separate indexing metrics for: %s", repos)
894894+ deltaBuildRepositoriesAllowList := getEnvWithDefaultEmptySet("DELTA_BUILD_REPOS_ALLOWLIST")
895895+ if len(deltaBuildRepositoriesAllowList) > 0 {
896896+ debug.Printf("using delta shard builds for: %s", joinStringSet(deltaBuildRepositoriesAllowList, ", "))
862897 }
863898864899 var sg Sourcegraph
···890925 if cpuCount < 1 {
891926 cpuCount = 1
892927 }
928928+893929 return &Server{
894894- Sourcegraph: sg,
895895- IndexDir: conf.index,
896896- Interval: conf.interval,
897897- VacuumInterval: conf.vacuumInterval,
898898- MergeInterval: conf.mergeInterval,
899899- CPUCount: cpuCount,
900900- TargetSizeBytes: conf.targetSize * 1024 * 1024,
901901- minSizeBytes: conf.minSize * 1024 * 1024,
902902- shardMerging: zoekt.ShardMergingEnabled(),
930930+ Sourcegraph: sg,
931931+ IndexDir: conf.index,
932932+ Interval: conf.interval,
933933+ VacuumInterval: conf.vacuumInterval,
934934+ MergeInterval: conf.mergeInterval,
935935+ CPUCount: cpuCount,
936936+ TargetSizeBytes: conf.targetSize * 1024 * 1024,
937937+ minSizeBytes: conf.minSize * 1024 * 1024,
938938+ shardMerging: zoekt.ShardMergingEnabled(),
939939+ deltaBuildRepositoriesAllowList: deltaBuildRepositoriesAllowList,
903940 }, err
904941}
905942
+295-35
gitindex/index.go
···17171818import (
1919 "bytes"
2020+ "context"
2121+ "errors"
2022 "fmt"
2123 "io"
2224 "log"
···351353352354// IndexGitRepo indexes the git repository as specified by the options.
353355func IndexGitRepo(opts Options) error {
356356+ return indexGitRepo(opts, gitIndexConfig{})
357357+}
358358+359359+// indexGitRepo indexes the git repository as specified by the options and the provided gitIndexConfig.
360360+func indexGitRepo(opts Options, config gitIndexConfig) error {
361361+ prepareDeltaBuild := prepareDeltaBuild
362362+ if config.prepareDeltaBuild != nil {
363363+ prepareDeltaBuild = config.prepareDeltaBuild
364364+ }
365365+366366+ prepareNormalBuild := prepareNormalBuild
367367+ if config.prepareNormalBuild != nil {
368368+ prepareNormalBuild = config.prepareNormalBuild
369369+ }
370370+354371 // Set max thresholds, since we use them in this function.
355372 opts.BuildOptions.SetDefaults()
356373 if opts.RepoDir == "" {
···367384 log.Printf("setTemplatesFromConfig(%s): %s", opts.RepoDir, err)
368385 }
369386370370- var repoCache *RepoCache
371371- if opts.Submodules {
372372- repoCache = NewRepoCache(opts.RepoCacheDir)
373373- }
374374-375375- // branch => (path, sha1) => repo.
376376- repos := map[fileKey]BlobLocation{}
377377-378378- // fileKey => branches
379379- branchMap := map[fileKey][]string{}
380380-381381- // Branch => Repo => SHA1
382382- branchVersions := map[string]map[string]plumbing.Hash{}
383383-384387 branches, err := expandBranches(repo, opts.Branches, opts.BranchPrefix)
385388 if err != nil {
386389 return fmt.Errorf("expandBranches: %w", err)
···403406 if when := commit.Committer.When; when.After(opts.BuildOptions.RepositoryDescription.LatestCommitDate) {
404407 opts.BuildOptions.RepositoryDescription.LatestCommitDate = when
405408 }
409409+ }
406410407407- tree, err := commit.Tree()
408408- if err != nil {
409409- return fmt.Errorf("commit.Tree: %w", err)
410410- }
411411+ if opts.Incremental && opts.BuildOptions.IncrementalSkipIndexing() {
412412+ return nil
413413+ }
411414412412- ig, err := newIgnoreMatcher(tree)
413413- if err != nil {
414414- return fmt.Errorf("newIgnoreMatcher: %w", err)
415415- }
415415+ // branch => (path, sha1) => repo.
416416+ var repos map[fileKey]BlobLocation
416417417417- files, subVersions, err := TreeToFiles(repo, tree, opts.BuildOptions.RepositoryDescription.URL, repoCache)
418418+ // fileKey => branches
419419+ var branchMap map[fileKey][]string
420420+421421+ // Branch => Repo => SHA1
422422+ var branchVersions map[string]map[string]plumbing.Hash
423423+424424+ // set of file paths that have been changed or deleted since
425425+ // the last indexed commit
426426+ //
427427+ // These only have an effect on delta builds
428428+ var changedOrRemovedFiles []string
429429+430430+ if opts.BuildOptions.IsDelta {
431431+ repos, branchMap, branchVersions, changedOrRemovedFiles, err = prepareDeltaBuild(opts, repo)
418432 if err != nil {
419419- return fmt.Errorf("TreeToFiles: %w", err)
433433+ log.Printf("delta build: falling back to normal build since delta build failed, repository=%q, err=%s", opts.BuildOptions.RepositoryDescription.Name, err)
434434+ opts.BuildOptions.IsDelta = false
420435 }
421421- for k, v := range files {
422422- if ig.Match(k.Path) {
423423- continue
424424- }
425425- repos[k] = v
426426- branchMap[k] = append(branchMap[k], b)
427427- }
428428-429429- branchVersions[b] = subVersions
430436 }
431437432432- if opts.Incremental && opts.BuildOptions.IncrementalSkipIndexing() {
433433- return nil
438438+ if !opts.BuildOptions.IsDelta {
439439+ repos, branchMap, branchVersions, err = prepareNormalBuild(opts, repo)
440440+ if err != nil {
441441+ return fmt.Errorf("preparing normal build: %w", err)
442442+ }
434443 }
435444436445 reposByPath := map[string]BlobLocation{}
···449458 }
450459 opts.BuildOptions.SubRepositories[path] = &tpl
451460 }
461461+452462 for _, br := range opts.BuildOptions.RepositoryDescription.Branches {
453463 for path, repo := range opts.BuildOptions.SubRepositories {
454464 id := branchVersions[br.Name][path]
···466476 // we don't need to check error, since we either already have an error, or
467477 // we returning the first call to builder.Finish.
468478 defer builder.Finish() // nolint:errcheck
479479+480480+ for _, f := range changedOrRemovedFiles {
481481+ builder.MarkFileAsChangedOrRemoved(f)
482482+ }
469483470484 var names []string
471485 fileKeys := map[string][]fileKey{}
···530544 return nil, err
531545 }
532546 return ignore.ParseIgnoreFile(strings.NewReader(content))
547547+}
548548+549549+// prepareDeltaBuildFunc is a function that calculates the necessary metadata for preparing
550550+// a build.Builder instance for generating a delta build.
551551+type prepareDeltaBuildFunc func(options Options, repository *git.Repository) (repos map[fileKey]BlobLocation, branchMap map[fileKey][]string, branchVersions map[string]map[string]plumbing.Hash, changedOrDeletedPaths []string, err error)
552552+553553+// prepareNormalBuildFunc is a function that calculates the necessary metadata for preparing
554554+// a build.Builder instance for generating a normal build.
555555+type prepareNormalBuildFunc func(options Options, repository *git.Repository) (repos map[fileKey]BlobLocation, branchMap map[fileKey][]string, branchVersions map[string]map[string]plumbing.Hash, err error)
556556+557557+type gitIndexConfig struct {
558558+ // prepareDeltaBuild, if not nil, is the function that is used to calculate the metadata that will be used to
559559+ // prepare the build.Builder instance for generating a delta build.
560560+ //
561561+ // If prepareDeltaBuild is nil, gitindex.prepareDeltaBuild will be used instead.
562562+ prepareDeltaBuild prepareDeltaBuildFunc
563563+564564+ // prepareNormalBuild, if not nil, is the function that is used to calculate the metadata that will be used to
565565+ // prepare the build.Builder instance for generating a normal build.
566566+ //
567567+ // If prepareNormalBuild is nil, gitindex.prepareNormalBuild will be used instead.
568568+ prepareNormalBuild prepareNormalBuildFunc
569569+}
570570+571571+func prepareDeltaBuild(options Options, repository *git.Repository) (repos map[fileKey]BlobLocation, branchMap map[fileKey][]string, branchVersions map[string]map[string]plumbing.Hash, changedOrDeletedPaths []string, err error) {
572572+ // discover what commits we indexed during our last build
573573+574574+ if options.Submodules {
575575+ return nil, nil, nil, nil, fmt.Errorf("delta builds currently don't support submodule indexing")
576576+ }
577577+578578+ existingRepository, ok, err := options.BuildOptions.FindRepositoryMetadata()
579579+ if err != nil {
580580+ return nil, nil, nil, nil, fmt.Errorf("failed to get repository metadata: %w", err)
581581+ }
582582+583583+ if !ok {
584584+ return nil, nil, nil, nil, fmt.Errorf("no existing shards found for repository")
585585+ }
586586+587587+ // Check to see if the set of branch names is consistent with what we last indexed.
588588+ // If it isn't consistent, that we can't proceed with a delta build (and the caller should fall back to a
589589+ // normal one).
590590+591591+ if !build.BranchNamesEqual(existingRepository.Branches, options.BuildOptions.RepositoryDescription.Branches) {
592592+ var existingBranchNames []string
593593+ for _, b := range existingRepository.Branches {
594594+ existingBranchNames = append(existingBranchNames, b.Name)
595595+ }
596596+597597+ var optionsBranchNames []string
598598+ for _, b := range options.BuildOptions.RepositoryDescription.Branches {
599599+ optionsBranchNames = append(optionsBranchNames, b.Name)
600600+ }
601601+602602+ existingBranchList := strings.Join(existingBranchNames, ", ")
603603+ optionsBranchList := strings.Join(optionsBranchNames, ", ")
604604+605605+ return nil, nil, nil, nil, fmt.Errorf("requested branch set in build options (%q) != branch set found on disk (%q) - branch set must be the same for delta shards", optionsBranchList, existingBranchList)
606606+ }
607607+608608+ // branch => (path, sha1) => repo.
609609+ repos = map[fileKey]BlobLocation{}
610610+611611+ // fileKey => branches
612612+ branchMap = map[fileKey][]string{}
613613+614614+ // branch name -> git worktree at most current commit
615615+ branchToCurrentTree := make(map[string]*object.Tree, len(options.Branches))
616616+617617+ for _, b := range options.Branches {
618618+ commit, err := getCommit(repository, options.BranchPrefix, b)
619619+ if err != nil {
620620+ return nil, nil, nil, nil, fmt.Errorf("getting last current commit for branch %q: %w", b, err)
621621+ }
622622+623623+ tree, err := commit.Tree()
624624+ if err != nil {
625625+ return nil, nil, nil, nil, fmt.Errorf("getting current git tree for branch %q: %w", b, err)
626626+ }
627627+628628+ branchToCurrentTree[b] = tree
629629+ }
630630+631631+ rawURL := options.BuildOptions.RepositoryDescription.URL
632632+ u, err := url.Parse(rawURL)
633633+ if err != nil {
634634+ return nil, nil, nil, nil, fmt.Errorf("parsing repository URL %q: %w", rawURL, err)
635635+ }
636636+637637+ // TODO: Support repository submodules for delta builds
638638+ // For this prototype, we are ignoring repository submodules, which means that we can use the same
639639+ // blob location for all files
640640+ hackSharedBlobLocation := BlobLocation{
641641+ Repo: repository,
642642+ URL: u,
643643+ }
644644+645645+ // loop over all branches, calculate the diff between our
646646+ // last indexed commit and the current commit, and add files mentioned in the diff
647647+ for _, branch := range existingRepository.Branches {
648648+ lastIndexedCommit, err := getCommit(repository, "", branch.Version)
649649+ if err != nil {
650650+ return nil, nil, nil, nil, fmt.Errorf("getting last indexed commit for branch %q: %w", branch.Name, err)
651651+ }
652652+653653+ lastIndexedTree, err := lastIndexedCommit.Tree()
654654+ if err != nil {
655655+ return nil, nil, nil, nil, fmt.Errorf("getting lasted indexed git tree for branch %q: %w", branch.Name, err)
656656+ }
657657+658658+ changes, err := object.DiffTreeWithOptions(context.Background(), lastIndexedTree, branchToCurrentTree[branch.Name], &object.DiffTreeOptions{DetectRenames: false})
659659+ if err != nil {
660660+ return nil, nil, nil, nil, fmt.Errorf("generating changeset for branch %q: %w", branch.Name, err)
661661+ }
662662+663663+ for i, c := range changes {
664664+ oldFile, newFile, err := c.Files()
665665+ if err != nil {
666666+ return nil, nil, nil, nil, fmt.Errorf("change #%d: getting files before and after change: %w", i, err)
667667+ }
668668+669669+ if newFile != nil {
670670+ // note: newFile.Name could be a path that isn't relative to the repository root - using the
671671+ // change's Name field is the only way that @ggilmore saw to get the full path relative to the root
672672+ newFileRelativeRootPath := c.To.Name
673673+674674+ // TODO@ggilmore: HACK - remove once ignore files are supported in delta builds
675675+ if newFileRelativeRootPath == ignore.IgnoreFile {
676676+ return nil, nil, nil, nil, fmt.Errorf("%q file is not yet supported in delta builds", ignore.IgnoreFile)
677677+ }
678678+679679+ // either file is added or renamed, so we need to add the new version to the build
680680+ file := fileKey{Path: newFileRelativeRootPath, ID: newFile.Hash}
681681+ repos[file] = hackSharedBlobLocation
682682+ branchMap[file] = append(branchMap[file], branch.Name)
683683+ }
684684+685685+ if oldFile == nil {
686686+ // file added - nothing more to do
687687+ continue
688688+ }
689689+690690+ // Note: oldFile.Name could be a path that isn't relative to the repository root - using the
691691+ // change's "Name" field is the only way that ggilmore saw to get the full path relative to the root
692692+ oldFileRelativeRootPath := c.From.Name
693693+694694+ if oldFileRelativeRootPath == ignore.IgnoreFile {
695695+ return nil, nil, nil, nil, fmt.Errorf("%q file is not yet supported in delta builds", ignore.IgnoreFile)
696696+ }
697697+698698+ // The file is either modified or deleted. So, we need to add ALL versions
699699+ // of the old file (across all branches) to the build.
700700+ for b, currentTree := range branchToCurrentTree {
701701+ f, err := currentTree.File(oldFileRelativeRootPath)
702702+ if err != nil {
703703+ // the file doesn't exist in this branch
704704+ if errors.Is(err, object.ErrFileNotFound) {
705705+ continue
706706+ }
707707+708708+ return nil, nil, nil, nil, fmt.Errorf("getting hash for file %q in branch %q: %w", oldFile.Name, b, err)
709709+ }
710710+711711+ file := fileKey{Path: oldFileRelativeRootPath, ID: f.ID()}
712712+ repos[file] = hackSharedBlobLocation
713713+ branchMap[file] = append(branchMap[file], b)
714714+ }
715715+716716+ changedOrDeletedPaths = append(changedOrDeletedPaths, oldFileRelativeRootPath)
717717+ }
718718+ }
719719+720720+ // we need to de-duplicate the branch map before returning it - it's possible for the same
721721+ // branch to have been added multiple times if a file has been modified across multiple commits
722722+723723+ for file, branches := range branchMap {
724724+ sort.Strings(branches)
725725+ branchMap[file] = uniq(branches)
726726+ }
727727+728728+ // we also need to de-duplicate the list of changed or deleted file paths, it's also possible to have duplicates
729729+ // for the same reasoning as above
730730+731731+ sort.Strings(changedOrDeletedPaths)
732732+ changedOrDeletedPaths = uniq(changedOrDeletedPaths)
733733+734734+ return repos, branchMap, nil, changedOrDeletedPaths, nil
735735+}
736736+737737+func prepareNormalBuild(options Options, repository *git.Repository) (repos map[fileKey]BlobLocation, branchMap map[fileKey][]string, branchVersions map[string]map[string]plumbing.Hash, err error) {
738738+ var repoCache *RepoCache
739739+ if options.Submodules {
740740+ repoCache = NewRepoCache(options.RepoCacheDir)
741741+ }
742742+743743+ // branch => (path, sha1) => repo.
744744+ repos = map[fileKey]BlobLocation{}
745745+746746+ // fileKey => branches
747747+ branchMap = map[fileKey][]string{}
748748+749749+ // Branch => Repo => SHA1
750750+ branchVersions = map[string]map[string]plumbing.Hash{}
751751+752752+ branches, err := expandBranches(repository, options.Branches, options.BranchPrefix)
753753+ if err != nil {
754754+ return nil, nil, nil, fmt.Errorf("expandBranches: %w", err)
755755+ }
756756+757757+ for _, b := range branches {
758758+ commit, err := getCommit(repository, options.BranchPrefix, b)
759759+ if err != nil {
760760+ if options.AllowMissingBranch && err.Error() == "reference not found" {
761761+ continue
762762+ }
763763+764764+ return nil, nil, nil, fmt.Errorf("getCommit: %w", err)
765765+ }
766766+767767+ tree, err := commit.Tree()
768768+ if err != nil {
769769+ return nil, nil, nil, fmt.Errorf("commit.Tree: %w", err)
770770+ }
771771+772772+ ig, err := newIgnoreMatcher(tree)
773773+ if err != nil {
774774+ return nil, nil, nil, fmt.Errorf("newIgnoreMatcher: %w", err)
775775+ }
776776+777777+ files, subVersions, err := TreeToFiles(repository, tree, options.BuildOptions.RepositoryDescription.URL, repoCache)
778778+ if err != nil {
779779+ return nil, nil, nil, fmt.Errorf("TreeToFiles: %w", err)
780780+ }
781781+ for k, v := range files {
782782+ if ig.Match(k.Path) {
783783+ continue
784784+ }
785785+ repos[k] = v
786786+ branchMap[k] = append(branchMap[k], b)
787787+ }
788788+789789+ branchVersions[b] = subVersions
790790+ }
791791+792792+ return repos, branchMap, branchVersions, nil
533793}
534794535795func blobContents(blob *object.Blob) ([]byte, error) {