fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

Debug: make indexing timeout configurable (#676)

On large repos, indexing might take quite a while and hit the indexing timeout.
This change helps debug these situations:
* Make the indexing timeout configurable through an env variable
`INDEXING_TIMEOUT`
* Add more info to progress logging: log the total number of files being
indexed, plus the file count per shard

+31 -9
+5 -2
build/builder.go
··· 1088 1088 return nil, err 1089 1089 } 1090 1090 1091 - log.Printf("finished %s: %d index bytes (overhead %3.1f)", fn, fi.Size(), 1092 - float64(fi.Size())/float64(ib.ContentSize()+1)) 1091 + log.Printf("finished shard %s: %d index bytes (overhead %3.1f), %d files processed \n", 1092 + fn, 1093 + fi.Size(), 1094 + float64(fi.Size())/float64(ib.ContentSize()+1), 1095 + ib.NumFiles()) 1093 1096 1094 1097 return &finishedShard{f.Name(), fn}, nil 1095 1098 }
+5 -5
cmd/zoekt-sourcegraph-indexserver/index.go
··· 25 25 sglog "github.com/sourcegraph/log" 26 26 ) 27 27 28 - // indexTimeout defines how long the indexserver waits before 29 - // killing an indexing job. 30 - const indexTimeout = 1*time.Hour + 30*time.Minute // an index should never take longer than an hour and a half 28 + const defaultIndexingTimeout = 1*time.Hour + 30*time.Minute 31 29 32 30 // IndexOptions are the options that Sourcegraph can set via it's search 33 31 // configuration endpoint. ··· 163 161 // The primary purpose of this configuration option is to be able to provide a stub 164 162 // implementation for this in our test suite. All other callers should use build.Options.FindRepositoryMetadata(). 165 163 findRepositoryMetadata func(args *indexArgs) (repository *zoekt.Repository, metadata *zoekt.IndexMetadata, ok bool, err error) 164 + 165 + // timeout defines how long the index server waits before killing an indexing job. 166 + timeout time.Duration 166 167 } 167 168 168 169 func gitIndex(c gitIndexConfig, o *indexArgs, sourcegraph Sourcegraph, l sglog.Logger) error { ··· 182 183 } 183 184 184 185 buildOptions := o.BuildOptions() 185 - 186 - ctx, cancel := context.WithTimeout(context.Background(), indexTimeout) 186 + ctx, cancel := context.WithTimeout(context.Background(), c.timeout) 187 187 defer cancel() 188 188 189 189 gitDir, err := tmpGitDir(o.Name)
+11 -1
cmd/zoekt-sourcegraph-indexserver/main.go
··· 82 82 Help: "A histogram of latencies for indexing a repository.", 83 83 Buckets: prometheus.ExponentialBucketsRange( 84 84 (100 * time.Millisecond).Seconds(), 85 - (40*time.Minute + indexTimeout).Seconds(), // add an extra 40 minutes to account for the time it takes to clone the repo 85 + (40*time.Minute + defaultIndexingTimeout).Seconds(), // add an extra 40 minutes to account for the time it takes to clone the repo 86 86 20), 87 87 }, []string{ 88 88 "state", // state is an indexState ··· 188 188 hostname string 189 189 190 190 mergeOpts mergeOpts 191 + 192 + // timeout defines how long the index server waits before killing an indexing job. 193 + timeout time.Duration 191 194 } 192 195 193 196 var debug = log.New(io.Discard, "", log.LstdFlags) ··· 585 588 findRepositoryMetadata: func(args *indexArgs) (repository *zoekt.Repository, metadata *zoekt.IndexMetadata, ok bool, err error) { 586 589 return args.BuildOptions().FindRepositoryMetadata() 587 590 }, 591 + timeout: s.timeout, 588 592 } 589 593 590 594 err = gitIndex(c, args, s.Sourcegraph, s.logger) ··· 1369 1373 debug.Printf("skipping generating symbols metadata for: %s", joinStringSet(reposShouldSkipSymbolsCalculation, ", ")) 1370 1374 } 1371 1375 1376 + indexingTimeout := getEnvWithDefaultDuration("INDEXING_TIMEOUT", defaultIndexingTimeout) 1377 + if indexingTimeout != defaultIndexingTimeout { 1378 + debug.Printf("using configured indexing timeout: %s", indexingTimeout) 1379 + } 1380 + 1372 1381 var sg Sourcegraph 1373 1382 if rootURL.IsAbs() { 1374 1383 var batchSize int ··· 1432 1441 minAgeDays: conf.minAgeDays, 1433 1442 maxPriority: conf.maxPriority, 1434 1443 }, 1444 + timeout: indexingTimeout, 1435 1445 }, err 1436 1446 } 1437 1447
+4
gitindex/index.go
··· 529 529 530 530 var names []string 531 531 fileKeys := map[string][]fileKey{} 532 + totalFiles := 0 533 + 532 534 for key := range repos { 533 535 n := key.FullPath() 534 536 fileKeys[n] = append(fileKeys[n], key) 535 537 names = append(names, n) 538 + totalFiles++ 536 539 } 537 540 538 541 sort.Strings(names) 539 542 names = uniq(names) 540 543 544 + log.Printf("attempting to index %d total files", totalFiles) 541 545 for _, name := range names { 542 546 keys := fileKeys[name] 543 547
+5
indexbuilder.go
··· 230 230 return b.contentPostings.endByte + b.namePostings.endByte 231 231 } 232 232 233 + // NumFiles returns the number of files added to this builder 234 + func (b *IndexBuilder) NumFiles() int { 235 + return len(b.contentStrings) 236 + } 237 + 233 238 // NewIndexBuilder creates a fresh IndexBuilder. The passed in 234 239 // Repository contains repo metadata, and may be set to nil. 235 240 func NewIndexBuilder(r *Repository) (*IndexBuilder, error) {
+1 -1
merge.go
··· 82 82 return err 83 83 } 84 84 85 - log.Printf("finished %s: %d index bytes (overhead %3.1f)", fn, fi.Size(), 85 + log.Printf("finished shard %s: %d index bytes (overhead %3.1f)", fn, fi.Size(), 86 86 float64(fi.Size())/float64(ib.ContentSize()+1)) 87 87 88 88 return nil