fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

index: enable shard merging by default (#798)

This enables shard merging by default for zoekt-sourcegraph-indexserver.

Sourcegraph has been using shard merging in production for several years. We have recently confirmed significant performance improvements for queries which are bound by matchTree construction.

I also remove -merge_max_priority because we have stopped using it.

Use SRC_DISABLE_SHARD_MERGING to disable shard merging.

Test plan:
mostly CI, I did some manual testing to confirm that shard merging is enabled by default for zoekt-sourcegraph-indexserver.

+37 -25
+10 -1
build/builder.go
··· 116 116 changedOrRemovedFiles []string 117 117 118 118 LanguageMap ctags.LanguageMap 119 + 120 + // ShardMerging is true if builder should respect compound shards. This is a 121 + // Sourcegraph specific option. 122 + ShardMerging bool 119 123 } 120 124 121 125 // HashOptions contains only the options in Options that upon modification leads to IndexState of IndexStateMismatch during the next index building. ··· 194 198 195 199 // Sourcegraph specific 196 200 fs.BoolVar(&o.DisableCTags, "disable_ctags", x.DisableCTags, "If set, ctags will not be called.") 201 + fs.BoolVar(&o.ShardMerging, "shard_merging", x.ShardMerging, "If set, builder will respect compound shards.") 197 202 } 198 203 199 204 // Args generates command line arguments for o. It is the "inverse" of Flags. ··· 231 236 // Sourcegraph specific 232 237 if o.DisableCTags { 233 238 args = append(args, "-disable_ctags") 239 + } 240 + 241 + if o.ShardMerging { 242 + args = append(args, "-shard_merging") 234 243 } 235 244 236 245 return args ··· 774 783 775 784 for p := range toDelete { 776 785 // Don't delete compound shards, set tombstones instead. 777 - if zoekt.ShardMergingEnabled() && strings.HasPrefix(filepath.Base(p), "compound-") { 786 + if b.opts.ShardMerging && strings.HasPrefix(filepath.Base(p), "compound-") { 778 787 if !strings.HasSuffix(p, ".zoekt") { 779 788 continue 780 789 }
+5
cmd/zoekt-sourcegraph-indexserver/index.go
··· 98 98 // DeltaShardNumberFallbackThreshold is an upper limit on the number of preexisting shards that can exist 99 99 // before attempting a delta build. 100 100 DeltaShardNumberFallbackThreshold uint64 101 + 102 + // ShardMerging is true if we want zoekt-git-index to respect compound shards. 103 + ShardMerging bool 101 104 } 102 105 103 106 // BuildOptions returns a build.Options represented by indexArgs. Note: it ··· 131 134 DocumentRanksVersion: o.DocumentRanksVersion, 132 135 133 136 LanguageMap: o.LanguageMap, 137 + 138 + ShardMerging: o.ShardMerging, 134 139 } 135 140 } 136 141
+22 -9
cmd/zoekt-sourcegraph-indexserver/main.go
··· 645 645 646 646 // 1 MB; match https://sourcegraph.sgdev.org/github.com/sourcegraph/sourcegraph/-/blob/cmd/symbols/internal/symbols/search.go#L22 647 647 FileLimit: 1 << 20, 648 + 649 + ShardMerging: s.shardMerging, 648 650 } 649 651 } 650 652 ··· 1065 1067 return strings.EqualFold(lvl, "dbug") || strings.EqualFold(lvl, "debug") 1066 1068 } 1067 1069 1070 + func getEnvWithDefaultBool(k string, defaultVal bool) bool { 1071 + v := os.Getenv(k) 1072 + if v == "" { 1073 + return defaultVal 1074 + } 1075 + b, err := strconv.ParseBool(v) 1076 + if err != nil { 1077 + log.Fatalf("error parsing ENV %s to int64: %s", k, err) 1078 + } 1079 + return b 1080 + } 1081 + 1068 1082 func getEnvWithDefaultInt64(k string, defaultVal int64) int64 { 1069 1083 v := os.Getenv(k) 1070 1084 if v == "" { ··· 1196 1210 blockProfileRate int 1197 1211 1198 1212 // config values related to shard merging 1199 - vacuumInterval time.Duration 1200 - mergeInterval time.Duration 1201 - targetSize int64 1202 - minSize int64 1203 - minAgeDays int 1204 - maxPriority float64 1213 + disableShardMerging bool 1214 + vacuumInterval time.Duration 1215 + mergeInterval time.Duration 1216 + targetSize int64 1217 + minSize int64 1218 + minAgeDays int 1205 1219 1206 1220 // config values related to backoff indexing repos with one or more consecutive failures 1207 1221 backoffDuration time.Duration ··· 1221 1235 fs.DurationVar(&rc.maxBackoffDuration, "max_backoff_duration", getEnvWithDefaultDuration("MAX_BACKOFF_DURATION", 120*time.Minute), "the maximum duration to backoff from enqueueing a repo for indexing. A negative value disables indexing backoff.") 1222 1236 1223 1237 // flags related to shard merging 1238 + fs.BoolVar(&rc.disableShardMerging, "shard_merging", getEnvWithDefaultBool("SRC_DISABLE_SHARD_MERGING", false), "disable shard merging") 1224 1239 fs.DurationVar(&rc.vacuumInterval, "vacuum_interval", getEnvWithDefaultDuration("SRC_VACUUM_INTERVAL", 24*time.Hour), "run vacuum this often") 1225 1240 fs.DurationVar(&rc.mergeInterval, "merge_interval", getEnvWithDefaultDuration("SRC_MERGE_INTERVAL", 8*time.Hour), "run merge this often") 1226 1241 fs.Int64Var(&rc.targetSize, "merge_target_size", getEnvWithDefaultInt64("SRC_MERGE_TARGET_SIZE", 2000), "the target size of compound shards in MiB") 1227 1242 fs.Int64Var(&rc.minSize, "merge_min_size", getEnvWithDefaultInt64("SRC_MERGE_MIN_SIZE", 1800), "the minimum size of a compound shard in MiB") 1228 1243 fs.IntVar(&rc.minAgeDays, "merge_min_age", getEnvWithDefaultInt("SRC_MERGE_MIN_AGE", 7), "the time since the last commit in days. Shards with newer commits are excluded from merging.") 1229 - fs.Float64Var(&rc.maxPriority, "merge_max_priority", getEnvWithDefaultFloat64("SRC_MERGE_MAX_PRIORITY", 100), "the maximum priority a shard can have to be considered for merging.") 1230 1244 } 1231 1245 1232 1246 func startServer(conf rootConfig) error { ··· 1428 1442 Interval: conf.interval, 1429 1443 CPUCount: cpuCount, 1430 1444 queue: *q, 1431 - shardMerging: zoekt.ShardMergingEnabled(), 1445 + shardMerging: !conf.disableShardMerging, 1432 1446 deltaBuildRepositoriesAllowList: deltaBuildRepositoriesAllowList, 1433 1447 deltaShardNumberFallbackThreshold: deltaShardNumberFallbackThreshold, 1434 1448 repositoriesSkipSymbolsCalculationAllowList: reposShouldSkipSymbolsCalculation, ··· 1439 1453 targetSizeBytes: conf.targetSize * 1024 * 1024, 1440 1454 minSizeBytes: conf.minSize * 1024 * 1024, 1441 1455 minAgeDays: conf.minAgeDays, 1442 - maxPriority: conf.maxPriority, 1443 1456 }, 1444 1457 timeout: indexingTimeout, 1445 1458 }, err
-7
cmd/zoekt-sourcegraph-indexserver/merge.go
··· 180 180 // merging. For example, a value of 7 means that only repos that have been 181 181 // inactive for 7 days will be considered for merging. 182 182 minAgeDays int 183 - 184 - // the MAX priority a shard can have to be considered for merging. 185 - maxPriority float64 186 183 } 187 184 188 185 // isExcluded returns true if a shard should not be merged, false otherwise. ··· 210 207 } 211 208 212 209 if repos[0].LatestCommitDate.After(time.Now().AddDate(0, 0, -opts.minAgeDays)) { 213 - return true 214 - } 215 - 216 - if priority, err := strconv.ParseFloat(repos[0].RawConfig["priority"], 64); err == nil && priority > opts.maxPriority { 217 210 return true 218 211 } 219 212
-8
tombstones.go
··· 5 5 "fmt" 6 6 "os" 7 7 "path/filepath" 8 - "strconv" 9 8 ) 10 - 11 - // ShardMergingEnabled returns true if SRC_ENABLE_SHARD_MERGING is set to true. 12 - func ShardMergingEnabled() bool { 13 - t := os.Getenv("SRC_ENABLE_SHARD_MERGING") 14 - enabled, _ := strconv.ParseBool(t) 15 - return enabled 16 - } 17 9 18 10 var mockRepos []*Repository 19 11