shards/shards.go at 7af9f842568cf8e164cc82561394aa2939f03348 · boltless.me/zoekt

fork of https://github.com/sourcegraph/zoekt
zoekt / shards / shards.go
at 7af9f842568cf8e164cc82561394aa2939f03348 38 kB View raw
Stefan Hengl tenant: introduce systemtenant (#863) 2y ago
   1// Copyright 2016 Google Inc. All rights reserved.
   2//
   3// Licensed under the Apache License, Version 2.0 (the "License");
   4// you may not use this file except in compliance with the License.
   5// You may obtain a copy of the License at
   6//
   7//    http://www.apache.org/licenses/LICENSE-2.0
   8//
   9// Unless required by applicable law or agreed to in writing, software
  10// distributed under the License is distributed on an "AS IS" BASIS,
  11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12// See the License for the specific language governing permissions and
  13// limitations under the License.
  14
  15package shards
  16
  17import (
  18	"context"
  19	"fmt"
  20	"log"
  21	"math"
  22	"os"
  23	"runtime"
  24	"runtime/debug"
  25	"slices"
  26	"sort"
  27	"strconv"
  28	"sync"
  29	"time"
  30
  31	"golang.org/x/sync/semaphore"
  32
  33	"github.com/prometheus/client_golang/prometheus"
  34	"github.com/prometheus/client_golang/prometheus/promauto"
  35	"go.uber.org/atomic"
  36
  37	"github.com/sourcegraph/zoekt"
  38	"github.com/sourcegraph/zoekt/internal/tenant/systemtenant"
  39	"github.com/sourcegraph/zoekt/query"
  40	"github.com/sourcegraph/zoekt/trace"
  41)
  42
  43var (
  44	metricShardsLoaded = promauto.NewGauge(prometheus.GaugeOpts{
  45		Name: "zoekt_shards_loaded",
  46		Help: "The number of shards currently loaded",
  47	})
  48	metricShardsLoadedTotal = promauto.NewCounter(prometheus.CounterOpts{
  49		Name: "zoekt_shards_loaded_total",
  50		Help: "The total number of shards loaded",
  51	})
  52	metricShardsLoadFailedTotal = promauto.NewCounter(prometheus.CounterOpts{
  53		Name: "zoekt_shards_load_failed_total",
  54		Help: "The total number of shard loads that failed",
  55	})
  56
  57	metricSearchRunning = promauto.NewGauge(prometheus.GaugeOpts{
  58		Name: "zoekt_search_running",
  59		Help: "The number of concurrent search requests running",
  60	})
  61	metricSearchShardRunning = promauto.NewGauge(prometheus.GaugeOpts{
  62		Name: "zoekt_search_shard_running",
  63		Help: "The number of concurrent search requests in a shard running",
  64	})
  65	metricSearchFailedTotal = promauto.NewCounter(prometheus.CounterOpts{
  66		Name: "zoekt_search_failed_total",
  67		Help: "The total number of search requests that failed",
  68	})
  69	metricSearchDuration = promauto.NewHistogram(prometheus.HistogramOpts{
  70		Name:    "zoekt_search_duration_seconds",
  71		Help:    "The duration a search request took in seconds",
  72		Buckets: prometheus.DefBuckets, // DefBuckets good for service timings
  73	})
  74
  75	// A Counter per Stat. Name should match field in zoekt.Stats.
  76	metricSearchContentBytesLoadedTotal = promauto.NewCounter(prometheus.CounterOpts{
  77		Name: "zoekt_search_content_loaded_bytes_total",
  78		Help: "Total amount of I/O for reading contents",
  79	})
  80	metricSearchIndexBytesLoadedTotal = promauto.NewCounter(prometheus.CounterOpts{
  81		Name: "zoekt_search_index_loaded_bytes_total",
  82		Help: "Total amount of I/O for reading from index",
  83	})
  84	metricSearchCrashesTotal = promauto.NewCounter(prometheus.CounterOpts{
  85		Name: "zoekt_search_crashes_total",
  86		Help: "Total number of search shards that had a crash",
  87	})
  88	metricSearchFileCountTotal = promauto.NewCounter(prometheus.CounterOpts{
  89		Name: "zoekt_search_file_count_total",
  90		Help: "Total number of files containing a match",
  91	})
  92	metricSearchShardFilesConsideredTotal = promauto.NewCounter(prometheus.CounterOpts{
  93		Name: "zoekt_search_shard_files_considered_total",
  94		Help: "Total number of files in shards that we considered",
  95	})
  96	metricSearchFilesConsideredTotal = promauto.NewCounter(prometheus.CounterOpts{
  97		Name: "zoekt_search_files_considered_total",
  98		Help: "Total files that we evaluated. Equivalent to files for which all atom matches (including negations) evaluated to true",
  99	})
 100	metricSearchFilesLoadedTotal = promauto.NewCounter(prometheus.CounterOpts{
 101		Name: "zoekt_search_files_loaded_total",
 102		Help: "Total files for which we loaded file content to verify substring matches",
 103	})
 104	metricSearchFilesSkippedTotal = promauto.NewCounter(prometheus.CounterOpts{
 105		Name: "zoekt_search_files_skipped_total",
 106		Help: "Total candidate files whose contents weren't examined because we gathered enough matches",
 107	})
 108	metricSearchShardsSkippedTotal = promauto.NewCounter(prometheus.CounterOpts{
 109		Name: "zoekt_search_shards_skipped_total",
 110		Help: "Total shards that we did not process because a query was canceled",
 111	})
 112	metricSearchMatchCountTotal = promauto.NewCounter(prometheus.CounterOpts{
 113		Name: "zoekt_search_match_count_total",
 114		Help: "Total number of non-overlapping matches",
 115	})
 116	metricSearchNgramMatchesTotal = promauto.NewCounter(prometheus.CounterOpts{
 117		Name: "zoekt_search_ngram_matches_total",
 118		Help: "Total number of candidate matches as a result of searching ngrams",
 119	})
 120	metricSearchNgramLookupsTotal = promauto.NewCounter(prometheus.CounterOpts{
 121		Name: "zoekt_search_ngram_lookups_total",
 122		Help: "Total number of times we accessed an ngram in the index",
 123	})
 124	metricSearchRegexpsConsideredTotal = promauto.NewCounter(prometheus.CounterOpts{
 125		Name: "zoekt_search_regexps_considered_total",
 126		Help: "Total number of times regexp was called on files that we evaluated",
 127	})
 128
 129	metricListRunning = promauto.NewGauge(prometheus.GaugeOpts{
 130		Name: "zoekt_list_running",
 131		Help: "The number of concurrent list requests running",
 132	})
 133	metricListShardRunning = promauto.NewGauge(prometheus.GaugeOpts{
 134		Name: "zoekt_list_shard_running",
 135		Help: "The number of concurrent list requests in a shard running",
 136	})
 137	metricShardsBatchReplaceDurationSeconds = promauto.NewHistogram(prometheus.HistogramOpts{
 138		Name:    "zoekt_shards_batch_replace_duration_seconds",
 139		Help:    "The time it takes to replace a batch of Searchers.",
 140		Buckets: []float64{0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1, 2, 5, 10, 30},
 141	})
 142	metricListAllRepos = promauto.NewGauge(prometheus.GaugeOpts{
 143		Name: "zoekt_list_all_stats_repos",
 144		Help: "The last List(true) value for RepoStats.Repos. Repos is used for aggregrating the number of repositories.",
 145	})
 146	metricListAllShards = promauto.NewGauge(prometheus.GaugeOpts{
 147		Name: "zoekt_list_all_stats_shards",
 148		Help: "The last List(true) value for RepoStats.Shards. Shards is the total number of search shards.",
 149	})
 150	metricListAllDocuments = promauto.NewGauge(prometheus.GaugeOpts{
 151		Name: "zoekt_list_all_stats_documents",
 152		Help: "The last List(true) value for RepoStats.Documents. Documents holds the number of documents or files.",
 153	})
 154	metricListAllIndexBytes = promauto.NewGauge(prometheus.GaugeOpts{
 155		Name: "zoekt_list_all_stats_index_bytes",
 156		Help: "The last List(true) value for RepoStats.IndexBytes. IndexBytes is the amount of RAM used for index overhead.",
 157	})
 158	metricListAllContentBytes = promauto.NewGauge(prometheus.GaugeOpts{
 159		Name: "zoekt_list_all_stats_content_bytes",
 160		Help: "The last List(true) value for RepoStats.ContentBytes. ContentBytes is the amount of RAM used for raw content.",
 161	})
 162	metricListAllNewLinesCount = promauto.NewGauge(prometheus.GaugeOpts{
 163		Name: "zoekt_list_all_stats_new_lines_count",
 164		Help: "The last List(true) value for RepoStats.NewLinesCount.",
 165	})
 166	metricListAllDefaultBranchNewLinesCount = promauto.NewGauge(prometheus.GaugeOpts{
 167		Name: "zoekt_list_all_stats_default_branch_new_lines_count",
 168		Help: "The last List(true) value for RepoStats.DefaultBranchNewLinesCount.",
 169	})
 170	metricListAllOtherBranchesNewLinesCount = promauto.NewGauge(prometheus.GaugeOpts{
 171		Name: "zoekt_list_all_stats_other_branches_new_lines_count",
 172		Help: "The last List(true) value for RepoStats.OtherBranchesNewLinesCount.",
 173	})
 174)
 175
 176type rankedShard struct {
 177	zoekt.Searcher
 178
 179	priority float64 // maximum priority across all repos in the shard
 180
 181	// We have out of band ranking on compound shards which can change even if
 182	// the shard file does not. So we compute a rank in getShards. We store
 183	// repos here to avoid the cost of List in the search request path.
 184	//
 185	// repos is nil only if that call failed.
 186	repos []*zoekt.Repository
 187}
 188
 189// loaded stores the state we compute when updating the state of shards from
 190// disk.
 191type loaded struct {
 192	// shards is the currently loaded shards sorted by decreasing rank and
 193	// should not be mutated.
 194	shards []*rankedShard
 195
 196	// ready is true if sharded searcher has finished loading all initial
 197	// shards on startup.
 198	ready bool
 199}
 200
 201type shardedSearcher struct {
 202	// Limit the number of parallel queries. Since searching is
 203	// CPU bound, we can't do better than #CPU queries in
 204	// parallel.  If we do so, we just create more memory
 205	// pressure.
 206	sched scheduler
 207
 208	mu     sync.Mutex // protects writes to shards
 209	shards map[string]*rankedShard
 210
 211	ready  atomic.Bool
 212	ranked atomic.Value
 213}
 214
 215func newShardedSearcher(n int64) *shardedSearcher {
 216	ss := &shardedSearcher{
 217		shards: make(map[string]*rankedShard),
 218		sched:  newScheduler(n),
 219	}
 220	return ss
 221}
 222
 223// NewDirectorySearcher returns a searcher instance that loads all
 224// shards corresponding to a glob into memory.
 225func NewDirectorySearcher(dir string) (zoekt.Streamer, error) {
 226	return newDirectorySearcher(dir, true)
 227}
 228
 229// NewDirectorySearcherFast is like NewDirectorySearcher, but does not block
 230// on the initial loading of shards.
 231//
 232// This exists since in the case of zoekt-webserver we are happy with having
 233// partial availability since that is better than no availability on large
 234// instances.
 235func NewDirectorySearcherFast(dir string) (zoekt.Streamer, error) {
 236	return newDirectorySearcher(dir, false)
 237}
 238
 239func newDirectorySearcher(dir string, waitUntilReady bool) (zoekt.Streamer, error) {
 240	ss := newShardedSearcher(int64(runtime.GOMAXPROCS(0)))
 241	tl := &loader{
 242		ss: ss,
 243	}
 244	dw, err := newDirectoryWatcher(dir, tl)
 245	if err != nil {
 246		return nil, err
 247	}
 248
 249	if waitUntilReady {
 250		if err := dw.WaitUntilReady(); err != nil {
 251			return nil, err
 252		}
 253	}
 254
 255	ds := &directorySearcher{
 256		Streamer:         ss,
 257		directoryWatcher: dw,
 258	}
 259
 260	return &typeRepoSearcher{Streamer: ds}, nil
 261}
 262
 263type directorySearcher struct {
 264	zoekt.Streamer
 265
 266	directoryWatcher *DirectoryWatcher
 267}
 268
 269func (s *directorySearcher) Close() {
 270	// We need to Stop directoryWatcher first since it calls load/unload on
 271	// Searcher.
 272	s.directoryWatcher.Stop()
 273	s.Streamer.Close()
 274}
 275
 276type loader struct {
 277	ss *shardedSearcher
 278}
 279
 280func (tl *loader) load(keys ...string) {
 281	// This is called with all keys on startup, so once this function has
 282	// finished running shardedSearcher will be ready.
 283	defer tl.ss.markReady()
 284
 285	if len(keys) == 0 {
 286		// If there's nothing to load, we exit early here, but we want to mark
 287		// ourselves as ready.
 288		return
 289	}
 290
 291	var (
 292		mu           sync.Mutex     // synchronizes writes to the shards map
 293		wg           sync.WaitGroup // used to wait for all shards to load
 294		sem          = semaphore.NewWeighted(int64(runtime.GOMAXPROCS(0)))
 295		loadedShards = make(map[string]zoekt.Searcher)
 296	)
 297
 298	publishLoaded := func() {
 299		mu.Lock()
 300		chunk := loadedShards
 301		loadedShards = make(map[string]zoekt.Searcher)
 302		mu.Unlock()
 303		tl.ss.replace(chunk)
 304	}
 305
 306	log.Printf("loading %d shard(s): %s", len(keys), humanTruncateList(keys, 5))
 307
 308	lastProgress := time.Now()
 309	for i, key := range keys {
 310		// If taking a while to start-up occasionally give a progress message
 311		if time.Since(lastProgress) > 5*time.Second {
 312			log.Printf("still need to load %d shards...", len(keys)-i)
 313			lastProgress = time.Now()
 314
 315			publishLoaded()
 316		}
 317
 318		_ = sem.Acquire(context.Background(), 1)
 319		wg.Add(1)
 320
 321		go func(key string) {
 322			defer sem.Release(1)
 323			defer wg.Done()
 324
 325			shard, err := loadShard(key)
 326			if err != nil {
 327				metricShardsLoadFailedTotal.Inc()
 328				log.Printf("reloading: %s, err %v ", key, err)
 329				return
 330			}
 331			metricShardsLoadedTotal.Inc()
 332
 333			mu.Lock()
 334			loadedShards[key] = shard
 335			mu.Unlock()
 336		}(key)
 337	}
 338
 339	wg.Wait()
 340
 341	publishLoaded()
 342}
 343
 344func (tl *loader) drop(keys ...string) {
 345	shards := make(map[string]zoekt.Searcher, len(keys))
 346	for _, key := range keys {
 347		shards[key] = nil
 348	}
 349	tl.ss.replace(shards)
 350}
 351
 352func (ss *shardedSearcher) String() string {
 353	return "shardedSearcher"
 354}
 355
 356// Close closes references to open files. It may be called only once.
 357func (ss *shardedSearcher) Close() {
 358	ss.mu.Lock()
 359	shards := make(map[string]zoekt.Searcher, len(ss.shards))
 360	for k := range ss.shards {
 361		shards[k] = nil
 362	}
 363	ss.mu.Unlock()
 364
 365	ss.replace(shards)
 366}
 367
 368func selectRepoSet(shards []*rankedShard, q query.Q) ([]*rankedShard, query.Q) {
 369	and, ok := q.(*query.And)
 370	if ok {
 371		return doSelectRepoSet(shards, and)
 372	}
 373
 374	// We have queries which look like (reposet ...) and we want to do the same
 375	// optimizations. To simplify we just always wrap the query in And and then
 376	// on the return value call Simplify to unwrap. In particular this is
 377	// important for List calls.
 378	and = &query.And{Children: []query.Q{q}}
 379	shards, q = doSelectRepoSet(shards, and)
 380	return shards, query.Simplify(q)
 381}
 382
 383func doSelectRepoSet(shards []*rankedShard, and *query.And) ([]*rankedShard, query.Q) {
 384	// (and (reposet ...) (q))
 385	// (and true (q)) with a filtered shards
 386	// (and false) // noop
 387
 388	// (and (repobranches ...) (q))
 389	// (and (repobranches ...) (q))
 390
 391	// Note: we also support (and (repo ...) (q)) even though sourcegraph does
 392	// not generate those sorts of queries. This is to support manual testing.
 393
 394	hasReposForPredicate := func(pred func(repo *zoekt.Repository) bool) func(repos []*zoekt.Repository) (any, all bool) {
 395		return func(repos []*zoekt.Repository) (any, all bool) {
 396			any = false
 397			all = true
 398			for _, repo := range repos {
 399				b := pred(repo)
 400				any = any || b
 401				all = all && b
 402			}
 403			return any, all
 404		}
 405	}
 406
 407	for i, c := range and.Children {
 408		var setSize int
 409		var hasRepos func([]*zoekt.Repository) (bool, bool)
 410		switch setQuery := c.(type) {
 411		case *query.RepoSet:
 412			setSize = len(setQuery.Set)
 413			hasRepos = hasReposForPredicate(func(repo *zoekt.Repository) bool {
 414				return setQuery.Set[repo.Name]
 415			})
 416		case *query.RepoIDs:
 417			setSize = int(setQuery.Repos.GetCardinality())
 418			hasRepos = hasReposForPredicate(func(repo *zoekt.Repository) bool {
 419				return setQuery.Repos.Contains(repo.ID)
 420			})
 421		case *query.Repo:
 422			setSize = 0
 423			hasRepos = hasReposForPredicate(func(repo *zoekt.Repository) bool {
 424				return setQuery.Regexp.MatchString(repo.Name)
 425			})
 426		case *query.BranchesRepos:
 427			for _, br := range setQuery.List {
 428				setSize += int(br.Repos.GetCardinality())
 429			}
 430
 431			hasRepos = hasReposForPredicate(func(repo *zoekt.Repository) bool {
 432				for _, br := range setQuery.List {
 433					if br.Repos.Contains(repo.ID) {
 434						return true
 435					}
 436				}
 437				return false
 438			})
 439		default:
 440			continue
 441		}
 442
 443		// setSize may be larger than the number of shards we have. The size of
 444		// filtered is bounded by min(len(set), len(shards))
 445		if setSize > len(shards) {
 446			setSize = len(shards)
 447		}
 448
 449		filtered := make([]*rankedShard, 0, setSize)
 450		filteredAll := true
 451
 452		for _, s := range shards {
 453			if s.repos == nil {
 454				// repos is nil if we failed to List the shard. This shouldn't
 455				// happen, but if it does we don't know what is in it and must search
 456				// it without simplifying the query.
 457				filtered = append(filtered, s)
 458				filteredAll = false
 459			} else if any, all := hasRepos(s.repos); any {
 460				filtered = append(filtered, s)
 461				filteredAll = filteredAll && all
 462			}
 463		}
 464
 465		// We don't need to adjust the query since we are returning an empty set
 466		// of shards to search.
 467		if len(filtered) == 0 {
 468			return filtered, and
 469		}
 470
 471		// We can't simplify the query since we are searching shards which contain
 472		// repos we aren't supposed to search.
 473		if !filteredAll {
 474			return filtered, and
 475		}
 476
 477		// We don't want to mutate the original and, so we clone it before
 478		// mutating it.
 479		and = &query.And{Children: slices.Clone(and.Children)}
 480
 481		// This optimization allows us to avoid the work done by
 482		// indexData.simplify for each shard.
 483		//
 484		// For example if our query is (and (reposet foo bar) (content baz))
 485		// then at this point filtered is [foo bar] and q is the same. For each
 486		// shard indexData.simplify will simplify to (and true (content baz)) ->
 487		// (content baz). This work can be done now once, rather than per shard.
 488		switch c := c.(type) {
 489		case *query.RepoSet, *query.RepoIDs, *query.Repo:
 490			and.Children[i] = &query.Const{Value: true}
 491			return filtered, query.Simplify(and)
 492
 493		case *query.BranchesRepos:
 494			// We can only replace if all the repos want the same branches. We
 495			// simplify and just check that we are requesting 1 branch. The common
 496			// case is just asking for HEAD, so this should be effective.
 497			if len(c.List) != 1 {
 498				return filtered, and
 499			}
 500
 501			// Every repo wants the same branches, so we can replace RepoBranches
 502			// with a list of branch queries.
 503			and.Children[i] = &query.Branch{Pattern: c.List[0].Branch, Exact: true}
 504			return filtered, query.Simplify(and)
 505		}
 506
 507		// Stop after first RepoSet, otherwise we might append duplicate
 508		// shards to `filtered`
 509		return filtered, and
 510	}
 511
 512	return shards, and
 513}
 514
 515func (ss *shardedSearcher) Search(ctx context.Context, q query.Q, opts *zoekt.SearchOptions) (sr *zoekt.SearchResult, err error) {
 516	tr, ctx := trace.New(ctx, "shardedSearcher.Search", "")
 517	defer func() {
 518		tr.Finish()
 519	}()
 520	ctx, cancel := context.WithCancel(ctx)
 521	defer cancel()
 522
 523	collectSender := newCollectSender(opts)
 524
 525	start := time.Now()
 526	proc, err := ss.sched.Acquire(ctx)
 527	if err != nil {
 528		return nil, err
 529	}
 530	defer proc.Release()
 531	tr.LazyPrintf("acquired process")
 532
 533	wait := time.Since(start)
 534	start = time.Now()
 535
 536	loaded := ss.getLoaded()
 537	done, err := streamSearch(ctx, proc, q, opts, loaded.shards, collectSender)
 538	defer done()
 539	if err != nil {
 540		return nil, err
 541	}
 542
 543	aggregate, ok := collectSender.Done()
 544	if !ok {
 545		aggregate = &zoekt.SearchResult{
 546			RepoURLs:      map[string]string{},
 547			LineFragments: map[string]string{},
 548		}
 549	}
 550
 551	copyFiles(aggregate)
 552
 553	if !loaded.ready {
 554		// We may have missed results due to not being fully loaded.
 555		aggregate.Stats.Crashes++
 556	}
 557
 558	aggregate.Stats.Wait = wait
 559	aggregate.Stats.Duration = time.Since(start)
 560
 561	return aggregate, nil
 562}
 563
 564func (ss *shardedSearcher) StreamSearch(ctx context.Context, q query.Q, opts *zoekt.SearchOptions, sender zoekt.Sender) (err error) {
 565	tr, ctx := trace.New(ctx, "shardedSearcher.StreamSearch", "")
 566	defer func() {
 567		if err != nil {
 568			tr.LazyPrintf("error: %v", err)
 569			tr.SetError(err)
 570		}
 571		tr.Finish()
 572	}()
 573
 574	start := time.Now()
 575	proc, err := ss.sched.Acquire(ctx)
 576	if err != nil {
 577		return err
 578	}
 579	defer proc.Release()
 580	tr.LazyPrintf("acquired process")
 581
 582	loaded := ss.getLoaded()
 583	shards := loaded.shards
 584
 585	maxPendingPriority := math.Inf(-1)
 586	if len(shards) > 0 {
 587		maxPendingPriority = shards[0].priority
 588	}
 589
 590	stillLoadingCrashes := 0
 591	if !loaded.ready {
 592		// We may have missed results due to not being fully loaded.
 593		stillLoadingCrashes++
 594	}
 595
 596	sender.Send(&zoekt.SearchResult{
 597		Stats: zoekt.Stats{
 598			Crashes: stillLoadingCrashes,
 599			Wait:    time.Since(start),
 600		},
 601		Progress: zoekt.Progress{
 602			MaxPendingPriority: maxPendingPriority,
 603		},
 604	})
 605
 606	// Matches flow from the shards up the stack in the following order:
 607	//
 608	// 1. Search shards
 609	// 2. flushCollectSender (aggregate)
 610	// 3. limitSender (limit)
 611	// 4. copyFileSender (copy)
 612	//
 613	// For streaming, the wrapping has to happen in the inverted order.
 614	sender = copyFileSender(sender)
 615
 616	if truncator, hasLimits := zoekt.NewDisplayTruncator(opts); hasLimits {
 617		var cancel context.CancelFunc
 618		ctx, cancel = context.WithCancel(ctx)
 619		defer cancel()
 620		sender = limitSender(cancel, sender, truncator)
 621	}
 622
 623	sender, flush := newFlushCollectSender(opts, sender)
 624
 625	done, err := streamSearch(ctx, proc, q, opts, shards, sender)
 626
 627	// Even though streaming is done, we may have results sitting in a buffer we
 628	// need to flush. So we need to send those before calling done.
 629	flush()
 630	done()
 631
 632	return err
 633}
 634
 635// streamSearch is an internal helper since both Search and StreamSearch are
 636// largely similar.
 637//
 638// done must always be called, even if err is non-nil. The SearchResults sent
 639// via sender contain references to the underlying mmap data that the garbage
 640// collector can't see. Calling done informs the garbage collector it is free
 641// to collect those shards. The caller must call copyFiles on any
 642// SearchResults it returns/streams out before calling done.
 643func streamSearch(ctx context.Context, proc *process, q query.Q, opts *zoekt.SearchOptions, shards []*rankedShard, sender zoekt.Sender) (done func(), err error) {
 644	tr, ctx := trace.New(ctx, "shardedSearcher.streamSearch", "")
 645	overallStart := time.Now()
 646	metricSearchRunning.Inc()
 647	defer func() {
 648		metricSearchRunning.Dec()
 649		metricSearchDuration.Observe(time.Since(overallStart).Seconds())
 650		if err != nil {
 651			metricSearchFailedTotal.Inc()
 652
 653			tr.LazyPrintf("error: %v", err)
 654			tr.SetError(err)
 655		}
 656		tr.Finish()
 657	}()
 658
 659	// Select the subset of shards that we will search over for the given query.
 660	{
 661		beforeLen := len(shards)
 662		beforeQ := q
 663		shards, q = selectRepoSet(shards, q)
 664		tr.LazyPrintf("selectRepoSet shards=%d->%d q=%s->%s", beforeLen, len(shards), beforeQ, q)
 665	}
 666
 667	if len(shards) == 0 {
 668		return func() {}, nil
 669	}
 670
 671	var cancel context.CancelFunc
 672	if opts.MaxWallTime == 0 {
 673		ctx, cancel = context.WithCancel(ctx)
 674	} else {
 675		ctx, cancel = context.WithTimeout(ctx, opts.MaxWallTime)
 676	}
 677
 678	defer cancel()
 679
 680	// We set the number of workers to GOMAXPROCS, or the number of shards,
 681	// whichever is smaller.
 682	workers := runtime.GOMAXPROCS(0)
 683	if workers > len(shards) {
 684		workers = len(shards)
 685	}
 686
 687	type result struct {
 688		priority float64
 689		*zoekt.SearchResult
 690		err error
 691	}
 692
 693	var (
 694		// buffered channels to continue searching when sending back results
 695		// takes a while / blocks. The maximum pending result set is workers * 2.
 696		results = make(chan *result, workers)
 697		search  = make(chan *rankedShard, workers)
 698		wg      sync.WaitGroup
 699	)
 700
 701	// Start workers that receive shards from the search channel, search them,
 702	// and send the results to the results channel. This process is repeated
 703	// until the search channel is closed.
 704	//
 705	// Note: Making "search" a buffered channel has the effect of limiting the number of parallel shard searches.
 706	// Since searching is mostly CPU bound, limiting parallel shard searches also reduces the peak working set.
 707	wg.Add(workers)
 708	for i := 0; i < workers; i++ {
 709		go func() {
 710			defer wg.Done()
 711			for s := range search {
 712				sr, err := searchOneShard(ctx, s, q, opts)
 713				r := &result{priority: s.priority, SearchResult: sr, err: err}
 714				results <- r
 715			}
 716		}()
 717	}
 718
 719	go func() {
 720		wg.Wait()
 721		close(results)
 722	}()
 723
 724	var (
 725		pending = make(prioritySlice, 0, workers)
 726		shard   = 0
 727		next    = shards[shard]
 728
 729		// We need a separate nil-able reference to the same channel so we can close(search) for the worker
 730		// go-routines to finish but also set work to nil in order for the select statement below to ignore
 731		// that case when we want to stop a search. This is needed because sending on a closed channel panics.
 732		work = search
 733	)
 734
 735	stop := func() {
 736		if work != nil {
 737			close(search)
 738			work = nil
 739			next = nil
 740		}
 741	}
 742
 743	// tracked so we can stop when we hit TotalMaxMatchCount
 744	var totalMatchCount int
 745
 746search:
 747	for {
 748		// At the top of each iteration, have the proc associated with this search yield its won "timeslice"
 749		// to possibly allow other searches to make progress
 750		_ = proc.Yield(ctx) // Note: we let searchOneShard handle context errors
 751
 752		select {
 753		case work <- next: // is there a worker available to search the next shard?
 754			pending.append(next.priority)
 755
 756			shard++
 757			if shard == len(shards) {
 758				stop()
 759			} else {
 760				next = shards[shard]
 761			}
 762		case r, ok := <-results: // is there a result to send back?
 763			if !ok {
 764				break search
 765			}
 766
 767			// delete this result's priority from pending before computing the new max pending priority
 768			pending.remove(r.priority)
 769
 770			if r.err != nil {
 771				// Set final error and stop searching new shards, but consume any pending
 772				// search results.
 773				stop()
 774				err = r.err
 775				continue
 776			}
 777
 778			// Update the match count statistics and stop searching new shards if we've
 779			// reached the limit set in the options.
 780			totalMatchCount += r.SearchResult.Stats.MatchCount
 781			if opts.TotalMaxMatchCount > 0 && totalMatchCount > opts.TotalMaxMatchCount {
 782				stop()
 783			}
 784
 785			observeMetrics(r.SearchResult)
 786
 787			r.Priority = r.priority
 788			r.MaxPendingPriority = pending.max()
 789
 790			sendByRepository(r.SearchResult, opts, sender) // send the result back to the client
 791		}
 792	}
 793
 794	return func() { runtime.KeepAlive(shards) }, err
 795}
 796
 797// sendByRepository splits a zoekt.SearchResult by repository and calls
 798// sender.Send for each batch. Ranking in Sourcegraph expects zoekt.SearchResult
 799// to contain results with the same zoekt.SearchResult.Priority only.
 800//
 801// We split by repository instead of by priority because it is easier to set
 802// RepoURLs and LineFragments in zoekt.SearchResult.
 803func sendByRepository(result *zoekt.SearchResult, opts *zoekt.SearchOptions, sender zoekt.Sender) {
 804	if len(result.RepoURLs) <= 1 || len(result.Files) == 0 {
 805		zoekt.SortFiles(result.Files)
 806		sender.Send(result)
 807		return
 808	}
 809
 810	send := func(repoName string, a, b int, stats zoekt.Stats) {
 811		zoekt.SortFiles(result.Files[a:b])
 812		sender.Send(&zoekt.SearchResult{
 813			Stats: stats,
 814			Progress: zoekt.Progress{
 815				Priority:           result.Files[a].RepositoryPriority,
 816				MaxPendingPriority: result.MaxPendingPriority,
 817			},
 818			Files:         result.Files[a:b],
 819			RepoURLs:      map[string]string{repoName: result.RepoURLs[repoName]},
 820			LineFragments: map[string]string{repoName: result.LineFragments[repoName]},
 821		})
 822	}
 823
 824	var startIndex, endIndex int
 825	curRepoID := result.Files[0].RepositoryID
 826	curRepoName := result.Files[0].Repository
 827
 828	fm := zoekt.FileMatch{}
 829	for endIndex, fm = range result.Files {
 830		if curRepoID != fm.RepositoryID {
 831			// Stats must stay aggregate-able, hence we sent the aggregate stats with the
 832			// last event.
 833			send(curRepoName, startIndex, endIndex, zoekt.Stats{})
 834
 835			startIndex = endIndex
 836			curRepoID = fm.RepositoryID
 837			curRepoName = fm.Repository
 838		}
 839	}
 840
 841	send(curRepoName, startIndex, endIndex+1, result.Stats)
 842}
 843
 844func observeMetrics(sr *zoekt.SearchResult) {
 845	metricSearchContentBytesLoadedTotal.Add(float64(sr.Stats.ContentBytesLoaded))
 846	metricSearchIndexBytesLoadedTotal.Add(float64(sr.Stats.IndexBytesLoaded))
 847	metricSearchCrashesTotal.Add(float64(sr.Stats.Crashes))
 848	metricSearchFileCountTotal.Add(float64(sr.Stats.FileCount))
 849	metricSearchShardFilesConsideredTotal.Add(float64(sr.Stats.ShardFilesConsidered))
 850	metricSearchFilesConsideredTotal.Add(float64(sr.Stats.FilesConsidered))
 851	metricSearchFilesLoadedTotal.Add(float64(sr.Stats.FilesLoaded))
 852	metricSearchFilesSkippedTotal.Add(float64(sr.Stats.FilesSkipped))
 853	metricSearchShardsSkippedTotal.Add(float64(sr.Stats.ShardsSkipped))
 854	metricSearchMatchCountTotal.Add(float64(sr.Stats.MatchCount))
 855	metricSearchNgramMatchesTotal.Add(float64(sr.Stats.NgramMatches))
 856	metricSearchNgramLookupsTotal.Add(float64(sr.Stats.NgramLookups))
 857	metricSearchRegexpsConsideredTotal.Add(float64(sr.Stats.RegexpsConsidered))
 858}
 859
 860func copySlice(src *[]byte) {
 861	if *src == nil {
 862		return
 863	}
 864	dst := make([]byte, len(*src))
 865	copy(dst, *src)
 866	*src = dst
 867}
 868
 869func copyFiles(sr *zoekt.SearchResult) {
 870	for i := range sr.Files {
 871		copySlice(&sr.Files[i].Content)
 872		copySlice(&sr.Files[i].Checksum)
 873		for l := range sr.Files[i].LineMatches {
 874			copySlice(&sr.Files[i].LineMatches[l].Line)
 875			copySlice(&sr.Files[i].LineMatches[l].Before)
 876			copySlice(&sr.Files[i].LineMatches[l].After)
 877		}
 878		for c := range sr.Files[i].ChunkMatches {
 879			copySlice(&sr.Files[i].ChunkMatches[c].Content)
 880		}
 881	}
 882}
 883
 884func searchOneShard(ctx context.Context, s zoekt.Searcher, q query.Q, opts *zoekt.SearchOptions) (sr *zoekt.SearchResult, err error) {
 885	metricSearchShardRunning.Inc()
 886	defer func() {
 887		metricSearchShardRunning.Dec()
 888		if e := recover(); e != nil {
 889			log.Printf("crashed shard: %s: %#v, %s", s, e, debug.Stack())
 890
 891			if sr == nil {
 892				sr = &zoekt.SearchResult{}
 893			}
 894			sr.Stats.Crashes = 1
 895		}
 896	}()
 897
 898	return s.Search(ctx, q, opts)
 899}
 900
 901type shardListResult struct {
 902	rl  *zoekt.RepoList
 903	err error
 904}
 905
 906func listOneShard(ctx context.Context, s zoekt.Searcher, q query.Q, opts *zoekt.ListOptions, sink chan shardListResult) {
 907	metricListShardRunning.Inc()
 908	defer func() {
 909		metricListShardRunning.Dec()
 910		if r := recover(); r != nil {
 911			log.Printf("crashed shard: %s: %s, %s", s.String(), r, debug.Stack())
 912			sink <- shardListResult{
 913				&zoekt.RepoList{Crashes: 1}, nil,
 914			}
 915		}
 916	}()
 917
 918	ms, err := s.List(ctx, q, opts)
 919	sink <- shardListResult{ms, err}
 920}
 921
 922func (ss *shardedSearcher) List(ctx context.Context, q query.Q, opts *zoekt.ListOptions) (rl *zoekt.RepoList, err error) {
 923	tr, ctx := trace.New(ctx, "shardedSearcher.List", "")
 924	metricListRunning.Inc()
 925	defer func() {
 926		metricListRunning.Dec()
 927		if rl != nil {
 928			tr.LazyPrintf("repos.size=%d reposmap.size=%d crashes=%d", len(rl.Repos), len(rl.ReposMap), rl.Crashes)
 929		}
 930		if err != nil {
 931			tr.LazyPrintf("error: %v", err)
 932			tr.SetError(err)
 933		}
 934		tr.Finish()
 935	}()
 936
 937	q = query.Simplify(q)
 938	isAll := false
 939	if c, ok := q.(*query.Const); ok {
 940		isAll = c.Value
 941	}
 942
 943	proc, err := ss.sched.Acquire(ctx)
 944	if err != nil {
 945		return nil, err
 946	}
 947	defer proc.Release()
 948	tr.LazyPrintf("acquired process")
 949
 950	loaded := ss.getLoaded()
 951	shards := loaded.shards
 952
 953	// Setup what we return now, since we may short circuit if there are no
 954	// shards to search.
 955	stillLoadingCrashes := 0
 956	if !loaded.ready {
 957		// We may have missed results due to not being fully loaded.
 958		stillLoadingCrashes++
 959	}
 960	agg := zoekt.RepoList{
 961		Crashes:  stillLoadingCrashes,
 962		ReposMap: zoekt.ReposMap{},
 963		Repos:    []*zoekt.RepoListEntry{},
 964	}
 965
 966	// PERF: Select the subset of shards that we will search over for the given
 967	// query. A common List query only asks for a specific repo, so this is an
 968	// important optimization.
 969	{
 970		beforeLen := len(shards)
 971		beforeQ := q
 972		shards, q = selectRepoSet(shards, q)
 973		tr.LazyPrintf("selectRepoSet shards=%d->%d q=%s->%s", beforeLen, len(shards), beforeQ, q)
 974	}
 975
 976	if len(shards) == 0 {
 977		return &agg, nil
 978	}
 979
 980	shardCount := len(shards)
 981	all := make(chan shardListResult, shardCount)
 982	feeder := make(chan zoekt.Searcher, len(shards))
 983	for _, s := range shards {
 984		feeder <- s
 985	}
 986	close(feeder)
 987
 988	for i := 0; i < runtime.GOMAXPROCS(0); i++ {
 989		go func() {
 990			for s := range feeder {
 991				listOneShard(ctx, s, q, opts, all)
 992			}
 993		}()
 994	}
 995
 996	uniq := map[string]*zoekt.RepoListEntry{}
 997
 998	for range shards {
 999		r := <-all
1000		if r.err != nil {
1001			return nil, r.err
1002		}
1003
1004		agg.Crashes += r.rl.Crashes
1005		agg.Stats.Add(&r.rl.Stats)
1006
1007		for _, r := range r.rl.Repos {
1008			prev, ok := uniq[r.Repository.Name]
1009			if !ok {
1010				cp := *r // We need to copy because we mutate r.Stats when merging duplicates
1011				uniq[r.Repository.Name] = &cp
1012			} else {
1013				prev.Stats.Add(&r.Stats)
1014			}
1015		}
1016
1017		for id, r := range r.rl.ReposMap {
1018			_, ok := agg.ReposMap[id]
1019			if !ok {
1020				agg.ReposMap[id] = r
1021			}
1022		}
1023	}
1024
1025	agg.Repos = make([]*zoekt.RepoListEntry, 0, len(uniq))
1026	for _, r := range uniq {
1027		agg.Repos = append(agg.Repos, r)
1028	}
1029
1030	// Only one of these fields is populated and in all cases the size of that
1031	// field is the number of Repos.
1032	//
1033	// Note: we don't just add individual Stats.Repos since a repository can
1034	// have multiple shards.
1035	agg.Stats.Repos = len(uniq) + len(agg.ReposMap)
1036
1037	if isAll && len(agg.Repos) > 0 {
1038		reportListAllMetrics(agg.Repos)
1039	}
1040
1041	return &agg, nil
1042}
1043
1044func reportListAllMetrics(repos []*zoekt.RepoListEntry) {
1045	var stats zoekt.RepoStats
1046	for _, r := range repos {
1047		stats.Add(&r.Stats)
1048	}
1049
1050	metricListAllRepos.Set(float64(stats.Repos))
1051	metricListAllIndexBytes.Set(float64(stats.IndexBytes))
1052	metricListAllContentBytes.Set(float64(stats.ContentBytes))
1053	metricListAllDocuments.Set(float64(stats.Documents))
1054	metricListAllShards.Set(float64(stats.Shards))
1055	metricListAllNewLinesCount.Set(float64(stats.NewLinesCount))
1056	metricListAllDefaultBranchNewLinesCount.Set(float64(stats.DefaultBranchNewLinesCount))
1057	metricListAllOtherBranchesNewLinesCount.Set(float64(stats.OtherBranchesNewLinesCount))
1058}
1059
1060// getLoaded returns the currently loaded shards. Shared so do not mutate.
1061func (s *shardedSearcher) getLoaded() loaded {
1062	// next commit will store the true value of this, for now we keep the
1063	// backwards compatible behaviour.
1064	ready := s.ready.Load()
1065	// ranked is loaded after ready to avoid a race were ready is true but
1066	// ranked is still not the final set of shards.
1067	ranked, _ := s.ranked.Load().([]*rankedShard)
1068	return loaded{
1069		shards: ranked,
1070		ready:  ready,
1071	}
1072}
1073
1074func mkRankedShard(s zoekt.Searcher) *rankedShard {
1075	q := query.Const{Value: true}
1076	// We need to use UnsafeCtx here, otherwise we cannot return a proper
1077	// rankedShard. On the user request path we use selectRepoSet which relies on
1078	// rankedShard.repos being set.
1079	result, err := s.List(systemtenant.UnsafeCtx, &q, nil)
1080	if err != nil {
1081		log.Printf("mkRankedShard(%s): failed to cache repository list: %v", s, err)
1082		return &rankedShard{Searcher: s}
1083	}
1084
1085	var (
1086		maxPriority float64
1087		repos       = make([]*zoekt.Repository, 0, len(result.Repos))
1088	)
1089	for i := range result.Repos {
1090		repo := &result.Repos[i].Repository
1091		repos = append(repos, repo)
1092		if repo.RawConfig != nil {
1093			priority, _ := strconv.ParseFloat(repo.RawConfig["priority"], 64)
1094			if priority > maxPriority {
1095				maxPriority = priority
1096			}
1097		}
1098	}
1099
1100	return &rankedShard{
1101		Searcher: s,
1102		repos:    repos,
1103		priority: maxPriority,
1104	}
1105}
1106
1107// markReady should be called once all shards have been passed into replace on
1108// startup. Once s is marked as ready it stops reporting a Crash in the
1109// response Stats.
1110func (s *shardedSearcher) markReady() {
1111	s.ready.CompareAndSwap(false, true)
1112}
1113
1114func (s *shardedSearcher) replace(shards map[string]zoekt.Searcher) {
1115	if len(shards) == 0 {
1116		return
1117	}
1118
1119	defer func(began time.Time) {
1120		metricShardsBatchReplaceDurationSeconds.Observe(time.Since(began).Seconds())
1121	}(time.Now())
1122
1123	s.mu.Lock()
1124	defer s.mu.Unlock()
1125
1126	for key, shard := range shards {
1127		var r *rankedShard
1128		if shard != nil {
1129			r = mkRankedShard(shard)
1130		}
1131
1132		old := s.shards[key]
1133		if shard == nil {
1134			delete(s.shards, key)
1135		} else {
1136			s.shards[key] = r
1137		}
1138
1139		if old != nil && old.Searcher != nil {
1140			//                 _ ___                /^^\ /^\  /^^\_
1141			//     _          _@)@) \            ,,/ '` ~ `'~~ ', `\.
1142			//   _/o\_ _ _ _/~`.`...'~\        ./~~..,'`','',.,' '  ~:
1143			//  / `,'.~,~.~  .   , . , ~|,   ,/ .,' , ,. .. ,,.   `,  ~\_
1144			// ( ' _' _ '_` _  '  .    , `\_/ .' ..' '  `  `   `..  `,   \_
1145			//  ~V~ V~ V~ V~ ~\ `   ' .  '    , ' .,.,''`.,.''`.,.``. ',   \_
1146			//   _/\ /\ /\ /\_/, . ' ,   `_/~\_ .' .,. ,, , _/~\_ `. `. '.,  \_
1147			//  < ~ ~ '~`'~'`, .,  .   `_: ::: \_ '      `_/ ::: \_ `.,' . ',  \_
1148			//   \ ' `_  '`_    _    ',/ _::_::_ \ _    _/ _::_::_ \   `.,'.,`., \-,-,-,_,_,
1149			//    `'~~ `'~~ `'~~ `'~~  \(_)(_)(_)/  `~~' \(_)(_)(_)/ ~'`\_.._,._,'_;_;_;_;_;
1150			//
1151			// We can't just call Close now, because there may be ongoing searches
1152			// which have old in the shards list. Previously we used an exclusive
1153			// lock to guarantee there were no concurrent searches. However, that
1154			// led to blocking on the read path.
1155			//
1156			// We could introduce granular locking per rankedShard to know when
1157			// there are no more references. However, this becomes tricky in
1158			// practice. Instead we rely on the garbage collector noticing old is no
1159			// longer used. We take care in our searchers to runtime.KeepAlive until
1160			// we have stopped referencing the underling mmap data.
1161			runtime.SetFinalizer(old, func(r *rankedShard) {
1162				r.Close()
1163			})
1164		}
1165	}
1166
1167	ranked := make([]*rankedShard, 0, len(s.shards))
1168	for _, r := range s.shards {
1169		ranked = append(ranked, r)
1170	}
1171
1172	sort.Slice(ranked, func(i, j int) bool {
1173		priorityDiff := ranked[i].priority - ranked[j].priority
1174		if priorityDiff != 0 {
1175			return priorityDiff > 0
1176		}
1177		if len(ranked[i].repos) == 0 || len(ranked[j].repos) == 0 {
1178			// Protect against empty names which can happen if we fail to List or
1179			// the shard is full of tombstones. Prefer the shard which has names.
1180			return len(ranked[i].repos) >= len(ranked[j].repos)
1181		}
1182		return ranked[i].repos[0].Name < ranked[j].repos[0].Name
1183	})
1184
1185	s.ranked.Store(ranked)
1186
1187	metricShardsLoaded.Set(float64(len(ranked)))
1188}
1189
1190func loadShard(fn string) (zoekt.Searcher, error) {
1191	f, err := os.Open(fn)
1192	if err != nil {
1193		return nil, err
1194	}
1195
1196	iFile, err := zoekt.NewIndexFile(f)
1197	if err != nil {
1198		return nil, err
1199	}
1200	s, err := zoekt.NewSearcher(iFile)
1201	if err != nil {
1202		iFile.Close()
1203		return nil, fmt.Errorf("NewSearcher(%s): %v", fn, err)
1204	}
1205
1206	return s, nil
1207}
1208
1209// prioritySlice is a trivial implementation of an array that provides three
1210// things: appending a value, removing a value, and getting the array's max.
1211// Operations take O(n) time, which is acceptable because N is restricted to
1212// GOMAXPROCS (i.e., number of cpu cores) by the shardedSearcher interface.
1213type prioritySlice []float64
1214
1215func (p *prioritySlice) append(pri float64) {
1216	*p = append(*p, pri)
1217}
1218
1219func (p *prioritySlice) remove(pri float64) {
1220	for i, opri := range *p {
1221		if opri == pri {
1222			if i != len(*p)-1 {
1223				// swap to make this element the tail
1224				(*p)[i] = (*p)[len(*p)-1]
1225			}
1226			// pop the end off
1227			*p = (*p)[:len(*p)-1]
1228			break
1229		}
1230	}
1231}
1232
1233func (p *prioritySlice) max() float64 {
1234	// remove() and max() could be combined, but this is easier to read and
1235	// the expected performance difference from the extra lock and loop is
1236	// almost certainly irrelevant.
1237	maxPri := math.Inf(-1)
1238	for _, pri := range *p {
1239		if pri > maxPri {
1240			maxPri = pri
1241		}
1242	}
1243	return maxPri
1244}
Configure Feed

Configure Feed