search/shards.go at bf9caa2750947b52df419fb4dd3d0ab000bd182f · boltless.me/zoekt

fork of https://github.com/sourcegraph/zoekt
zoekt / search / shards.go
at bf9caa2750947b52df419fb4dd3d0ab000bd182f 38 kB View raw
Keegan Carruthers-Smith refactor(all): goimports -w -local github.com/sourcegraph/zoekt (#948) 1y ago
   1// Copyright 2016 Google Inc. All rights reserved.
   2//
   3// Licensed under the Apache License, Version 2.0 (the "License");
   4// you may not use this file except in compliance with the License.
   5// You may obtain a copy of the License at
   6//
   7//    http://www.apache.org/licenses/LICENSE-2.0
   8//
   9// Unless required by applicable law or agreed to in writing, software
  10// distributed under the License is distributed on an "AS IS" BASIS,
  11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12// See the License for the specific language governing permissions and
  13// limitations under the License.
  14
  15package search
  16
  17import (
  18	"context"
  19	"fmt"
  20	"log"
  21	"math"
  22	"os"
  23	"runtime"
  24	"runtime/debug"
  25	"slices"
  26	"sort"
  27	"strconv"
  28	"sync"
  29	"time"
  30
  31	"github.com/prometheus/client_golang/prometheus"
  32	"github.com/prometheus/client_golang/prometheus/promauto"
  33	"go.uber.org/atomic"
  34	"golang.org/x/sync/semaphore"
  35
  36	"github.com/sourcegraph/zoekt"
  37	"github.com/sourcegraph/zoekt/index"
  38	"github.com/sourcegraph/zoekt/internal/tenant/systemtenant"
  39	"github.com/sourcegraph/zoekt/internal/trace"
  40	"github.com/sourcegraph/zoekt/query"
  41)
  42
  43var (
  44	metricShardsLoaded = promauto.NewGauge(prometheus.GaugeOpts{
  45		Name: "zoekt_shards_loaded",
  46		Help: "The number of shards currently loaded",
  47	})
  48	metricShardsLoadedTotal = promauto.NewCounter(prometheus.CounterOpts{
  49		Name: "zoekt_shards_loaded_total",
  50		Help: "The total number of shards loaded",
  51	})
  52	metricShardsLoadFailedTotal = promauto.NewCounter(prometheus.CounterOpts{
  53		Name: "zoekt_shards_load_failed_total",
  54		Help: "The total number of shard loads that failed",
  55	})
  56
  57	metricSearchRunning = promauto.NewGauge(prometheus.GaugeOpts{
  58		Name: "zoekt_search_running",
  59		Help: "The number of concurrent search requests running",
  60	})
  61	metricSearchShardRunning = promauto.NewGauge(prometheus.GaugeOpts{
  62		Name: "zoekt_search_shard_running",
  63		Help: "The number of concurrent search requests in a shard running",
  64	})
  65	metricSearchFailedTotal = promauto.NewCounter(prometheus.CounterOpts{
  66		Name: "zoekt_search_failed_total",
  67		Help: "The total number of search requests that failed",
  68	})
  69	metricSearchDuration = promauto.NewHistogram(prometheus.HistogramOpts{
  70		Name:    "zoekt_search_duration_seconds",
  71		Help:    "The duration a search request took in seconds",
  72		Buckets: prometheus.DefBuckets, // DefBuckets good for service timings
  73	})
  74
  75	// A Counter per Stat. Name should match field in zoekt.Stats.
  76	metricSearchContentBytesLoadedTotal = promauto.NewCounter(prometheus.CounterOpts{
  77		Name: "zoekt_search_content_loaded_bytes_total",
  78		Help: "Total amount of I/O for reading contents",
  79	})
  80	metricSearchIndexBytesLoadedTotal = promauto.NewCounter(prometheus.CounterOpts{
  81		Name: "zoekt_search_index_loaded_bytes_total",
  82		Help: "Total amount of I/O for reading from index",
  83	})
  84	metricSearchCrashesTotal = promauto.NewCounter(prometheus.CounterOpts{
  85		Name: "zoekt_search_crashes_total",
  86		Help: "Total number of search shards that had a crash",
  87	})
  88	metricSearchFileCountTotal = promauto.NewCounter(prometheus.CounterOpts{
  89		Name: "zoekt_search_file_count_total",
  90		Help: "Total number of files containing a match",
  91	})
  92	metricSearchShardFilesConsideredTotal = promauto.NewCounter(prometheus.CounterOpts{
  93		Name: "zoekt_search_shard_files_considered_total",
  94		Help: "Total number of files in shards that we considered",
  95	})
  96	metricSearchFilesConsideredTotal = promauto.NewCounter(prometheus.CounterOpts{
  97		Name: "zoekt_search_files_considered_total",
  98		Help: "Total files that we evaluated. Equivalent to files for which all atom matches (including negations) evaluated to true",
  99	})
 100	metricSearchFilesLoadedTotal = promauto.NewCounter(prometheus.CounterOpts{
 101		Name: "zoekt_search_files_loaded_total",
 102		Help: "Total files for which we loaded file content to verify substring matches",
 103	})
 104	metricSearchFilesSkippedTotal = promauto.NewCounter(prometheus.CounterOpts{
 105		Name: "zoekt_search_files_skipped_total",
 106		Help: "Total candidate files whose contents weren't examined because we gathered enough matches",
 107	})
 108	metricSearchShardsSkippedTotal = promauto.NewCounter(prometheus.CounterOpts{
 109		Name: "zoekt_search_shards_skipped_total",
 110		Help: "Total shards that we did not process because a query was canceled",
 111	})
 112	metricSearchMatchCountTotal = promauto.NewCounter(prometheus.CounterOpts{
 113		Name: "zoekt_search_match_count_total",
 114		Help: "Total number of non-overlapping matches",
 115	})
 116	metricSearchNgramMatchesTotal = promauto.NewCounter(prometheus.CounterOpts{
 117		Name: "zoekt_search_ngram_matches_total",
 118		Help: "Total number of candidate matches as a result of searching ngrams",
 119	})
 120	metricSearchNgramLookupsTotal = promauto.NewCounter(prometheus.CounterOpts{
 121		Name: "zoekt_search_ngram_lookups_total",
 122		Help: "Total number of times we accessed an ngram in the index",
 123	})
 124	metricSearchRegexpsConsideredTotal = promauto.NewCounter(prometheus.CounterOpts{
 125		Name: "zoekt_search_regexps_considered_total",
 126		Help: "Total number of times regexp was called on files that we evaluated",
 127	})
 128
 129	metricListRunning = promauto.NewGauge(prometheus.GaugeOpts{
 130		Name: "zoekt_list_running",
 131		Help: "The number of concurrent list requests running",
 132	})
 133	metricListShardRunning = promauto.NewGauge(prometheus.GaugeOpts{
 134		Name: "zoekt_list_shard_running",
 135		Help: "The number of concurrent list requests in a shard running",
 136	})
 137	metricShardsBatchReplaceDurationSeconds = promauto.NewHistogram(prometheus.HistogramOpts{
 138		Name:    "zoekt_shards_batch_replace_duration_seconds",
 139		Help:    "The time it takes to replace a batch of Searchers.",
 140		Buckets: []float64{0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1, 2, 5, 10, 30},
 141	})
 142	metricListAllRepos = promauto.NewGauge(prometheus.GaugeOpts{
 143		Name: "zoekt_list_all_stats_repos",
 144		Help: "The last List(true) value for RepoStats.Repos. Repos is used for aggregrating the number of repositories.",
 145	})
 146	metricListAllShards = promauto.NewGauge(prometheus.GaugeOpts{
 147		Name: "zoekt_list_all_stats_shards",
 148		Help: "The last List(true) value for RepoStats.Shards. Shards is the total number of search shards.",
 149	})
 150	metricListAllDocuments = promauto.NewGauge(prometheus.GaugeOpts{
 151		Name: "zoekt_list_all_stats_documents",
 152		Help: "The last List(true) value for RepoStats.Documents. Documents holds the number of documents or files.",
 153	})
 154	metricListAllIndexBytes = promauto.NewGauge(prometheus.GaugeOpts{
 155		Name: "zoekt_list_all_stats_index_bytes",
 156		Help: "The last List(true) value for RepoStats.IndexBytes. IndexBytes is the amount of RAM used for index overhead.",
 157	})
 158	metricListAllContentBytes = promauto.NewGauge(prometheus.GaugeOpts{
 159		Name: "zoekt_list_all_stats_content_bytes",
 160		Help: "The last List(true) value for RepoStats.ContentBytes. ContentBytes is the amount of RAM used for raw content.",
 161	})
 162	metricListAllNewLinesCount = promauto.NewGauge(prometheus.GaugeOpts{
 163		Name: "zoekt_list_all_stats_new_lines_count",
 164		Help: "The last List(true) value for RepoStats.NewLinesCount.",
 165	})
 166	metricListAllDefaultBranchNewLinesCount = promauto.NewGauge(prometheus.GaugeOpts{
 167		Name: "zoekt_list_all_stats_default_branch_new_lines_count",
 168		Help: "The last List(true) value for RepoStats.DefaultBranchNewLinesCount.",
 169	})
 170	metricListAllOtherBranchesNewLinesCount = promauto.NewGauge(prometheus.GaugeOpts{
 171		Name: "zoekt_list_all_stats_other_branches_new_lines_count",
 172		Help: "The last List(true) value for RepoStats.OtherBranchesNewLinesCount.",
 173	})
 174)
 175
 176type rankedShard struct {
 177	zoekt.Searcher
 178
 179	priority float64 // maximum priority across all repos in the shard
 180
 181	// We have out of band ranking on compound shards which can change even if
 182	// the shard file does not. So we compute a rank in getShards. We store
 183	// repos here to avoid the cost of List in the search request path.
 184	//
 185	// repos is nil only if that call failed.
 186	repos []*zoekt.Repository
 187}
 188
 189// loaded stores the state we compute when updating the state of shards from
 190// disk.
 191type loaded struct {
 192	// shards is the currently loaded shards sorted by decreasing rank and
 193	// should not be mutated.
 194	shards []*rankedShard
 195
 196	// ready is true if sharded searcher has finished loading all initial
 197	// shards on startup.
 198	ready bool
 199}
 200
 201type shardedSearcher struct {
 202	// Limit the number of parallel queries. Since searching is
 203	// CPU bound, we can't do better than #CPU queries in
 204	// parallel.  If we do so, we just create more memory
 205	// pressure.
 206	sched scheduler
 207
 208	mu     sync.Mutex // protects writes to shards
 209	shards map[string]*rankedShard
 210
 211	ready  atomic.Bool
 212	ranked atomic.Value
 213}
 214
 215func newShardedSearcher(n int64) *shardedSearcher {
 216	ss := &shardedSearcher{
 217		shards: make(map[string]*rankedShard),
 218		sched:  newScheduler(n),
 219	}
 220	return ss
 221}
 222
 223// NewDirectorySearcher returns a searcher instance that loads all
 224// shards corresponding to a glob into memory.
 225func NewDirectorySearcher(dir string) (zoekt.Streamer, error) {
 226	return newDirectorySearcher(dir, true)
 227}
 228
 229// NewDirectorySearcherFast is like NewDirectorySearcher, but does not block
 230// on the initial loading of shards.
 231//
 232// This exists since in the case of zoekt-webserver we are happy with having
 233// partial availability since that is better than no availability on large
 234// instances.
 235func NewDirectorySearcherFast(dir string) (zoekt.Streamer, error) {
 236	return newDirectorySearcher(dir, false)
 237}
 238
 239func newDirectorySearcher(dir string, waitUntilReady bool) (zoekt.Streamer, error) {
 240	ss := newShardedSearcher(int64(runtime.GOMAXPROCS(0)))
 241	tl := &loader{
 242		ss: ss,
 243	}
 244	dw, err := newDirectoryWatcher(dir, tl)
 245	if err != nil {
 246		return nil, err
 247	}
 248
 249	if waitUntilReady {
 250		if err := dw.WaitUntilReady(); err != nil {
 251			return nil, err
 252		}
 253	}
 254
 255	ds := &directorySearcher{
 256		Streamer:         ss,
 257		directoryWatcher: dw,
 258	}
 259
 260	return &typeRepoSearcher{Streamer: ds}, nil
 261}
 262
 263type directorySearcher struct {
 264	zoekt.Streamer
 265
 266	directoryWatcher *DirectoryWatcher
 267}
 268
 269func (s *directorySearcher) Close() {
 270	// We need to Stop directoryWatcher first since it calls load/unload on
 271	// Searcher.
 272	s.directoryWatcher.Stop()
 273	s.Streamer.Close()
 274}
 275
 276type loader struct {
 277	ss *shardedSearcher
 278}
 279
 280func (tl *loader) load(keys ...string) {
 281	// This is called with all keys on startup, so once this function has
 282	// finished running shardedSearcher will be ready.
 283	defer tl.ss.markReady()
 284
 285	if len(keys) == 0 {
 286		// If there's nothing to load, we exit early here, but we want to mark
 287		// ourselves as ready.
 288		return
 289	}
 290
 291	var (
 292		mu           sync.Mutex     // synchronizes writes to the shards map
 293		wg           sync.WaitGroup // used to wait for all shards to load
 294		sem          = semaphore.NewWeighted(int64(runtime.GOMAXPROCS(0)))
 295		loadedShards = make(map[string]zoekt.Searcher)
 296	)
 297
 298	publishLoaded := func() {
 299		mu.Lock()
 300		chunk := loadedShards
 301		loadedShards = make(map[string]zoekt.Searcher)
 302		mu.Unlock()
 303		tl.ss.replace(chunk)
 304	}
 305
 306	log.Printf("[INFO] loading %d shard(s): %s", len(keys), humanTruncateList(keys, 5))
 307
 308	lastProgress := time.Now()
 309	for i, key := range keys {
 310		// If taking a while to start-up occasionally give a progress message
 311		if time.Since(lastProgress) > 5*time.Second {
 312			log.Printf("[INFO] still need to load %d shards...", len(keys)-i)
 313			lastProgress = time.Now()
 314
 315			publishLoaded()
 316		}
 317
 318		_ = sem.Acquire(context.Background(), 1)
 319		wg.Add(1)
 320
 321		go func(key string) {
 322			defer sem.Release(1)
 323			defer wg.Done()
 324
 325			shard, err := loadShard(key)
 326			if err != nil {
 327				metricShardsLoadFailedTotal.Inc()
 328				log.Printf("[ERROR] reloading: %s, err %v ", key, err)
 329				return
 330			}
 331			metricShardsLoadedTotal.Inc()
 332
 333			mu.Lock()
 334			loadedShards[key] = shard
 335			mu.Unlock()
 336		}(key)
 337	}
 338
 339	wg.Wait()
 340
 341	publishLoaded()
 342}
 343
 344func (tl *loader) drop(keys ...string) {
 345	shards := make(map[string]zoekt.Searcher, len(keys))
 346	for _, key := range keys {
 347		shards[key] = nil
 348	}
 349	tl.ss.replace(shards)
 350}
 351
 352func (ss *shardedSearcher) String() string {
 353	return "shardedSearcher"
 354}
 355
 356// Close closes references to open files. It may be called only once.
 357func (ss *shardedSearcher) Close() {
 358	ss.mu.Lock()
 359	shards := make(map[string]zoekt.Searcher, len(ss.shards))
 360	for k := range ss.shards {
 361		shards[k] = nil
 362	}
 363	ss.mu.Unlock()
 364
 365	ss.replace(shards)
 366}
 367
 368func selectRepoSet(shards []*rankedShard, q query.Q) ([]*rankedShard, query.Q) {
 369	and, ok := q.(*query.And)
 370	if ok {
 371		return doSelectRepoSet(shards, and)
 372	}
 373
 374	// We have queries which look like (reposet ...) and we want to do the same
 375	// optimizations. To simplify we just always wrap the query in And and then
 376	// on the return value call Simplify to unwrap. In particular this is
 377	// important for List calls.
 378	and = &query.And{Children: []query.Q{q}}
 379	shards, q = doSelectRepoSet(shards, and)
 380	return shards, query.Simplify(q)
 381}
 382
 383func doSelectRepoSet(shards []*rankedShard, and *query.And) ([]*rankedShard, query.Q) {
 384	// (and (reposet ...) (q))
 385	// (and true (q)) with a filtered shards
 386	// (and false) // noop
 387
 388	// (and (repobranches ...) (q))
 389	// (and (repobranches ...) (q))
 390
 391	// Note: we also support (and (repo ...) (q)) even though sourcegraph does
 392	// not generate those sorts of queries. This is to support manual testing.
 393
 394	hasReposForPredicate := func(pred func(repo *zoekt.Repository) bool) func(repos []*zoekt.Repository) (any, all bool) {
 395		return func(repos []*zoekt.Repository) (any, all bool) {
 396			any = false
 397			all = true
 398			for _, repo := range repos {
 399				b := pred(repo)
 400				any = any || b
 401				all = all && b
 402			}
 403			return any, all
 404		}
 405	}
 406
 407	for i, c := range and.Children {
 408		var setSize int
 409		var hasRepos func([]*zoekt.Repository) (bool, bool)
 410		switch setQuery := c.(type) {
 411		case *query.RepoSet:
 412			setSize = len(setQuery.Set)
 413			hasRepos = hasReposForPredicate(func(repo *zoekt.Repository) bool {
 414				return setQuery.Set[repo.Name]
 415			})
 416		case *query.RepoIDs:
 417			setSize = int(setQuery.Repos.GetCardinality())
 418			hasRepos = hasReposForPredicate(func(repo *zoekt.Repository) bool {
 419				return setQuery.Repos.Contains(repo.ID)
 420			})
 421		case *query.Repo:
 422			setSize = 0
 423			hasRepos = hasReposForPredicate(func(repo *zoekt.Repository) bool {
 424				return setQuery.Regexp.MatchString(repo.Name)
 425			})
 426		case *query.BranchesRepos:
 427			for _, br := range setQuery.List {
 428				setSize += int(br.Repos.GetCardinality())
 429			}
 430
 431			hasRepos = hasReposForPredicate(func(repo *zoekt.Repository) bool {
 432				for _, br := range setQuery.List {
 433					if br.Repos.Contains(repo.ID) {
 434						return true
 435					}
 436				}
 437				return false
 438			})
 439		default:
 440			continue
 441		}
 442
 443		// setSize may be larger than the number of shards we have. The size of
 444		// filtered is bounded by min(len(set), len(shards))
 445		if setSize > len(shards) {
 446			setSize = len(shards)
 447		}
 448
 449		filtered := make([]*rankedShard, 0, setSize)
 450		filteredAll := true
 451
 452		for _, s := range shards {
 453			if s.repos == nil {
 454				// repos is nil if we failed to List the shard. This shouldn't
 455				// happen, but if it does we don't know what is in it and must search
 456				// it without simplifying the query.
 457				filtered = append(filtered, s)
 458				filteredAll = false
 459			} else if any, all := hasRepos(s.repos); any {
 460				filtered = append(filtered, s)
 461				filteredAll = filteredAll && all
 462			}
 463		}
 464
 465		// We don't need to adjust the query since we are returning an empty set
 466		// of shards to search.
 467		if len(filtered) == 0 {
 468			return filtered, and
 469		}
 470
 471		// We can't simplify the query since we are searching shards which contain
 472		// repos we aren't supposed to search.
 473		if !filteredAll {
 474			return filtered, and
 475		}
 476
 477		// We don't want to mutate the original and, so we clone it before
 478		// mutating it.
 479		and = &query.And{Children: slices.Clone(and.Children)}
 480
 481		// This optimization allows us to avoid the work done by
 482		// indexData.simplify for each shard.
 483		//
 484		// For example if our query is (and (reposet foo bar) (content baz))
 485		// then at this point filtered is [foo bar] and q is the same. For each
 486		// shard indexData.simplify will simplify to (and true (content baz)) ->
 487		// (content baz). This work can be done now once, rather than per shard.
 488		switch c := c.(type) {
 489		case *query.RepoSet, *query.RepoIDs, *query.Repo:
 490			and.Children[i] = &query.Const{Value: true}
 491			return filtered, query.Simplify(and)
 492
 493		case *query.BranchesRepos:
 494			// We can only replace if all the repos want the same branches. We
 495			// simplify and just check that we are requesting 1 branch. The common
 496			// case is just asking for HEAD, so this should be effective.
 497			if len(c.List) != 1 {
 498				return filtered, and
 499			}
 500
 501			// Every repo wants the same branches, so we can replace RepoBranches
 502			// with a list of branch queries.
 503			and.Children[i] = &query.Branch{Pattern: c.List[0].Branch, Exact: true}
 504			return filtered, query.Simplify(and)
 505		}
 506
 507		// Stop after first RepoSet, otherwise we might append duplicate
 508		// shards to `filtered`
 509		return filtered, and
 510	}
 511
 512	return shards, and
 513}
 514
 515func (ss *shardedSearcher) Search(ctx context.Context, q query.Q, opts *zoekt.SearchOptions) (sr *zoekt.SearchResult, err error) {
 516	tr, ctx := trace.New(ctx, "shardedSearcher.Search", "")
 517	tr.LazyLog(q, true)
 518	tr.LazyPrintf("opts: %+v", opts)
 519	defer func() {
 520		if sr != nil {
 521			tr.LazyPrintf("num files: %d", len(sr.Files))
 522			tr.LazyPrintf("stats: %+v", sr.Stats)
 523		}
 524		if err != nil {
 525			tr.LazyPrintf("error: %v", err)
 526			tr.SetError(err)
 527		}
 528		tr.Finish()
 529	}()
 530	ctx, cancel := context.WithCancel(ctx)
 531	defer cancel()
 532
 533	collectSender := newCollectSender(opts)
 534
 535	start := time.Now()
 536	proc, err := ss.sched.Acquire(ctx)
 537	if err != nil {
 538		return nil, err
 539	}
 540	defer proc.Release()
 541	tr.LazyPrintf("acquired process")
 542
 543	wait := time.Since(start)
 544	start = time.Now()
 545
 546	loaded := ss.getLoaded()
 547	done, err := streamSearch(ctx, proc, q, opts, loaded.shards, collectSender)
 548	defer done()
 549	if err != nil {
 550		return nil, err
 551	}
 552
 553	aggregate, ok := collectSender.Done()
 554	if !ok {
 555		aggregate = &zoekt.SearchResult{
 556			RepoURLs:      map[string]string{},
 557			LineFragments: map[string]string{},
 558		}
 559	}
 560
 561	copyFiles(aggregate)
 562
 563	if !loaded.ready {
 564		// We may have missed results due to not being fully loaded.
 565		aggregate.Stats.Crashes++
 566	}
 567
 568	aggregate.Stats.Wait = wait
 569	aggregate.Stats.Duration = time.Since(start)
 570
 571	return aggregate, nil
 572}
 573
 574func (ss *shardedSearcher) StreamSearch(ctx context.Context, q query.Q, opts *zoekt.SearchOptions, sender zoekt.Sender) (err error) {
 575	tr, ctx := trace.New(ctx, "shardedSearcher.StreamSearch", "")
 576	defer func() {
 577		if err != nil {
 578			tr.LazyPrintf("error: %v", err)
 579			tr.SetError(err)
 580		}
 581		tr.Finish()
 582	}()
 583
 584	start := time.Now()
 585	proc, err := ss.sched.Acquire(ctx)
 586	if err != nil {
 587		return err
 588	}
 589	defer proc.Release()
 590	tr.LazyPrintf("acquired process")
 591
 592	loaded := ss.getLoaded()
 593	shards := loaded.shards
 594
 595	maxPendingPriority := math.Inf(-1)
 596	if len(shards) > 0 {
 597		maxPendingPriority = shards[0].priority
 598	}
 599
 600	stillLoadingCrashes := 0
 601	if !loaded.ready {
 602		// We may have missed results due to not being fully loaded.
 603		stillLoadingCrashes++
 604	}
 605
 606	sender.Send(&zoekt.SearchResult{
 607		Stats: zoekt.Stats{
 608			Crashes: stillLoadingCrashes,
 609			Wait:    time.Since(start),
 610		},
 611		Progress: zoekt.Progress{
 612			MaxPendingPriority: maxPendingPriority,
 613		},
 614	})
 615
 616	// Matches flow from the shards up the stack in the following order:
 617	//
 618	// 1. Search shards
 619	// 2. flushCollectSender (aggregate)
 620	// 3. limitSender (limit)
 621	// 4. copyFileSender (copy)
 622	//
 623	// For streaming, the wrapping has to happen in the inverted order.
 624	sender = copyFileSender(sender)
 625
 626	if truncator, hasLimits := index.NewDisplayTruncator(opts); hasLimits {
 627		var cancel context.CancelFunc
 628		ctx, cancel = context.WithCancel(ctx)
 629		defer cancel()
 630		sender = limitSender(cancel, sender, truncator)
 631	}
 632
 633	sender, flush := newFlushCollectSender(opts, sender)
 634
 635	done, err := streamSearch(ctx, proc, q, opts, shards, sender)
 636
 637	// Even though streaming is done, we may have results sitting in a buffer we
 638	// need to flush. So we need to send those before calling done.
 639	flush()
 640	done()
 641
 642	return err
 643}
 644
 645// streamSearch is an internal helper since both Search and StreamSearch are
 646// largely similar.
 647//
 648// done must always be called, even if err is non-nil. The SearchResults sent
 649// via sender contain references to the underlying mmap data that the garbage
 650// collector can't see. Calling done informs the garbage collector it is free
 651// to collect those shards. The caller must call copyFiles on any
 652// SearchResults it returns/streams out before calling done.
 653func streamSearch(ctx context.Context, proc *process, q query.Q, opts *zoekt.SearchOptions, shards []*rankedShard, sender zoekt.Sender) (done func(), err error) {
 654	tr, ctx := trace.New(ctx, "shardedSearcher.streamSearch", "")
 655	overallStart := time.Now()
 656	metricSearchRunning.Inc()
 657	defer func() {
 658		metricSearchRunning.Dec()
 659		metricSearchDuration.Observe(time.Since(overallStart).Seconds())
 660		if err != nil {
 661			metricSearchFailedTotal.Inc()
 662
 663			tr.LazyPrintf("error: %v", err)
 664			tr.SetError(err)
 665		}
 666		tr.Finish()
 667	}()
 668
 669	// Select the subset of shards that we will search over for the given query.
 670	{
 671		beforeLen := len(shards)
 672		beforeQ := q
 673		shards, q = selectRepoSet(shards, q)
 674		tr.LazyPrintf("selectRepoSet shards=%d->%d q=%s->%s", beforeLen, len(shards), beforeQ, q)
 675	}
 676
 677	if len(shards) == 0 {
 678		return func() {}, nil
 679	}
 680
 681	var cancel context.CancelFunc
 682	if opts.MaxWallTime == 0 {
 683		ctx, cancel = context.WithCancel(ctx)
 684	} else {
 685		ctx, cancel = context.WithTimeout(ctx, opts.MaxWallTime)
 686	}
 687
 688	defer cancel()
 689
 690	// We set the number of workers to GOMAXPROCS, or the number of shards,
 691	// whichever is smaller.
 692	workers := min(runtime.GOMAXPROCS(0), len(shards))
 693
 694	type result struct {
 695		priority float64
 696		*zoekt.SearchResult
 697		err error
 698	}
 699
 700	var (
 701		// buffered channels to continue searching when sending back results
 702		// takes a while / blocks. The maximum pending result set is workers * 2.
 703		results = make(chan *result, workers)
 704		search  = make(chan *rankedShard, workers)
 705		wg      sync.WaitGroup
 706	)
 707
 708	// Start workers that receive shards from the search channel, search them,
 709	// and send the results to the results channel. This process is repeated
 710	// until the search channel is closed.
 711	//
 712	// Note: Making "search" a buffered channel has the effect of limiting the number of parallel shard searches.
 713	// Since searching is mostly CPU bound, limiting parallel shard searches also reduces the peak working set.
 714	wg.Add(workers)
 715	for range workers {
 716		go func() {
 717			defer wg.Done()
 718			for s := range search {
 719				sr, err := searchOneShard(ctx, s, q, opts)
 720				r := &result{priority: s.priority, SearchResult: sr, err: err}
 721				results <- r
 722			}
 723		}()
 724	}
 725
 726	go func() {
 727		wg.Wait()
 728		close(results)
 729	}()
 730
 731	var (
 732		pending = make(prioritySlice, 0, workers)
 733		shard   = 0
 734		next    = shards[shard]
 735
 736		// We need a separate nil-able reference to the same channel so we can close(search) for the worker
 737		// go-routines to finish but also set work to nil in order for the select statement below to ignore
 738		// that case when we want to stop a search. This is needed because sending on a closed channel panics.
 739		work = search
 740	)
 741
 742	stop := func() {
 743		if work != nil {
 744			close(search)
 745			work = nil
 746			next = nil
 747		}
 748	}
 749
 750	// tracked so we can stop when we hit TotalMaxMatchCount
 751	var totalMatchCount int
 752
 753search:
 754	for {
 755		// At the top of each iteration, have the proc associated with this search yield its won "timeslice"
 756		// to possibly allow other searches to make progress
 757		_ = proc.Yield(ctx) // Note: we let searchOneShard handle context errors
 758
 759		select {
 760		case work <- next: // is there a worker available to search the next shard?
 761			pending.append(next.priority)
 762
 763			shard++
 764			if shard == len(shards) {
 765				stop()
 766			} else {
 767				next = shards[shard]
 768			}
 769		case r, ok := <-results: // is there a result to send back?
 770			if !ok {
 771				break search
 772			}
 773
 774			// delete this result's priority from pending before computing the new max pending priority
 775			pending.remove(r.priority)
 776
 777			if r.err != nil {
 778				// Set final error and stop searching new shards, but consume any pending
 779				// search results.
 780				stop()
 781				err = r.err
 782				continue
 783			}
 784
 785			// Update the match count statistics and stop searching new shards if we've
 786			// reached the limit set in the options.
 787			totalMatchCount += r.SearchResult.Stats.MatchCount
 788			if opts.TotalMaxMatchCount > 0 && totalMatchCount > opts.TotalMaxMatchCount {
 789				stop()
 790			}
 791
 792			observeMetrics(r.SearchResult)
 793
 794			r.Priority = r.priority
 795			r.MaxPendingPriority = pending.max()
 796
 797			sendByRepository(r.SearchResult, opts, sender) // send the result back to the client
 798		}
 799	}
 800
 801	return func() { runtime.KeepAlive(shards) }, err
 802}
 803
 804// sendByRepository splits a zoekt.SearchResult by repository and calls
 805// sender.Send for each batch. Ranking in Sourcegraph expects zoekt.SearchResult
 806// to contain results with the same zoekt.SearchResult.priority only.
 807//
 808// We split by repository instead of by priority because it is easier to set
 809// RepoURLs and LineFragments in zoekt.SearchResult.
 810func sendByRepository(result *zoekt.SearchResult, opts *zoekt.SearchOptions, sender zoekt.Sender) {
 811	if len(result.RepoURLs) <= 1 || len(result.Files) == 0 {
 812		index.SortFiles(result.Files)
 813		sender.Send(result)
 814		return
 815	}
 816
 817	send := func(repoName string, a, b int, stats zoekt.Stats) {
 818		index.SortFiles(result.Files[a:b])
 819		sender.Send(&zoekt.SearchResult{
 820			Stats: stats,
 821			Progress: zoekt.Progress{
 822				Priority:           result.Files[a].RepositoryPriority,
 823				MaxPendingPriority: result.MaxPendingPriority,
 824			},
 825			Files:         result.Files[a:b],
 826			RepoURLs:      map[string]string{repoName: result.RepoURLs[repoName]},
 827			LineFragments: map[string]string{repoName: result.LineFragments[repoName]},
 828		})
 829	}
 830
 831	var startIndex, endIndex int
 832	curRepoID := result.Files[0].RepositoryID
 833	curRepoName := result.Files[0].Repository
 834
 835	fm := zoekt.FileMatch{}
 836	for endIndex, fm = range result.Files {
 837		if curRepoID != fm.RepositoryID {
 838			// Stats must stay aggregate-able, hence we sent the aggregate stats with the
 839			// last event.
 840			send(curRepoName, startIndex, endIndex, zoekt.Stats{})
 841
 842			startIndex = endIndex
 843			curRepoID = fm.RepositoryID
 844			curRepoName = fm.Repository
 845		}
 846	}
 847
 848	send(curRepoName, startIndex, endIndex+1, result.Stats)
 849}
 850
 851func observeMetrics(sr *zoekt.SearchResult) {
 852	metricSearchContentBytesLoadedTotal.Add(float64(sr.Stats.ContentBytesLoaded))
 853	metricSearchIndexBytesLoadedTotal.Add(float64(sr.Stats.IndexBytesLoaded))
 854	metricSearchCrashesTotal.Add(float64(sr.Stats.Crashes))
 855	metricSearchFileCountTotal.Add(float64(sr.Stats.FileCount))
 856	metricSearchShardFilesConsideredTotal.Add(float64(sr.Stats.ShardFilesConsidered))
 857	metricSearchFilesConsideredTotal.Add(float64(sr.Stats.FilesConsidered))
 858	metricSearchFilesLoadedTotal.Add(float64(sr.Stats.FilesLoaded))
 859	metricSearchFilesSkippedTotal.Add(float64(sr.Stats.FilesSkipped))
 860	metricSearchShardsSkippedTotal.Add(float64(sr.Stats.ShardsSkipped))
 861	metricSearchMatchCountTotal.Add(float64(sr.Stats.MatchCount))
 862	metricSearchNgramMatchesTotal.Add(float64(sr.Stats.NgramMatches))
 863	metricSearchNgramLookupsTotal.Add(float64(sr.Stats.NgramLookups))
 864	metricSearchRegexpsConsideredTotal.Add(float64(sr.Stats.RegexpsConsidered))
 865}
 866
 867func copySlice(src *[]byte) {
 868	if *src == nil {
 869		return
 870	}
 871	dst := make([]byte, len(*src))
 872	copy(dst, *src)
 873	*src = dst
 874}
 875
 876func copyFiles(sr *zoekt.SearchResult) {
 877	for i := range sr.Files {
 878		copySlice(&sr.Files[i].Content)
 879		copySlice(&sr.Files[i].Checksum)
 880		for l := range sr.Files[i].LineMatches {
 881			copySlice(&sr.Files[i].LineMatches[l].Line)
 882			copySlice(&sr.Files[i].LineMatches[l].Before)
 883			copySlice(&sr.Files[i].LineMatches[l].After)
 884		}
 885		for c := range sr.Files[i].ChunkMatches {
 886			copySlice(&sr.Files[i].ChunkMatches[c].Content)
 887		}
 888	}
 889}
 890
 891func searchOneShard(ctx context.Context, s zoekt.Searcher, q query.Q, opts *zoekt.SearchOptions) (sr *zoekt.SearchResult, err error) {
 892	metricSearchShardRunning.Inc()
 893	defer func() {
 894		metricSearchShardRunning.Dec()
 895		if e := recover(); e != nil {
 896			log.Printf("[ERROR] crashed shard: %s: %#v, %s", s, e, debug.Stack())
 897
 898			if sr == nil {
 899				sr = &zoekt.SearchResult{}
 900			}
 901			sr.Stats.Crashes = 1
 902		}
 903	}()
 904
 905	return s.Search(ctx, q, opts)
 906}
 907
 908type shardListResult struct {
 909	rl  *zoekt.RepoList
 910	err error
 911}
 912
 913func listOneShard(ctx context.Context, s zoekt.Searcher, q query.Q, opts *zoekt.ListOptions, sink chan shardListResult) {
 914	metricListShardRunning.Inc()
 915	defer func() {
 916		metricListShardRunning.Dec()
 917		if r := recover(); r != nil {
 918			log.Printf("[ERROR] crashed shard: %s: %s, %s", s.String(), r, debug.Stack())
 919			sink <- shardListResult{
 920				&zoekt.RepoList{Crashes: 1}, nil,
 921			}
 922		}
 923	}()
 924
 925	ms, err := s.List(ctx, q, opts)
 926	sink <- shardListResult{ms, err}
 927}
 928
 929func (ss *shardedSearcher) List(ctx context.Context, q query.Q, opts *zoekt.ListOptions) (rl *zoekt.RepoList, err error) {
 930	tr, ctx := trace.New(ctx, "shardedSearcher.List", "")
 931	metricListRunning.Inc()
 932	defer func() {
 933		metricListRunning.Dec()
 934		if rl != nil {
 935			tr.LazyPrintf("repos.size=%d reposmap.size=%d crashes=%d stats=%+v", len(rl.Repos), len(rl.ReposMap), rl.Crashes, rl.Stats)
 936		}
 937		if err != nil {
 938			tr.LazyPrintf("error: %v", err)
 939			tr.SetError(err)
 940		}
 941		tr.Finish()
 942	}()
 943
 944	q = query.Simplify(q)
 945	isAll := false
 946	if c, ok := q.(*query.Const); ok {
 947		isAll = c.Value
 948	}
 949
 950	proc, err := ss.sched.Acquire(ctx)
 951	if err != nil {
 952		return nil, err
 953	}
 954	defer proc.Release()
 955	tr.LazyPrintf("acquired process")
 956
 957	loaded := ss.getLoaded()
 958	shards := loaded.shards
 959
 960	// Setup what we return now, since we may short circuit if there are no
 961	// shards to search.
 962	stillLoadingCrashes := 0
 963	if !loaded.ready {
 964		// We may have missed results due to not being fully loaded.
 965		stillLoadingCrashes++
 966	}
 967	agg := zoekt.RepoList{
 968		Crashes:  stillLoadingCrashes,
 969		ReposMap: zoekt.ReposMap{},
 970		Repos:    []*zoekt.RepoListEntry{},
 971	}
 972
 973	// PERF: Select the subset of shards that we will search over for the given
 974	// query. A common List query only asks for a specific repo, so this is an
 975	// important optimization.
 976	{
 977		beforeLen := len(shards)
 978		beforeQ := q
 979		shards, q = selectRepoSet(shards, q)
 980		tr.LazyPrintf("selectRepoSet shards=%d->%d q=%s->%s", beforeLen, len(shards), beforeQ, q)
 981	}
 982
 983	if len(shards) == 0 {
 984		return &agg, nil
 985	}
 986
 987	shardCount := len(shards)
 988	all := make(chan shardListResult, shardCount)
 989	feeder := make(chan zoekt.Searcher, len(shards))
 990	for _, s := range shards {
 991		feeder <- s
 992	}
 993	close(feeder)
 994
 995	for range runtime.GOMAXPROCS(0) {
 996		go func() {
 997			for s := range feeder {
 998				listOneShard(ctx, s, q, opts, all)
 999			}
1000		}()
1001	}
1002
1003	uniq := map[string]*zoekt.RepoListEntry{}
1004
1005	for range shards {
1006		r := <-all
1007		if r.err != nil {
1008			return nil, r.err
1009		}
1010
1011		agg.Crashes += r.rl.Crashes
1012		agg.Stats.Add(&r.rl.Stats)
1013
1014		for _, r := range r.rl.Repos {
1015			prev, ok := uniq[r.Repository.Name]
1016			if !ok {
1017				cp := *r // We need to copy because we mutate r.Stats when merging duplicates
1018				uniq[r.Repository.Name] = &cp
1019			} else {
1020				prev.Stats.Add(&r.Stats)
1021			}
1022		}
1023
1024		for id, r := range r.rl.ReposMap {
1025			_, ok := agg.ReposMap[id]
1026			if !ok {
1027				agg.ReposMap[id] = r
1028			}
1029		}
1030	}
1031
1032	agg.Repos = make([]*zoekt.RepoListEntry, 0, len(uniq))
1033	for _, r := range uniq {
1034		agg.Repos = append(agg.Repos, r)
1035	}
1036
1037	// Only one of these fields is populated and in all cases the size of that
1038	// field is the number of Repos.
1039	//
1040	// Note: we don't just add individual Stats.Repos since a repository can
1041	// have multiple shards.
1042	agg.Stats.Repos = len(uniq) + len(agg.ReposMap)
1043
1044	if isAll && len(agg.Repos) > 0 {
1045		reportListAllMetrics(agg.Repos)
1046	}
1047
1048	return &agg, nil
1049}
1050
1051func reportListAllMetrics(repos []*zoekt.RepoListEntry) {
1052	var stats zoekt.RepoStats
1053	for _, r := range repos {
1054		stats.Add(&r.Stats)
1055	}
1056
1057	metricListAllRepos.Set(float64(stats.Repos))
1058	metricListAllIndexBytes.Set(float64(stats.IndexBytes))
1059	metricListAllContentBytes.Set(float64(stats.ContentBytes))
1060	metricListAllDocuments.Set(float64(stats.Documents))
1061	metricListAllShards.Set(float64(stats.Shards))
1062	metricListAllNewLinesCount.Set(float64(stats.NewLinesCount))
1063	metricListAllDefaultBranchNewLinesCount.Set(float64(stats.DefaultBranchNewLinesCount))
1064	metricListAllOtherBranchesNewLinesCount.Set(float64(stats.OtherBranchesNewLinesCount))
1065}
1066
1067// getLoaded returns the currently loaded shards. Shared so do not mutate.
1068func (s *shardedSearcher) getLoaded() loaded {
1069	// next commit will store the true value of this, for now we keep the
1070	// backwards compatible behaviour.
1071	ready := s.ready.Load()
1072	// ranked is loaded after ready to avoid a race were ready is true but
1073	// ranked is still not the final set of shards.
1074	ranked, _ := s.ranked.Load().([]*rankedShard)
1075	return loaded{
1076		shards: ranked,
1077		ready:  ready,
1078	}
1079}
1080
1081func mkRankedShard(s zoekt.Searcher) *rankedShard {
1082	q := query.Const{Value: true}
1083	// We need to use WithUnsafeContext here, otherwise we cannot return a proper
1084	// rankedShard. On the user request path we use selectRepoSet which relies on
1085	// rankedShard.repos being set.
1086	result, err := s.List(systemtenant.WithUnsafeContext(context.Background()), &q, nil)
1087	if err != nil {
1088		log.Printf("[ERROR] mkRankedShard(%s): failed to cache repository list: %v", s, err)
1089		return &rankedShard{Searcher: s}
1090	}
1091
1092	var (
1093		maxPriority float64
1094		repos       = make([]*zoekt.Repository, 0, len(result.Repos))
1095	)
1096	for i := range result.Repos {
1097		repo := &result.Repos[i].Repository
1098		repos = append(repos, repo)
1099		if repo.RawConfig != nil {
1100			priority, _ := strconv.ParseFloat(repo.RawConfig["priority"], 64)
1101			if priority > maxPriority {
1102				maxPriority = priority
1103			}
1104		}
1105	}
1106
1107	return &rankedShard{
1108		Searcher: s,
1109		repos:    repos,
1110		priority: maxPriority,
1111	}
1112}
1113
1114// markReady should be called once all shards have been passed into replace on
1115// startup. Once s is marked as ready it stops reporting a Crash in the
1116// response Stats.
1117func (s *shardedSearcher) markReady() {
1118	s.ready.CompareAndSwap(false, true)
1119}
1120
1121func (s *shardedSearcher) replace(shards map[string]zoekt.Searcher) {
1122	if len(shards) == 0 {
1123		return
1124	}
1125
1126	defer func(began time.Time) {
1127		metricShardsBatchReplaceDurationSeconds.Observe(time.Since(began).Seconds())
1128	}(time.Now())
1129
1130	s.mu.Lock()
1131	defer s.mu.Unlock()
1132
1133	for key, shard := range shards {
1134		var r *rankedShard
1135		if shard != nil {
1136			r = mkRankedShard(shard)
1137		}
1138
1139		old := s.shards[key]
1140		if shard == nil {
1141			delete(s.shards, key)
1142		} else {
1143			s.shards[key] = r
1144		}
1145
1146		if old != nil && old.Searcher != nil {
1147			//                 _ ___                /^^\ /^\  /^^\_
1148			//     _          _@)@) \            ,,/ '` ~ `'~~ ', `\.
1149			//   _/o\_ _ _ _/~`.`...'~\        ./~~..,'`','',.,' '  ~:
1150			//  / `,'.~,~.~  .   , . , ~|,   ,/ .,' , ,. .. ,,.   `,  ~\_
1151			// ( ' _' _ '_` _  '  .    , `\_/ .' ..' '  `  `   `..  `,   \_
1152			//  ~V~ V~ V~ V~ ~\ `   ' .  '    , ' .,.,''`.,.''`.,.``. ',   \_
1153			//   _/\ /\ /\ /\_/, . ' ,   `_/~\_ .' .,. ,, , _/~\_ `. `. '.,  \_
1154			//  < ~ ~ '~`'~'`, .,  .   `_: ::: \_ '      `_/ ::: \_ `.,' . ',  \_
1155			//   \ ' `_  '`_    _    ',/ _::_::_ \ _    _/ _::_::_ \   `.,'.,`., \-,-,-,_,_,
1156			//    `'~~ `'~~ `'~~ `'~~  \(_)(_)(_)/  `~~' \(_)(_)(_)/ ~'`\_.._,._,'_;_;_;_;_;
1157			//
1158			// We can't just call Close now, because there may be ongoing searches
1159			// which have old in the shards list. Previously we used an exclusive
1160			// lock to guarantee there were no concurrent searches. However, that
1161			// led to blocking on the read path.
1162			//
1163			// We could introduce granular locking per rankedShard to know when
1164			// there are no more references. However, this becomes tricky in
1165			// practice. Instead we rely on the garbage collector noticing old is no
1166			// longer used. We take care in our searchers to runtime.KeepAlive until
1167			// we have stopped referencing the underling mmap data.
1168			runtime.SetFinalizer(old, func(r *rankedShard) {
1169				r.Close()
1170			})
1171		}
1172	}
1173
1174	ranked := make([]*rankedShard, 0, len(s.shards))
1175	for _, r := range s.shards {
1176		ranked = append(ranked, r)
1177	}
1178
1179	sort.Slice(ranked, func(i, j int) bool {
1180		priorityDiff := ranked[i].priority - ranked[j].priority
1181		if priorityDiff != 0 {
1182			return priorityDiff > 0
1183		}
1184		if len(ranked[i].repos) == 0 || len(ranked[j].repos) == 0 {
1185			// Protect against empty names which can happen if we fail to List or
1186			// the shard is full of tombstones. Prefer the shard which has names.
1187			return len(ranked[i].repos) >= len(ranked[j].repos)
1188		}
1189		return ranked[i].repos[0].Name < ranked[j].repos[0].Name
1190	})
1191
1192	s.ranked.Store(ranked)
1193
1194	metricShardsLoaded.Set(float64(len(ranked)))
1195}
1196
1197func loadShard(fn string) (zoekt.Searcher, error) {
1198	f, err := os.Open(fn)
1199	if err != nil {
1200		return nil, err
1201	}
1202
1203	iFile, err := index.NewIndexFile(f)
1204	if err != nil {
1205		return nil, err
1206	}
1207	s, err := index.NewSearcher(iFile)
1208	if err != nil {
1209		iFile.Close()
1210		return nil, fmt.Errorf("NewSearcher(%s): %v", fn, err)
1211	}
1212
1213	return s, nil
1214}
1215
1216// prioritySlice is a trivial implementation of an array that provides three
1217// things: appending a value, removing a value, and getting the array's max.
1218// Operations take O(n) time, which is acceptable because N is restricted to
1219// GOMAXPROCS (i.e., number of cpu cores) by the shardedSearcher interface.
1220type prioritySlice []float64
1221
1222func (p *prioritySlice) append(pri float64) {
1223	*p = append(*p, pri)
1224}
1225
1226func (p *prioritySlice) remove(pri float64) {
1227	for i, opri := range *p {
1228		if opri == pri {
1229			if i != len(*p)-1 {
1230				// swap to make this element the tail
1231				(*p)[i] = (*p)[len(*p)-1]
1232			}
1233			// pop the end off
1234			*p = (*p)[:len(*p)-1]
1235			break
1236		}
1237	}
1238}
1239
1240func (p *prioritySlice) max() float64 {
1241	// remove() and max() could be combined, but this is easier to read and
1242	// the expected performance difference from the extra lock and loop is
1243	// almost certainly irrelevant.
1244	maxPri := math.Inf(-1)
1245	for _, pri := range *p {
1246		if pri > maxPri {
1247			maxPri = pri
1248		}
1249	}
1250	return maxPri
1251}
Configure Feed

Configure Feed