shards/shards.go at b51a2335d51b865e1ffe84aa549e85570da61463 · boltless.me/zoekt

fork of https://github.com/sourcegraph/zoekt
zoekt / shards / shards.go
at b51a2335d51b865e1ffe84aa549e85570da61463 38 kB View raw
Stefan Hengl tenant: run healthz check with system priviledges (#877) 2y ago
   1// Copyright 2016 Google Inc. All rights reserved.
   2//
   3// Licensed under the Apache License, Version 2.0 (the "License");
   4// you may not use this file except in compliance with the License.
   5// You may obtain a copy of the License at
   6//
   7//    http://www.apache.org/licenses/LICENSE-2.0
   8//
   9// Unless required by applicable law or agreed to in writing, software
  10// distributed under the License is distributed on an "AS IS" BASIS,
  11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12// See the License for the specific language governing permissions and
  13// limitations under the License.
  14
  15package shards
  16
  17import (
  18	"context"
  19	"fmt"
  20	"log"
  21	"math"
  22	"os"
  23	"runtime"
  24	"runtime/debug"
  25	"slices"
  26	"sort"
  27	"strconv"
  28	"sync"
  29	"time"
  30
  31	"golang.org/x/sync/semaphore"
  32
  33	"github.com/prometheus/client_golang/prometheus"
  34	"github.com/prometheus/client_golang/prometheus/promauto"
  35	"go.uber.org/atomic"
  36
  37	"github.com/sourcegraph/zoekt"
  38	"github.com/sourcegraph/zoekt/internal/tenant/systemtenant"
  39	"github.com/sourcegraph/zoekt/query"
  40	"github.com/sourcegraph/zoekt/trace"
  41)
  42
  43var (
  44	metricShardsLoaded = promauto.NewGauge(prometheus.GaugeOpts{
  45		Name: "zoekt_shards_loaded",
  46		Help: "The number of shards currently loaded",
  47	})
  48	metricShardsLoadedTotal = promauto.NewCounter(prometheus.CounterOpts{
  49		Name: "zoekt_shards_loaded_total",
  50		Help: "The total number of shards loaded",
  51	})
  52	metricShardsLoadFailedTotal = promauto.NewCounter(prometheus.CounterOpts{
  53		Name: "zoekt_shards_load_failed_total",
  54		Help: "The total number of shard loads that failed",
  55	})
  56
  57	metricSearchRunning = promauto.NewGauge(prometheus.GaugeOpts{
  58		Name: "zoekt_search_running",
  59		Help: "The number of concurrent search requests running",
  60	})
  61	metricSearchShardRunning = promauto.NewGauge(prometheus.GaugeOpts{
  62		Name: "zoekt_search_shard_running",
  63		Help: "The number of concurrent search requests in a shard running",
  64	})
  65	metricSearchFailedTotal = promauto.NewCounter(prometheus.CounterOpts{
  66		Name: "zoekt_search_failed_total",
  67		Help: "The total number of search requests that failed",
  68	})
  69	metricSearchDuration = promauto.NewHistogram(prometheus.HistogramOpts{
  70		Name:    "zoekt_search_duration_seconds",
  71		Help:    "The duration a search request took in seconds",
  72		Buckets: prometheus.DefBuckets, // DefBuckets good for service timings
  73	})
  74
  75	// A Counter per Stat. Name should match field in zoekt.Stats.
  76	metricSearchContentBytesLoadedTotal = promauto.NewCounter(prometheus.CounterOpts{
  77		Name: "zoekt_search_content_loaded_bytes_total",
  78		Help: "Total amount of I/O for reading contents",
  79	})
  80	metricSearchIndexBytesLoadedTotal = promauto.NewCounter(prometheus.CounterOpts{
  81		Name: "zoekt_search_index_loaded_bytes_total",
  82		Help: "Total amount of I/O for reading from index",
  83	})
  84	metricSearchCrashesTotal = promauto.NewCounter(prometheus.CounterOpts{
  85		Name: "zoekt_search_crashes_total",
  86		Help: "Total number of search shards that had a crash",
  87	})
  88	metricSearchFileCountTotal = promauto.NewCounter(prometheus.CounterOpts{
  89		Name: "zoekt_search_file_count_total",
  90		Help: "Total number of files containing a match",
  91	})
  92	metricSearchShardFilesConsideredTotal = promauto.NewCounter(prometheus.CounterOpts{
  93		Name: "zoekt_search_shard_files_considered_total",
  94		Help: "Total number of files in shards that we considered",
  95	})
  96	metricSearchFilesConsideredTotal = promauto.NewCounter(prometheus.CounterOpts{
  97		Name: "zoekt_search_files_considered_total",
  98		Help: "Total files that we evaluated. Equivalent to files for which all atom matches (including negations) evaluated to true",
  99	})
 100	metricSearchFilesLoadedTotal = promauto.NewCounter(prometheus.CounterOpts{
 101		Name: "zoekt_search_files_loaded_total",
 102		Help: "Total files for which we loaded file content to verify substring matches",
 103	})
 104	metricSearchFilesSkippedTotal = promauto.NewCounter(prometheus.CounterOpts{
 105		Name: "zoekt_search_files_skipped_total",
 106		Help: "Total candidate files whose contents weren't examined because we gathered enough matches",
 107	})
 108	metricSearchShardsSkippedTotal = promauto.NewCounter(prometheus.CounterOpts{
 109		Name: "zoekt_search_shards_skipped_total",
 110		Help: "Total shards that we did not process because a query was canceled",
 111	})
 112	metricSearchMatchCountTotal = promauto.NewCounter(prometheus.CounterOpts{
 113		Name: "zoekt_search_match_count_total",
 114		Help: "Total number of non-overlapping matches",
 115	})
 116	metricSearchNgramMatchesTotal = promauto.NewCounter(prometheus.CounterOpts{
 117		Name: "zoekt_search_ngram_matches_total",
 118		Help: "Total number of candidate matches as a result of searching ngrams",
 119	})
 120	metricSearchNgramLookupsTotal = promauto.NewCounter(prometheus.CounterOpts{
 121		Name: "zoekt_search_ngram_lookups_total",
 122		Help: "Total number of times we accessed an ngram in the index",
 123	})
 124	metricSearchRegexpsConsideredTotal = promauto.NewCounter(prometheus.CounterOpts{
 125		Name: "zoekt_search_regexps_considered_total",
 126		Help: "Total number of times regexp was called on files that we evaluated",
 127	})
 128
 129	metricListRunning = promauto.NewGauge(prometheus.GaugeOpts{
 130		Name: "zoekt_list_running",
 131		Help: "The number of concurrent list requests running",
 132	})
 133	metricListShardRunning = promauto.NewGauge(prometheus.GaugeOpts{
 134		Name: "zoekt_list_shard_running",
 135		Help: "The number of concurrent list requests in a shard running",
 136	})
 137	metricShardsBatchReplaceDurationSeconds = promauto.NewHistogram(prometheus.HistogramOpts{
 138		Name:    "zoekt_shards_batch_replace_duration_seconds",
 139		Help:    "The time it takes to replace a batch of Searchers.",
 140		Buckets: []float64{0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1, 2, 5, 10, 30},
 141	})
 142	metricListAllRepos = promauto.NewGauge(prometheus.GaugeOpts{
 143		Name: "zoekt_list_all_stats_repos",
 144		Help: "The last List(true) value for RepoStats.Repos. Repos is used for aggregrating the number of repositories.",
 145	})
 146	metricListAllShards = promauto.NewGauge(prometheus.GaugeOpts{
 147		Name: "zoekt_list_all_stats_shards",
 148		Help: "The last List(true) value for RepoStats.Shards. Shards is the total number of search shards.",
 149	})
 150	metricListAllDocuments = promauto.NewGauge(prometheus.GaugeOpts{
 151		Name: "zoekt_list_all_stats_documents",
 152		Help: "The last List(true) value for RepoStats.Documents. Documents holds the number of documents or files.",
 153	})
 154	metricListAllIndexBytes = promauto.NewGauge(prometheus.GaugeOpts{
 155		Name: "zoekt_list_all_stats_index_bytes",
 156		Help: "The last List(true) value for RepoStats.IndexBytes. IndexBytes is the amount of RAM used for index overhead.",
 157	})
 158	metricListAllContentBytes = promauto.NewGauge(prometheus.GaugeOpts{
 159		Name: "zoekt_list_all_stats_content_bytes",
 160		Help: "The last List(true) value for RepoStats.ContentBytes. ContentBytes is the amount of RAM used for raw content.",
 161	})
 162	metricListAllNewLinesCount = promauto.NewGauge(prometheus.GaugeOpts{
 163		Name: "zoekt_list_all_stats_new_lines_count",
 164		Help: "The last List(true) value for RepoStats.NewLinesCount.",
 165	})
 166	metricListAllDefaultBranchNewLinesCount = promauto.NewGauge(prometheus.GaugeOpts{
 167		Name: "zoekt_list_all_stats_default_branch_new_lines_count",
 168		Help: "The last List(true) value for RepoStats.DefaultBranchNewLinesCount.",
 169	})
 170	metricListAllOtherBranchesNewLinesCount = promauto.NewGauge(prometheus.GaugeOpts{
 171		Name: "zoekt_list_all_stats_other_branches_new_lines_count",
 172		Help: "The last List(true) value for RepoStats.OtherBranchesNewLinesCount.",
 173	})
 174)
 175
 176type rankedShard struct {
 177	zoekt.Searcher
 178
 179	priority float64 // maximum priority across all repos in the shard
 180
 181	// We have out of band ranking on compound shards which can change even if
 182	// the shard file does not. So we compute a rank in getShards. We store
 183	// repos here to avoid the cost of List in the search request path.
 184	//
 185	// repos is nil only if that call failed.
 186	repos []*zoekt.Repository
 187}
 188
 189// loaded stores the state we compute when updating the state of shards from
 190// disk.
 191type loaded struct {
 192	// shards is the currently loaded shards sorted by decreasing rank and
 193	// should not be mutated.
 194	shards []*rankedShard
 195
 196	// ready is true if sharded searcher has finished loading all initial
 197	// shards on startup.
 198	ready bool
 199}
 200
 201type shardedSearcher struct {
 202	// Limit the number of parallel queries. Since searching is
 203	// CPU bound, we can't do better than #CPU queries in
 204	// parallel.  If we do so, we just create more memory
 205	// pressure.
 206	sched scheduler
 207
 208	mu     sync.Mutex // protects writes to shards
 209	shards map[string]*rankedShard
 210
 211	ready  atomic.Bool
 212	ranked atomic.Value
 213}
 214
 215func newShardedSearcher(n int64) *shardedSearcher {
 216	ss := &shardedSearcher{
 217		shards: make(map[string]*rankedShard),
 218		sched:  newScheduler(n),
 219	}
 220	return ss
 221}
 222
 223// NewDirectorySearcher returns a searcher instance that loads all
 224// shards corresponding to a glob into memory.
 225func NewDirectorySearcher(dir string) (zoekt.Streamer, error) {
 226	return newDirectorySearcher(dir, true)
 227}
 228
 229// NewDirectorySearcherFast is like NewDirectorySearcher, but does not block
 230// on the initial loading of shards.
 231//
 232// This exists since in the case of zoekt-webserver we are happy with having
 233// partial availability since that is better than no availability on large
 234// instances.
 235func NewDirectorySearcherFast(dir string) (zoekt.Streamer, error) {
 236	return newDirectorySearcher(dir, false)
 237}
 238
 239func newDirectorySearcher(dir string, waitUntilReady bool) (zoekt.Streamer, error) {
 240	ss := newShardedSearcher(int64(runtime.GOMAXPROCS(0)))
 241	tl := &loader{
 242		ss: ss,
 243	}
 244	dw, err := newDirectoryWatcher(dir, tl)
 245	if err != nil {
 246		return nil, err
 247	}
 248
 249	if waitUntilReady {
 250		if err := dw.WaitUntilReady(); err != nil {
 251			return nil, err
 252		}
 253	}
 254
 255	ds := &directorySearcher{
 256		Streamer:         ss,
 257		directoryWatcher: dw,
 258	}
 259
 260	return &typeRepoSearcher{Streamer: ds}, nil
 261}
 262
 263type directorySearcher struct {
 264	zoekt.Streamer
 265
 266	directoryWatcher *DirectoryWatcher
 267}
 268
 269func (s *directorySearcher) Close() {
 270	// We need to Stop directoryWatcher first since it calls load/unload on
 271	// Searcher.
 272	s.directoryWatcher.Stop()
 273	s.Streamer.Close()
 274}
 275
 276type loader struct {
 277	ss *shardedSearcher
 278}
 279
 280func (tl *loader) load(keys ...string) {
 281	// This is called with all keys on startup, so once this function has
 282	// finished running shardedSearcher will be ready.
 283	defer tl.ss.markReady()
 284
 285	if len(keys) == 0 {
 286		// If there's nothing to load, we exit early here, but we want to mark
 287		// ourselves as ready.
 288		return
 289	}
 290
 291	var (
 292		mu           sync.Mutex     // synchronizes writes to the shards map
 293		wg           sync.WaitGroup // used to wait for all shards to load
 294		sem          = semaphore.NewWeighted(int64(runtime.GOMAXPROCS(0)))
 295		loadedShards = make(map[string]zoekt.Searcher)
 296	)
 297
 298	publishLoaded := func() {
 299		mu.Lock()
 300		chunk := loadedShards
 301		loadedShards = make(map[string]zoekt.Searcher)
 302		mu.Unlock()
 303		tl.ss.replace(chunk)
 304	}
 305
 306	log.Printf("[INFO] loading %d shard(s): %s", len(keys), humanTruncateList(keys, 5))
 307
 308	lastProgress := time.Now()
 309	for i, key := range keys {
 310		// If taking a while to start-up occasionally give a progress message
 311		if time.Since(lastProgress) > 5*time.Second {
 312			log.Printf("[INFO] still need to load %d shards...", len(keys)-i)
 313			lastProgress = time.Now()
 314
 315			publishLoaded()
 316		}
 317
 318		_ = sem.Acquire(context.Background(), 1)
 319		wg.Add(1)
 320
 321		go func(key string) {
 322			defer sem.Release(1)
 323			defer wg.Done()
 324
 325			shard, err := loadShard(key)
 326			if err != nil {
 327				metricShardsLoadFailedTotal.Inc()
 328				log.Printf("[ERROR] reloading: %s, err %v ", key, err)
 329				return
 330			}
 331			metricShardsLoadedTotal.Inc()
 332
 333			mu.Lock()
 334			loadedShards[key] = shard
 335			mu.Unlock()
 336		}(key)
 337	}
 338
 339	wg.Wait()
 340
 341	publishLoaded()
 342}
 343
 344func (tl *loader) drop(keys ...string) {
 345	shards := make(map[string]zoekt.Searcher, len(keys))
 346	for _, key := range keys {
 347		shards[key] = nil
 348	}
 349	tl.ss.replace(shards)
 350}
 351
 352func (ss *shardedSearcher) String() string {
 353	return "shardedSearcher"
 354}
 355
 356// Close closes references to open files. It may be called only once.
 357func (ss *shardedSearcher) Close() {
 358	ss.mu.Lock()
 359	shards := make(map[string]zoekt.Searcher, len(ss.shards))
 360	for k := range ss.shards {
 361		shards[k] = nil
 362	}
 363	ss.mu.Unlock()
 364
 365	ss.replace(shards)
 366}
 367
 368func selectRepoSet(shards []*rankedShard, q query.Q) ([]*rankedShard, query.Q) {
 369	and, ok := q.(*query.And)
 370	if ok {
 371		return doSelectRepoSet(shards, and)
 372	}
 373
 374	// We have queries which look like (reposet ...) and we want to do the same
 375	// optimizations. To simplify we just always wrap the query in And and then
 376	// on the return value call Simplify to unwrap. In particular this is
 377	// important for List calls.
 378	and = &query.And{Children: []query.Q{q}}
 379	shards, q = doSelectRepoSet(shards, and)
 380	return shards, query.Simplify(q)
 381}
 382
 383func doSelectRepoSet(shards []*rankedShard, and *query.And) ([]*rankedShard, query.Q) {
 384	// (and (reposet ...) (q))
 385	// (and true (q)) with a filtered shards
 386	// (and false) // noop
 387
 388	// (and (repobranches ...) (q))
 389	// (and (repobranches ...) (q))
 390
 391	// Note: we also support (and (repo ...) (q)) even though sourcegraph does
 392	// not generate those sorts of queries. This is to support manual testing.
 393
 394	hasReposForPredicate := func(pred func(repo *zoekt.Repository) bool) func(repos []*zoekt.Repository) (any, all bool) {
 395		return func(repos []*zoekt.Repository) (any, all bool) {
 396			any = false
 397			all = true
 398			for _, repo := range repos {
 399				b := pred(repo)
 400				any = any || b
 401				all = all && b
 402			}
 403			return any, all
 404		}
 405	}
 406
 407	for i, c := range and.Children {
 408		var setSize int
 409		var hasRepos func([]*zoekt.Repository) (bool, bool)
 410		switch setQuery := c.(type) {
 411		case *query.RepoSet:
 412			setSize = len(setQuery.Set)
 413			hasRepos = hasReposForPredicate(func(repo *zoekt.Repository) bool {
 414				return setQuery.Set[repo.Name]
 415			})
 416		case *query.RepoIDs:
 417			setSize = int(setQuery.Repos.GetCardinality())
 418			hasRepos = hasReposForPredicate(func(repo *zoekt.Repository) bool {
 419				return setQuery.Repos.Contains(repo.ID)
 420			})
 421		case *query.Repo:
 422			setSize = 0
 423			hasRepos = hasReposForPredicate(func(repo *zoekt.Repository) bool {
 424				return setQuery.Regexp.MatchString(repo.Name)
 425			})
 426		case *query.BranchesRepos:
 427			for _, br := range setQuery.List {
 428				setSize += int(br.Repos.GetCardinality())
 429			}
 430
 431			hasRepos = hasReposForPredicate(func(repo *zoekt.Repository) bool {
 432				for _, br := range setQuery.List {
 433					if br.Repos.Contains(repo.ID) {
 434						return true
 435					}
 436				}
 437				return false
 438			})
 439		default:
 440			continue
 441		}
 442
 443		// setSize may be larger than the number of shards we have. The size of
 444		// filtered is bounded by min(len(set), len(shards))
 445		if setSize > len(shards) {
 446			setSize = len(shards)
 447		}
 448
 449		filtered := make([]*rankedShard, 0, setSize)
 450		filteredAll := true
 451
 452		for _, s := range shards {
 453			if s.repos == nil {
 454				// repos is nil if we failed to List the shard. This shouldn't
 455				// happen, but if it does we don't know what is in it and must search
 456				// it without simplifying the query.
 457				filtered = append(filtered, s)
 458				filteredAll = false
 459			} else if any, all := hasRepos(s.repos); any {
 460				filtered = append(filtered, s)
 461				filteredAll = filteredAll && all
 462			}
 463		}
 464
 465		// We don't need to adjust the query since we are returning an empty set
 466		// of shards to search.
 467		if len(filtered) == 0 {
 468			return filtered, and
 469		}
 470
 471		// We can't simplify the query since we are searching shards which contain
 472		// repos we aren't supposed to search.
 473		if !filteredAll {
 474			return filtered, and
 475		}
 476
 477		// We don't want to mutate the original and, so we clone it before
 478		// mutating it.
 479		and = &query.And{Children: slices.Clone(and.Children)}
 480
 481		// This optimization allows us to avoid the work done by
 482		// indexData.simplify for each shard.
 483		//
 484		// For example if our query is (and (reposet foo bar) (content baz))
 485		// then at this point filtered is [foo bar] and q is the same. For each
 486		// shard indexData.simplify will simplify to (and true (content baz)) ->
 487		// (content baz). This work can be done now once, rather than per shard.
 488		switch c := c.(type) {
 489		case *query.RepoSet, *query.RepoIDs, *query.Repo:
 490			and.Children[i] = &query.Const{Value: true}
 491			return filtered, query.Simplify(and)
 492
 493		case *query.BranchesRepos:
 494			// We can only replace if all the repos want the same branches. We
 495			// simplify and just check that we are requesting 1 branch. The common
 496			// case is just asking for HEAD, so this should be effective.
 497			if len(c.List) != 1 {
 498				return filtered, and
 499			}
 500
 501			// Every repo wants the same branches, so we can replace RepoBranches
 502			// with a list of branch queries.
 503			and.Children[i] = &query.Branch{Pattern: c.List[0].Branch, Exact: true}
 504			return filtered, query.Simplify(and)
 505		}
 506
 507		// Stop after first RepoSet, otherwise we might append duplicate
 508		// shards to `filtered`
 509		return filtered, and
 510	}
 511
 512	return shards, and
 513}
 514
 515func (ss *shardedSearcher) Search(ctx context.Context, q query.Q, opts *zoekt.SearchOptions) (sr *zoekt.SearchResult, err error) {
 516	tr, ctx := trace.New(ctx, "shardedSearcher.Search", "")
 517	tr.LazyLog(q, true)
 518	tr.LazyPrintf("opts: %+v", opts)
 519	defer func() {
 520		if sr != nil {
 521			tr.LazyPrintf("num files: %d", len(sr.Files))
 522			tr.LazyPrintf("stats: %+v", sr.Stats)
 523		}
 524		if err != nil {
 525			tr.LazyPrintf("error: %v", err)
 526			tr.SetError(err)
 527		}
 528		tr.Finish()
 529	}()
 530	ctx, cancel := context.WithCancel(ctx)
 531	defer cancel()
 532
 533	collectSender := newCollectSender(opts)
 534
 535	start := time.Now()
 536	proc, err := ss.sched.Acquire(ctx)
 537	if err != nil {
 538		return nil, err
 539	}
 540	defer proc.Release()
 541	tr.LazyPrintf("acquired process")
 542
 543	wait := time.Since(start)
 544	start = time.Now()
 545
 546	loaded := ss.getLoaded()
 547	done, err := streamSearch(ctx, proc, q, opts, loaded.shards, collectSender)
 548	defer done()
 549	if err != nil {
 550		return nil, err
 551	}
 552
 553	aggregate, ok := collectSender.Done()
 554	if !ok {
 555		aggregate = &zoekt.SearchResult{
 556			RepoURLs:      map[string]string{},
 557			LineFragments: map[string]string{},
 558		}
 559	}
 560
 561	copyFiles(aggregate)
 562
 563	if !loaded.ready {
 564		// We may have missed results due to not being fully loaded.
 565		aggregate.Stats.Crashes++
 566	}
 567
 568	aggregate.Stats.Wait = wait
 569	aggregate.Stats.Duration = time.Since(start)
 570
 571	return aggregate, nil
 572}
 573
 574func (ss *shardedSearcher) StreamSearch(ctx context.Context, q query.Q, opts *zoekt.SearchOptions, sender zoekt.Sender) (err error) {
 575	tr, ctx := trace.New(ctx, "shardedSearcher.StreamSearch", "")
 576	defer func() {
 577		if err != nil {
 578			tr.LazyPrintf("error: %v", err)
 579			tr.SetError(err)
 580		}
 581		tr.Finish()
 582	}()
 583
 584	start := time.Now()
 585	proc, err := ss.sched.Acquire(ctx)
 586	if err != nil {
 587		return err
 588	}
 589	defer proc.Release()
 590	tr.LazyPrintf("acquired process")
 591
 592	loaded := ss.getLoaded()
 593	shards := loaded.shards
 594
 595	maxPendingPriority := math.Inf(-1)
 596	if len(shards) > 0 {
 597		maxPendingPriority = shards[0].priority
 598	}
 599
 600	stillLoadingCrashes := 0
 601	if !loaded.ready {
 602		// We may have missed results due to not being fully loaded.
 603		stillLoadingCrashes++
 604	}
 605
 606	sender.Send(&zoekt.SearchResult{
 607		Stats: zoekt.Stats{
 608			Crashes: stillLoadingCrashes,
 609			Wait:    time.Since(start),
 610		},
 611		Progress: zoekt.Progress{
 612			MaxPendingPriority: maxPendingPriority,
 613		},
 614	})
 615
 616	// Matches flow from the shards up the stack in the following order:
 617	//
 618	// 1. Search shards
 619	// 2. flushCollectSender (aggregate)
 620	// 3. limitSender (limit)
 621	// 4. copyFileSender (copy)
 622	//
 623	// For streaming, the wrapping has to happen in the inverted order.
 624	sender = copyFileSender(sender)
 625
 626	if truncator, hasLimits := zoekt.NewDisplayTruncator(opts); hasLimits {
 627		var cancel context.CancelFunc
 628		ctx, cancel = context.WithCancel(ctx)
 629		defer cancel()
 630		sender = limitSender(cancel, sender, truncator)
 631	}
 632
 633	sender, flush := newFlushCollectSender(opts, sender)
 634
 635	done, err := streamSearch(ctx, proc, q, opts, shards, sender)
 636
 637	// Even though streaming is done, we may have results sitting in a buffer we
 638	// need to flush. So we need to send those before calling done.
 639	flush()
 640	done()
 641
 642	return err
 643}
 644
 645// streamSearch is an internal helper since both Search and StreamSearch are
 646// largely similar.
 647//
 648// done must always be called, even if err is non-nil. The SearchResults sent
 649// via sender contain references to the underlying mmap data that the garbage
 650// collector can't see. Calling done informs the garbage collector it is free
 651// to collect those shards. The caller must call copyFiles on any
 652// SearchResults it returns/streams out before calling done.
 653func streamSearch(ctx context.Context, proc *process, q query.Q, opts *zoekt.SearchOptions, shards []*rankedShard, sender zoekt.Sender) (done func(), err error) {
 654	tr, ctx := trace.New(ctx, "shardedSearcher.streamSearch", "")
 655	overallStart := time.Now()
 656	metricSearchRunning.Inc()
 657	defer func() {
 658		metricSearchRunning.Dec()
 659		metricSearchDuration.Observe(time.Since(overallStart).Seconds())
 660		if err != nil {
 661			metricSearchFailedTotal.Inc()
 662
 663			tr.LazyPrintf("error: %v", err)
 664			tr.SetError(err)
 665		}
 666		tr.Finish()
 667	}()
 668
 669	// Select the subset of shards that we will search over for the given query.
 670	{
 671		beforeLen := len(shards)
 672		beforeQ := q
 673		shards, q = selectRepoSet(shards, q)
 674		tr.LazyPrintf("selectRepoSet shards=%d->%d q=%s->%s", beforeLen, len(shards), beforeQ, q)
 675	}
 676
 677	if len(shards) == 0 {
 678		return func() {}, nil
 679	}
 680
 681	var cancel context.CancelFunc
 682	if opts.MaxWallTime == 0 {
 683		ctx, cancel = context.WithCancel(ctx)
 684	} else {
 685		ctx, cancel = context.WithTimeout(ctx, opts.MaxWallTime)
 686	}
 687
 688	defer cancel()
 689
 690	// We set the number of workers to GOMAXPROCS, or the number of shards,
 691	// whichever is smaller.
 692	workers := runtime.GOMAXPROCS(0)
 693	if workers > len(shards) {
 694		workers = len(shards)
 695	}
 696
 697	type result struct {
 698		priority float64
 699		*zoekt.SearchResult
 700		err error
 701	}
 702
 703	var (
 704		// buffered channels to continue searching when sending back results
 705		// takes a while / blocks. The maximum pending result set is workers * 2.
 706		results = make(chan *result, workers)
 707		search  = make(chan *rankedShard, workers)
 708		wg      sync.WaitGroup
 709	)
 710
 711	// Start workers that receive shards from the search channel, search them,
 712	// and send the results to the results channel. This process is repeated
 713	// until the search channel is closed.
 714	//
 715	// Note: Making "search" a buffered channel has the effect of limiting the number of parallel shard searches.
 716	// Since searching is mostly CPU bound, limiting parallel shard searches also reduces the peak working set.
 717	wg.Add(workers)
 718	for i := 0; i < workers; i++ {
 719		go func() {
 720			defer wg.Done()
 721			for s := range search {
 722				sr, err := searchOneShard(ctx, s, q, opts)
 723				r := &result{priority: s.priority, SearchResult: sr, err: err}
 724				results <- r
 725			}
 726		}()
 727	}
 728
 729	go func() {
 730		wg.Wait()
 731		close(results)
 732	}()
 733
 734	var (
 735		pending = make(prioritySlice, 0, workers)
 736		shard   = 0
 737		next    = shards[shard]
 738
 739		// We need a separate nil-able reference to the same channel so we can close(search) for the worker
 740		// go-routines to finish but also set work to nil in order for the select statement below to ignore
 741		// that case when we want to stop a search. This is needed because sending on a closed channel panics.
 742		work = search
 743	)
 744
 745	stop := func() {
 746		if work != nil {
 747			close(search)
 748			work = nil
 749			next = nil
 750		}
 751	}
 752
 753	// tracked so we can stop when we hit TotalMaxMatchCount
 754	var totalMatchCount int
 755
 756search:
 757	for {
 758		// At the top of each iteration, have the proc associated with this search yield its won "timeslice"
 759		// to possibly allow other searches to make progress
 760		_ = proc.Yield(ctx) // Note: we let searchOneShard handle context errors
 761
 762		select {
 763		case work <- next: // is there a worker available to search the next shard?
 764			pending.append(next.priority)
 765
 766			shard++
 767			if shard == len(shards) {
 768				stop()
 769			} else {
 770				next = shards[shard]
 771			}
 772		case r, ok := <-results: // is there a result to send back?
 773			if !ok {
 774				break search
 775			}
 776
 777			// delete this result's priority from pending before computing the new max pending priority
 778			pending.remove(r.priority)
 779
 780			if r.err != nil {
 781				// Set final error and stop searching new shards, but consume any pending
 782				// search results.
 783				stop()
 784				err = r.err
 785				continue
 786			}
 787
 788			// Update the match count statistics and stop searching new shards if we've
 789			// reached the limit set in the options.
 790			totalMatchCount += r.SearchResult.Stats.MatchCount
 791			if opts.TotalMaxMatchCount > 0 && totalMatchCount > opts.TotalMaxMatchCount {
 792				stop()
 793			}
 794
 795			observeMetrics(r.SearchResult)
 796
 797			r.Priority = r.priority
 798			r.MaxPendingPriority = pending.max()
 799
 800			sendByRepository(r.SearchResult, opts, sender) // send the result back to the client
 801		}
 802	}
 803
 804	return func() { runtime.KeepAlive(shards) }, err
 805}
 806
 807// sendByRepository splits a zoekt.SearchResult by repository and calls
 808// sender.Send for each batch. Ranking in Sourcegraph expects zoekt.SearchResult
 809// to contain results with the same zoekt.SearchResult.Priority only.
 810//
 811// We split by repository instead of by priority because it is easier to set
 812// RepoURLs and LineFragments in zoekt.SearchResult.
 813func sendByRepository(result *zoekt.SearchResult, opts *zoekt.SearchOptions, sender zoekt.Sender) {
 814	if len(result.RepoURLs) <= 1 || len(result.Files) == 0 {
 815		zoekt.SortFiles(result.Files)
 816		sender.Send(result)
 817		return
 818	}
 819
 820	send := func(repoName string, a, b int, stats zoekt.Stats) {
 821		zoekt.SortFiles(result.Files[a:b])
 822		sender.Send(&zoekt.SearchResult{
 823			Stats: stats,
 824			Progress: zoekt.Progress{
 825				Priority:           result.Files[a].RepositoryPriority,
 826				MaxPendingPriority: result.MaxPendingPriority,
 827			},
 828			Files:         result.Files[a:b],
 829			RepoURLs:      map[string]string{repoName: result.RepoURLs[repoName]},
 830			LineFragments: map[string]string{repoName: result.LineFragments[repoName]},
 831		})
 832	}
 833
 834	var startIndex, endIndex int
 835	curRepoID := result.Files[0].RepositoryID
 836	curRepoName := result.Files[0].Repository
 837
 838	fm := zoekt.FileMatch{}
 839	for endIndex, fm = range result.Files {
 840		if curRepoID != fm.RepositoryID {
 841			// Stats must stay aggregate-able, hence we sent the aggregate stats with the
 842			// last event.
 843			send(curRepoName, startIndex, endIndex, zoekt.Stats{})
 844
 845			startIndex = endIndex
 846			curRepoID = fm.RepositoryID
 847			curRepoName = fm.Repository
 848		}
 849	}
 850
 851	send(curRepoName, startIndex, endIndex+1, result.Stats)
 852}
 853
 854func observeMetrics(sr *zoekt.SearchResult) {
 855	metricSearchContentBytesLoadedTotal.Add(float64(sr.Stats.ContentBytesLoaded))
 856	metricSearchIndexBytesLoadedTotal.Add(float64(sr.Stats.IndexBytesLoaded))
 857	metricSearchCrashesTotal.Add(float64(sr.Stats.Crashes))
 858	metricSearchFileCountTotal.Add(float64(sr.Stats.FileCount))
 859	metricSearchShardFilesConsideredTotal.Add(float64(sr.Stats.ShardFilesConsidered))
 860	metricSearchFilesConsideredTotal.Add(float64(sr.Stats.FilesConsidered))
 861	metricSearchFilesLoadedTotal.Add(float64(sr.Stats.FilesLoaded))
 862	metricSearchFilesSkippedTotal.Add(float64(sr.Stats.FilesSkipped))
 863	metricSearchShardsSkippedTotal.Add(float64(sr.Stats.ShardsSkipped))
 864	metricSearchMatchCountTotal.Add(float64(sr.Stats.MatchCount))
 865	metricSearchNgramMatchesTotal.Add(float64(sr.Stats.NgramMatches))
 866	metricSearchNgramLookupsTotal.Add(float64(sr.Stats.NgramLookups))
 867	metricSearchRegexpsConsideredTotal.Add(float64(sr.Stats.RegexpsConsidered))
 868}
 869
 870func copySlice(src *[]byte) {
 871	if *src == nil {
 872		return
 873	}
 874	dst := make([]byte, len(*src))
 875	copy(dst, *src)
 876	*src = dst
 877}
 878
 879func copyFiles(sr *zoekt.SearchResult) {
 880	for i := range sr.Files {
 881		copySlice(&sr.Files[i].Content)
 882		copySlice(&sr.Files[i].Checksum)
 883		for l := range sr.Files[i].LineMatches {
 884			copySlice(&sr.Files[i].LineMatches[l].Line)
 885			copySlice(&sr.Files[i].LineMatches[l].Before)
 886			copySlice(&sr.Files[i].LineMatches[l].After)
 887		}
 888		for c := range sr.Files[i].ChunkMatches {
 889			copySlice(&sr.Files[i].ChunkMatches[c].Content)
 890		}
 891	}
 892}
 893
 894func searchOneShard(ctx context.Context, s zoekt.Searcher, q query.Q, opts *zoekt.SearchOptions) (sr *zoekt.SearchResult, err error) {
 895	metricSearchShardRunning.Inc()
 896	defer func() {
 897		metricSearchShardRunning.Dec()
 898		if e := recover(); e != nil {
 899			log.Printf("[ERROR] crashed shard: %s: %#v, %s", s, e, debug.Stack())
 900
 901			if sr == nil {
 902				sr = &zoekt.SearchResult{}
 903			}
 904			sr.Stats.Crashes = 1
 905		}
 906	}()
 907
 908	return s.Search(ctx, q, opts)
 909}
 910
 911type shardListResult struct {
 912	rl  *zoekt.RepoList
 913	err error
 914}
 915
 916func listOneShard(ctx context.Context, s zoekt.Searcher, q query.Q, opts *zoekt.ListOptions, sink chan shardListResult) {
 917	metricListShardRunning.Inc()
 918	defer func() {
 919		metricListShardRunning.Dec()
 920		if r := recover(); r != nil {
 921			log.Printf("[ERROR] crashed shard: %s: %s, %s", s.String(), r, debug.Stack())
 922			sink <- shardListResult{
 923				&zoekt.RepoList{Crashes: 1}, nil,
 924			}
 925		}
 926	}()
 927
 928	ms, err := s.List(ctx, q, opts)
 929	sink <- shardListResult{ms, err}
 930}
 931
 932func (ss *shardedSearcher) List(ctx context.Context, q query.Q, opts *zoekt.ListOptions) (rl *zoekt.RepoList, err error) {
 933	tr, ctx := trace.New(ctx, "shardedSearcher.List", "")
 934	metricListRunning.Inc()
 935	defer func() {
 936		metricListRunning.Dec()
 937		if rl != nil {
 938			tr.LazyPrintf("repos.size=%d reposmap.size=%d crashes=%d stats=%+v", len(rl.Repos), len(rl.ReposMap), rl.Crashes, rl.Stats)
 939		}
 940		if err != nil {
 941			tr.LazyPrintf("error: %v", err)
 942			tr.SetError(err)
 943		}
 944		tr.Finish()
 945	}()
 946
 947	q = query.Simplify(q)
 948	isAll := false
 949	if c, ok := q.(*query.Const); ok {
 950		isAll = c.Value
 951	}
 952
 953	proc, err := ss.sched.Acquire(ctx)
 954	if err != nil {
 955		return nil, err
 956	}
 957	defer proc.Release()
 958	tr.LazyPrintf("acquired process")
 959
 960	loaded := ss.getLoaded()
 961	shards := loaded.shards
 962
 963	// Setup what we return now, since we may short circuit if there are no
 964	// shards to search.
 965	stillLoadingCrashes := 0
 966	if !loaded.ready {
 967		// We may have missed results due to not being fully loaded.
 968		stillLoadingCrashes++
 969	}
 970	agg := zoekt.RepoList{
 971		Crashes:  stillLoadingCrashes,
 972		ReposMap: zoekt.ReposMap{},
 973		Repos:    []*zoekt.RepoListEntry{},
 974	}
 975
 976	// PERF: Select the subset of shards that we will search over for the given
 977	// query. A common List query only asks for a specific repo, so this is an
 978	// important optimization.
 979	{
 980		beforeLen := len(shards)
 981		beforeQ := q
 982		shards, q = selectRepoSet(shards, q)
 983		tr.LazyPrintf("selectRepoSet shards=%d->%d q=%s->%s", beforeLen, len(shards), beforeQ, q)
 984	}
 985
 986	if len(shards) == 0 {
 987		return &agg, nil
 988	}
 989
 990	shardCount := len(shards)
 991	all := make(chan shardListResult, shardCount)
 992	feeder := make(chan zoekt.Searcher, len(shards))
 993	for _, s := range shards {
 994		feeder <- s
 995	}
 996	close(feeder)
 997
 998	for i := 0; i < runtime.GOMAXPROCS(0); i++ {
 999		go func() {
1000			for s := range feeder {
1001				listOneShard(ctx, s, q, opts, all)
1002			}
1003		}()
1004	}
1005
1006	uniq := map[string]*zoekt.RepoListEntry{}
1007
1008	for range shards {
1009		r := <-all
1010		if r.err != nil {
1011			return nil, r.err
1012		}
1013
1014		agg.Crashes += r.rl.Crashes
1015		agg.Stats.Add(&r.rl.Stats)
1016
1017		for _, r := range r.rl.Repos {
1018			prev, ok := uniq[r.Repository.Name]
1019			if !ok {
1020				cp := *r // We need to copy because we mutate r.Stats when merging duplicates
1021				uniq[r.Repository.Name] = &cp
1022			} else {
1023				prev.Stats.Add(&r.Stats)
1024			}
1025		}
1026
1027		for id, r := range r.rl.ReposMap {
1028			_, ok := agg.ReposMap[id]
1029			if !ok {
1030				agg.ReposMap[id] = r
1031			}
1032		}
1033	}
1034
1035	agg.Repos = make([]*zoekt.RepoListEntry, 0, len(uniq))
1036	for _, r := range uniq {
1037		agg.Repos = append(agg.Repos, r)
1038	}
1039
1040	// Only one of these fields is populated and in all cases the size of that
1041	// field is the number of Repos.
1042	//
1043	// Note: we don't just add individual Stats.Repos since a repository can
1044	// have multiple shards.
1045	agg.Stats.Repos = len(uniq) + len(agg.ReposMap)
1046
1047	if isAll && len(agg.Repos) > 0 {
1048		reportListAllMetrics(agg.Repos)
1049	}
1050
1051	return &agg, nil
1052}
1053
1054func reportListAllMetrics(repos []*zoekt.RepoListEntry) {
1055	var stats zoekt.RepoStats
1056	for _, r := range repos {
1057		stats.Add(&r.Stats)
1058	}
1059
1060	metricListAllRepos.Set(float64(stats.Repos))
1061	metricListAllIndexBytes.Set(float64(stats.IndexBytes))
1062	metricListAllContentBytes.Set(float64(stats.ContentBytes))
1063	metricListAllDocuments.Set(float64(stats.Documents))
1064	metricListAllShards.Set(float64(stats.Shards))
1065	metricListAllNewLinesCount.Set(float64(stats.NewLinesCount))
1066	metricListAllDefaultBranchNewLinesCount.Set(float64(stats.DefaultBranchNewLinesCount))
1067	metricListAllOtherBranchesNewLinesCount.Set(float64(stats.OtherBranchesNewLinesCount))
1068}
1069
1070// getLoaded returns the currently loaded shards. Shared so do not mutate.
1071func (s *shardedSearcher) getLoaded() loaded {
1072	// next commit will store the true value of this, for now we keep the
1073	// backwards compatible behaviour.
1074	ready := s.ready.Load()
1075	// ranked is loaded after ready to avoid a race were ready is true but
1076	// ranked is still not the final set of shards.
1077	ranked, _ := s.ranked.Load().([]*rankedShard)
1078	return loaded{
1079		shards: ranked,
1080		ready:  ready,
1081	}
1082}
1083
1084func mkRankedShard(s zoekt.Searcher) *rankedShard {
1085	q := query.Const{Value: true}
1086	// We need to use WithUnsafeContext here, otherwise we cannot return a proper
1087	// rankedShard. On the user request path we use selectRepoSet which relies on
1088	// rankedShard.repos being set.
1089	result, err := s.List(systemtenant.WithUnsafeContext(context.Background()), &q, nil)
1090	if err != nil {
1091		log.Printf("[ERROR] mkRankedShard(%s): failed to cache repository list: %v", s, err)
1092		return &rankedShard{Searcher: s}
1093	}
1094
1095	var (
1096		maxPriority float64
1097		repos       = make([]*zoekt.Repository, 0, len(result.Repos))
1098	)
1099	for i := range result.Repos {
1100		repo := &result.Repos[i].Repository
1101		repos = append(repos, repo)
1102		if repo.RawConfig != nil {
1103			priority, _ := strconv.ParseFloat(repo.RawConfig["priority"], 64)
1104			if priority > maxPriority {
1105				maxPriority = priority
1106			}
1107		}
1108	}
1109
1110	return &rankedShard{
1111		Searcher: s,
1112		repos:    repos,
1113		priority: maxPriority,
1114	}
1115}
1116
1117// markReady should be called once all shards have been passed into replace on
1118// startup. Once s is marked as ready it stops reporting a Crash in the
1119// response Stats.
1120func (s *shardedSearcher) markReady() {
1121	s.ready.CompareAndSwap(false, true)
1122}
1123
1124func (s *shardedSearcher) replace(shards map[string]zoekt.Searcher) {
1125	if len(shards) == 0 {
1126		return
1127	}
1128
1129	defer func(began time.Time) {
1130		metricShardsBatchReplaceDurationSeconds.Observe(time.Since(began).Seconds())
1131	}(time.Now())
1132
1133	s.mu.Lock()
1134	defer s.mu.Unlock()
1135
1136	for key, shard := range shards {
1137		var r *rankedShard
1138		if shard != nil {
1139			r = mkRankedShard(shard)
1140		}
1141
1142		old := s.shards[key]
1143		if shard == nil {
1144			delete(s.shards, key)
1145		} else {
1146			s.shards[key] = r
1147		}
1148
1149		if old != nil && old.Searcher != nil {
1150			//                 _ ___                /^^\ /^\  /^^\_
1151			//     _          _@)@) \            ,,/ '` ~ `'~~ ', `\.
1152			//   _/o\_ _ _ _/~`.`...'~\        ./~~..,'`','',.,' '  ~:
1153			//  / `,'.~,~.~  .   , . , ~|,   ,/ .,' , ,. .. ,,.   `,  ~\_
1154			// ( ' _' _ '_` _  '  .    , `\_/ .' ..' '  `  `   `..  `,   \_
1155			//  ~V~ V~ V~ V~ ~\ `   ' .  '    , ' .,.,''`.,.''`.,.``. ',   \_
1156			//   _/\ /\ /\ /\_/, . ' ,   `_/~\_ .' .,. ,, , _/~\_ `. `. '.,  \_
1157			//  < ~ ~ '~`'~'`, .,  .   `_: ::: \_ '      `_/ ::: \_ `.,' . ',  \_
1158			//   \ ' `_  '`_    _    ',/ _::_::_ \ _    _/ _::_::_ \   `.,'.,`., \-,-,-,_,_,
1159			//    `'~~ `'~~ `'~~ `'~~  \(_)(_)(_)/  `~~' \(_)(_)(_)/ ~'`\_.._,._,'_;_;_;_;_;
1160			//
1161			// We can't just call Close now, because there may be ongoing searches
1162			// which have old in the shards list. Previously we used an exclusive
1163			// lock to guarantee there were no concurrent searches. However, that
1164			// led to blocking on the read path.
1165			//
1166			// We could introduce granular locking per rankedShard to know when
1167			// there are no more references. However, this becomes tricky in
1168			// practice. Instead we rely on the garbage collector noticing old is no
1169			// longer used. We take care in our searchers to runtime.KeepAlive until
1170			// we have stopped referencing the underling mmap data.
1171			runtime.SetFinalizer(old, func(r *rankedShard) {
1172				r.Close()
1173			})
1174		}
1175	}
1176
1177	ranked := make([]*rankedShard, 0, len(s.shards))
1178	for _, r := range s.shards {
1179		ranked = append(ranked, r)
1180	}
1181
1182	sort.Slice(ranked, func(i, j int) bool {
1183		priorityDiff := ranked[i].priority - ranked[j].priority
1184		if priorityDiff != 0 {
1185			return priorityDiff > 0
1186		}
1187		if len(ranked[i].repos) == 0 || len(ranked[j].repos) == 0 {
1188			// Protect against empty names which can happen if we fail to List or
1189			// the shard is full of tombstones. Prefer the shard which has names.
1190			return len(ranked[i].repos) >= len(ranked[j].repos)
1191		}
1192		return ranked[i].repos[0].Name < ranked[j].repos[0].Name
1193	})
1194
1195	s.ranked.Store(ranked)
1196
1197	metricShardsLoaded.Set(float64(len(ranked)))
1198}
1199
1200func loadShard(fn string) (zoekt.Searcher, error) {
1201	f, err := os.Open(fn)
1202	if err != nil {
1203		return nil, err
1204	}
1205
1206	iFile, err := zoekt.NewIndexFile(f)
1207	if err != nil {
1208		return nil, err
1209	}
1210	s, err := zoekt.NewSearcher(iFile)
1211	if err != nil {
1212		iFile.Close()
1213		return nil, fmt.Errorf("NewSearcher(%s): %v", fn, err)
1214	}
1215
1216	return s, nil
1217}
1218
1219// prioritySlice is a trivial implementation of an array that provides three
1220// things: appending a value, removing a value, and getting the array's max.
1221// Operations take O(n) time, which is acceptable because N is restricted to
1222// GOMAXPROCS (i.e., number of cpu cores) by the shardedSearcher interface.
1223type prioritySlice []float64
1224
1225func (p *prioritySlice) append(pri float64) {
1226	*p = append(*p, pri)
1227}
1228
1229func (p *prioritySlice) remove(pri float64) {
1230	for i, opri := range *p {
1231		if opri == pri {
1232			if i != len(*p)-1 {
1233				// swap to make this element the tail
1234				(*p)[i] = (*p)[len(*p)-1]
1235			}
1236			// pop the end off
1237			*p = (*p)[:len(*p)-1]
1238			break
1239		}
1240	}
1241}
1242
1243func (p *prioritySlice) max() float64 {
1244	// remove() and max() could be combined, but this is easier to read and
1245	// the expected performance difference from the extra lock and loop is
1246	// almost certainly irrelevant.
1247	maxPri := math.Inf(-1)
1248	for _, pri := range *p {
1249		if pri > maxPri {
1250			maxPri = pri
1251		}
1252	}
1253	return maxPri
1254}
Configure Feed

Configure Feed