api.go at 5e2620e0cf642a18f3f85da79122b7d1e20e6193 · boltless.me/zoekt

fork of https://github.com/sourcegraph/zoekt
zoekt / api.go
at 5e2620e0cf642a18f3f85da79122b7d1e20e6193 27 kB View raw
Keegan Carruthers-Smith all: remove deprecated RepoList.Minimal (#624) 2y ago
  1// Copyright 2016 Google Inc. All rights reserved.
  2//
  3// Licensed under the Apache License, Version 2.0 (the "License");
  4// you may not use this file except in compliance with the License.
  5// You may obtain a copy of the License at
  6//
  7//    http://www.apache.org/licenses/LICENSE-2.0
  8//
  9// Unless required by applicable law or agreed to in writing, software
 10// distributed under the License is distributed on an "AS IS" BASIS,
 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12// See the License for the specific language governing permissions and
 13// limitations under the License.
 14
 15package zoekt // import "github.com/sourcegraph/zoekt"
 16
 17import (
 18	"context"
 19	"encoding/json"
 20	"errors"
 21	"fmt"
 22	"reflect"
 23	"strconv"
 24	"time"
 25
 26	"github.com/sourcegraph/zoekt/query"
 27)
 28
 29const mapHeaderBytes uint64 = 48
 30const sliceHeaderBytes uint64 = 24
 31const stringHeaderBytes uint64 = 16
 32const pointerSize uint64 = 8
 33const interfaceBytes uint64 = 16
 34
 35// FileMatch contains all the matches within a file.
 36type FileMatch struct {
 37	// Ranking; the higher, the better.
 38	Score float64 // TODO - hide this field?
 39
 40	// For debugging. Needs DebugScore set, but public so tests in
 41	// other packages can print some diagnostics.
 42	Debug string
 43
 44	FileName string
 45
 46	// Repository is the globally unique name of the repo of the
 47	// match
 48	Repository string
 49	Branches   []string
 50
 51	// One of LineMatches or ChunkMatches will be returned depending on whether
 52	// the SearchOptions.ChunkMatches is set.
 53	LineMatches  []LineMatch
 54	ChunkMatches []ChunkMatch
 55
 56	// RepositoryID is a Sourcegraph extension. This is the ID of Repository in
 57	// Sourcegraph.
 58	RepositoryID uint32
 59
 60	// RepositoryPriority is a Sourcegraph extension. It is used by Sourcegraph to
 61	// order results from different repositories relative to each other.
 62	RepositoryPriority float64
 63
 64	// Only set if requested
 65	Content []byte
 66
 67	// Checksum of the content.
 68	Checksum []byte
 69
 70	// Detected language of the result.
 71	Language string
 72
 73	// SubRepositoryName is the globally unique name of the repo,
 74	// if it came from a subrepository
 75	SubRepositoryName string
 76
 77	// SubRepositoryPath holds the prefix where the subrepository
 78	// was mounted.
 79	SubRepositoryPath string
 80
 81	// Commit SHA1 (hex) of the (sub)repo holding the file.
 82	Version string
 83}
 84
 85func (m *FileMatch) sizeBytes() (sz uint64) {
 86	// Score
 87	sz += 8
 88
 89	for _, s := range []string{
 90		m.Debug,
 91		m.FileName,
 92		m.Repository,
 93		m.Language,
 94		m.SubRepositoryName,
 95		m.SubRepositoryPath,
 96		m.Version,
 97	} {
 98		sz += stringHeaderBytes + uint64(len(s))
 99	}
100
101	// Branches
102	sz += sliceHeaderBytes
103	for _, s := range m.Branches {
104		sz += stringHeaderBytes + uint64(len(s))
105	}
106
107	// LineMatches
108	sz += sliceHeaderBytes
109	for _, lm := range m.LineMatches {
110		sz += lm.sizeBytes()
111	}
112
113	// ChunkMatches
114	sz += sliceHeaderBytes
115	for _, cm := range m.ChunkMatches {
116		sz += cm.sizeBytes()
117	}
118
119	// RepositoryID
120	sz += 4
121
122	// RepositoryPriority
123	sz += 8
124
125	// Content
126	sz += sliceHeaderBytes + uint64(len(m.Content))
127
128	// Checksum
129	sz += sliceHeaderBytes + uint64(len(m.Checksum))
130
131	return
132}
133
134// ChunkMatch is a set of non-overlapping matches within a contiguous range of
135// lines in the file.
136type ChunkMatch struct {
137	// Content is a contiguous range of complete lines that fully contains Ranges.
138	Content []byte
139	// ContentStart is the location (inclusive) of the beginning of content
140	// relative to the beginning of the file. It will always be at the
141	// beginning of a line (Column will always be 1).
142	ContentStart Location
143
144	// FileName indicates whether this match is a match on the file name, in
145	// which case Content will contain the file name.
146	FileName bool
147
148	// Ranges is a set of matching ranges within this chunk. Each range is relative
149	// to the beginning of the file (not the beginning of Content).
150	Ranges []Range
151
152	// SymbolInfo is the symbol information associated with Ranges. If it is non-nil,
153	// its length will equal that of Ranges. Any of its elements may be nil.
154	SymbolInfo []*Symbol
155
156	Score      float64
157	DebugScore string
158}
159
160func (cm *ChunkMatch) sizeBytes() (sz uint64) {
161	// Content
162	sz += sliceHeaderBytes + uint64(len(cm.Content))
163
164	// ContentStart
165	sz += cm.ContentStart.sizeBytes()
166
167	// FileName
168	sz += 1
169
170	// Ranges
171	sz += sliceHeaderBytes
172	if len(cm.Ranges) > 0 {
173		sz += uint64(len(cm.Ranges)) * cm.Ranges[0].sizeBytes()
174	}
175
176	// SymbolInfo
177	sz += sliceHeaderBytes
178	for _, si := range cm.SymbolInfo {
179		sz += pointerSize
180		if si != nil {
181			sz += si.sizeBytes()
182		}
183	}
184
185	// Score
186	sz += 8
187
188	// DebugScore
189	sz += stringHeaderBytes + uint64(len(cm.DebugScore))
190
191	return
192}
193
194type Range struct {
195	// The inclusive beginning of the range.
196	Start Location
197	// The exclusive end of the range.
198	End Location
199}
200
201func (r *Range) sizeBytes() uint64 {
202	return r.Start.sizeBytes() + r.End.sizeBytes()
203}
204
205type Location struct {
206	// 0-based byte offset from the beginning of the file
207	ByteOffset uint32
208	// 1-based line number from the beginning of the file
209	LineNumber uint32
210	// 1-based column number (in runes) from the beginning of line
211	Column uint32
212}
213
214func (l *Location) sizeBytes() uint64 {
215	return 3 * 4
216}
217
218// LineMatch holds the matches within a single line in a file.
219type LineMatch struct {
220	// The line in which a match was found.
221	Line       []byte
222	LineStart  int
223	LineEnd    int
224	LineNumber int
225
226	// Before and After are only set when SearchOptions.NumContextLines is > 0
227	Before []byte
228	After  []byte
229
230	// If set, this was a match on the filename.
231	FileName bool
232
233	// The higher the better. Only ranks the quality of the match
234	// within the file, does not take rank of file into account
235	Score      float64
236	DebugScore string
237
238	LineFragments []LineFragmentMatch
239}
240
241func (lm *LineMatch) sizeBytes() (sz uint64) {
242	// Line
243	sz += sliceHeaderBytes + uint64(len(lm.Line))
244
245	// LineStart, LineEnd, LineNumber
246	sz += 3 * 8
247
248	// Before
249	sz += sliceHeaderBytes + uint64(len(lm.Before))
250
251	// After
252	sz += sliceHeaderBytes + uint64(len(lm.After))
253
254	// FileName
255	sz += 1
256
257	// Score
258	sz += 8
259
260	// DebugScore
261	sz += stringHeaderBytes + uint64(len(lm.DebugScore))
262
263	// LineFragments
264	sz += sliceHeaderBytes
265	for _, lf := range lm.LineFragments {
266		sz += lf.sizeBytes()
267	}
268
269	return
270}
271
272type Symbol struct {
273	Sym        string
274	Kind       string
275	Parent     string
276	ParentKind string
277}
278
279func (s *Symbol) sizeBytes() uint64 {
280	return 4*stringHeaderBytes + uint64(len(s.Sym)+len(s.Kind)+len(s.Parent)+len(s.ParentKind))
281}
282
283// LineFragmentMatch a segment of matching text within a line.
284type LineFragmentMatch struct {
285	// Offset within the line, in bytes.
286	LineOffset int
287
288	// Offset from file start, in bytes.
289	Offset uint32
290
291	// Number bytes that match.
292	MatchLength int
293
294	SymbolInfo *Symbol
295}
296
297func (lfm *LineFragmentMatch) sizeBytes() (sz uint64) {
298	// LineOffset
299	sz += 8
300
301	// Offset
302	sz += 4
303
304	// MatchLength
305	sz += 8
306
307	// SymbolInfo
308	sz += pointerSize
309	if lfm.SymbolInfo != nil {
310		sz += lfm.SymbolInfo.sizeBytes()
311	}
312
313	return
314}
315
316type FlushReason uint8
317
318const (
319	FlushReasonTimerExpired FlushReason = 1 << iota
320	FlushReasonFinalFlush
321	FlushReasonMaxSize
322)
323
324var FlushReasonStrings = map[FlushReason]string{
325	FlushReasonTimerExpired: "timer_expired",
326	FlushReasonFinalFlush:   "final_flush",
327	FlushReasonMaxSize:      "max_size_reached",
328}
329
330func (fr FlushReason) String() string {
331	if v, ok := FlushReasonStrings[fr]; ok {
332		return v
333	}
334
335	return "none"
336}
337
338// Stats contains interesting numbers on the search
339type Stats struct {
340	// Amount of I/O for reading contents.
341	ContentBytesLoaded int64
342
343	// Amount of I/O for reading from index.
344	IndexBytesLoaded int64
345
346	// Number of search shards that had a crash.
347	Crashes int
348
349	// Wall clock time for this search
350	Duration time.Duration
351
352	// Number of files containing a match.
353	FileCount int
354
355	// Number of files in shards that we considered.
356	ShardFilesConsidered int
357
358	// Files that we evaluated. Equivalent to files for which all
359	// atom matches (including negations) evaluated to true.
360	FilesConsidered int
361
362	// Files for which we loaded file content to verify substring matches
363	FilesLoaded int
364
365	// Candidate files whose contents weren't examined because we
366	// gathered enough matches.
367	FilesSkipped int
368
369	// Shards that we scanned to find matches.
370	ShardsScanned int
371
372	// Shards that we did not process because a query was canceled.
373	ShardsSkipped int
374
375	// Shards that we did not process because the query was rejected by the
376	// ngram filter indicating it had no matches.
377	ShardsSkippedFilter int
378
379	// Number of non-overlapping matches
380	MatchCount int
381
382	// Number of candidate matches as a result of searching ngrams.
383	NgramMatches int
384
385	// NgramLookups is the number of times we accessed an ngram in the index.
386	NgramLookups int
387
388	// Wall clock time for queued search.
389	Wait time.Duration
390
391	// Aggregate wall clock time spent constructing and pruning the match tree.
392	// This accounts for time such as lookups in the trigram index.
393	MatchTreeConstruction time.Duration
394
395	// Aggregate wall clock time spent searching the match tree. This accounts
396	// for the bulk of search work done looking for matches.
397	MatchTreeSearch time.Duration
398
399	// Number of times regexp was called on files that we evaluated.
400	RegexpsConsidered int
401
402	// FlushReason explains why results were flushed.
403	FlushReason FlushReason
404}
405
406func (s *Stats) sizeBytes() (sz uint64) {
407	sz = 16 * 8 // This assumes we are running on a 64-bit architecture
408	sz += 1     // FlushReason
409
410	return
411}
412
413func (s *Stats) Add(o Stats) {
414	s.ContentBytesLoaded += o.ContentBytesLoaded
415	s.IndexBytesLoaded += o.IndexBytesLoaded
416	s.Crashes += o.Crashes
417	s.FileCount += o.FileCount
418	s.FilesConsidered += o.FilesConsidered
419	s.FilesLoaded += o.FilesLoaded
420	s.FilesSkipped += o.FilesSkipped
421	s.MatchCount += o.MatchCount
422	s.NgramMatches += o.NgramMatches
423	s.NgramLookups += o.NgramLookups
424	s.ShardFilesConsidered += o.ShardFilesConsidered
425	s.ShardsScanned += o.ShardsScanned
426	s.ShardsSkipped += o.ShardsSkipped
427	s.ShardsSkippedFilter += o.ShardsSkippedFilter
428	s.Wait += o.Wait
429	s.MatchTreeConstruction += o.MatchTreeConstruction
430	s.MatchTreeSearch += o.MatchTreeSearch
431	s.RegexpsConsidered += o.RegexpsConsidered
432
433	// We want the first non-zero FlushReason to be sticky. This is a useful
434	// property when aggregating stats from several Zoekts.
435	if s.FlushReason == 0 {
436		s.FlushReason = o.FlushReason
437	}
438}
439
440// Zero returns true if stats is empty.
441func (s *Stats) Zero() bool {
442	if s == nil {
443		return true
444	}
445
446	return !(s.ContentBytesLoaded > 0 ||
447		s.IndexBytesLoaded > 0 ||
448		s.Crashes > 0 ||
449		s.FileCount > 0 ||
450		s.FilesConsidered > 0 ||
451		s.FilesLoaded > 0 ||
452		s.FilesSkipped > 0 ||
453		s.MatchCount > 0 ||
454		s.NgramMatches > 0 ||
455		s.NgramLookups > 0 ||
456		s.ShardFilesConsidered > 0 ||
457		s.ShardsScanned > 0 ||
458		s.ShardsSkipped > 0 ||
459		s.ShardsSkippedFilter > 0 ||
460		s.Wait > 0 ||
461		s.MatchTreeConstruction > 0 ||
462		s.MatchTreeSearch > 0 ||
463		s.RegexpsConsidered > 0)
464}
465
466// Progress contains information about the global progress of the running search query.
467// This is used by the frontend to reorder results and emit them when stable.
468// Sourcegraph specific: this is used when querying multiple zoekt-webserver instances.
469type Progress struct {
470	// Priority of the shard that was searched.
471	Priority float64
472
473	// MaxPendingPriority is the maximum priority of pending result that is being searched in parallel.
474	// This is used to reorder results when the result set is known to be stable-- that is, when a result's
475	// Priority is greater than the max(MaxPendingPriority) from the latest results of each backend, it can be returned to the user.
476	//
477	// MaxPendingPriority decreases monotonically in each SearchResult.
478	MaxPendingPriority float64
479}
480
481func (p *Progress) sizeBytes() uint64 {
482	return 2 * 8
483}
484
485// SearchResult contains search matches and extra data
486type SearchResult struct {
487	Stats
488
489	// Do not encode this as we cannot encode -Inf in JSON
490	Progress `json:"-"`
491
492	Files []FileMatch
493
494	// RepoURLs holds a repo => template string map.
495	RepoURLs map[string]string
496
497	// FragmentNames holds a repo => template string map, for
498	// the line number fragment.
499	LineFragments map[string]string
500}
501
502// SizeBytes is a best-effort estimate of the size of SearchResult in memory.
503// The estimate does not take alignment into account. The result is a lower
504// bound on the actual size in memory.
505func (sr *SearchResult) SizeBytes() (sz uint64) {
506	sz += sr.Stats.sizeBytes()
507	sz += sr.Progress.sizeBytes()
508
509	// Files
510	sz += sliceHeaderBytes
511	for _, f := range sr.Files {
512		sz += f.sizeBytes()
513	}
514
515	// RepoURLs
516	sz += mapHeaderBytes
517	for k, v := range sr.RepoURLs {
518		sz += stringHeaderBytes + uint64(len(k))
519		sz += stringHeaderBytes + uint64(len(v))
520	}
521
522	// LineFragments
523	sz += mapHeaderBytes
524	for k, v := range sr.LineFragments {
525		sz += stringHeaderBytes + uint64(len(k))
526		sz += stringHeaderBytes + uint64(len(v))
527	}
528
529	return
530}
531
532// RepositoryBranch describes an indexed branch, which is a name
533// combined with a version.
534type RepositoryBranch struct {
535	Name    string
536	Version string
537}
538
539func (r RepositoryBranch) String() string {
540	return fmt.Sprintf("%s@%s", r.Name, r.Version)
541}
542
543// Repository holds repository metadata.
544type Repository struct {
545	// Sourcegraph's repository ID
546	ID uint32
547
548	// The repository name
549	Name string
550
551	// The repository URL.
552	URL string
553
554	// The physical source where this repo came from, eg. full
555	// path to the zip filename or git repository directory. This
556	// will not be exposed in the UI, but can be used to detect
557	// orphaned index shards.
558	Source string
559
560	// The branches indexed in this repo.
561	Branches []RepositoryBranch
562
563	// Nil if this is not the super project.
564	SubRepoMap map[string]*Repository
565
566	// URL template to link to the commit of a branch
567	CommitURLTemplate string
568
569	// The repository URL for getting to a file.  Has access to
570	// {{.Version}}, {{.Path}}
571	FileURLTemplate string
572
573	// The URL fragment to add to a file URL for line numbers. has
574	// access to {{.LineNumber}}. The fragment should include the
575	// separator, generally '#' or ';'.
576	LineFragmentTemplate string
577
578	// Perf optimization: priority is set when we load the shard. It corresponds to
579	// the value of "priority" stored in RawConfig.
580	priority float64
581
582	// All zoekt.* configuration settings.
583	RawConfig map[string]string
584
585	// Importance of the repository, bigger is more important
586	Rank uint16
587
588	// IndexOptions is a hash of the options used to create the index for the
589	// repo.
590	IndexOptions string
591
592	// HasSymbols is true if this repository has indexed ctags
593	// output. Sourcegraph specific: This field is more appropriate for
594	// IndexMetadata. However, we store it here since the Sourcegraph frontend
595	// can read this structure but not IndexMetadata.
596	HasSymbols bool
597
598	// Tombstone is true if we are not allowed to search this repo.
599	Tombstone bool
600
601	// LatestCommitDate is the date of the latest commit among all indexed Branches.
602	// The date might be time.Time's 0-value if the repository was last indexed
603	// before this field was added.
604	LatestCommitDate time.Time
605
606	// FileTombstones is a set of file paths that should be ignored across all branches
607	// in this shard.
608	FileTombstones map[string]struct{} `json:",omitempty"`
609}
610
611func (r *Repository) UnmarshalJSON(data []byte) error {
612	// We define a new type so that we can use json.Unmarshal
613	// without recursing into this same method.
614	type repository *Repository
615	repo := repository(r)
616
617	err := json.Unmarshal(data, repo)
618	if err != nil {
619		return err
620	}
621
622	if v, ok := repo.RawConfig["repoid"]; ok {
623		id, _ := strconv.ParseUint(v, 10, 32)
624		r.ID = uint32(id)
625	}
626
627	if v, ok := repo.RawConfig["priority"]; ok {
628		r.priority, err = strconv.ParseFloat(v, 64)
629		if err != nil {
630			r.priority = 0
631		}
632
633		// Sourcegraph indexserver doesn't set repo.Rank, so we set it here
634		// based on priority. Setting it on read instead of during indexing
635		// allows us to avoid a complete reindex.
636		if r.Rank == 0 && r.priority > 0 {
637			// Normalize the repo score within [0, 1), with the midpoint at 5,000. This means popular
638			// repos (roughly ones with over 5,000 stars) see diminishing returns from more stars.
639			r.Rank = uint16(r.priority / (5000.0 + r.priority) * maxUInt16)
640		}
641	}
642	return nil
643}
644
645// MergeMutable will merge x into r. mutated will be true if it made any
646// changes. err is non-nil if we needed to mutate an immutable field.
647//
648// Note: SubRepoMap, IndexOptions and HasSymbol fields are ignored. They are
649// computed while indexing so can't be synthesized from x.
650//
651// Note: We ignore RawConfig fields which are duplicated into Repository:
652// name and id.
653//
654// Note: URL, *Template fields are ignored. They are not used by Sourcegraph.
655func (r *Repository) MergeMutable(x *Repository) (mutated bool, err error) {
656	if r.ID != x.ID {
657		// Sourcegraph: strange behaviour may occur if ID changes but names don't.
658		return mutated, errors.New("ID is immutable")
659	}
660	if r.Name != x.Name {
661		// Name is encoded into the shard name on disk. We need to re-index if it
662		// changes.
663		return mutated, errors.New("Name is immutable")
664	}
665	if !reflect.DeepEqual(r.Branches, x.Branches) {
666		// Need a reindex if content changing.
667		return mutated, errors.New("Branches is immutable")
668	}
669
670	for k, v := range x.RawConfig {
671		// We ignore name and id since they are encoded into the repository.
672		if k == "name" || k == "id" {
673			continue
674		}
675		if r.RawConfig == nil {
676			mutated = true
677			r.RawConfig = make(map[string]string)
678		}
679		if r.RawConfig[k] != v {
680			mutated = true
681			r.RawConfig[k] = v
682		}
683	}
684
685	return mutated, nil
686}
687
688// IndexMetadata holds metadata stored in the index file. It contains
689// data generated by the core indexing library.
690type IndexMetadata struct {
691	IndexFormatVersion    int
692	IndexFeatureVersion   int
693	IndexMinReaderVersion int
694	IndexTime             time.Time
695	PlainASCII            bool
696	LanguageMap           map[string]uint16
697	ZoektVersion          string
698	ID                    string
699}
700
701// Statistics of a (collection of) repositories.
702type RepoStats struct {
703	// Repos is used for aggregrating the number of repositories.
704	//
705	// Note: This field is not populated on RepoListEntry.Stats (individual) but
706	// only for RepoList.Stats (aggregate).
707	Repos int
708
709	// Shards is the total number of search shards.
710	Shards int
711
712	// Documents holds the number of documents or files.
713	Documents int
714
715	// IndexBytes is the amount of RAM used for index overhead.
716	IndexBytes int64
717
718	// ContentBytes is the amount of RAM used for raw content.
719	ContentBytes int64
720
721	// Sourcegraph specific stats below. These are not as efficient to calculate
722	// as the above statistics. We experimentally measured about a 10% slower
723	// shard load time. However, we find these values very useful to track and
724	// computing them outside of load time introduces a lot of complexity.
725
726	// NewLinesCount is the number of newlines "\n" that appear in the zoekt
727	// indexed documents. This is not exactly the same as line count, since it
728	// will not include lines not terminated by "\n" (eg a file with no "\n", or
729	// a final line without "\n"). Note: Zoekt deduplicates documents across
730	// branches, so if a path has the same contents on multiple branches, there
731	// is only one document for it. As such that document's newlines is only
732	// counted once. See DefaultBranchNewLinesCount and AllBranchesNewLinesCount
733	// for counts which do not deduplicate.
734	NewLinesCount uint64
735
736	// DefaultBranchNewLinesCount is the number of newlines "\n" in the default
737	// branch.
738	DefaultBranchNewLinesCount uint64
739
740	// OtherBranchesNewLinesCount is the number of newlines "\n" in all branches
741	// except the default branch.
742	OtherBranchesNewLinesCount uint64
743}
744
745func (s *RepoStats) Add(o *RepoStats) {
746	// can't update Repos, since one repo may have multiple
747	// shards.
748	s.Shards += o.Shards
749	s.IndexBytes += o.IndexBytes
750	s.Documents += o.Documents
751	s.ContentBytes += o.ContentBytes
752
753	// Sourcegraph specific
754	s.NewLinesCount += o.NewLinesCount
755	s.DefaultBranchNewLinesCount += o.DefaultBranchNewLinesCount
756	s.OtherBranchesNewLinesCount += o.OtherBranchesNewLinesCount
757}
758
759type RepoListEntry struct {
760	Repository    Repository
761	IndexMetadata IndexMetadata
762	Stats         RepoStats
763}
764
765// MinimalRepoListEntry is a subset of RepoListEntry. It was added after
766// performance profiling of sourcegraph.com revealed that querying this
767// information from Zoekt was causing lots of CPU and memory usage. Note: we
768// can revisit this, how we store and query this information has changed a lot
769// since this was introduced.
770type MinimalRepoListEntry struct {
771	// HasSymbols is exported since Sourcegraph uses this information at search
772	// planning time to decide between Zoekt and an unindexed symbol search.
773	//
774	// Note: it pretty much is always true in practice.
775	HasSymbols bool
776
777	// Branches is used by Sourcegraphs query planner to decided if it can use
778	// zoekt or go via an unindexed code path.
779	Branches []RepositoryBranch
780
781	// IndexTimeUnix is the IndexTime converted to unix time (number of seconds
782	// since the epoch). This is to make it clear we are not transporting the
783	// full fidelty timestamp (ie with milliseconds and location). Additionally
784	// it saves 16 bytes in this struct.
785	//
786	// IndexTime is used as a heuristic in Sourcegraph to decide in aggregate
787	// how many repositories need updating after a ranking change/etc.
788	//
789	// TODO(keegancsmith) audit updates to IndexTime and document how and when
790	// it changes. Concerned about things like metadata updates or compound
791	// shards leading to untrustworthy data here.
792	IndexTimeUnix int64
793}
794
795type ReposMap map[uint32]MinimalRepoListEntry
796
797// MarshalBinary implements a specialized encoder for ReposMap.
798func (q *ReposMap) MarshalBinary() ([]byte, error) {
799	return reposMapEncode(*q)
800}
801
802// UnmarshalBinary implements a specialized decoder for ReposMap.
803func (q *ReposMap) UnmarshalBinary(b []byte) error {
804	var err error
805	(*q), err = reposMapDecode(b)
806	return err
807}
808
809// RepoList holds a set of Repository metadata.
810type RepoList struct {
811	// Returned when ListOptions.Field is RepoListFieldRepos.
812	Repos []*RepoListEntry
813
814	// ReposMap is set when ListOptions.Field is RepoListFieldReposMap.
815	ReposMap ReposMap
816
817	Crashes int
818
819	// Stats response to a List request.
820	// This is the aggregate RepoStats of all repos matching the input query.
821	Stats RepoStats
822}
823
824type Searcher interface {
825	Search(ctx context.Context, q query.Q, opts *SearchOptions) (*SearchResult, error)
826
827	// List lists repositories. The query `q` can only contain
828	// query.Repo atoms.
829	List(ctx context.Context, q query.Q, opts *ListOptions) (*RepoList, error)
830	Close()
831
832	// Describe the searcher for debug messages.
833	String() string
834}
835
836type RepoListField int
837
838const (
839	RepoListFieldRepos    RepoListField = 0
840	RepoListFieldReposMap               = 2
841)
842
843type ListOptions struct {
844	// Field decides which field to populate in RepoList response.
845	Field RepoListField
846}
847
848func (o *ListOptions) GetField() (RepoListField, error) {
849	if o == nil {
850		return RepoListFieldRepos, nil
851	}
852	switch o.Field {
853	case RepoListFieldRepos, RepoListFieldReposMap:
854		return o.Field, nil
855	case 1:
856		return 0, fmt.Errorf("RepoListFieldMinimal (%d) is no longer supported", o.Field)
857	default:
858		return 0, fmt.Errorf("unknown RepoListField %d", o.Field)
859	}
860}
861
862func (o *ListOptions) String() string {
863	return fmt.Sprintf("%#v", o)
864}
865
866type SearchOptions struct {
867	// Return an upper-bound estimate of eligible documents in
868	// stats.ShardFilesConsidered.
869	EstimateDocCount bool
870
871	// Return the whole file.
872	Whole bool
873
874	// Maximum number of matches: skip all processing an index
875	// shard after we found this many non-overlapping matches.
876	ShardMaxMatchCount int
877
878	// Maximum number of matches: stop looking for more matches
879	// once we have this many matches across shards.
880	TotalMaxMatchCount int
881
882	// Maximum number of matches: skip processing documents for a repository in
883	// a shard once we have found ShardRepoMaxMatchCount.
884	//
885	// A compound shard may contain multiple repositories. This will most often
886	// be set to 1 to find all repositories containing a result.
887	ShardRepoMaxMatchCount int
888
889	// Deprecated: this field is not read anymore.
890	ShardMaxImportantMatch int
891
892	// Deprecated: this field is not read anymore.
893	TotalMaxImportantMatch int
894
895	// Abort the search after this much time has passed.
896	MaxWallTime time.Duration
897
898	// FlushWallTime if non-zero will stop streaming behaviour at first and
899	// instead will collate and sort results. At FlushWallTime the results will
900	// be sent and then the behaviour will revert to the normal streaming.
901	FlushWallTime time.Duration
902
903	// Truncates the number of documents (i.e. files) after collating and
904	// sorting the results.
905	MaxDocDisplayCount int
906
907	// Truncates the number of matchs after collating and sorting the results.
908	MaxMatchDisplayCount int
909
910	// If set to a number greater than zero then up to this many number
911	// of context lines will be added before and after each matched line.
912	// Note that the included context lines might contain matches and
913	// it's up to the consumer of the result to remove those lines.
914	NumContextLines int
915
916	// If true, ChunkMatches will be returned in each FileMatch rather than LineMatches
917	// EXPERIMENTAL: the behavior of this flag may be changed in future versions.
918	ChunkMatches bool
919
920	// EXPERIMENTAL. If true, document ranks are used as additional input for
921	// sorting matches.
922	UseDocumentRanks bool
923
924	// EXPERIMENTAL. When UseDocumentRanks is enabled, this can be optionally set to adjust
925	// their weight in the file match score. If the value is <= 0.0, the default weight value
926	// will be used. This option is temporary and is only exposed for testing/ tuning purposes.
927	DocumentRanksWeight float64
928
929	// EXPERIMENTAL. If true, use keyword-style scoring instead of the default scoring formula.
930	// Currently, this treats each match in a file as a term and computes an approximation to BM25.
931	// When enabled, all other scoring signals are ignored, including document ranks.
932	UseKeywordScoring bool
933
934	// Trace turns on opentracing for this request if true and if the Jaeger address was provided as
935	// a command-line flag
936	Trace bool
937
938	// If set, the search results will contain debug information for scoring.
939	DebugScore bool
940
941	// SpanContext is the opentracing span context, if it exists, from the zoekt client
942	SpanContext map[string]string
943}
944
945func (s *SearchOptions) String() string {
946	return fmt.Sprintf("%#v", s)
947}
948
949// Sender is the interface that wraps the basic Send method.
950type Sender interface {
951	Send(*SearchResult)
952}
953
954// Streamer adds the method StreamSearch to the Searcher interface.
955type Streamer interface {
956	Searcher
957	StreamSearch(ctx context.Context, q query.Q, opts *SearchOptions, sender Sender) (err error)
958}
Configure Feed

Configure Feed