fork of https://github.com/sourcegraph/zoekt
1// Copyright 2016 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package zoekt // import "github.com/sourcegraph/zoekt"
16
17import (
18 "context"
19 "encoding/json"
20 "errors"
21 "fmt"
22 "reflect"
23 "strconv"
24 "time"
25
26 "github.com/sourcegraph/zoekt/query"
27)
28
29const mapHeaderBytes uint64 = 48
30const sliceHeaderBytes uint64 = 24
31const stringHeaderBytes uint64 = 16
32const pointerSize uint64 = 8
33const interfaceBytes uint64 = 16
34
35// FileMatch contains all the matches within a file.
36type FileMatch struct {
37 // Ranking; the higher, the better.
38 Score float64 // TODO - hide this field?
39
40 // For debugging. Needs DebugScore set, but public so tests in
41 // other packages can print some diagnostics.
42 Debug string
43
44 FileName string
45
46 // Repository is the globally unique name of the repo of the
47 // match
48 Repository string
49 Branches []string
50
51 // One of LineMatches or ChunkMatches will be returned depending on whether
52 // the SearchOptions.ChunkMatches is set.
53 LineMatches []LineMatch
54 ChunkMatches []ChunkMatch
55
56 // RepositoryID is a Sourcegraph extension. This is the ID of Repository in
57 // Sourcegraph.
58 RepositoryID uint32
59
60 // RepositoryPriority is a Sourcegraph extension. It is used by Sourcegraph to
61 // order results from different repositories relative to each other.
62 RepositoryPriority float64
63
64 // Only set if requested
65 Content []byte
66
67 // Checksum of the content.
68 Checksum []byte
69
70 // Detected language of the result.
71 Language string
72
73 // SubRepositoryName is the globally unique name of the repo,
74 // if it came from a subrepository
75 SubRepositoryName string
76
77 // SubRepositoryPath holds the prefix where the subrepository
78 // was mounted.
79 SubRepositoryPath string
80
81 // Commit SHA1 (hex) of the (sub)repo holding the file.
82 Version string
83}
84
85func (m *FileMatch) sizeBytes() (sz uint64) {
86 // Score
87 sz += 8
88
89 for _, s := range []string{
90 m.Debug,
91 m.FileName,
92 m.Repository,
93 m.Language,
94 m.SubRepositoryName,
95 m.SubRepositoryPath,
96 m.Version,
97 } {
98 sz += stringHeaderBytes + uint64(len(s))
99 }
100
101 // Branches
102 sz += sliceHeaderBytes
103 for _, s := range m.Branches {
104 sz += stringHeaderBytes + uint64(len(s))
105 }
106
107 // LineMatches
108 sz += sliceHeaderBytes
109 for _, lm := range m.LineMatches {
110 sz += lm.sizeBytes()
111 }
112
113 // ChunkMatches
114 sz += sliceHeaderBytes
115 for _, cm := range m.ChunkMatches {
116 sz += cm.sizeBytes()
117 }
118
119 // RepositoryID
120 sz += 4
121
122 // RepositoryPriority
123 sz += 8
124
125 // Content
126 sz += sliceHeaderBytes + uint64(len(m.Content))
127
128 // Checksum
129 sz += sliceHeaderBytes + uint64(len(m.Checksum))
130
131 return
132}
133
134// ChunkMatch is a set of non-overlapping matches within a contiguous range of
135// lines in the file.
136type ChunkMatch struct {
137 // Content is a contiguous range of complete lines that fully contains Ranges.
138 Content []byte
139 // ContentStart is the location (inclusive) of the beginning of content
140 // relative to the beginning of the file. It will always be at the
141 // beginning of a line (Column will always be 1).
142 ContentStart Location
143
144 // FileName indicates whether this match is a match on the file name, in
145 // which case Content will contain the file name.
146 FileName bool
147
148 // Ranges is a set of matching ranges within this chunk. Each range is relative
149 // to the beginning of the file (not the beginning of Content).
150 Ranges []Range
151
152 // SymbolInfo is the symbol information associated with Ranges. If it is non-nil,
153 // its length will equal that of Ranges. Any of its elements may be nil.
154 SymbolInfo []*Symbol
155
156 Score float64
157 DebugScore string
158}
159
160func (cm *ChunkMatch) sizeBytes() (sz uint64) {
161 // Content
162 sz += sliceHeaderBytes + uint64(len(cm.Content))
163
164 // ContentStart
165 sz += cm.ContentStart.sizeBytes()
166
167 // FileName
168 sz += 1
169
170 // Ranges
171 sz += sliceHeaderBytes
172 if len(cm.Ranges) > 0 {
173 sz += uint64(len(cm.Ranges)) * cm.Ranges[0].sizeBytes()
174 }
175
176 // SymbolInfo
177 sz += sliceHeaderBytes
178 for _, si := range cm.SymbolInfo {
179 sz += pointerSize
180 if si != nil {
181 sz += si.sizeBytes()
182 }
183 }
184
185 // Score
186 sz += 8
187
188 // DebugScore
189 sz += stringHeaderBytes + uint64(len(cm.DebugScore))
190
191 return
192}
193
194type Range struct {
195 // The inclusive beginning of the range.
196 Start Location
197 // The exclusive end of the range.
198 End Location
199}
200
201func (r *Range) sizeBytes() uint64 {
202 return r.Start.sizeBytes() + r.End.sizeBytes()
203}
204
205type Location struct {
206 // 0-based byte offset from the beginning of the file
207 ByteOffset uint32
208 // 1-based line number from the beginning of the file
209 LineNumber uint32
210 // 1-based column number (in runes) from the beginning of line
211 Column uint32
212}
213
214func (l *Location) sizeBytes() uint64 {
215 return 3 * 4
216}
217
218// LineMatch holds the matches within a single line in a file.
219type LineMatch struct {
220 // The line in which a match was found.
221 Line []byte
222 LineStart int
223 LineEnd int
224 LineNumber int
225
226 // Before and After are only set when SearchOptions.NumContextLines is > 0
227 Before []byte
228 After []byte
229
230 // If set, this was a match on the filename.
231 FileName bool
232
233 // The higher the better. Only ranks the quality of the match
234 // within the file, does not take rank of file into account
235 Score float64
236 DebugScore string
237
238 LineFragments []LineFragmentMatch
239}
240
241func (lm *LineMatch) sizeBytes() (sz uint64) {
242 // Line
243 sz += sliceHeaderBytes + uint64(len(lm.Line))
244
245 // LineStart, LineEnd, LineNumber
246 sz += 3 * 8
247
248 // Before
249 sz += sliceHeaderBytes + uint64(len(lm.Before))
250
251 // After
252 sz += sliceHeaderBytes + uint64(len(lm.After))
253
254 // FileName
255 sz += 1
256
257 // Score
258 sz += 8
259
260 // DebugScore
261 sz += stringHeaderBytes + uint64(len(lm.DebugScore))
262
263 // LineFragments
264 sz += sliceHeaderBytes
265 for _, lf := range lm.LineFragments {
266 sz += lf.sizeBytes()
267 }
268
269 return
270}
271
272type Symbol struct {
273 Sym string
274 Kind string
275 Parent string
276 ParentKind string
277}
278
279func (s *Symbol) sizeBytes() uint64 {
280 return 4*stringHeaderBytes + uint64(len(s.Sym)+len(s.Kind)+len(s.Parent)+len(s.ParentKind))
281}
282
283// LineFragmentMatch a segment of matching text within a line.
284type LineFragmentMatch struct {
285 // Offset within the line, in bytes.
286 LineOffset int
287
288 // Offset from file start, in bytes.
289 Offset uint32
290
291 // Number bytes that match.
292 MatchLength int
293
294 SymbolInfo *Symbol
295}
296
297func (lfm *LineFragmentMatch) sizeBytes() (sz uint64) {
298 // LineOffset
299 sz += 8
300
301 // Offset
302 sz += 4
303
304 // MatchLength
305 sz += 8
306
307 // SymbolInfo
308 sz += pointerSize
309 if lfm.SymbolInfo != nil {
310 sz += lfm.SymbolInfo.sizeBytes()
311 }
312
313 return
314}
315
316type FlushReason uint8
317
318const (
319 FlushReasonTimerExpired FlushReason = 1 << iota
320 FlushReasonFinalFlush
321 FlushReasonMaxSize
322)
323
324var FlushReasonStrings = map[FlushReason]string{
325 FlushReasonTimerExpired: "timer_expired",
326 FlushReasonFinalFlush: "final_flush",
327 FlushReasonMaxSize: "max_size_reached",
328}
329
330func (fr FlushReason) String() string {
331 if v, ok := FlushReasonStrings[fr]; ok {
332 return v
333 }
334
335 return "none"
336}
337
338// Stats contains interesting numbers on the search
339type Stats struct {
340 // Amount of I/O for reading contents.
341 ContentBytesLoaded int64
342
343 // Amount of I/O for reading from index.
344 IndexBytesLoaded int64
345
346 // Number of search shards that had a crash.
347 Crashes int
348
349 // Wall clock time for this search
350 Duration time.Duration
351
352 // Number of files containing a match.
353 FileCount int
354
355 // Number of files in shards that we considered.
356 ShardFilesConsidered int
357
358 // Files that we evaluated. Equivalent to files for which all
359 // atom matches (including negations) evaluated to true.
360 FilesConsidered int
361
362 // Files for which we loaded file content to verify substring matches
363 FilesLoaded int
364
365 // Candidate files whose contents weren't examined because we
366 // gathered enough matches.
367 FilesSkipped int
368
369 // Shards that we scanned to find matches.
370 ShardsScanned int
371
372 // Shards that we did not process because a query was canceled.
373 ShardsSkipped int
374
375 // Shards that we did not process because the query was rejected by the
376 // ngram filter indicating it had no matches.
377 ShardsSkippedFilter int
378
379 // Number of non-overlapping matches
380 MatchCount int
381
382 // Number of candidate matches as a result of searching ngrams.
383 NgramMatches int
384
385 // NgramLookups is the number of times we accessed an ngram in the index.
386 NgramLookups int
387
388 // Wall clock time for queued search.
389 Wait time.Duration
390
391 // Aggregate wall clock time spent constructing and pruning the match tree.
392 // This accounts for time such as lookups in the trigram index.
393 MatchTreeConstruction time.Duration
394
395 // Aggregate wall clock time spent searching the match tree. This accounts
396 // for the bulk of search work done looking for matches.
397 MatchTreeSearch time.Duration
398
399 // Number of times regexp was called on files that we evaluated.
400 RegexpsConsidered int
401
402 // FlushReason explains why results were flushed.
403 FlushReason FlushReason
404}
405
406func (s *Stats) sizeBytes() (sz uint64) {
407 sz = 16 * 8 // This assumes we are running on a 64-bit architecture
408 sz += 1 // FlushReason
409
410 return
411}
412
413func (s *Stats) Add(o Stats) {
414 s.ContentBytesLoaded += o.ContentBytesLoaded
415 s.IndexBytesLoaded += o.IndexBytesLoaded
416 s.Crashes += o.Crashes
417 s.FileCount += o.FileCount
418 s.FilesConsidered += o.FilesConsidered
419 s.FilesLoaded += o.FilesLoaded
420 s.FilesSkipped += o.FilesSkipped
421 s.MatchCount += o.MatchCount
422 s.NgramMatches += o.NgramMatches
423 s.NgramLookups += o.NgramLookups
424 s.ShardFilesConsidered += o.ShardFilesConsidered
425 s.ShardsScanned += o.ShardsScanned
426 s.ShardsSkipped += o.ShardsSkipped
427 s.ShardsSkippedFilter += o.ShardsSkippedFilter
428 s.Wait += o.Wait
429 s.MatchTreeConstruction += o.MatchTreeConstruction
430 s.MatchTreeSearch += o.MatchTreeSearch
431 s.RegexpsConsidered += o.RegexpsConsidered
432
433 // We want the first non-zero FlushReason to be sticky. This is a useful
434 // property when aggregating stats from several Zoekts.
435 if s.FlushReason == 0 {
436 s.FlushReason = o.FlushReason
437 }
438}
439
440// Zero returns true if stats is empty.
441func (s *Stats) Zero() bool {
442 if s == nil {
443 return true
444 }
445
446 return !(s.ContentBytesLoaded > 0 ||
447 s.IndexBytesLoaded > 0 ||
448 s.Crashes > 0 ||
449 s.FileCount > 0 ||
450 s.FilesConsidered > 0 ||
451 s.FilesLoaded > 0 ||
452 s.FilesSkipped > 0 ||
453 s.MatchCount > 0 ||
454 s.NgramMatches > 0 ||
455 s.NgramLookups > 0 ||
456 s.ShardFilesConsidered > 0 ||
457 s.ShardsScanned > 0 ||
458 s.ShardsSkipped > 0 ||
459 s.ShardsSkippedFilter > 0 ||
460 s.Wait > 0 ||
461 s.MatchTreeConstruction > 0 ||
462 s.MatchTreeSearch > 0 ||
463 s.RegexpsConsidered > 0)
464}
465
466// Progress contains information about the global progress of the running search query.
467// This is used by the frontend to reorder results and emit them when stable.
468// Sourcegraph specific: this is used when querying multiple zoekt-webserver instances.
469type Progress struct {
470 // Priority of the shard that was searched.
471 Priority float64
472
473 // MaxPendingPriority is the maximum priority of pending result that is being searched in parallel.
474 // This is used to reorder results when the result set is known to be stable-- that is, when a result's
475 // Priority is greater than the max(MaxPendingPriority) from the latest results of each backend, it can be returned to the user.
476 //
477 // MaxPendingPriority decreases monotonically in each SearchResult.
478 MaxPendingPriority float64
479}
480
481func (p *Progress) sizeBytes() uint64 {
482 return 2 * 8
483}
484
485// SearchResult contains search matches and extra data
486type SearchResult struct {
487 Stats
488
489 // Do not encode this as we cannot encode -Inf in JSON
490 Progress `json:"-"`
491
492 Files []FileMatch
493
494 // RepoURLs holds a repo => template string map.
495 RepoURLs map[string]string
496
497 // FragmentNames holds a repo => template string map, for
498 // the line number fragment.
499 LineFragments map[string]string
500}
501
502// SizeBytes is a best-effort estimate of the size of SearchResult in memory.
503// The estimate does not take alignment into account. The result is a lower
504// bound on the actual size in memory.
505func (sr *SearchResult) SizeBytes() (sz uint64) {
506 sz += sr.Stats.sizeBytes()
507 sz += sr.Progress.sizeBytes()
508
509 // Files
510 sz += sliceHeaderBytes
511 for _, f := range sr.Files {
512 sz += f.sizeBytes()
513 }
514
515 // RepoURLs
516 sz += mapHeaderBytes
517 for k, v := range sr.RepoURLs {
518 sz += stringHeaderBytes + uint64(len(k))
519 sz += stringHeaderBytes + uint64(len(v))
520 }
521
522 // LineFragments
523 sz += mapHeaderBytes
524 for k, v := range sr.LineFragments {
525 sz += stringHeaderBytes + uint64(len(k))
526 sz += stringHeaderBytes + uint64(len(v))
527 }
528
529 return
530}
531
532// RepositoryBranch describes an indexed branch, which is a name
533// combined with a version.
534type RepositoryBranch struct {
535 Name string
536 Version string
537}
538
539func (r RepositoryBranch) String() string {
540 return fmt.Sprintf("%s@%s", r.Name, r.Version)
541}
542
543// Repository holds repository metadata.
544type Repository struct {
545 // Sourcegraph's repository ID
546 ID uint32
547
548 // The repository name
549 Name string
550
551 // The repository URL.
552 URL string
553
554 // The physical source where this repo came from, eg. full
555 // path to the zip filename or git repository directory. This
556 // will not be exposed in the UI, but can be used to detect
557 // orphaned index shards.
558 Source string
559
560 // The branches indexed in this repo.
561 Branches []RepositoryBranch
562
563 // Nil if this is not the super project.
564 SubRepoMap map[string]*Repository
565
566 // URL template to link to the commit of a branch
567 CommitURLTemplate string
568
569 // The repository URL for getting to a file. Has access to
570 // {{.Version}}, {{.Path}}
571 FileURLTemplate string
572
573 // The URL fragment to add to a file URL for line numbers. has
574 // access to {{.LineNumber}}. The fragment should include the
575 // separator, generally '#' or ';'.
576 LineFragmentTemplate string
577
578 // Perf optimization: priority is set when we load the shard. It corresponds to
579 // the value of "priority" stored in RawConfig.
580 priority float64
581
582 // All zoekt.* configuration settings.
583 RawConfig map[string]string
584
585 // Importance of the repository, bigger is more important
586 Rank uint16
587
588 // IndexOptions is a hash of the options used to create the index for the
589 // repo.
590 IndexOptions string
591
592 // HasSymbols is true if this repository has indexed ctags
593 // output. Sourcegraph specific: This field is more appropriate for
594 // IndexMetadata. However, we store it here since the Sourcegraph frontend
595 // can read this structure but not IndexMetadata.
596 HasSymbols bool
597
598 // Tombstone is true if we are not allowed to search this repo.
599 Tombstone bool
600
601 // LatestCommitDate is the date of the latest commit among all indexed Branches.
602 // The date might be time.Time's 0-value if the repository was last indexed
603 // before this field was added.
604 LatestCommitDate time.Time
605
606 // FileTombstones is a set of file paths that should be ignored across all branches
607 // in this shard.
608 FileTombstones map[string]struct{} `json:",omitempty"`
609}
610
611func (r *Repository) UnmarshalJSON(data []byte) error {
612 // We define a new type so that we can use json.Unmarshal
613 // without recursing into this same method.
614 type repository *Repository
615 repo := repository(r)
616
617 err := json.Unmarshal(data, repo)
618 if err != nil {
619 return err
620 }
621
622 if v, ok := repo.RawConfig["repoid"]; ok {
623 id, _ := strconv.ParseUint(v, 10, 32)
624 r.ID = uint32(id)
625 }
626
627 if v, ok := repo.RawConfig["priority"]; ok {
628 r.priority, err = strconv.ParseFloat(v, 64)
629 if err != nil {
630 r.priority = 0
631 }
632
633 // Sourcegraph indexserver doesn't set repo.Rank, so we set it here
634 // based on priority. Setting it on read instead of during indexing
635 // allows us to avoid a complete reindex.
636 if r.Rank == 0 && r.priority > 0 {
637 // Normalize the repo score within [0, 1), with the midpoint at 5,000. This means popular
638 // repos (roughly ones with over 5,000 stars) see diminishing returns from more stars.
639 r.Rank = uint16(r.priority / (5000.0 + r.priority) * maxUInt16)
640 }
641 }
642 return nil
643}
644
645// MergeMutable will merge x into r. mutated will be true if it made any
646// changes. err is non-nil if we needed to mutate an immutable field.
647//
648// Note: SubRepoMap, IndexOptions and HasSymbol fields are ignored. They are
649// computed while indexing so can't be synthesized from x.
650//
651// Note: We ignore RawConfig fields which are duplicated into Repository:
652// name and id.
653//
654// Note: URL, *Template fields are ignored. They are not used by Sourcegraph.
655func (r *Repository) MergeMutable(x *Repository) (mutated bool, err error) {
656 if r.ID != x.ID {
657 // Sourcegraph: strange behaviour may occur if ID changes but names don't.
658 return mutated, errors.New("ID is immutable")
659 }
660 if r.Name != x.Name {
661 // Name is encoded into the shard name on disk. We need to re-index if it
662 // changes.
663 return mutated, errors.New("Name is immutable")
664 }
665 if !reflect.DeepEqual(r.Branches, x.Branches) {
666 // Need a reindex if content changing.
667 return mutated, errors.New("Branches is immutable")
668 }
669
670 for k, v := range x.RawConfig {
671 // We ignore name and id since they are encoded into the repository.
672 if k == "name" || k == "id" {
673 continue
674 }
675 if r.RawConfig == nil {
676 mutated = true
677 r.RawConfig = make(map[string]string)
678 }
679 if r.RawConfig[k] != v {
680 mutated = true
681 r.RawConfig[k] = v
682 }
683 }
684
685 return mutated, nil
686}
687
688// IndexMetadata holds metadata stored in the index file. It contains
689// data generated by the core indexing library.
690type IndexMetadata struct {
691 IndexFormatVersion int
692 IndexFeatureVersion int
693 IndexMinReaderVersion int
694 IndexTime time.Time
695 PlainASCII bool
696 LanguageMap map[string]uint16
697 ZoektVersion string
698 ID string
699}
700
701// Statistics of a (collection of) repositories.
702type RepoStats struct {
703 // Repos is used for aggregrating the number of repositories.
704 //
705 // Note: This field is not populated on RepoListEntry.Stats (individual) but
706 // only for RepoList.Stats (aggregate).
707 Repos int
708
709 // Shards is the total number of search shards.
710 Shards int
711
712 // Documents holds the number of documents or files.
713 Documents int
714
715 // IndexBytes is the amount of RAM used for index overhead.
716 IndexBytes int64
717
718 // ContentBytes is the amount of RAM used for raw content.
719 ContentBytes int64
720
721 // Sourcegraph specific stats below. These are not as efficient to calculate
722 // as the above statistics. We experimentally measured about a 10% slower
723 // shard load time. However, we find these values very useful to track and
724 // computing them outside of load time introduces a lot of complexity.
725
726 // NewLinesCount is the number of newlines "\n" that appear in the zoekt
727 // indexed documents. This is not exactly the same as line count, since it
728 // will not include lines not terminated by "\n" (eg a file with no "\n", or
729 // a final line without "\n"). Note: Zoekt deduplicates documents across
730 // branches, so if a path has the same contents on multiple branches, there
731 // is only one document for it. As such that document's newlines is only
732 // counted once. See DefaultBranchNewLinesCount and AllBranchesNewLinesCount
733 // for counts which do not deduplicate.
734 NewLinesCount uint64
735
736 // DefaultBranchNewLinesCount is the number of newlines "\n" in the default
737 // branch.
738 DefaultBranchNewLinesCount uint64
739
740 // OtherBranchesNewLinesCount is the number of newlines "\n" in all branches
741 // except the default branch.
742 OtherBranchesNewLinesCount uint64
743}
744
745func (s *RepoStats) Add(o *RepoStats) {
746 // can't update Repos, since one repo may have multiple
747 // shards.
748 s.Shards += o.Shards
749 s.IndexBytes += o.IndexBytes
750 s.Documents += o.Documents
751 s.ContentBytes += o.ContentBytes
752
753 // Sourcegraph specific
754 s.NewLinesCount += o.NewLinesCount
755 s.DefaultBranchNewLinesCount += o.DefaultBranchNewLinesCount
756 s.OtherBranchesNewLinesCount += o.OtherBranchesNewLinesCount
757}
758
759type RepoListEntry struct {
760 Repository Repository
761 IndexMetadata IndexMetadata
762 Stats RepoStats
763}
764
765// MinimalRepoListEntry is a subset of RepoListEntry. It was added after
766// performance profiling of sourcegraph.com revealed that querying this
767// information from Zoekt was causing lots of CPU and memory usage. Note: we
768// can revisit this, how we store and query this information has changed a lot
769// since this was introduced.
770type MinimalRepoListEntry struct {
771 // HasSymbols is exported since Sourcegraph uses this information at search
772 // planning time to decide between Zoekt and an unindexed symbol search.
773 //
774 // Note: it pretty much is always true in practice.
775 HasSymbols bool
776
777 // Branches is used by Sourcegraphs query planner to decided if it can use
778 // zoekt or go via an unindexed code path.
779 Branches []RepositoryBranch
780
781 // IndexTimeUnix is the IndexTime converted to unix time (number of seconds
782 // since the epoch). This is to make it clear we are not transporting the
783 // full fidelty timestamp (ie with milliseconds and location). Additionally
784 // it saves 16 bytes in this struct.
785 //
786 // IndexTime is used as a heuristic in Sourcegraph to decide in aggregate
787 // how many repositories need updating after a ranking change/etc.
788 //
789 // TODO(keegancsmith) audit updates to IndexTime and document how and when
790 // it changes. Concerned about things like metadata updates or compound
791 // shards leading to untrustworthy data here.
792 IndexTimeUnix int64
793}
794
795type ReposMap map[uint32]MinimalRepoListEntry
796
797// MarshalBinary implements a specialized encoder for ReposMap.
798func (q *ReposMap) MarshalBinary() ([]byte, error) {
799 return reposMapEncode(*q)
800}
801
802// UnmarshalBinary implements a specialized decoder for ReposMap.
803func (q *ReposMap) UnmarshalBinary(b []byte) error {
804 var err error
805 (*q), err = reposMapDecode(b)
806 return err
807}
808
809// RepoList holds a set of Repository metadata.
810type RepoList struct {
811 // Returned when ListOptions.Field is RepoListFieldRepos.
812 Repos []*RepoListEntry
813
814 // ReposMap is set when ListOptions.Field is RepoListFieldReposMap.
815 ReposMap ReposMap
816
817 Crashes int
818
819 // Stats response to a List request.
820 // This is the aggregate RepoStats of all repos matching the input query.
821 Stats RepoStats
822}
823
824type Searcher interface {
825 Search(ctx context.Context, q query.Q, opts *SearchOptions) (*SearchResult, error)
826
827 // List lists repositories. The query `q` can only contain
828 // query.Repo atoms.
829 List(ctx context.Context, q query.Q, opts *ListOptions) (*RepoList, error)
830 Close()
831
832 // Describe the searcher for debug messages.
833 String() string
834}
835
836type RepoListField int
837
838const (
839 RepoListFieldRepos RepoListField = 0
840 RepoListFieldReposMap = 2
841)
842
843type ListOptions struct {
844 // Field decides which field to populate in RepoList response.
845 Field RepoListField
846}
847
848func (o *ListOptions) GetField() (RepoListField, error) {
849 if o == nil {
850 return RepoListFieldRepos, nil
851 }
852 switch o.Field {
853 case RepoListFieldRepos, RepoListFieldReposMap:
854 return o.Field, nil
855 case 1:
856 return 0, fmt.Errorf("RepoListFieldMinimal (%d) is no longer supported", o.Field)
857 default:
858 return 0, fmt.Errorf("unknown RepoListField %d", o.Field)
859 }
860}
861
862func (o *ListOptions) String() string {
863 return fmt.Sprintf("%#v", o)
864}
865
866type SearchOptions struct {
867 // Return an upper-bound estimate of eligible documents in
868 // stats.ShardFilesConsidered.
869 EstimateDocCount bool
870
871 // Return the whole file.
872 Whole bool
873
874 // Maximum number of matches: skip all processing an index
875 // shard after we found this many non-overlapping matches.
876 ShardMaxMatchCount int
877
878 // Maximum number of matches: stop looking for more matches
879 // once we have this many matches across shards.
880 TotalMaxMatchCount int
881
882 // Maximum number of matches: skip processing documents for a repository in
883 // a shard once we have found ShardRepoMaxMatchCount.
884 //
885 // A compound shard may contain multiple repositories. This will most often
886 // be set to 1 to find all repositories containing a result.
887 ShardRepoMaxMatchCount int
888
889 // Deprecated: this field is not read anymore.
890 ShardMaxImportantMatch int
891
892 // Deprecated: this field is not read anymore.
893 TotalMaxImportantMatch int
894
895 // Abort the search after this much time has passed.
896 MaxWallTime time.Duration
897
898 // FlushWallTime if non-zero will stop streaming behaviour at first and
899 // instead will collate and sort results. At FlushWallTime the results will
900 // be sent and then the behaviour will revert to the normal streaming.
901 FlushWallTime time.Duration
902
903 // Truncates the number of documents (i.e. files) after collating and
904 // sorting the results.
905 MaxDocDisplayCount int
906
907 // Truncates the number of matchs after collating and sorting the results.
908 MaxMatchDisplayCount int
909
910 // If set to a number greater than zero then up to this many number
911 // of context lines will be added before and after each matched line.
912 // Note that the included context lines might contain matches and
913 // it's up to the consumer of the result to remove those lines.
914 NumContextLines int
915
916 // If true, ChunkMatches will be returned in each FileMatch rather than LineMatches
917 // EXPERIMENTAL: the behavior of this flag may be changed in future versions.
918 ChunkMatches bool
919
920 // EXPERIMENTAL. If true, document ranks are used as additional input for
921 // sorting matches.
922 UseDocumentRanks bool
923
924 // EXPERIMENTAL. When UseDocumentRanks is enabled, this can be optionally set to adjust
925 // their weight in the file match score. If the value is <= 0.0, the default weight value
926 // will be used. This option is temporary and is only exposed for testing/ tuning purposes.
927 DocumentRanksWeight float64
928
929 // EXPERIMENTAL. If true, use keyword-style scoring instead of the default scoring formula.
930 // Currently, this treats each match in a file as a term and computes an approximation to BM25.
931 // When enabled, all other scoring signals are ignored, including document ranks.
932 UseKeywordScoring bool
933
934 // Trace turns on opentracing for this request if true and if the Jaeger address was provided as
935 // a command-line flag
936 Trace bool
937
938 // If set, the search results will contain debug information for scoring.
939 DebugScore bool
940
941 // SpanContext is the opentracing span context, if it exists, from the zoekt client
942 SpanContext map[string]string
943}
944
945func (s *SearchOptions) String() string {
946 return fmt.Sprintf("%#v", s)
947}
948
949// Sender is the interface that wraps the basic Send method.
950type Sender interface {
951 Send(*SearchResult)
952}
953
954// Streamer adds the method StreamSearch to the Searcher interface.
955type Streamer interface {
956 Searcher
957 StreamSearch(ctx context.Context, q query.Q, opts *SearchOptions, sender Sender) (err error)
958}