fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

1// Copyright 2016 Google Inc. All rights reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package zoekt // import "github.com/sourcegraph/zoekt" 16 17import ( 18 "context" 19 "encoding/json" 20 "errors" 21 "fmt" 22 "reflect" 23 "strconv" 24 "time" 25 26 "github.com/sourcegraph/zoekt/query" 27) 28 29const mapHeaderBytes uint64 = 48 30const sliceHeaderBytes uint64 = 24 31const stringHeaderBytes uint64 = 16 32const pointerSize uint64 = 8 33const interfaceBytes uint64 = 16 34 35// FileMatch contains all the matches within a file. 36type FileMatch struct { 37 // Ranking; the higher, the better. 38 Score float64 // TODO - hide this field? 39 40 // For debugging. Needs DebugScore set, but public so tests in 41 // other packages can print some diagnostics. 42 Debug string 43 44 FileName string 45 46 // Repository is the globally unique name of the repo of the 47 // match 48 Repository string 49 Branches []string 50 51 // One of LineMatches or ChunkMatches will be returned depending on whether 52 // the SearchOptions.ChunkMatches is set. 53 LineMatches []LineMatch 54 ChunkMatches []ChunkMatch 55 56 // RepositoryID is a Sourcegraph extension. This is the ID of Repository in 57 // Sourcegraph. 58 RepositoryID uint32 59 60 // RepositoryPriority is a Sourcegraph extension. It is used by Sourcegraph to 61 // order results from different repositories relative to each other. 62 RepositoryPriority float64 63 64 // Only set if requested 65 Content []byte 66 67 // Checksum of the content. 68 Checksum []byte 69 70 // Detected language of the result. 71 Language string 72 73 // SubRepositoryName is the globally unique name of the repo, 74 // if it came from a subrepository 75 SubRepositoryName string 76 77 // SubRepositoryPath holds the prefix where the subrepository 78 // was mounted. 79 SubRepositoryPath string 80 81 // Commit SHA1 (hex) of the (sub)repo holding the file. 82 Version string 83} 84 85func (m *FileMatch) sizeBytes() (sz uint64) { 86 // Score 87 sz += 8 88 89 for _, s := range []string{ 90 m.Debug, 91 m.FileName, 92 m.Repository, 93 m.Language, 94 m.SubRepositoryName, 95 m.SubRepositoryPath, 96 m.Version, 97 } { 98 sz += stringHeaderBytes + uint64(len(s)) 99 } 100 101 // Branches 102 sz += sliceHeaderBytes 103 for _, s := range m.Branches { 104 sz += stringHeaderBytes + uint64(len(s)) 105 } 106 107 // LineMatches 108 sz += sliceHeaderBytes 109 for _, lm := range m.LineMatches { 110 sz += lm.sizeBytes() 111 } 112 113 // ChunkMatches 114 sz += sliceHeaderBytes 115 for _, cm := range m.ChunkMatches { 116 sz += cm.sizeBytes() 117 } 118 119 // RepositoryID 120 sz += 4 121 122 // RepositoryPriority 123 sz += 8 124 125 // Content 126 sz += sliceHeaderBytes + uint64(len(m.Content)) 127 128 // Checksum 129 sz += sliceHeaderBytes + uint64(len(m.Checksum)) 130 131 return 132} 133 134// ChunkMatch is a set of non-overlapping matches within a contiguous range of 135// lines in the file. 136type ChunkMatch struct { 137 // Content is a contiguous range of complete lines that fully contains Ranges. 138 Content []byte 139 // ContentStart is the location (inclusive) of the beginning of content 140 // relative to the beginning of the file. It will always be at the 141 // beginning of a line (Column will always be 1). 142 ContentStart Location 143 144 // FileName indicates whether this match is a match on the file name, in 145 // which case Content will contain the file name. 146 FileName bool 147 148 // Ranges is a set of matching ranges within this chunk. Each range is relative 149 // to the beginning of the file (not the beginning of Content). 150 Ranges []Range 151 152 // SymbolInfo is the symbol information associated with Ranges. If it is non-nil, 153 // its length will equal that of Ranges. Any of its elements may be nil. 154 SymbolInfo []*Symbol 155 156 Score float64 157 DebugScore string 158} 159 160func (cm *ChunkMatch) sizeBytes() (sz uint64) { 161 // Content 162 sz += sliceHeaderBytes + uint64(len(cm.Content)) 163 164 // ContentStart 165 sz += cm.ContentStart.sizeBytes() 166 167 // FileName 168 sz += 1 169 170 // Ranges 171 sz += sliceHeaderBytes 172 if len(cm.Ranges) > 0 { 173 sz += uint64(len(cm.Ranges)) * cm.Ranges[0].sizeBytes() 174 } 175 176 // SymbolInfo 177 sz += sliceHeaderBytes 178 for _, si := range cm.SymbolInfo { 179 sz += pointerSize 180 if si != nil { 181 sz += si.sizeBytes() 182 } 183 } 184 185 // Score 186 sz += 8 187 188 // DebugScore 189 sz += stringHeaderBytes + uint64(len(cm.DebugScore)) 190 191 return 192} 193 194type Range struct { 195 // The inclusive beginning of the range. 196 Start Location 197 // The exclusive end of the range. 198 End Location 199} 200 201func (r *Range) sizeBytes() uint64 { 202 return r.Start.sizeBytes() + r.End.sizeBytes() 203} 204 205type Location struct { 206 // 0-based byte offset from the beginning of the file 207 ByteOffset uint32 208 // 1-based line number from the beginning of the file 209 LineNumber uint32 210 // 1-based column number (in runes) from the beginning of line 211 Column uint32 212} 213 214func (l *Location) sizeBytes() uint64 { 215 return 3 * 4 216} 217 218// LineMatch holds the matches within a single line in a file. 219type LineMatch struct { 220 // The line in which a match was found. 221 Line []byte 222 LineStart int 223 LineEnd int 224 LineNumber int 225 226 // Before and After are only set when SearchOptions.NumContextLines is > 0 227 Before []byte 228 After []byte 229 230 // If set, this was a match on the filename. 231 FileName bool 232 233 // The higher the better. Only ranks the quality of the match 234 // within the file, does not take rank of file into account 235 Score float64 236 DebugScore string 237 238 LineFragments []LineFragmentMatch 239} 240 241func (lm *LineMatch) sizeBytes() (sz uint64) { 242 // Line 243 sz += sliceHeaderBytes + uint64(len(lm.Line)) 244 245 // LineStart, LineEnd, LineNumber 246 sz += 3 * 8 247 248 // Before 249 sz += sliceHeaderBytes + uint64(len(lm.Before)) 250 251 // After 252 sz += sliceHeaderBytes + uint64(len(lm.After)) 253 254 // FileName 255 sz += 1 256 257 // Score 258 sz += 8 259 260 // DebugScore 261 sz += stringHeaderBytes + uint64(len(lm.DebugScore)) 262 263 // LineFragments 264 sz += sliceHeaderBytes 265 for _, lf := range lm.LineFragments { 266 sz += lf.sizeBytes() 267 } 268 269 return 270} 271 272type Symbol struct { 273 Sym string 274 Kind string 275 Parent string 276 ParentKind string 277} 278 279func (s *Symbol) sizeBytes() uint64 { 280 return 4*stringHeaderBytes + uint64(len(s.Sym)+len(s.Kind)+len(s.Parent)+len(s.ParentKind)) 281} 282 283// LineFragmentMatch a segment of matching text within a line. 284type LineFragmentMatch struct { 285 // Offset within the line, in bytes. 286 LineOffset int 287 288 // Offset from file start, in bytes. 289 Offset uint32 290 291 // Number bytes that match. 292 MatchLength int 293 294 SymbolInfo *Symbol 295} 296 297func (lfm *LineFragmentMatch) sizeBytes() (sz uint64) { 298 // LineOffset 299 sz += 8 300 301 // Offset 302 sz += 4 303 304 // MatchLength 305 sz += 8 306 307 // SymbolInfo 308 sz += pointerSize 309 if lfm.SymbolInfo != nil { 310 sz += lfm.SymbolInfo.sizeBytes() 311 } 312 313 return 314} 315 316type FlushReason uint8 317 318const ( 319 FlushReasonTimerExpired FlushReason = 1 << iota 320 FlushReasonFinalFlush 321 FlushReasonMaxSize 322) 323 324var FlushReasonStrings = map[FlushReason]string{ 325 FlushReasonTimerExpired: "timer_expired", 326 FlushReasonFinalFlush: "final_flush", 327 FlushReasonMaxSize: "max_size_reached", 328} 329 330func (fr FlushReason) String() string { 331 if v, ok := FlushReasonStrings[fr]; ok { 332 return v 333 } 334 335 return "none" 336} 337 338// Stats contains interesting numbers on the search 339type Stats struct { 340 // Amount of I/O for reading contents. 341 ContentBytesLoaded int64 342 343 // Amount of I/O for reading from index. 344 IndexBytesLoaded int64 345 346 // Number of search shards that had a crash. 347 Crashes int 348 349 // Wall clock time for this search 350 Duration time.Duration 351 352 // Number of files containing a match. 353 FileCount int 354 355 // Number of files in shards that we considered. 356 ShardFilesConsidered int 357 358 // Files that we evaluated. Equivalent to files for which all 359 // atom matches (including negations) evaluated to true. 360 FilesConsidered int 361 362 // Files for which we loaded file content to verify substring matches 363 FilesLoaded int 364 365 // Candidate files whose contents weren't examined because we 366 // gathered enough matches. 367 FilesSkipped int 368 369 // Shards that we scanned to find matches. 370 ShardsScanned int 371 372 // Shards that we did not process because a query was canceled. 373 ShardsSkipped int 374 375 // Shards that we did not process because the query was rejected by the 376 // ngram filter indicating it had no matches. 377 ShardsSkippedFilter int 378 379 // Number of non-overlapping matches 380 MatchCount int 381 382 // Number of candidate matches as a result of searching ngrams. 383 NgramMatches int 384 385 // Wall clock time for queued search. 386 Wait time.Duration 387 388 // Number of times regexp was called on files that we evaluated. 389 RegexpsConsidered int 390 391 // FlushReason explains why results were flushed. 392 FlushReason FlushReason 393} 394 395func (s *Stats) sizeBytes() (sz uint64) { 396 sz = 16 * 8 // This assumes we are running on a 64-bit architecture 397 sz += 1 // FlushReason 398 399 return 400} 401 402func (s *Stats) Add(o Stats) { 403 s.ContentBytesLoaded += o.ContentBytesLoaded 404 s.IndexBytesLoaded += o.IndexBytesLoaded 405 s.Crashes += o.Crashes 406 s.FileCount += o.FileCount 407 s.FilesConsidered += o.FilesConsidered 408 s.FilesLoaded += o.FilesLoaded 409 s.FilesSkipped += o.FilesSkipped 410 s.MatchCount += o.MatchCount 411 s.NgramMatches += o.NgramMatches 412 s.ShardFilesConsidered += o.ShardFilesConsidered 413 s.ShardsScanned += o.ShardsScanned 414 s.ShardsSkipped += o.ShardsSkipped 415 s.ShardsSkippedFilter += o.ShardsSkippedFilter 416 s.Wait += o.Wait 417 s.RegexpsConsidered += o.RegexpsConsidered 418 419 // We want the first non-zero FlushReason to be sticky. This is a useful 420 // property when aggregating stats from several Zoekts. 421 if s.FlushReason == 0 { 422 s.FlushReason = o.FlushReason 423 } 424} 425 426// Zero returns true if stats is empty. 427func (s *Stats) Zero() bool { 428 if s == nil { 429 return true 430 } 431 432 return !(s.ContentBytesLoaded > 0 || 433 s.IndexBytesLoaded > 0 || 434 s.Crashes > 0 || 435 s.FileCount > 0 || 436 s.FilesConsidered > 0 || 437 s.FilesLoaded > 0 || 438 s.FilesSkipped > 0 || 439 s.MatchCount > 0 || 440 s.NgramMatches > 0 || 441 s.ShardFilesConsidered > 0 || 442 s.ShardsScanned > 0 || 443 s.ShardsSkipped > 0 || 444 s.ShardsSkippedFilter > 0 || 445 s.Wait > 0 || 446 s.RegexpsConsidered > 0) 447} 448 449// Progress contains information about the global progress of the running search query. 450// This is used by the frontend to reorder results and emit them when stable. 451// Sourcegraph specific: this is used when querying multiple zoekt-webserver instances. 452type Progress struct { 453 // Priority of the shard that was searched. 454 Priority float64 455 456 // MaxPendingPriority is the maximum priority of pending result that is being searched in parallel. 457 // This is used to reorder results when the result set is known to be stable-- that is, when a result's 458 // Priority is greater than the max(MaxPendingPriority) from the latest results of each backend, it can be returned to the user. 459 // 460 // MaxPendingPriority decreases monotonically in each SearchResult. 461 MaxPendingPriority float64 462} 463 464func (p *Progress) sizeBytes() uint64 { 465 return 2 * 8 466} 467 468// SearchResult contains search matches and extra data 469type SearchResult struct { 470 Stats 471 472 // Do not encode this as we cannot encode -Inf in JSON 473 Progress `json:"-"` 474 475 Files []FileMatch 476 477 // RepoURLs holds a repo => template string map. 478 RepoURLs map[string]string 479 480 // FragmentNames holds a repo => template string map, for 481 // the line number fragment. 482 LineFragments map[string]string 483} 484 485// SizeBytes is a best-effort estimate of the size of SearchResult in memory. 486// The estimate does not take alignment into account. The result is a lower 487// bound on the actual size in memory. 488func (sr *SearchResult) SizeBytes() (sz uint64) { 489 sz += sr.Stats.sizeBytes() 490 sz += sr.Progress.sizeBytes() 491 492 // Files 493 sz += sliceHeaderBytes 494 for _, f := range sr.Files { 495 sz += f.sizeBytes() 496 } 497 498 // RepoURLs 499 sz += mapHeaderBytes 500 for k, v := range sr.RepoURLs { 501 sz += stringHeaderBytes + uint64(len(k)) 502 sz += stringHeaderBytes + uint64(len(v)) 503 } 504 505 // LineFragments 506 sz += mapHeaderBytes 507 for k, v := range sr.LineFragments { 508 sz += stringHeaderBytes + uint64(len(k)) 509 sz += stringHeaderBytes + uint64(len(v)) 510 } 511 512 return 513} 514 515// RepositoryBranch describes an indexed branch, which is a name 516// combined with a version. 517type RepositoryBranch struct { 518 Name string 519 Version string 520} 521 522func (r RepositoryBranch) String() string { 523 return fmt.Sprintf("%s@%s", r.Name, r.Version) 524} 525 526// Repository holds repository metadata. 527type Repository struct { 528 // Sourcergaph's repository ID 529 ID uint32 530 531 // The repository name 532 Name string 533 534 // The repository URL. 535 URL string 536 537 // The physical source where this repo came from, eg. full 538 // path to the zip filename or git repository directory. This 539 // will not be exposed in the UI, but can be used to detect 540 // orphaned index shards. 541 Source string 542 543 // The branches indexed in this repo. 544 Branches []RepositoryBranch 545 546 // Nil if this is not the super project. 547 SubRepoMap map[string]*Repository 548 549 // URL template to link to the commit of a branch 550 CommitURLTemplate string 551 552 // The repository URL for getting to a file. Has access to 553 // {{Branch}}, {{Path}} 554 FileURLTemplate string 555 556 // The URL fragment to add to a file URL for line numbers. has 557 // access to {{LineNumber}}. The fragment should include the 558 // separator, generally '#' or ';'. 559 LineFragmentTemplate string 560 561 // Perf optimization: priority is set when we load the shard. It corresponds to 562 // the value of "priority" stored in RawConfig. 563 priority float64 564 565 // All zoekt.* configuration settings. 566 RawConfig map[string]string 567 568 // Importance of the repository, bigger is more important 569 Rank uint16 570 571 // IndexOptions is a hash of the options used to create the index for the 572 // repo. 573 IndexOptions string 574 575 // HasSymbols is true if this repository has indexed ctags 576 // output. Sourcegraph specific: This field is more appropriate for 577 // IndexMetadata. However, we store it here since the Sourcegraph frontend 578 // can read this structure but not IndexMetadata. 579 HasSymbols bool 580 581 // Tombstone is true if we are not allowed to search this repo. 582 Tombstone bool 583 584 // LatestCommitDate is the date of the latest commit among all indexed Branches. 585 // The date might be time.Time's 0-value if the repository was last indexed 586 // before this field was added. 587 LatestCommitDate time.Time 588 589 // FileTombstones is a set of file paths that should be ignored across all branches 590 // in this shard. 591 FileTombstones map[string]struct{} `json:",omitempty"` 592} 593 594func (r *Repository) UnmarshalJSON(data []byte) error { 595 // We define a new type so that we can use json.Unmarshal 596 // without recursing into this same method. 597 type repository *Repository 598 repo := repository(r) 599 600 err := json.Unmarshal(data, repo) 601 if err != nil { 602 return err 603 } 604 605 if v, ok := repo.RawConfig["repoid"]; ok { 606 id, _ := strconv.ParseUint(v, 10, 32) 607 r.ID = uint32(id) 608 } 609 610 if v, ok := repo.RawConfig["priority"]; ok { 611 r.priority, err = strconv.ParseFloat(v, 64) 612 if err != nil { 613 r.priority = 0 614 } 615 616 // Sourcegraph indexserver doesn't set repo.Rank, so we set it here 617 // based on priority. Setting it on read instead of during indexing 618 // allows us to avoid a complete reindex. 619 if r.Rank == 0 && r.priority > 0 { 620 // Normalize the repo score within [0, 1), with the midpoint at 5,000. This means popular 621 // repos (roughly ones with over 5,000 stars) see diminishing returns from more stars. 622 r.Rank = uint16(r.priority / (5000.0 + r.priority) * maxUInt16) 623 } 624 } 625 return nil 626} 627 628// MergeMutable will merge x into r. mutated will be true if it made any 629// changes. err is non-nil if we needed to mutate an immutable field. 630// 631// Note: SubRepoMap, IndexOptions and HasSymbol fields are ignored. They are 632// computed while indexing so can't be synthesized from x. 633// 634// Note: We ignore RawConfig fields which are duplicated into Repository: 635// name and id. 636// 637// Note: URL, *Template fields are ignored. They are not used by Sourcegraph. 638func (r *Repository) MergeMutable(x *Repository) (mutated bool, err error) { 639 if r.ID != x.ID { 640 // Sourcegraph: strange behaviour may occur if ID changes but names don't. 641 return mutated, errors.New("ID is immutable") 642 } 643 if r.Name != x.Name { 644 // Name is encoded into the shard name on disk. We need to re-index if it 645 // changes. 646 return mutated, errors.New("Name is immutable") 647 } 648 if !reflect.DeepEqual(r.Branches, x.Branches) { 649 // Need a reindex if content changing. 650 return mutated, errors.New("Branches is immutable") 651 } 652 653 for k, v := range x.RawConfig { 654 // We ignore name and id since they are encoded into the repository. 655 if k == "name" || k == "id" { 656 continue 657 } 658 if r.RawConfig == nil { 659 mutated = true 660 r.RawConfig = make(map[string]string) 661 } 662 if r.RawConfig[k] != v { 663 mutated = true 664 r.RawConfig[k] = v 665 } 666 } 667 668 return mutated, nil 669} 670 671// IndexMetadata holds metadata stored in the index file. It contains 672// data generated by the core indexing library. 673type IndexMetadata struct { 674 IndexFormatVersion int 675 IndexFeatureVersion int 676 IndexMinReaderVersion int 677 IndexTime time.Time 678 PlainASCII bool 679 LanguageMap map[string]uint16 680 ZoektVersion string 681 ID string 682} 683 684// Statistics of a (collection of) repositories. 685type RepoStats struct { 686 // Repos is used for aggregrating the number of repositories. 687 Repos int 688 689 // Shards is the total number of search shards. 690 Shards int 691 692 // Documents holds the number of documents or files. 693 Documents int 694 695 // IndexBytes is the amount of RAM used for index overhead. 696 IndexBytes int64 697 698 // ContentBytes is the amount of RAM used for raw content. 699 ContentBytes int64 700 701 // Sourcegraph specific stats below. These are not as efficient to calculate 702 // as the above statistics. We experimentally measured about a 10% slower 703 // shard load time. However, we find these values very useful to track and 704 // computing them outside of load time introduces a lot of complexity. 705 706 // NewLinesCount is the number of newlines "\n" that appear in the zoekt 707 // indexed documents. This is not exactly the same as line count, since it 708 // will not include lines not terminated by "\n" (eg a file with no "\n", or 709 // a final line without "\n"). Note: Zoekt deduplicates documents across 710 // branches, so if a path has the same contents on multiple branches, there 711 // is only one document for it. As such that document's newlines is only 712 // counted once. See DefaultBranchNewLinesCount and AllBranchesNewLinesCount 713 // for counts which do not deduplicate. 714 NewLinesCount uint64 715 716 // DefaultBranchNewLinesCount is the number of newlines "\n" in the default 717 // branch. 718 DefaultBranchNewLinesCount uint64 719 720 // OtherBranchesNewLinesCount is the number of newlines "\n" in all branches 721 // except the default branch. 722 OtherBranchesNewLinesCount uint64 723} 724 725func (s *RepoStats) Add(o *RepoStats) { 726 // can't update Repos, since one repo may have multiple 727 // shards. 728 s.Shards += o.Shards 729 s.IndexBytes += o.IndexBytes 730 s.Documents += o.Documents 731 s.ContentBytes += o.ContentBytes 732 733 // Sourcegraph specific 734 s.NewLinesCount += o.NewLinesCount 735 s.DefaultBranchNewLinesCount += o.DefaultBranchNewLinesCount 736 s.OtherBranchesNewLinesCount += o.OtherBranchesNewLinesCount 737} 738 739type RepoListEntry struct { 740 Repository Repository 741 IndexMetadata IndexMetadata 742 Stats RepoStats 743} 744 745type MinimalRepoListEntry struct { 746 HasSymbols bool 747 Branches []RepositoryBranch 748} 749 750type ReposMap map[uint32]MinimalRepoListEntry 751 752// MarshalBinary implements a specialized encoder for ReposMap. 753func (q *ReposMap) MarshalBinary() ([]byte, error) { 754 return reposMapEncode(*q) 755} 756 757// UnmarshalBinary implements a specialized decoder for ReposMap. 758func (q *ReposMap) UnmarshalBinary(b []byte) error { 759 var err error 760 (*q), err = reposMapDecode(b) 761 return err 762} 763 764// RepoList holds a set of Repository metadata. 765type RepoList struct { 766 // Returned when ListOptions.Field is RepoListFieldRepos. 767 Repos []*RepoListEntry 768 769 // Returned when ListOptions.Field is RepoListFieldMinimal. 770 // 771 // Deprecated: use ReposMap. 772 Minimal map[uint32]*MinimalRepoListEntry 773 774 // ReposMap is set when ListOptions.Field is RepoListFieldReposMap. 775 ReposMap ReposMap 776 777 Crashes int 778 779 // Stats response to a List request. 780 // This is the aggregate RepoStats of all repos matching the input query. 781 Stats RepoStats 782} 783 784type Searcher interface { 785 Search(ctx context.Context, q query.Q, opts *SearchOptions) (*SearchResult, error) 786 787 // List lists repositories. The query `q` can only contain 788 // query.Repo atoms. 789 List(ctx context.Context, q query.Q, opts *ListOptions) (*RepoList, error) 790 Close() 791 792 // Describe the searcher for debug messages. 793 String() string 794} 795 796type RepoListField int 797 798const ( 799 RepoListFieldRepos RepoListField = 0 800 RepoListFieldMinimal = 1 801 RepoListFieldReposMap = 2 802) 803 804type ListOptions struct { 805 // Return only Minimal data per repo that Sourcegraph frontend needs. 806 // 807 // Deprecated: use Field 808 Minimal bool 809 810 // Field decides which field to populate in RepoList response. 811 Field RepoListField 812} 813 814func (o *ListOptions) GetField() (RepoListField, error) { 815 if o == nil { 816 return RepoListFieldRepos, nil 817 } 818 if o.Field < 0 || o.Field > RepoListFieldReposMap { 819 return 0, fmt.Errorf("unknown RepoListField %d", o.Field) 820 } 821 if o.Minimal == true { 822 return RepoListFieldMinimal, nil 823 } 824 return o.Field, nil 825} 826 827func (o *ListOptions) String() string { 828 return fmt.Sprintf("%#v", o) 829} 830 831type SearchOptions struct { 832 // Return an upper-bound estimate of eligible documents in 833 // stats.ShardFilesConsidered. 834 EstimateDocCount bool 835 836 // Return the whole file. 837 Whole bool 838 839 // Maximum number of matches: skip all processing an index 840 // shard after we found this many non-overlapping matches. 841 ShardMaxMatchCount int 842 843 // Maximum number of matches: stop looking for more matches 844 // once we have this many matches across shards. 845 TotalMaxMatchCount int 846 847 // Maximum number of matches: skip processing documents for a repository in 848 // a shard once we have found ShardRepoMaxMatchCount. 849 // 850 // A compound shard may contain multiple repositories. This will most often 851 // be set to 1 to find all repositories containing a result. 852 ShardRepoMaxMatchCount int 853 854 // Deprecated: this field is not read anymore. 855 ShardMaxImportantMatch int 856 857 // Deprecated: this field is not read anymore. 858 TotalMaxImportantMatch int 859 860 // Abort the search after this much time has passed. 861 MaxWallTime time.Duration 862 863 // FlushWallTime if non-zero will stop streaming behaviour at first and 864 // instead will collate and sort results. At FlushWallTime the results will 865 // be sent and then the behaviour will revert to the normal streaming. 866 FlushWallTime time.Duration 867 868 // Trim the number of results after collating and sorting the 869 // results 870 MaxDocDisplayCount int 871 872 // If set to a number greater than zero then up to this many number 873 // of context lines will be added before and after each matched line. 874 // Note that the included context lines might contain matches and 875 // it's up to the consumer of the result to remove those lines. 876 NumContextLines int 877 878 // If true, ChunkMatches will be returned in each FileMatch rather than LineMatches 879 // EXPERIMENTAL: the behavior of this flag may be changed in future versions. 880 ChunkMatches bool 881 882 // EXPERIMENTAL. If true, document ranks are used as additional input for 883 // sorting matches. 884 UseDocumentRanks bool 885 886 // EXPERIMENTAL. When UseDocumentRanks is enabled, this can be optionally set to adjust 887 // their weight in the file match score. If the value is <= 0.0, the default weight value 888 // will be used. This option is temporary and is only exposed for testing/ tuning purposes. 889 DocumentRanksWeight float64 890 891 // EXPERIMENTAL. If true, use keyword-style scoring instead of the default scoring formula. 892 // Currently, this treats each match in a file as a term and computes an approximation to BM25. 893 // When enabled, all other scoring signals are ignored, including document ranks. 894 UseKeywordScoring bool 895 896 // Trace turns on opentracing for this request if true and if the Jaeger address was provided as 897 // a command-line flag 898 Trace bool 899 900 // If set, the search results will contain debug information for scoring. 901 DebugScore bool 902 903 // SpanContext is the opentracing span context, if it exists, from the zoekt client 904 SpanContext map[string]string 905} 906 907func (s *SearchOptions) String() string { 908 return fmt.Sprintf("%#v", s) 909} 910 911// Sender is the interface that wraps the basic Send method. 912type Sender interface { 913 Send(*SearchResult) 914} 915 916// Streamer adds the method StreamSearch to the Searcher interface. 917type Streamer interface { 918 Searcher 919 StreamSearch(ctx context.Context, q query.Q, opts *SearchOptions, sender Sender) (err error) 920}