fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

1// Copyright 2016 Google Inc. All rights reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package zoekt // import "github.com/sourcegraph/zoekt" 16 17import ( 18 "context" 19 "encoding/json" 20 "errors" 21 "fmt" 22 "reflect" 23 "strconv" 24 "time" 25 26 "github.com/sourcegraph/zoekt/query" 27) 28 29const mapHeaderBytes uint64 = 48 30const sliceHeaderBytes uint64 = 24 31const stringHeaderBytes uint64 = 16 32const pointerSize uint64 = 8 33const interfaceBytes uint64 = 16 34 35// FileMatch contains all the matches within a file. 36type FileMatch struct { 37 // Ranking; the higher, the better. 38 Score float64 // TODO - hide this field? 39 40 // For debugging. Needs DebugScore set, but public so tests in 41 // other packages can print some diagnostics. 42 Debug string 43 44 FileName string 45 46 // Repository is the globally unique name of the repo of the 47 // match 48 Repository string 49 Branches []string 50 51 // One of LineMatches or ChunkMatches will be returned depending on whether 52 // the SearchOptions.ChunkMatches is set. 53 LineMatches []LineMatch 54 ChunkMatches []ChunkMatch 55 56 // RepositoryID is a Sourcegraph extension. This is the ID of Repository in 57 // Sourcegraph. 58 RepositoryID uint32 59 60 // RepositoryPriority is a Sourcegraph extension. It is used by Sourcegraph to 61 // order results from different repositories relative to each other. 62 RepositoryPriority float64 63 64 // Only set if requested 65 Content []byte 66 67 // Checksum of the content. 68 Checksum []byte 69 70 // Detected language of the result. 71 Language string 72 73 // SubRepositoryName is the globally unique name of the repo, 74 // if it came from a subrepository 75 SubRepositoryName string 76 77 // SubRepositoryPath holds the prefix where the subrepository 78 // was mounted. 79 SubRepositoryPath string 80 81 // Commit SHA1 (hex) of the (sub)repo holding the file. 82 Version string 83} 84 85func (m *FileMatch) sizeBytes() (sz uint64) { 86 // Score 87 sz += 8 88 89 for _, s := range []string{ 90 m.Debug, 91 m.FileName, 92 m.Repository, 93 m.Language, 94 m.SubRepositoryName, 95 m.SubRepositoryPath, 96 m.Version, 97 } { 98 sz += stringHeaderBytes + uint64(len(s)) 99 } 100 101 // Branches 102 sz += sliceHeaderBytes 103 for _, s := range m.Branches { 104 sz += stringHeaderBytes + uint64(len(s)) 105 } 106 107 // LineMatches 108 sz += sliceHeaderBytes 109 for _, lm := range m.LineMatches { 110 sz += lm.sizeBytes() 111 } 112 113 // ChunkMatches 114 sz += sliceHeaderBytes 115 for _, cm := range m.ChunkMatches { 116 sz += cm.sizeBytes() 117 } 118 119 // RepositoryID 120 sz += 4 121 122 // RepositoryPriority 123 sz += 8 124 125 // Content 126 sz += sliceHeaderBytes + uint64(len(m.Content)) 127 128 // Checksum 129 sz += sliceHeaderBytes + uint64(len(m.Checksum)) 130 131 return 132} 133 134// ChunkMatch is a set of non-overlapping matches within a contiguous range of 135// lines in the file. 136type ChunkMatch struct { 137 // Content is a contiguous range of complete lines that fully contains Ranges. 138 Content []byte 139 // ContentStart is the location (inclusive) of the beginning of content 140 // relative to the beginning of the file. It will always be at the 141 // beginning of a line (Column will always be 1). 142 ContentStart Location 143 144 // FileName indicates whether this match is a match on the file name, in 145 // which case Content will contain the file name. 146 FileName bool 147 148 // Ranges is a set of matching ranges within this chunk. Each range is relative 149 // to the beginning of the file (not the beginning of Content). 150 Ranges []Range 151 152 // SymbolInfo is the symbol information associated with Ranges. If it is non-nil, 153 // its length will equal that of Ranges. Any of its elements may be nil. 154 SymbolInfo []*Symbol 155 156 Score float64 157 DebugScore string 158} 159 160func (cm *ChunkMatch) sizeBytes() (sz uint64) { 161 // Content 162 sz += sliceHeaderBytes + uint64(len(cm.Content)) 163 164 // ContentStart 165 sz += cm.ContentStart.sizeBytes() 166 167 // FileName 168 sz += 1 169 170 // Ranges 171 sz += sliceHeaderBytes 172 if len(cm.Ranges) > 0 { 173 sz += uint64(len(cm.Ranges)) * cm.Ranges[0].sizeBytes() 174 } 175 176 // SymbolInfo 177 sz += sliceHeaderBytes 178 for _, si := range cm.SymbolInfo { 179 sz += pointerSize 180 if si != nil { 181 sz += si.sizeBytes() 182 } 183 } 184 185 // Score 186 sz += 8 187 188 // DebugScore 189 sz += stringHeaderBytes + uint64(len(cm.DebugScore)) 190 191 return 192} 193 194type Range struct { 195 // The inclusive beginning of the range. 196 Start Location 197 // The exclusive end of the range. 198 End Location 199} 200 201func (r *Range) sizeBytes() uint64 { 202 return r.Start.sizeBytes() + r.End.sizeBytes() 203} 204 205type Location struct { 206 // 0-based byte offset from the beginning of the file 207 ByteOffset uint32 208 // 1-based line number from the beginning of the file 209 LineNumber uint32 210 // 1-based column number (in runes) from the beginning of line 211 Column uint32 212} 213 214func (l *Location) sizeBytes() uint64 { 215 return 3 * 4 216} 217 218// LineMatch holds the matches within a single line in a file. 219type LineMatch struct { 220 // The line in which a match was found. 221 Line []byte 222 LineStart int 223 LineEnd int 224 LineNumber int 225 226 // Before and After are only set when SearchOptions.NumContextLines is > 0 227 Before []byte 228 After []byte 229 230 // If set, this was a match on the filename. 231 FileName bool 232 233 // The higher the better. Only ranks the quality of the match 234 // within the file, does not take rank of file into account 235 Score float64 236 DebugScore string 237 238 LineFragments []LineFragmentMatch 239} 240 241func (lm *LineMatch) sizeBytes() (sz uint64) { 242 // Line 243 sz += sliceHeaderBytes + uint64(len(lm.Line)) 244 245 // LineStart, LineEnd, LineNumber 246 sz += 3 * 8 247 248 // Before 249 sz += sliceHeaderBytes + uint64(len(lm.Before)) 250 251 // After 252 sz += sliceHeaderBytes + uint64(len(lm.After)) 253 254 // FileName 255 sz += 1 256 257 // Score 258 sz += 8 259 260 // DebugScore 261 sz += stringHeaderBytes + uint64(len(lm.DebugScore)) 262 263 // LineFragments 264 sz += sliceHeaderBytes 265 for _, lf := range lm.LineFragments { 266 sz += lf.sizeBytes() 267 } 268 269 return 270} 271 272type Symbol struct { 273 Sym string 274 Kind string 275 Parent string 276 ParentKind string 277} 278 279func (s *Symbol) sizeBytes() uint64 { 280 return 4*stringHeaderBytes + uint64(len(s.Sym)+len(s.Kind)+len(s.Parent)+len(s.ParentKind)) 281} 282 283// LineFragmentMatch a segment of matching text within a line. 284type LineFragmentMatch struct { 285 // Offset within the line, in bytes. 286 LineOffset int 287 288 // Offset from file start, in bytes. 289 Offset uint32 290 291 // Number bytes that match. 292 MatchLength int 293 294 SymbolInfo *Symbol 295} 296 297func (lfm *LineFragmentMatch) sizeBytes() (sz uint64) { 298 // LineOffset 299 sz += 8 300 301 // Offset 302 sz += 4 303 304 // MatchLength 305 sz += 8 306 307 // SymbolInfo 308 sz += pointerSize 309 if lfm.SymbolInfo != nil { 310 sz += lfm.SymbolInfo.sizeBytes() 311 } 312 313 return 314} 315 316type FlushReason uint8 317 318const ( 319 FlushReasonTimerExpired FlushReason = 1 << iota 320 FlushReasonFinalFlush 321 FlushReasonMaxSize 322) 323 324var FlushReasonStrings = map[FlushReason]string{ 325 FlushReasonTimerExpired: "timer_expired", 326 FlushReasonFinalFlush: "final_flush", 327 FlushReasonMaxSize: "max_size_reached", 328} 329 330func (fr FlushReason) String() string { 331 if v, ok := FlushReasonStrings[fr]; ok { 332 return v 333 } 334 335 return "none" 336} 337 338// Stats contains interesting numbers on the search 339type Stats struct { 340 // Amount of I/O for reading contents. 341 ContentBytesLoaded int64 342 343 // Amount of I/O for reading from index. 344 IndexBytesLoaded int64 345 346 // Number of search shards that had a crash. 347 Crashes int 348 349 // Wall clock time for this search 350 Duration time.Duration 351 352 // Number of files containing a match. 353 FileCount int 354 355 // Number of files in shards that we considered. 356 ShardFilesConsidered int 357 358 // Files that we evaluated. Equivalent to files for which all 359 // atom matches (including negations) evaluated to true. 360 FilesConsidered int 361 362 // Files for which we loaded file content to verify substring matches 363 FilesLoaded int 364 365 // Candidate files whose contents weren't examined because we 366 // gathered enough matches. 367 FilesSkipped int 368 369 // Shards that we scanned to find matches. 370 ShardsScanned int 371 372 // Shards that we did not process because a query was canceled. 373 ShardsSkipped int 374 375 // Shards that we did not process because the query was rejected by the 376 // ngram filter indicating it had no matches. 377 ShardsSkippedFilter int 378 379 // Number of non-overlapping matches 380 MatchCount int 381 382 // Number of candidate matches as a result of searching ngrams. 383 NgramMatches int 384 385 // NgramLookups is the number of times we accessed an ngram in the index. 386 NgramLookups int 387 388 // Wall clock time for queued search. 389 Wait time.Duration 390 391 // Number of times regexp was called on files that we evaluated. 392 RegexpsConsidered int 393 394 // FlushReason explains why results were flushed. 395 FlushReason FlushReason 396} 397 398func (s *Stats) sizeBytes() (sz uint64) { 399 sz = 16 * 8 // This assumes we are running on a 64-bit architecture 400 sz += 1 // FlushReason 401 402 return 403} 404 405func (s *Stats) Add(o Stats) { 406 s.ContentBytesLoaded += o.ContentBytesLoaded 407 s.IndexBytesLoaded += o.IndexBytesLoaded 408 s.Crashes += o.Crashes 409 s.FileCount += o.FileCount 410 s.FilesConsidered += o.FilesConsidered 411 s.FilesLoaded += o.FilesLoaded 412 s.FilesSkipped += o.FilesSkipped 413 s.MatchCount += o.MatchCount 414 s.NgramMatches += o.NgramMatches 415 s.NgramLookups += o.NgramLookups 416 s.ShardFilesConsidered += o.ShardFilesConsidered 417 s.ShardsScanned += o.ShardsScanned 418 s.ShardsSkipped += o.ShardsSkipped 419 s.ShardsSkippedFilter += o.ShardsSkippedFilter 420 s.Wait += o.Wait 421 s.RegexpsConsidered += o.RegexpsConsidered 422 423 // We want the first non-zero FlushReason to be sticky. This is a useful 424 // property when aggregating stats from several Zoekts. 425 if s.FlushReason == 0 { 426 s.FlushReason = o.FlushReason 427 } 428} 429 430// Zero returns true if stats is empty. 431func (s *Stats) Zero() bool { 432 if s == nil { 433 return true 434 } 435 436 return !(s.ContentBytesLoaded > 0 || 437 s.IndexBytesLoaded > 0 || 438 s.Crashes > 0 || 439 s.FileCount > 0 || 440 s.FilesConsidered > 0 || 441 s.FilesLoaded > 0 || 442 s.FilesSkipped > 0 || 443 s.MatchCount > 0 || 444 s.NgramMatches > 0 || 445 s.NgramLookups > 0 || 446 s.ShardFilesConsidered > 0 || 447 s.ShardsScanned > 0 || 448 s.ShardsSkipped > 0 || 449 s.ShardsSkippedFilter > 0 || 450 s.Wait > 0 || 451 s.RegexpsConsidered > 0) 452} 453 454// Progress contains information about the global progress of the running search query. 455// This is used by the frontend to reorder results and emit them when stable. 456// Sourcegraph specific: this is used when querying multiple zoekt-webserver instances. 457type Progress struct { 458 // Priority of the shard that was searched. 459 Priority float64 460 461 // MaxPendingPriority is the maximum priority of pending result that is being searched in parallel. 462 // This is used to reorder results when the result set is known to be stable-- that is, when a result's 463 // Priority is greater than the max(MaxPendingPriority) from the latest results of each backend, it can be returned to the user. 464 // 465 // MaxPendingPriority decreases monotonically in each SearchResult. 466 MaxPendingPriority float64 467} 468 469func (p *Progress) sizeBytes() uint64 { 470 return 2 * 8 471} 472 473// SearchResult contains search matches and extra data 474type SearchResult struct { 475 Stats 476 477 // Do not encode this as we cannot encode -Inf in JSON 478 Progress `json:"-"` 479 480 Files []FileMatch 481 482 // RepoURLs holds a repo => template string map. 483 RepoURLs map[string]string 484 485 // FragmentNames holds a repo => template string map, for 486 // the line number fragment. 487 LineFragments map[string]string 488} 489 490// SizeBytes is a best-effort estimate of the size of SearchResult in memory. 491// The estimate does not take alignment into account. The result is a lower 492// bound on the actual size in memory. 493func (sr *SearchResult) SizeBytes() (sz uint64) { 494 sz += sr.Stats.sizeBytes() 495 sz += sr.Progress.sizeBytes() 496 497 // Files 498 sz += sliceHeaderBytes 499 for _, f := range sr.Files { 500 sz += f.sizeBytes() 501 } 502 503 // RepoURLs 504 sz += mapHeaderBytes 505 for k, v := range sr.RepoURLs { 506 sz += stringHeaderBytes + uint64(len(k)) 507 sz += stringHeaderBytes + uint64(len(v)) 508 } 509 510 // LineFragments 511 sz += mapHeaderBytes 512 for k, v := range sr.LineFragments { 513 sz += stringHeaderBytes + uint64(len(k)) 514 sz += stringHeaderBytes + uint64(len(v)) 515 } 516 517 return 518} 519 520// RepositoryBranch describes an indexed branch, which is a name 521// combined with a version. 522type RepositoryBranch struct { 523 Name string 524 Version string 525} 526 527func (r RepositoryBranch) String() string { 528 return fmt.Sprintf("%s@%s", r.Name, r.Version) 529} 530 531// Repository holds repository metadata. 532type Repository struct { 533 // Sourcegraph's repository ID 534 ID uint32 535 536 // The repository name 537 Name string 538 539 // The repository URL. 540 URL string 541 542 // The physical source where this repo came from, eg. full 543 // path to the zip filename or git repository directory. This 544 // will not be exposed in the UI, but can be used to detect 545 // orphaned index shards. 546 Source string 547 548 // The branches indexed in this repo. 549 Branches []RepositoryBranch 550 551 // Nil if this is not the super project. 552 SubRepoMap map[string]*Repository 553 554 // URL template to link to the commit of a branch 555 CommitURLTemplate string 556 557 // The repository URL for getting to a file. Has access to 558 // {{.Version}}, {{.Path}} 559 FileURLTemplate string 560 561 // The URL fragment to add to a file URL for line numbers. has 562 // access to {{.LineNumber}}. The fragment should include the 563 // separator, generally '#' or ';'. 564 LineFragmentTemplate string 565 566 // Perf optimization: priority is set when we load the shard. It corresponds to 567 // the value of "priority" stored in RawConfig. 568 priority float64 569 570 // All zoekt.* configuration settings. 571 RawConfig map[string]string 572 573 // Importance of the repository, bigger is more important 574 Rank uint16 575 576 // IndexOptions is a hash of the options used to create the index for the 577 // repo. 578 IndexOptions string 579 580 // HasSymbols is true if this repository has indexed ctags 581 // output. Sourcegraph specific: This field is more appropriate for 582 // IndexMetadata. However, we store it here since the Sourcegraph frontend 583 // can read this structure but not IndexMetadata. 584 HasSymbols bool 585 586 // Tombstone is true if we are not allowed to search this repo. 587 Tombstone bool 588 589 // LatestCommitDate is the date of the latest commit among all indexed Branches. 590 // The date might be time.Time's 0-value if the repository was last indexed 591 // before this field was added. 592 LatestCommitDate time.Time 593 594 // FileTombstones is a set of file paths that should be ignored across all branches 595 // in this shard. 596 FileTombstones map[string]struct{} `json:",omitempty"` 597} 598 599func (r *Repository) UnmarshalJSON(data []byte) error { 600 // We define a new type so that we can use json.Unmarshal 601 // without recursing into this same method. 602 type repository *Repository 603 repo := repository(r) 604 605 err := json.Unmarshal(data, repo) 606 if err != nil { 607 return err 608 } 609 610 if v, ok := repo.RawConfig["repoid"]; ok { 611 id, _ := strconv.ParseUint(v, 10, 32) 612 r.ID = uint32(id) 613 } 614 615 if v, ok := repo.RawConfig["priority"]; ok { 616 r.priority, err = strconv.ParseFloat(v, 64) 617 if err != nil { 618 r.priority = 0 619 } 620 621 // Sourcegraph indexserver doesn't set repo.Rank, so we set it here 622 // based on priority. Setting it on read instead of during indexing 623 // allows us to avoid a complete reindex. 624 if r.Rank == 0 && r.priority > 0 { 625 // Normalize the repo score within [0, 1), with the midpoint at 5,000. This means popular 626 // repos (roughly ones with over 5,000 stars) see diminishing returns from more stars. 627 r.Rank = uint16(r.priority / (5000.0 + r.priority) * maxUInt16) 628 } 629 } 630 return nil 631} 632 633// MergeMutable will merge x into r. mutated will be true if it made any 634// changes. err is non-nil if we needed to mutate an immutable field. 635// 636// Note: SubRepoMap, IndexOptions and HasSymbol fields are ignored. They are 637// computed while indexing so can't be synthesized from x. 638// 639// Note: We ignore RawConfig fields which are duplicated into Repository: 640// name and id. 641// 642// Note: URL, *Template fields are ignored. They are not used by Sourcegraph. 643func (r *Repository) MergeMutable(x *Repository) (mutated bool, err error) { 644 if r.ID != x.ID { 645 // Sourcegraph: strange behaviour may occur if ID changes but names don't. 646 return mutated, errors.New("ID is immutable") 647 } 648 if r.Name != x.Name { 649 // Name is encoded into the shard name on disk. We need to re-index if it 650 // changes. 651 return mutated, errors.New("Name is immutable") 652 } 653 if !reflect.DeepEqual(r.Branches, x.Branches) { 654 // Need a reindex if content changing. 655 return mutated, errors.New("Branches is immutable") 656 } 657 658 for k, v := range x.RawConfig { 659 // We ignore name and id since they are encoded into the repository. 660 if k == "name" || k == "id" { 661 continue 662 } 663 if r.RawConfig == nil { 664 mutated = true 665 r.RawConfig = make(map[string]string) 666 } 667 if r.RawConfig[k] != v { 668 mutated = true 669 r.RawConfig[k] = v 670 } 671 } 672 673 return mutated, nil 674} 675 676// IndexMetadata holds metadata stored in the index file. It contains 677// data generated by the core indexing library. 678type IndexMetadata struct { 679 IndexFormatVersion int 680 IndexFeatureVersion int 681 IndexMinReaderVersion int 682 IndexTime time.Time 683 PlainASCII bool 684 LanguageMap map[string]uint16 685 ZoektVersion string 686 ID string 687} 688 689// Statistics of a (collection of) repositories. 690type RepoStats struct { 691 // Repos is used for aggregrating the number of repositories. 692 // 693 // Note: This field is not populated on RepoListEntry.Stats (individual) but 694 // only for RepoList.Stats (aggregate). 695 Repos int 696 697 // Shards is the total number of search shards. 698 Shards int 699 700 // Documents holds the number of documents or files. 701 Documents int 702 703 // IndexBytes is the amount of RAM used for index overhead. 704 IndexBytes int64 705 706 // ContentBytes is the amount of RAM used for raw content. 707 ContentBytes int64 708 709 // Sourcegraph specific stats below. These are not as efficient to calculate 710 // as the above statistics. We experimentally measured about a 10% slower 711 // shard load time. However, we find these values very useful to track and 712 // computing them outside of load time introduces a lot of complexity. 713 714 // NewLinesCount is the number of newlines "\n" that appear in the zoekt 715 // indexed documents. This is not exactly the same as line count, since it 716 // will not include lines not terminated by "\n" (eg a file with no "\n", or 717 // a final line without "\n"). Note: Zoekt deduplicates documents across 718 // branches, so if a path has the same contents on multiple branches, there 719 // is only one document for it. As such that document's newlines is only 720 // counted once. See DefaultBranchNewLinesCount and AllBranchesNewLinesCount 721 // for counts which do not deduplicate. 722 NewLinesCount uint64 723 724 // DefaultBranchNewLinesCount is the number of newlines "\n" in the default 725 // branch. 726 DefaultBranchNewLinesCount uint64 727 728 // OtherBranchesNewLinesCount is the number of newlines "\n" in all branches 729 // except the default branch. 730 OtherBranchesNewLinesCount uint64 731} 732 733func (s *RepoStats) Add(o *RepoStats) { 734 // can't update Repos, since one repo may have multiple 735 // shards. 736 s.Shards += o.Shards 737 s.IndexBytes += o.IndexBytes 738 s.Documents += o.Documents 739 s.ContentBytes += o.ContentBytes 740 741 // Sourcegraph specific 742 s.NewLinesCount += o.NewLinesCount 743 s.DefaultBranchNewLinesCount += o.DefaultBranchNewLinesCount 744 s.OtherBranchesNewLinesCount += o.OtherBranchesNewLinesCount 745} 746 747type RepoListEntry struct { 748 Repository Repository 749 IndexMetadata IndexMetadata 750 Stats RepoStats 751} 752 753// MinimalRepoListEntry is a subset of RepoListEntry. It was added after 754// performance profiling of sourcegraph.com revealed that querying this 755// information from Zoekt was causing lots of CPU and memory usage. Note: we 756// can revisit this, how we store and query this information has changed a lot 757// since this was introduced. 758type MinimalRepoListEntry struct { 759 // HasSymbols is exported since Sourcegraph uses this information at search 760 // planning time to decide between Zoekt and an unindexed symbol search. 761 // 762 // Note: it pretty much is always true in practice. 763 HasSymbols bool 764 765 // Branches is used by Sourcegraphs query planner to decided if it can use 766 // zoekt or go via an unindexed code path. 767 Branches []RepositoryBranch 768 769 // IndexTimeUnix is the IndexTime converted to unix time (number of seconds 770 // since the epoch). This is to make it clear we are not transporting the 771 // full fidelty timestamp (ie with milliseconds and location). Additionally 772 // it saves 16 bytes in this struct. 773 // 774 // IndexTime is used as a heuristic in Sourcegraph to decide in aggregate 775 // how many repositories need updating after a ranking change/etc. 776 // 777 // TODO(keegancsmith) audit updates to IndexTime and document how and when 778 // it changes. Concerned about things like metadata updates or compound 779 // shards leading to untrustworthy data here. 780 IndexTimeUnix int64 781} 782 783type ReposMap map[uint32]MinimalRepoListEntry 784 785// MarshalBinary implements a specialized encoder for ReposMap. 786func (q *ReposMap) MarshalBinary() ([]byte, error) { 787 return reposMapEncode(*q) 788} 789 790// UnmarshalBinary implements a specialized decoder for ReposMap. 791func (q *ReposMap) UnmarshalBinary(b []byte) error { 792 var err error 793 (*q), err = reposMapDecode(b) 794 return err 795} 796 797// RepoList holds a set of Repository metadata. 798type RepoList struct { 799 // Returned when ListOptions.Field is RepoListFieldRepos. 800 Repos []*RepoListEntry 801 802 // Returned when ListOptions.Field is RepoListFieldMinimal. 803 // 804 // Deprecated: use ReposMap. 805 Minimal map[uint32]*MinimalRepoListEntry 806 807 // ReposMap is set when ListOptions.Field is RepoListFieldReposMap. 808 ReposMap ReposMap 809 810 Crashes int 811 812 // Stats response to a List request. 813 // This is the aggregate RepoStats of all repos matching the input query. 814 Stats RepoStats 815} 816 817type Searcher interface { 818 Search(ctx context.Context, q query.Q, opts *SearchOptions) (*SearchResult, error) 819 820 // List lists repositories. The query `q` can only contain 821 // query.Repo atoms. 822 List(ctx context.Context, q query.Q, opts *ListOptions) (*RepoList, error) 823 Close() 824 825 // Describe the searcher for debug messages. 826 String() string 827} 828 829type RepoListField int 830 831const ( 832 RepoListFieldRepos RepoListField = 0 833 RepoListFieldMinimal = 1 834 RepoListFieldReposMap = 2 835) 836 837type ListOptions struct { 838 // Return only Minimal data per repo that Sourcegraph frontend needs. 839 // 840 // Deprecated: use Field 841 Minimal bool 842 843 // Field decides which field to populate in RepoList response. 844 Field RepoListField 845} 846 847func (o *ListOptions) GetField() (RepoListField, error) { 848 if o == nil { 849 return RepoListFieldRepos, nil 850 } 851 if o.Field < 0 || o.Field > RepoListFieldReposMap { 852 return 0, fmt.Errorf("unknown RepoListField %d", o.Field) 853 } 854 if o.Minimal == true { 855 return RepoListFieldMinimal, nil 856 } 857 return o.Field, nil 858} 859 860func (o *ListOptions) String() string { 861 return fmt.Sprintf("%#v", o) 862} 863 864type SearchOptions struct { 865 // Return an upper-bound estimate of eligible documents in 866 // stats.ShardFilesConsidered. 867 EstimateDocCount bool 868 869 // Return the whole file. 870 Whole bool 871 872 // Maximum number of matches: skip all processing an index 873 // shard after we found this many non-overlapping matches. 874 ShardMaxMatchCount int 875 876 // Maximum number of matches: stop looking for more matches 877 // once we have this many matches across shards. 878 TotalMaxMatchCount int 879 880 // Maximum number of matches: skip processing documents for a repository in 881 // a shard once we have found ShardRepoMaxMatchCount. 882 // 883 // A compound shard may contain multiple repositories. This will most often 884 // be set to 1 to find all repositories containing a result. 885 ShardRepoMaxMatchCount int 886 887 // Deprecated: this field is not read anymore. 888 ShardMaxImportantMatch int 889 890 // Deprecated: this field is not read anymore. 891 TotalMaxImportantMatch int 892 893 // Abort the search after this much time has passed. 894 MaxWallTime time.Duration 895 896 // FlushWallTime if non-zero will stop streaming behaviour at first and 897 // instead will collate and sort results. At FlushWallTime the results will 898 // be sent and then the behaviour will revert to the normal streaming. 899 FlushWallTime time.Duration 900 901 // Truncates the number of documents (i.e. files) after collating and 902 // sorting the results. 903 MaxDocDisplayCount int 904 905 // Truncates the number of matchs after collating and sorting the results. 906 MaxMatchDisplayCount int 907 908 // If set to a number greater than zero then up to this many number 909 // of context lines will be added before and after each matched line. 910 // Note that the included context lines might contain matches and 911 // it's up to the consumer of the result to remove those lines. 912 NumContextLines int 913 914 // If true, ChunkMatches will be returned in each FileMatch rather than LineMatches 915 // EXPERIMENTAL: the behavior of this flag may be changed in future versions. 916 ChunkMatches bool 917 918 // EXPERIMENTAL. If true, document ranks are used as additional input for 919 // sorting matches. 920 UseDocumentRanks bool 921 922 // EXPERIMENTAL. When UseDocumentRanks is enabled, this can be optionally set to adjust 923 // their weight in the file match score. If the value is <= 0.0, the default weight value 924 // will be used. This option is temporary and is only exposed for testing/ tuning purposes. 925 DocumentRanksWeight float64 926 927 // EXPERIMENTAL. If true, use keyword-style scoring instead of the default scoring formula. 928 // Currently, this treats each match in a file as a term and computes an approximation to BM25. 929 // When enabled, all other scoring signals are ignored, including document ranks. 930 UseKeywordScoring bool 931 932 // Trace turns on opentracing for this request if true and if the Jaeger address was provided as 933 // a command-line flag 934 Trace bool 935 936 // If set, the search results will contain debug information for scoring. 937 DebugScore bool 938 939 // SpanContext is the opentracing span context, if it exists, from the zoekt client 940 SpanContext map[string]string 941} 942 943func (s *SearchOptions) String() string { 944 return fmt.Sprintf("%#v", s) 945} 946 947// Sender is the interface that wraps the basic Send method. 948type Sender interface { 949 Send(*SearchResult) 950} 951 952// Streamer adds the method StreamSearch to the Searcher interface. 953type Streamer interface { 954 Searcher 955 StreamSearch(ctx context.Context, q query.Q, opts *SearchOptions, sender Sender) (err error) 956}