fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

1// Copyright 2016 Google Inc. All rights reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package zoekt // import "github.com/sourcegraph/zoekt" 16 17import ( 18 "context" 19 "encoding/json" 20 "errors" 21 "fmt" 22 "reflect" 23 "strconv" 24 "time" 25 26 "github.com/sourcegraph/zoekt/query" 27) 28 29const mapHeaderBytes uint64 = 48 30const sliceHeaderBytes uint64 = 24 31const stringHeaderBytes uint64 = 16 32const pointerSize uint64 = 8 33const interfaceBytes uint64 = 16 34 35// FileMatch contains all the matches within a file. 36type FileMatch struct { 37 // Ranking; the higher, the better. 38 Score float64 // TODO - hide this field? 39 40 // For debugging. Needs DebugScore set, but public so tests in 41 // other packages can print some diagnostics. 42 Debug string 43 44 FileName string 45 46 // Repository is the globally unique name of the repo of the 47 // match 48 Repository string 49 Branches []string 50 51 // One of LineMatches or ChunkMatches will be returned depending on whether 52 // the SearchOptions.ChunkMatches is set. 53 LineMatches []LineMatch 54 ChunkMatches []ChunkMatch 55 56 // RepositoryID is a Sourcegraph extension. This is the ID of Repository in 57 // Sourcegraph. 58 RepositoryID uint32 59 60 // RepositoryPriority is a Sourcegraph extension. It is used by Sourcegraph to 61 // order results from different repositories relative to each other. 62 RepositoryPriority float64 63 64 // Only set if requested 65 Content []byte 66 67 // Checksum of the content. 68 Checksum []byte 69 70 // Detected language of the result. 71 Language string 72 73 // SubRepositoryName is the globally unique name of the repo, 74 // if it came from a subrepository 75 SubRepositoryName string 76 77 // SubRepositoryPath holds the prefix where the subrepository 78 // was mounted. 79 SubRepositoryPath string 80 81 // Commit SHA1 (hex) of the (sub)repo holding the file. 82 Version string 83} 84 85func (m *FileMatch) sizeBytes() (sz uint64) { 86 // Score 87 sz += 8 88 89 for _, s := range []string{ 90 m.Debug, 91 m.FileName, 92 m.Repository, 93 m.Language, 94 m.SubRepositoryName, 95 m.SubRepositoryPath, 96 m.Version, 97 } { 98 sz += stringHeaderBytes + uint64(len(s)) 99 } 100 101 // Branches 102 sz += sliceHeaderBytes 103 for _, s := range m.Branches { 104 sz += stringHeaderBytes + uint64(len(s)) 105 } 106 107 // LineMatches 108 sz += sliceHeaderBytes 109 for _, lm := range m.LineMatches { 110 sz += lm.sizeBytes() 111 } 112 113 // ChunkMatches 114 sz += sliceHeaderBytes 115 for _, cm := range m.ChunkMatches { 116 sz += cm.sizeBytes() 117 } 118 119 // RepositoryID 120 sz += 4 121 122 // RepositoryPriority 123 sz += 8 124 125 // Content 126 sz += sliceHeaderBytes + uint64(len(m.Content)) 127 128 // Checksum 129 sz += sliceHeaderBytes + uint64(len(m.Checksum)) 130 131 return 132} 133 134// ChunkMatch is a set of non-overlapping matches within a contiguous range of 135// lines in the file. 136type ChunkMatch struct { 137 // Content is a contiguous range of complete lines that fully contains Ranges. 138 Content []byte 139 // ContentStart is the location (inclusive) of the beginning of content 140 // relative to the beginning of the file. It will always be at the 141 // beginning of a line (Column will always be 1). 142 ContentStart Location 143 144 // FileName indicates whether this match is a match on the file name, in 145 // which case Content will contain the file name. 146 FileName bool 147 148 // Ranges is a set of matching ranges within this chunk. Each range is relative 149 // to the beginning of the file (not the beginning of Content). 150 Ranges []Range 151 152 // SymbolInfo is the symbol information associated with Ranges. If it is non-nil, 153 // its length will equal that of Ranges. Any of its elements may be nil. 154 SymbolInfo []*Symbol 155 156 Score float64 157 DebugScore string 158} 159 160func (cm *ChunkMatch) sizeBytes() (sz uint64) { 161 // Content 162 sz += sliceHeaderBytes + uint64(len(cm.Content)) 163 164 // ContentStart 165 sz += cm.ContentStart.sizeBytes() 166 167 // FileName 168 sz += 1 169 170 // Ranges 171 sz += sliceHeaderBytes 172 if len(cm.Ranges) > 0 { 173 sz += uint64(len(cm.Ranges)) * cm.Ranges[0].sizeBytes() 174 } 175 176 // SymbolInfo 177 sz += sliceHeaderBytes 178 for _, si := range cm.SymbolInfo { 179 sz += pointerSize 180 if si != nil { 181 sz += si.sizeBytes() 182 } 183 } 184 185 // Score 186 sz += 8 187 188 // DebugScore 189 sz += stringHeaderBytes + uint64(len(cm.DebugScore)) 190 191 return 192} 193 194type Range struct { 195 // The inclusive beginning of the range. 196 Start Location 197 // The exclusive end of the range. 198 End Location 199} 200 201func (r *Range) sizeBytes() uint64 { 202 return r.Start.sizeBytes() + r.End.sizeBytes() 203} 204 205type Location struct { 206 // 0-based byte offset from the beginning of the file 207 ByteOffset uint32 208 // 1-based line number from the beginning of the file 209 LineNumber uint32 210 // 1-based column number (in runes) from the beginning of line 211 Column uint32 212} 213 214func (l *Location) sizeBytes() uint64 { 215 return 3 * 4 216} 217 218// LineMatch holds the matches within a single line in a file. 219type LineMatch struct { 220 // The line in which a match was found. 221 Line []byte 222 LineStart int 223 LineEnd int 224 LineNumber int 225 226 // Before and After are only set when SearchOptions.NumContextLines is > 0 227 Before []byte 228 After []byte 229 230 // If set, this was a match on the filename. 231 FileName bool 232 233 // The higher the better. Only ranks the quality of the match 234 // within the file, does not take rank of file into account 235 Score float64 236 DebugScore string 237 238 LineFragments []LineFragmentMatch 239} 240 241func (lm *LineMatch) sizeBytes() (sz uint64) { 242 // Line 243 sz += sliceHeaderBytes + uint64(len(lm.Line)) 244 245 // LineStart, LineEnd, LineNumber 246 sz += 3 * 8 247 248 // Before 249 sz += sliceHeaderBytes + uint64(len(lm.Before)) 250 251 // After 252 sz += sliceHeaderBytes + uint64(len(lm.After)) 253 254 // FileName 255 sz += 1 256 257 // Score 258 sz += 8 259 260 // DebugScore 261 sz += stringHeaderBytes + uint64(len(lm.DebugScore)) 262 263 // LineFragments 264 sz += sliceHeaderBytes 265 for _, lf := range lm.LineFragments { 266 sz += lf.sizeBytes() 267 } 268 269 return 270} 271 272type Symbol struct { 273 Sym string 274 Kind string 275 Parent string 276 ParentKind string 277} 278 279func (s *Symbol) sizeBytes() uint64 { 280 return 4*stringHeaderBytes + uint64(len(s.Sym)+len(s.Kind)+len(s.Parent)+len(s.ParentKind)) 281} 282 283// LineFragmentMatch a segment of matching text within a line. 284type LineFragmentMatch struct { 285 // Offset within the line, in bytes. 286 LineOffset int 287 288 // Offset from file start, in bytes. 289 Offset uint32 290 291 // Number bytes that match. 292 MatchLength int 293 294 SymbolInfo *Symbol 295} 296 297func (lfm *LineFragmentMatch) sizeBytes() (sz uint64) { 298 // LineOffset 299 sz += 8 300 301 // Offset 302 sz += 4 303 304 // MatchLength 305 sz += 8 306 307 // SymbolInfo 308 sz += pointerSize 309 if lfm.SymbolInfo != nil { 310 sz += lfm.SymbolInfo.sizeBytes() 311 } 312 313 return 314} 315 316type FlushReason uint8 317 318const ( 319 FlushReasonTimerExpired FlushReason = 1 << iota 320 FlushReasonFinalFlush 321 FlushReasonMaxSize 322) 323 324var FlushReasonStrings = map[FlushReason]string{ 325 FlushReasonTimerExpired: "timer_expired", 326 FlushReasonFinalFlush: "final_flush", 327 FlushReasonMaxSize: "max_size_reached", 328} 329 330func (fr FlushReason) String() string { 331 if v, ok := FlushReasonStrings[fr]; ok { 332 return v 333 } 334 335 return "none" 336} 337 338// Stats contains interesting numbers on the search 339type Stats struct { 340 // Amount of I/O for reading contents. 341 ContentBytesLoaded int64 342 343 // Amount of I/O for reading from index. 344 IndexBytesLoaded int64 345 346 // Number of search shards that had a crash. 347 Crashes int 348 349 // Wall clock time for this search 350 Duration time.Duration 351 352 // Number of files containing a match. 353 FileCount int 354 355 // Number of files in shards that we considered. 356 ShardFilesConsidered int 357 358 // Files that we evaluated. Equivalent to files for which all 359 // atom matches (including negations) evaluated to true. 360 FilesConsidered int 361 362 // Files for which we loaded file content to verify substring matches 363 FilesLoaded int 364 365 // Candidate files whose contents weren't examined because we 366 // gathered enough matches. 367 FilesSkipped int 368 369 // Shards that we scanned to find matches. 370 ShardsScanned int 371 372 // Shards that we did not process because a query was canceled. 373 ShardsSkipped int 374 375 // Shards that we did not process because the query was rejected by the 376 // ngram filter indicating it had no matches. 377 ShardsSkippedFilter int 378 379 // Number of non-overlapping matches 380 MatchCount int 381 382 // Number of candidate matches as a result of searching ngrams. 383 NgramMatches int 384 385 // NgramLookups is the number of times we accessed an ngram in the index. 386 NgramLookups int 387 388 // Wall clock time for queued search. 389 Wait time.Duration 390 391 // Aggregate wall clock time spent constructing and pruning the match tree. 392 // This accounts for time such as lookups in the trigram index. 393 MatchTreeConstruction time.Duration 394 395 // Aggregate wall clock time spent searching the match tree. This accounts 396 // for the bulk of search work done looking for matches. 397 MatchTreeSearch time.Duration 398 399 // Number of times regexp was called on files that we evaluated. 400 RegexpsConsidered int 401 402 // FlushReason explains why results were flushed. 403 FlushReason FlushReason 404} 405 406func (s *Stats) sizeBytes() (sz uint64) { 407 sz = 16 * 8 // This assumes we are running on a 64-bit architecture 408 sz += 1 // FlushReason 409 410 return 411} 412 413func (s *Stats) Add(o Stats) { 414 s.ContentBytesLoaded += o.ContentBytesLoaded 415 s.IndexBytesLoaded += o.IndexBytesLoaded 416 s.Crashes += o.Crashes 417 s.FileCount += o.FileCount 418 s.FilesConsidered += o.FilesConsidered 419 s.FilesLoaded += o.FilesLoaded 420 s.FilesSkipped += o.FilesSkipped 421 s.MatchCount += o.MatchCount 422 s.NgramMatches += o.NgramMatches 423 s.NgramLookups += o.NgramLookups 424 s.ShardFilesConsidered += o.ShardFilesConsidered 425 s.ShardsScanned += o.ShardsScanned 426 s.ShardsSkipped += o.ShardsSkipped 427 s.ShardsSkippedFilter += o.ShardsSkippedFilter 428 s.Wait += o.Wait 429 s.MatchTreeConstruction += o.MatchTreeConstruction 430 s.MatchTreeSearch += o.MatchTreeSearch 431 s.RegexpsConsidered += o.RegexpsConsidered 432 433 // We want the first non-zero FlushReason to be sticky. This is a useful 434 // property when aggregating stats from several Zoekts. 435 if s.FlushReason == 0 { 436 s.FlushReason = o.FlushReason 437 } 438} 439 440// Zero returns true if stats is empty. 441func (s *Stats) Zero() bool { 442 if s == nil { 443 return true 444 } 445 446 return !(s.ContentBytesLoaded > 0 || 447 s.IndexBytesLoaded > 0 || 448 s.Crashes > 0 || 449 s.FileCount > 0 || 450 s.FilesConsidered > 0 || 451 s.FilesLoaded > 0 || 452 s.FilesSkipped > 0 || 453 s.MatchCount > 0 || 454 s.NgramMatches > 0 || 455 s.NgramLookups > 0 || 456 s.ShardFilesConsidered > 0 || 457 s.ShardsScanned > 0 || 458 s.ShardsSkipped > 0 || 459 s.ShardsSkippedFilter > 0 || 460 s.Wait > 0 || 461 s.MatchTreeConstruction > 0 || 462 s.MatchTreeSearch > 0 || 463 s.RegexpsConsidered > 0) 464} 465 466// Progress contains information about the global progress of the running search query. 467// This is used by the frontend to reorder results and emit them when stable. 468// Sourcegraph specific: this is used when querying multiple zoekt-webserver instances. 469type Progress struct { 470 // Priority of the shard that was searched. 471 Priority float64 472 473 // MaxPendingPriority is the maximum priority of pending result that is being searched in parallel. 474 // This is used to reorder results when the result set is known to be stable-- that is, when a result's 475 // Priority is greater than the max(MaxPendingPriority) from the latest results of each backend, it can be returned to the user. 476 // 477 // MaxPendingPriority decreases monotonically in each SearchResult. 478 MaxPendingPriority float64 479} 480 481func (p *Progress) sizeBytes() uint64 { 482 return 2 * 8 483} 484 485// SearchResult contains search matches and extra data 486type SearchResult struct { 487 Stats 488 489 // Do not encode this as we cannot encode -Inf in JSON 490 Progress `json:"-"` 491 492 Files []FileMatch 493 494 // RepoURLs holds a repo => template string map. 495 RepoURLs map[string]string 496 497 // FragmentNames holds a repo => template string map, for 498 // the line number fragment. 499 LineFragments map[string]string 500} 501 502// SizeBytes is a best-effort estimate of the size of SearchResult in memory. 503// The estimate does not take alignment into account. The result is a lower 504// bound on the actual size in memory. 505func (sr *SearchResult) SizeBytes() (sz uint64) { 506 sz += sr.Stats.sizeBytes() 507 sz += sr.Progress.sizeBytes() 508 509 // Files 510 sz += sliceHeaderBytes 511 for _, f := range sr.Files { 512 sz += f.sizeBytes() 513 } 514 515 // RepoURLs 516 sz += mapHeaderBytes 517 for k, v := range sr.RepoURLs { 518 sz += stringHeaderBytes + uint64(len(k)) 519 sz += stringHeaderBytes + uint64(len(v)) 520 } 521 522 // LineFragments 523 sz += mapHeaderBytes 524 for k, v := range sr.LineFragments { 525 sz += stringHeaderBytes + uint64(len(k)) 526 sz += stringHeaderBytes + uint64(len(v)) 527 } 528 529 return 530} 531 532// RepositoryBranch describes an indexed branch, which is a name 533// combined with a version. 534type RepositoryBranch struct { 535 Name string 536 Version string 537} 538 539func (r RepositoryBranch) String() string { 540 return fmt.Sprintf("%s@%s", r.Name, r.Version) 541} 542 543// Repository holds repository metadata. 544type Repository struct { 545 // Sourcegraph's repository ID 546 ID uint32 547 548 // The repository name 549 Name string 550 551 // The repository URL. 552 URL string 553 554 // The physical source where this repo came from, eg. full 555 // path to the zip filename or git repository directory. This 556 // will not be exposed in the UI, but can be used to detect 557 // orphaned index shards. 558 Source string 559 560 // The branches indexed in this repo. 561 Branches []RepositoryBranch 562 563 // Nil if this is not the super project. 564 SubRepoMap map[string]*Repository 565 566 // URL template to link to the commit of a branch 567 CommitURLTemplate string 568 569 // The repository URL for getting to a file. Has access to 570 // {{.Version}}, {{.Path}} 571 FileURLTemplate string 572 573 // The URL fragment to add to a file URL for line numbers. has 574 // access to {{.LineNumber}}. The fragment should include the 575 // separator, generally '#' or ';'. 576 LineFragmentTemplate string 577 578 // Perf optimization: priority is set when we load the shard. It corresponds to 579 // the value of "priority" stored in RawConfig. 580 priority float64 581 582 // All zoekt.* configuration settings. 583 RawConfig map[string]string 584 585 // Importance of the repository, bigger is more important 586 Rank uint16 587 588 // IndexOptions is a hash of the options used to create the index for the 589 // repo. 590 IndexOptions string 591 592 // HasSymbols is true if this repository has indexed ctags 593 // output. Sourcegraph specific: This field is more appropriate for 594 // IndexMetadata. However, we store it here since the Sourcegraph frontend 595 // can read this structure but not IndexMetadata. 596 HasSymbols bool 597 598 // Tombstone is true if we are not allowed to search this repo. 599 Tombstone bool 600 601 // LatestCommitDate is the date of the latest commit among all indexed Branches. 602 // The date might be time.Time's 0-value if the repository was last indexed 603 // before this field was added. 604 LatestCommitDate time.Time 605 606 // FileTombstones is a set of file paths that should be ignored across all branches 607 // in this shard. 608 FileTombstones map[string]struct{} `json:",omitempty"` 609} 610 611func (r *Repository) UnmarshalJSON(data []byte) error { 612 // We define a new type so that we can use json.Unmarshal 613 // without recursing into this same method. 614 type repository *Repository 615 repo := repository(r) 616 617 err := json.Unmarshal(data, repo) 618 if err != nil { 619 return err 620 } 621 622 if v, ok := repo.RawConfig["repoid"]; ok { 623 id, _ := strconv.ParseUint(v, 10, 32) 624 r.ID = uint32(id) 625 } 626 627 if v, ok := repo.RawConfig["priority"]; ok { 628 r.priority, err = strconv.ParseFloat(v, 64) 629 if err != nil { 630 r.priority = 0 631 } 632 633 // Sourcegraph indexserver doesn't set repo.Rank, so we set it here 634 // based on priority. Setting it on read instead of during indexing 635 // allows us to avoid a complete reindex. 636 if r.Rank == 0 && r.priority > 0 { 637 // Normalize the repo score within [0, 1), with the midpoint at 5,000. This means popular 638 // repos (roughly ones with over 5,000 stars) see diminishing returns from more stars. 639 r.Rank = uint16(r.priority / (5000.0 + r.priority) * maxUInt16) 640 } 641 } 642 return nil 643} 644 645// MergeMutable will merge x into r. mutated will be true if it made any 646// changes. err is non-nil if we needed to mutate an immutable field. 647// 648// Note: SubRepoMap, IndexOptions and HasSymbol fields are ignored. They are 649// computed while indexing so can't be synthesized from x. 650// 651// Note: We ignore RawConfig fields which are duplicated into Repository: 652// name and id. 653// 654// Note: URL, *Template fields are ignored. They are not used by Sourcegraph. 655func (r *Repository) MergeMutable(x *Repository) (mutated bool, err error) { 656 if r.ID != x.ID { 657 // Sourcegraph: strange behaviour may occur if ID changes but names don't. 658 return mutated, errors.New("ID is immutable") 659 } 660 if r.Name != x.Name { 661 // Name is encoded into the shard name on disk. We need to re-index if it 662 // changes. 663 return mutated, errors.New("Name is immutable") 664 } 665 if !reflect.DeepEqual(r.Branches, x.Branches) { 666 // Need a reindex if content changing. 667 return mutated, errors.New("Branches is immutable") 668 } 669 670 for k, v := range x.RawConfig { 671 // We ignore name and id since they are encoded into the repository. 672 if k == "name" || k == "id" { 673 continue 674 } 675 if r.RawConfig == nil { 676 mutated = true 677 r.RawConfig = make(map[string]string) 678 } 679 if r.RawConfig[k] != v { 680 mutated = true 681 r.RawConfig[k] = v 682 } 683 } 684 685 return mutated, nil 686} 687 688// IndexMetadata holds metadata stored in the index file. It contains 689// data generated by the core indexing library. 690type IndexMetadata struct { 691 IndexFormatVersion int 692 IndexFeatureVersion int 693 IndexMinReaderVersion int 694 IndexTime time.Time 695 PlainASCII bool 696 LanguageMap map[string]uint16 697 ZoektVersion string 698 ID string 699} 700 701// Statistics of a (collection of) repositories. 702type RepoStats struct { 703 // Repos is used for aggregrating the number of repositories. 704 // 705 // Note: This field is not populated on RepoListEntry.Stats (individual) but 706 // only for RepoList.Stats (aggregate). 707 Repos int 708 709 // Shards is the total number of search shards. 710 Shards int 711 712 // Documents holds the number of documents or files. 713 Documents int 714 715 // IndexBytes is the amount of RAM used for index overhead. 716 IndexBytes int64 717 718 // ContentBytes is the amount of RAM used for raw content. 719 ContentBytes int64 720 721 // Sourcegraph specific stats below. These are not as efficient to calculate 722 // as the above statistics. We experimentally measured about a 10% slower 723 // shard load time. However, we find these values very useful to track and 724 // computing them outside of load time introduces a lot of complexity. 725 726 // NewLinesCount is the number of newlines "\n" that appear in the zoekt 727 // indexed documents. This is not exactly the same as line count, since it 728 // will not include lines not terminated by "\n" (eg a file with no "\n", or 729 // a final line without "\n"). Note: Zoekt deduplicates documents across 730 // branches, so if a path has the same contents on multiple branches, there 731 // is only one document for it. As such that document's newlines is only 732 // counted once. See DefaultBranchNewLinesCount and AllBranchesNewLinesCount 733 // for counts which do not deduplicate. 734 NewLinesCount uint64 735 736 // DefaultBranchNewLinesCount is the number of newlines "\n" in the default 737 // branch. 738 DefaultBranchNewLinesCount uint64 739 740 // OtherBranchesNewLinesCount is the number of newlines "\n" in all branches 741 // except the default branch. 742 OtherBranchesNewLinesCount uint64 743} 744 745func (s *RepoStats) Add(o *RepoStats) { 746 // can't update Repos, since one repo may have multiple 747 // shards. 748 s.Shards += o.Shards 749 s.IndexBytes += o.IndexBytes 750 s.Documents += o.Documents 751 s.ContentBytes += o.ContentBytes 752 753 // Sourcegraph specific 754 s.NewLinesCount += o.NewLinesCount 755 s.DefaultBranchNewLinesCount += o.DefaultBranchNewLinesCount 756 s.OtherBranchesNewLinesCount += o.OtherBranchesNewLinesCount 757} 758 759type RepoListEntry struct { 760 Repository Repository 761 IndexMetadata IndexMetadata 762 Stats RepoStats 763} 764 765// MinimalRepoListEntry is a subset of RepoListEntry. It was added after 766// performance profiling of sourcegraph.com revealed that querying this 767// information from Zoekt was causing lots of CPU and memory usage. Note: we 768// can revisit this, how we store and query this information has changed a lot 769// since this was introduced. 770type MinimalRepoListEntry struct { 771 // HasSymbols is exported since Sourcegraph uses this information at search 772 // planning time to decide between Zoekt and an unindexed symbol search. 773 // 774 // Note: it pretty much is always true in practice. 775 HasSymbols bool 776 777 // Branches is used by Sourcegraphs query planner to decided if it can use 778 // zoekt or go via an unindexed code path. 779 Branches []RepositoryBranch 780 781 // IndexTimeUnix is the IndexTime converted to unix time (number of seconds 782 // since the epoch). This is to make it clear we are not transporting the 783 // full fidelty timestamp (ie with milliseconds and location). Additionally 784 // it saves 16 bytes in this struct. 785 // 786 // IndexTime is used as a heuristic in Sourcegraph to decide in aggregate 787 // how many repositories need updating after a ranking change/etc. 788 // 789 // TODO(keegancsmith) audit updates to IndexTime and document how and when 790 // it changes. Concerned about things like metadata updates or compound 791 // shards leading to untrustworthy data here. 792 IndexTimeUnix int64 793} 794 795type ReposMap map[uint32]MinimalRepoListEntry 796 797// MarshalBinary implements a specialized encoder for ReposMap. 798func (q *ReposMap) MarshalBinary() ([]byte, error) { 799 return reposMapEncode(*q) 800} 801 802// UnmarshalBinary implements a specialized decoder for ReposMap. 803func (q *ReposMap) UnmarshalBinary(b []byte) error { 804 var err error 805 (*q), err = reposMapDecode(b) 806 return err 807} 808 809// RepoList holds a set of Repository metadata. 810type RepoList struct { 811 // Returned when ListOptions.Field is RepoListFieldRepos. 812 Repos []*RepoListEntry 813 814 // ReposMap is set when ListOptions.Field is RepoListFieldReposMap. 815 ReposMap ReposMap 816 817 Crashes int 818 819 // Stats response to a List request. 820 // This is the aggregate RepoStats of all repos matching the input query. 821 Stats RepoStats 822} 823 824type Searcher interface { 825 Search(ctx context.Context, q query.Q, opts *SearchOptions) (*SearchResult, error) 826 827 // List lists repositories. The query `q` can only contain 828 // query.Repo atoms. 829 List(ctx context.Context, q query.Q, opts *ListOptions) (*RepoList, error) 830 Close() 831 832 // Describe the searcher for debug messages. 833 String() string 834} 835 836type RepoListField int 837 838const ( 839 RepoListFieldRepos RepoListField = 0 840 RepoListFieldReposMap = 2 841) 842 843type ListOptions struct { 844 // Field decides which field to populate in RepoList response. 845 Field RepoListField 846} 847 848func (o *ListOptions) GetField() (RepoListField, error) { 849 if o == nil { 850 return RepoListFieldRepos, nil 851 } 852 switch o.Field { 853 case RepoListFieldRepos, RepoListFieldReposMap: 854 return o.Field, nil 855 case 1: 856 return 0, fmt.Errorf("RepoListFieldMinimal (%d) is no longer supported", o.Field) 857 default: 858 return 0, fmt.Errorf("unknown RepoListField %d", o.Field) 859 } 860} 861 862func (o *ListOptions) String() string { 863 return fmt.Sprintf("%#v", o) 864} 865 866type SearchOptions struct { 867 // Return an upper-bound estimate of eligible documents in 868 // stats.ShardFilesConsidered. 869 EstimateDocCount bool 870 871 // Return the whole file. 872 Whole bool 873 874 // Maximum number of matches: skip all processing an index 875 // shard after we found this many non-overlapping matches. 876 ShardMaxMatchCount int 877 878 // Maximum number of matches: stop looking for more matches 879 // once we have this many matches across shards. 880 TotalMaxMatchCount int 881 882 // Maximum number of matches: skip processing documents for a repository in 883 // a shard once we have found ShardRepoMaxMatchCount. 884 // 885 // A compound shard may contain multiple repositories. This will most often 886 // be set to 1 to find all repositories containing a result. 887 ShardRepoMaxMatchCount int 888 889 // Deprecated: this field is not read anymore. 890 ShardMaxImportantMatch int 891 892 // Deprecated: this field is not read anymore. 893 TotalMaxImportantMatch int 894 895 // Abort the search after this much time has passed. 896 MaxWallTime time.Duration 897 898 // FlushWallTime if non-zero will stop streaming behaviour at first and 899 // instead will collate and sort results. At FlushWallTime the results will 900 // be sent and then the behaviour will revert to the normal streaming. 901 FlushWallTime time.Duration 902 903 // Truncates the number of documents (i.e. files) after collating and 904 // sorting the results. 905 MaxDocDisplayCount int 906 907 // Truncates the number of matchs after collating and sorting the results. 908 MaxMatchDisplayCount int 909 910 // If set to a number greater than zero then up to this many number 911 // of context lines will be added before and after each matched line. 912 // Note that the included context lines might contain matches and 913 // it's up to the consumer of the result to remove those lines. 914 NumContextLines int 915 916 // If true, ChunkMatches will be returned in each FileMatch rather than LineMatches 917 // EXPERIMENTAL: the behavior of this flag may be changed in future versions. 918 ChunkMatches bool 919 920 // EXPERIMENTAL. If true, document ranks are used as additional input for 921 // sorting matches. 922 UseDocumentRanks bool 923 924 // EXPERIMENTAL. When UseDocumentRanks is enabled, this can be optionally set to adjust 925 // their weight in the file match score. If the value is <= 0.0, the default weight value 926 // will be used. This option is temporary and is only exposed for testing/ tuning purposes. 927 DocumentRanksWeight float64 928 929 // EXPERIMENTAL. If true, use keyword-style scoring instead of the default scoring formula. 930 // Currently, this treats each match in a file as a term and computes an approximation to BM25. 931 // When enabled, all other scoring signals are ignored, including document ranks. 932 UseKeywordScoring bool 933 934 // Trace turns on opentracing for this request if true and if the Jaeger address was provided as 935 // a command-line flag 936 Trace bool 937 938 // If set, the search results will contain debug information for scoring. 939 DebugScore bool 940 941 // SpanContext is the opentracing span context, if it exists, from the zoekt client 942 SpanContext map[string]string 943} 944 945func (s *SearchOptions) String() string { 946 return fmt.Sprintf("%#v", s) 947} 948 949// Sender is the interface that wraps the basic Send method. 950type Sender interface { 951 Send(*SearchResult) 952} 953 954// Streamer adds the method StreamSearch to the Searcher interface. 955type Streamer interface { 956 Searcher 957 StreamSearch(ctx context.Context, q query.Q, opts *SearchOptions, sender Sender) (err error) 958}