fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

1// Copyright 2016 Google Inc. All rights reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package zoekt // import "github.com/sourcegraph/zoekt" 16 17import ( 18 "context" 19 "encoding/json" 20 "errors" 21 "fmt" 22 "reflect" 23 "strconv" 24 "strings" 25 "time" 26 27 "github.com/sourcegraph/zoekt/query" 28) 29 30const mapHeaderBytes uint64 = 48 31const sliceHeaderBytes uint64 = 24 32const stringHeaderBytes uint64 = 16 33const pointerSize uint64 = 8 34const interfaceBytes uint64 = 16 35 36// FileMatch contains all the matches within a file. 37type FileMatch struct { 38 FileName string 39 40 // Repository is the globally unique name of the repo of the 41 // match 42 Repository string 43 44 // SubRepositoryName is the globally unique name of the repo, 45 // if it came from a subrepository 46 SubRepositoryName string `json:",omitempty"` 47 48 // SubRepositoryPath holds the prefix where the subrepository 49 // was mounted. 50 SubRepositoryPath string `json:",omitempty"` 51 52 // Commit SHA1 (hex) of the (sub)repo holding the file. 53 Version string `json:",omitempty"` 54 55 // Detected language of the result. 56 Language string 57 58 // For debugging. Needs DebugScore set, but public so tests in 59 // other packages can print some diagnostics. 60 Debug string `json:",omitempty"` 61 62 Branches []string `json:",omitempty"` 63 64 // One of LineMatches or ChunkMatches will be returned depending on whether 65 // the SearchOptions.ChunkMatches is set. 66 LineMatches []LineMatch `json:",omitempty"` 67 ChunkMatches []ChunkMatch `json:",omitempty"` 68 69 // Only set if requested 70 Content []byte `json:",omitempty"` 71 72 // Checksum of the content. 73 Checksum []byte 74 75 // Ranking; the higher, the better. 76 Score float64 `json:",omitempty"` 77 78 // RepositoryPriority is a Sourcegraph extension. It is used by Sourcegraph to 79 // order results from different repositories relative to each other. 80 RepositoryPriority float64 `json:",omitempty"` 81 82 // RepositoryID is a Sourcegraph extension. This is the ID of Repository in 83 // Sourcegraph. 84 RepositoryID uint32 `json:",omitempty"` 85} 86 87func (m *FileMatch) sizeBytes() (sz uint64) { 88 // Score 89 sz += 8 90 91 for _, s := range []string{ 92 m.Debug, 93 m.FileName, 94 m.Repository, 95 m.Language, 96 m.SubRepositoryName, 97 m.SubRepositoryPath, 98 m.Version, 99 } { 100 sz += stringHeaderBytes + uint64(len(s)) 101 } 102 103 // Branches 104 sz += sliceHeaderBytes 105 for _, s := range m.Branches { 106 sz += stringHeaderBytes + uint64(len(s)) 107 } 108 109 // LineMatches 110 sz += sliceHeaderBytes 111 for _, lm := range m.LineMatches { 112 sz += lm.sizeBytes() 113 } 114 115 // ChunkMatches 116 sz += sliceHeaderBytes 117 for _, cm := range m.ChunkMatches { 118 sz += cm.sizeBytes() 119 } 120 121 // RepositoryID 122 sz += 4 123 124 // RepositoryPriority 125 sz += 8 126 127 // Content 128 sz += sliceHeaderBytes + uint64(len(m.Content)) 129 130 // Checksum 131 sz += sliceHeaderBytes + uint64(len(m.Checksum)) 132 133 return 134} 135 136// ChunkMatch is a set of non-overlapping matches within a contiguous range of 137// lines in the file. 138type ChunkMatch struct { 139 DebugScore string 140 141 // Content is a contiguous range of complete lines that fully contains Ranges. 142 Content []byte 143 144 // Ranges is a set of matching ranges within this chunk. Each range is relative 145 // to the beginning of the file (not the beginning of Content). 146 Ranges []Range 147 148 // SymbolInfo is the symbol information associated with Ranges. If it is non-nil, 149 // its length will equal that of Ranges. Any of its elements may be nil. 150 SymbolInfo []*Symbol 151 152 // FileName indicates whether this match is a match on the file name, in 153 // which case Content will contain the file name. 154 FileName bool 155 156 // ContentStart is the location (inclusive) of the beginning of content 157 // relative to the beginning of the file. It will always be at the 158 // beginning of a line (Column will always be 1). 159 ContentStart Location 160 161 Score float64 162} 163 164func (cm *ChunkMatch) sizeBytes() (sz uint64) { 165 // Content 166 sz += sliceHeaderBytes + uint64(len(cm.Content)) 167 168 // ContentStart 169 sz += cm.ContentStart.sizeBytes() 170 171 // FileName 172 sz += 1 173 174 // Ranges 175 sz += sliceHeaderBytes 176 if len(cm.Ranges) > 0 { 177 sz += uint64(len(cm.Ranges)) * cm.Ranges[0].sizeBytes() 178 } 179 180 // SymbolInfo 181 sz += sliceHeaderBytes 182 for _, si := range cm.SymbolInfo { 183 sz += pointerSize 184 if si != nil { 185 sz += si.sizeBytes() 186 } 187 } 188 189 // Score 190 sz += 8 191 192 // DebugScore 193 sz += stringHeaderBytes + uint64(len(cm.DebugScore)) 194 195 return 196} 197 198type Range struct { 199 // The inclusive beginning of the range. 200 Start Location 201 // The exclusive end of the range. 202 End Location 203} 204 205func (r *Range) sizeBytes() uint64 { 206 return r.Start.sizeBytes() + r.End.sizeBytes() 207} 208 209type Location struct { 210 // 0-based byte offset from the beginning of the file 211 ByteOffset uint32 212 // 1-based line number from the beginning of the file 213 LineNumber uint32 214 // 1-based column number (in runes) from the beginning of line 215 Column uint32 216} 217 218func (l *Location) sizeBytes() uint64 { 219 return 3 * 4 220} 221 222// LineMatch holds the matches within a single line in a file. 223type LineMatch struct { 224 // The line in which a match was found. 225 Line []byte 226 LineStart int 227 LineEnd int 228 LineNumber int 229 230 // Before and After are only set when SearchOptions.NumContextLines is > 0 231 Before []byte 232 After []byte 233 234 // If set, this was a match on the filename. 235 FileName bool 236 237 // The higher the better. Only ranks the quality of the match 238 // within the file, does not take rank of file into account 239 Score float64 240 DebugScore string 241 242 LineFragments []LineFragmentMatch 243} 244 245func (lm *LineMatch) sizeBytes() (sz uint64) { 246 // Line 247 sz += sliceHeaderBytes + uint64(len(lm.Line)) 248 249 // LineStart, LineEnd, LineNumber 250 sz += 3 * 8 251 252 // Before 253 sz += sliceHeaderBytes + uint64(len(lm.Before)) 254 255 // After 256 sz += sliceHeaderBytes + uint64(len(lm.After)) 257 258 // FileName 259 sz += 1 260 261 // Score 262 sz += 8 263 264 // DebugScore 265 sz += stringHeaderBytes + uint64(len(lm.DebugScore)) 266 267 // LineFragments 268 sz += sliceHeaderBytes 269 for _, lf := range lm.LineFragments { 270 sz += lf.sizeBytes() 271 } 272 273 return 274} 275 276type Symbol struct { 277 Sym string 278 Kind string 279 Parent string 280 ParentKind string 281} 282 283func (s *Symbol) sizeBytes() uint64 { 284 return 4*stringHeaderBytes + uint64(len(s.Sym)+len(s.Kind)+len(s.Parent)+len(s.ParentKind)) 285} 286 287// LineFragmentMatch a segment of matching text within a line. 288type LineFragmentMatch struct { 289 // Offset within the line, in bytes. 290 LineOffset int 291 292 // Offset from file start, in bytes. 293 Offset uint32 294 295 // Number bytes that match. 296 MatchLength int 297 298 SymbolInfo *Symbol 299} 300 301func (lfm *LineFragmentMatch) sizeBytes() (sz uint64) { 302 // LineOffset 303 sz += 8 304 305 // Offset 306 sz += 4 307 308 // MatchLength 309 sz += 8 310 311 // SymbolInfo 312 sz += pointerSize 313 if lfm.SymbolInfo != nil { 314 sz += lfm.SymbolInfo.sizeBytes() 315 } 316 317 return 318} 319 320type FlushReason uint8 321 322const ( 323 FlushReasonTimerExpired FlushReason = 1 << iota 324 FlushReasonFinalFlush 325 FlushReasonMaxSize 326) 327 328var FlushReasonStrings = map[FlushReason]string{ 329 FlushReasonTimerExpired: "timer_expired", 330 FlushReasonFinalFlush: "final_flush", 331 FlushReasonMaxSize: "max_size_reached", 332} 333 334func (fr FlushReason) String() string { 335 if v, ok := FlushReasonStrings[fr]; ok { 336 return v 337 } 338 339 return "none" 340} 341 342// Stats contains interesting numbers on the search 343type Stats struct { 344 // Amount of I/O for reading contents. 345 ContentBytesLoaded int64 346 347 // Amount of I/O for reading from index. 348 IndexBytesLoaded int64 349 350 // Number of search shards that had a crash. 351 Crashes int 352 353 // Wall clock time for this search 354 Duration time.Duration 355 356 // Number of files containing a match. 357 FileCount int 358 359 // Number of files in shards that we considered. 360 ShardFilesConsidered int 361 362 // Files that we evaluated. Equivalent to files for which all 363 // atom matches (including negations) evaluated to true. 364 FilesConsidered int 365 366 // Files for which we loaded file content to verify substring matches 367 FilesLoaded int 368 369 // Candidate files whose contents weren't examined because we 370 // gathered enough matches. 371 FilesSkipped int 372 373 // Shards that we scanned to find matches. 374 ShardsScanned int 375 376 // Shards that we did not process because a query was canceled. 377 ShardsSkipped int 378 379 // Shards that we did not process because the query was rejected by the 380 // ngram filter indicating it had no matches. 381 ShardsSkippedFilter int 382 383 // Number of non-overlapping matches 384 MatchCount int 385 386 // Number of candidate matches as a result of searching ngrams. 387 NgramMatches int 388 389 // NgramLookups is the number of times we accessed an ngram in the index. 390 NgramLookups int 391 392 // Wall clock time for queued search. 393 Wait time.Duration 394 395 // Aggregate wall clock time spent constructing and pruning the match tree. 396 // This accounts for time such as lookups in the trigram index. 397 MatchTreeConstruction time.Duration 398 399 // Aggregate wall clock time spent searching the match tree. This accounts 400 // for the bulk of search work done looking for matches. 401 MatchTreeSearch time.Duration 402 403 // Number of times regexp was called on files that we evaluated. 404 RegexpsConsidered int 405 406 // FlushReason explains why results were flushed. 407 FlushReason FlushReason 408} 409 410func (s *Stats) sizeBytes() (sz uint64) { 411 sz = 16 * 8 // This assumes we are running on a 64-bit architecture 412 sz += 1 // FlushReason 413 414 return 415} 416 417func (s *Stats) Add(o Stats) { 418 s.ContentBytesLoaded += o.ContentBytesLoaded 419 s.IndexBytesLoaded += o.IndexBytesLoaded 420 s.Crashes += o.Crashes 421 s.FileCount += o.FileCount 422 s.FilesConsidered += o.FilesConsidered 423 s.FilesLoaded += o.FilesLoaded 424 s.FilesSkipped += o.FilesSkipped 425 s.MatchCount += o.MatchCount 426 s.NgramMatches += o.NgramMatches 427 s.NgramLookups += o.NgramLookups 428 s.ShardFilesConsidered += o.ShardFilesConsidered 429 s.ShardsScanned += o.ShardsScanned 430 s.ShardsSkipped += o.ShardsSkipped 431 s.ShardsSkippedFilter += o.ShardsSkippedFilter 432 s.Wait += o.Wait 433 s.MatchTreeConstruction += o.MatchTreeConstruction 434 s.MatchTreeSearch += o.MatchTreeSearch 435 s.RegexpsConsidered += o.RegexpsConsidered 436 437 // We want the first non-zero FlushReason to be sticky. This is a useful 438 // property when aggregating stats from several Zoekts. 439 if s.FlushReason == 0 { 440 s.FlushReason = o.FlushReason 441 } 442} 443 444// Zero returns true if stats is empty. 445func (s *Stats) Zero() bool { 446 if s == nil { 447 return true 448 } 449 450 return !(s.ContentBytesLoaded > 0 || 451 s.IndexBytesLoaded > 0 || 452 s.Crashes > 0 || 453 s.FileCount > 0 || 454 s.FilesConsidered > 0 || 455 s.FilesLoaded > 0 || 456 s.FilesSkipped > 0 || 457 s.MatchCount > 0 || 458 s.NgramMatches > 0 || 459 s.NgramLookups > 0 || 460 s.ShardFilesConsidered > 0 || 461 s.ShardsScanned > 0 || 462 s.ShardsSkipped > 0 || 463 s.ShardsSkippedFilter > 0 || 464 s.Wait > 0 || 465 s.MatchTreeConstruction > 0 || 466 s.MatchTreeSearch > 0 || 467 s.RegexpsConsidered > 0) 468} 469 470// Progress contains information about the global progress of the running search query. 471// This is used by the frontend to reorder results and emit them when stable. 472// Sourcegraph specific: this is used when querying multiple zoekt-webserver instances. 473type Progress struct { 474 // Priority of the shard that was searched. 475 Priority float64 476 477 // MaxPendingPriority is the maximum priority of pending result that is being searched in parallel. 478 // This is used to reorder results when the result set is known to be stable-- that is, when a result's 479 // Priority is greater than the max(MaxPendingPriority) from the latest results of each backend, it can be returned to the user. 480 // 481 // MaxPendingPriority decreases monotonically in each SearchResult. 482 MaxPendingPriority float64 483} 484 485func (p *Progress) sizeBytes() uint64 { 486 return 2 * 8 487} 488 489// SearchResult contains search matches and extra data 490type SearchResult struct { 491 Stats 492 493 // Do not encode this as we cannot encode -Inf in JSON 494 Progress `json:"-"` 495 496 Files []FileMatch 497 498 // RepoURLs holds a repo => template string map. 499 RepoURLs map[string]string 500 501 // FragmentNames holds a repo => template string map, for 502 // the line number fragment. 503 LineFragments map[string]string 504} 505 506// SizeBytes is a best-effort estimate of the size of SearchResult in memory. 507// The estimate does not take alignment into account. The result is a lower 508// bound on the actual size in memory. 509func (sr *SearchResult) SizeBytes() (sz uint64) { 510 sz += sr.Stats.sizeBytes() 511 sz += sr.Progress.sizeBytes() 512 513 // Files 514 sz += sliceHeaderBytes 515 for _, f := range sr.Files { 516 sz += f.sizeBytes() 517 } 518 519 // RepoURLs 520 sz += mapHeaderBytes 521 for k, v := range sr.RepoURLs { 522 sz += stringHeaderBytes + uint64(len(k)) 523 sz += stringHeaderBytes + uint64(len(v)) 524 } 525 526 // LineFragments 527 sz += mapHeaderBytes 528 for k, v := range sr.LineFragments { 529 sz += stringHeaderBytes + uint64(len(k)) 530 sz += stringHeaderBytes + uint64(len(v)) 531 } 532 533 return 534} 535 536// RepositoryBranch describes an indexed branch, which is a name 537// combined with a version. 538type RepositoryBranch struct { 539 Name string 540 Version string 541} 542 543func (r RepositoryBranch) String() string { 544 return fmt.Sprintf("%s@%s", r.Name, r.Version) 545} 546 547// Repository holds repository metadata. 548type Repository struct { 549 // Sourcegraph's repository ID 550 ID uint32 551 552 // The repository name 553 Name string 554 555 // The repository URL. 556 URL string 557 558 // The physical source where this repo came from, eg. full 559 // path to the zip filename or git repository directory. This 560 // will not be exposed in the UI, but can be used to detect 561 // orphaned index shards. 562 Source string 563 564 // The branches indexed in this repo. 565 Branches []RepositoryBranch 566 567 // Nil if this is not the super project. 568 SubRepoMap map[string]*Repository 569 570 // URL template to link to the commit of a branch 571 CommitURLTemplate string 572 573 // The repository URL for getting to a file. Has access to 574 // {{.Version}}, {{.Path}} 575 FileURLTemplate string 576 577 // The URL fragment to add to a file URL for line numbers. has 578 // access to {{.LineNumber}}. The fragment should include the 579 // separator, generally '#' or ';'. 580 LineFragmentTemplate string 581 582 // Perf optimization: priority is set when we load the shard. It corresponds to 583 // the value of "priority" stored in RawConfig. 584 priority float64 585 586 // All zoekt.* configuration settings. 587 RawConfig map[string]string 588 589 // Importance of the repository, bigger is more important 590 Rank uint16 591 592 // IndexOptions is a hash of the options used to create the index for the 593 // repo. 594 IndexOptions string 595 596 // HasSymbols is true if this repository has indexed ctags 597 // output. Sourcegraph specific: This field is more appropriate for 598 // IndexMetadata. However, we store it here since the Sourcegraph frontend 599 // can read this structure but not IndexMetadata. 600 HasSymbols bool 601 602 // Tombstone is true if we are not allowed to search this repo. 603 Tombstone bool 604 605 // LatestCommitDate is the date of the latest commit among all indexed Branches. 606 // The date might be time.Time's 0-value if the repository was last indexed 607 // before this field was added. 608 LatestCommitDate time.Time 609 610 // FileTombstones is a set of file paths that should be ignored across all branches 611 // in this shard. 612 FileTombstones map[string]struct{} `json:",omitempty"` 613} 614 615func (r *Repository) UnmarshalJSON(data []byte) error { 616 // We define a new type so that we can use json.Unmarshal 617 // without recursing into this same method. 618 type repository *Repository 619 repo := repository(r) 620 621 err := json.Unmarshal(data, repo) 622 if err != nil { 623 return err 624 } 625 626 if v, ok := repo.RawConfig["repoid"]; ok { 627 id, _ := strconv.ParseUint(v, 10, 32) 628 r.ID = uint32(id) 629 } 630 631 if v, ok := repo.RawConfig["priority"]; ok { 632 r.priority, err = strconv.ParseFloat(v, 64) 633 if err != nil { 634 r.priority = 0 635 } 636 637 // Sourcegraph indexserver doesn't set repo.Rank, so we set it here 638 // based on priority. Setting it on read instead of during indexing 639 // allows us to avoid a complete reindex. 640 if r.Rank == 0 && r.priority > 0 { 641 // Normalize the repo score within [0, 1), with the midpoint at 5,000. This means popular 642 // repos (roughly ones with over 5,000 stars) see diminishing returns from more stars. 643 r.Rank = uint16(r.priority / (5000.0 + r.priority) * maxUInt16) 644 } 645 } 646 return nil 647} 648 649// MergeMutable will merge x into r. mutated will be true if it made any 650// changes. err is non-nil if we needed to mutate an immutable field. 651// 652// Note: SubRepoMap, IndexOptions and HasSymbol fields are ignored. They are 653// computed while indexing so can't be synthesized from x. 654// 655// Note: We ignore RawConfig fields which are duplicated into Repository: 656// name and id. 657// 658// Note: URL, *Template fields are ignored. They are not used by Sourcegraph. 659func (r *Repository) MergeMutable(x *Repository) (mutated bool, err error) { 660 if r.ID != x.ID { 661 // Sourcegraph: strange behaviour may occur if ID changes but names don't. 662 return mutated, errors.New("ID is immutable") 663 } 664 if r.Name != x.Name { 665 // Name is encoded into the shard name on disk. We need to re-index if it 666 // changes. 667 return mutated, errors.New("Name is immutable") 668 } 669 if !reflect.DeepEqual(r.Branches, x.Branches) { 670 // Need a reindex if content changing. 671 return mutated, errors.New("Branches is immutable") 672 } 673 674 for k, v := range x.RawConfig { 675 // We ignore name and id since they are encoded into the repository. 676 if k == "name" || k == "id" { 677 continue 678 } 679 if r.RawConfig == nil { 680 mutated = true 681 r.RawConfig = make(map[string]string) 682 } 683 if r.RawConfig[k] != v { 684 mutated = true 685 r.RawConfig[k] = v 686 } 687 } 688 689 return mutated, nil 690} 691 692// IndexMetadata holds metadata stored in the index file. It contains 693// data generated by the core indexing library. 694type IndexMetadata struct { 695 IndexFormatVersion int 696 IndexFeatureVersion int 697 IndexMinReaderVersion int 698 IndexTime time.Time 699 PlainASCII bool 700 LanguageMap map[string]uint16 701 ZoektVersion string 702 ID string 703} 704 705// Statistics of a (collection of) repositories. 706type RepoStats struct { 707 // Repos is used for aggregrating the number of repositories. 708 // 709 // Note: This field is not populated on RepoListEntry.Stats (individual) but 710 // only for RepoList.Stats (aggregate). 711 Repos int 712 713 // Shards is the total number of search shards. 714 Shards int 715 716 // Documents holds the number of documents or files. 717 Documents int 718 719 // IndexBytes is the amount of RAM used for index overhead. 720 IndexBytes int64 721 722 // ContentBytes is the amount of RAM used for raw content. 723 ContentBytes int64 724 725 // Sourcegraph specific stats below. These are not as efficient to calculate 726 // as the above statistics. We experimentally measured about a 10% slower 727 // shard load time. However, we find these values very useful to track and 728 // computing them outside of load time introduces a lot of complexity. 729 730 // NewLinesCount is the number of newlines "\n" that appear in the zoekt 731 // indexed documents. This is not exactly the same as line count, since it 732 // will not include lines not terminated by "\n" (eg a file with no "\n", or 733 // a final line without "\n"). Note: Zoekt deduplicates documents across 734 // branches, so if a path has the same contents on multiple branches, there 735 // is only one document for it. As such that document's newlines is only 736 // counted once. See DefaultBranchNewLinesCount and AllBranchesNewLinesCount 737 // for counts which do not deduplicate. 738 NewLinesCount uint64 739 740 // DefaultBranchNewLinesCount is the number of newlines "\n" in the default 741 // branch. 742 DefaultBranchNewLinesCount uint64 743 744 // OtherBranchesNewLinesCount is the number of newlines "\n" in all branches 745 // except the default branch. 746 OtherBranchesNewLinesCount uint64 747} 748 749func (s *RepoStats) Add(o *RepoStats) { 750 // can't update Repos, since one repo may have multiple 751 // shards. 752 s.Shards += o.Shards 753 s.IndexBytes += o.IndexBytes 754 s.Documents += o.Documents 755 s.ContentBytes += o.ContentBytes 756 757 // Sourcegraph specific 758 s.NewLinesCount += o.NewLinesCount 759 s.DefaultBranchNewLinesCount += o.DefaultBranchNewLinesCount 760 s.OtherBranchesNewLinesCount += o.OtherBranchesNewLinesCount 761} 762 763type RepoListEntry struct { 764 Repository Repository 765 IndexMetadata IndexMetadata 766 Stats RepoStats 767} 768 769// MinimalRepoListEntry is a subset of RepoListEntry. It was added after 770// performance profiling of sourcegraph.com revealed that querying this 771// information from Zoekt was causing lots of CPU and memory usage. Note: we 772// can revisit this, how we store and query this information has changed a lot 773// since this was introduced. 774type MinimalRepoListEntry struct { 775 // HasSymbols is exported since Sourcegraph uses this information at search 776 // planning time to decide between Zoekt and an unindexed symbol search. 777 // 778 // Note: it pretty much is always true in practice. 779 HasSymbols bool 780 781 // Branches is used by Sourcegraphs query planner to decided if it can use 782 // zoekt or go via an unindexed code path. 783 Branches []RepositoryBranch 784 785 // IndexTimeUnix is the IndexTime converted to unix time (number of seconds 786 // since the epoch). This is to make it clear we are not transporting the 787 // full fidelty timestamp (ie with milliseconds and location). Additionally 788 // it saves 16 bytes in this struct. 789 // 790 // IndexTime is used as a heuristic in Sourcegraph to decide in aggregate 791 // how many repositories need updating after a ranking change/etc. 792 // 793 // TODO(keegancsmith) audit updates to IndexTime and document how and when 794 // it changes. Concerned about things like metadata updates or compound 795 // shards leading to untrustworthy data here. 796 IndexTimeUnix int64 797} 798 799type ReposMap map[uint32]MinimalRepoListEntry 800 801// MarshalBinary implements a specialized encoder for ReposMap. 802func (q *ReposMap) MarshalBinary() ([]byte, error) { 803 return reposMapEncode(*q) 804} 805 806// UnmarshalBinary implements a specialized decoder for ReposMap. 807func (q *ReposMap) UnmarshalBinary(b []byte) error { 808 var err error 809 (*q), err = reposMapDecode(b) 810 return err 811} 812 813// RepoList holds a set of Repository metadata. 814type RepoList struct { 815 // Returned when ListOptions.Field is RepoListFieldRepos. 816 Repos []*RepoListEntry 817 818 // ReposMap is set when ListOptions.Field is RepoListFieldReposMap. 819 ReposMap ReposMap 820 821 Crashes int 822 823 // Stats response to a List request. 824 // This is the aggregate RepoStats of all repos matching the input query. 825 Stats RepoStats 826} 827 828type Searcher interface { 829 Search(ctx context.Context, q query.Q, opts *SearchOptions) (*SearchResult, error) 830 831 // List lists repositories. The query `q` can only contain 832 // query.Repo atoms. 833 List(ctx context.Context, q query.Q, opts *ListOptions) (*RepoList, error) 834 Close() 835 836 // Describe the searcher for debug messages. 837 String() string 838} 839 840type RepoListField int 841 842const ( 843 RepoListFieldRepos RepoListField = 0 844 RepoListFieldReposMap = 2 845) 846 847type ListOptions struct { 848 // Field decides which field to populate in RepoList response. 849 Field RepoListField 850} 851 852func (o *ListOptions) GetField() (RepoListField, error) { 853 if o == nil { 854 return RepoListFieldRepos, nil 855 } 856 switch o.Field { 857 case RepoListFieldRepos, RepoListFieldReposMap: 858 return o.Field, nil 859 case 1: 860 return 0, fmt.Errorf("RepoListFieldMinimal (%d) is no longer supported", o.Field) 861 default: 862 return 0, fmt.Errorf("unknown RepoListField %d", o.Field) 863 } 864} 865 866func (o *ListOptions) String() string { 867 return fmt.Sprintf("%#v", o) 868} 869 870type SearchOptions struct { 871 // Return an upper-bound estimate of eligible documents in 872 // stats.ShardFilesConsidered. 873 EstimateDocCount bool 874 875 // Return the whole file. 876 Whole bool 877 878 // Maximum number of matches: skip all processing an index 879 // shard after we found this many non-overlapping matches. 880 ShardMaxMatchCount int 881 882 // Maximum number of matches: stop looking for more matches 883 // once we have this many matches across shards. 884 TotalMaxMatchCount int 885 886 // Maximum number of matches: skip processing documents for a repository in 887 // a shard once we have found ShardRepoMaxMatchCount. 888 // 889 // A compound shard may contain multiple repositories. This will most often 890 // be set to 1 to find all repositories containing a result. 891 ShardRepoMaxMatchCount int 892 893 // Deprecated: this field is not read anymore. 894 ShardMaxImportantMatch int 895 896 // Deprecated: this field is not read anymore. 897 TotalMaxImportantMatch int 898 899 // Abort the search after this much time has passed. 900 MaxWallTime time.Duration 901 902 // FlushWallTime if non-zero will stop streaming behaviour at first and 903 // instead will collate and sort results. At FlushWallTime the results will 904 // be sent and then the behaviour will revert to the normal streaming. 905 FlushWallTime time.Duration 906 907 // Truncates the number of documents (i.e. files) after collating and 908 // sorting the results. 909 MaxDocDisplayCount int 910 911 // Truncates the number of matchs after collating and sorting the results. 912 MaxMatchDisplayCount int 913 914 // If set to a number greater than zero then up to this many number 915 // of context lines will be added before and after each matched line. 916 // Note that the included context lines might contain matches and 917 // it's up to the consumer of the result to remove those lines. 918 NumContextLines int 919 920 // If true, ChunkMatches will be returned in each FileMatch rather than LineMatches 921 // EXPERIMENTAL: the behavior of this flag may be changed in future versions. 922 ChunkMatches bool 923 924 // EXPERIMENTAL. If true, document ranks are used as additional input for 925 // sorting matches. 926 UseDocumentRanks bool 927 928 // EXPERIMENTAL. When UseDocumentRanks is enabled, this can be optionally set to adjust 929 // their weight in the file match score. If the value is <= 0.0, the default weight value 930 // will be used. This option is temporary and is only exposed for testing/ tuning purposes. 931 DocumentRanksWeight float64 932 933 // EXPERIMENTAL. If true, use keyword-style scoring instead of the default scoring formula. 934 // Currently, this treats each match in a file as a term and computes an approximation to BM25. 935 // When enabled, all other scoring signals are ignored, including document ranks. 936 UseKeywordScoring bool 937 938 // Trace turns on opentracing for this request if true and if the Jaeger address was provided as 939 // a command-line flag 940 Trace bool 941 942 // If set, the search results will contain debug information for scoring. 943 DebugScore bool 944 945 // SpanContext is the opentracing span context, if it exists, from the zoekt client 946 SpanContext map[string]string 947} 948 949// String returns a succinct representation of the options. This is meant for 950// human consumption in logs and traces. 951// 952// Note: some tracing systems have limits on length of values, so we take care 953// to try and make this small, and include the important information near the 954// front incase of truncation. 955func (s *SearchOptions) String() string { 956 var b strings.Builder 957 958 add := func(name, value string) { 959 b.WriteString(name) 960 b.WriteByte('=') 961 b.WriteString(value) 962 b.WriteByte(' ') 963 } 964 addInt := func(name string, value int) { 965 if value != 0 { 966 add(name, strconv.Itoa(value)) 967 } 968 } 969 addDuration := func(name string, value time.Duration) { 970 if value != 0 { 971 add(name, value.String()) 972 } 973 } 974 addBool := func(name string, value bool) { 975 if !value { 976 return 977 } 978 b.WriteString(name) 979 b.WriteByte(' ') 980 } 981 982 b.WriteString("zoekt.SearchOptions{ ") 983 984 addInt("ShardMaxMatchCount", s.ShardMaxMatchCount) 985 addInt("TotalMaxMatchCount", s.TotalMaxMatchCount) 986 addInt("ShardRepoMaxMatchCount", s.ShardRepoMaxMatchCount) 987 addInt("ShardMaxImportantMatch", s.ShardMaxImportantMatch) 988 addInt("TotalMaxImportantMatch", s.TotalMaxImportantMatch) 989 addInt("MaxDocDisplayCount", s.MaxDocDisplayCount) 990 addInt("MaxMatchDisplayCount", s.MaxMatchDisplayCount) 991 addInt("NumContextLines", s.NumContextLines) 992 993 addDuration("MaxWallTime", s.MaxWallTime) 994 addDuration("FlushWallTime", s.FlushWallTime) 995 996 if s.DocumentRanksWeight > 0 { 997 add("DocumentRanksWeight", strconv.FormatFloat(s.DocumentRanksWeight, 'g', -1, 64)) 998 } 999 1000 addBool("EstimateDocCount", s.EstimateDocCount) 1001 addBool("Whole", s.Whole) 1002 addBool("ChunkMatches", s.ChunkMatches) 1003 addBool("UseDocumentRanks", s.UseDocumentRanks) 1004 addBool("UseKeywordScoring", s.UseKeywordScoring) 1005 addBool("Trace", s.Trace) 1006 addBool("DebugScore", s.DebugScore) 1007 1008 for k, v := range s.SpanContext { 1009 add("SpanContext."+k, strconv.Quote(v)) 1010 } 1011 1012 b.WriteByte('}') 1013 return b.String() 1014} 1015 1016// Sender is the interface that wraps the basic Send method. 1017type Sender interface { 1018 Send(*SearchResult) 1019} 1020 1021// Streamer adds the method StreamSearch to the Searcher interface. 1022type Streamer interface { 1023 Searcher 1024 StreamSearch(ctx context.Context, q query.Q, opts *SearchOptions, sender Sender) (err error) 1025}