fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

1// Copyright 2016 Google Inc. All rights reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package zoekt // import "github.com/sourcegraph/zoekt" 16 17import ( 18 "context" 19 "encoding/json" 20 "errors" 21 "fmt" 22 "reflect" 23 "strconv" 24 "strings" 25 "time" 26 27 "github.com/sourcegraph/zoekt/query" 28) 29 30const ( 31 mapHeaderBytes uint64 = 48 32 sliceHeaderBytes uint64 = 24 33 stringHeaderBytes uint64 = 16 34 pointerSize uint64 = 8 35 interfaceBytes uint64 = 16 36) 37 38// FileMatch contains all the matches within a file. 39type FileMatch struct { 40 FileName string 41 42 // Repository is the globally unique name of the repo of the 43 // match 44 Repository string 45 46 // SubRepositoryName is the globally unique name of the repo, 47 // if it came from a subrepository 48 SubRepositoryName string `json:",omitempty"` 49 50 // SubRepositoryPath holds the prefix where the subrepository 51 // was mounted. 52 SubRepositoryPath string `json:",omitempty"` 53 54 // Commit SHA1 (hex) of the (sub)repo holding the file. 55 Version string `json:",omitempty"` 56 57 // Detected language of the result. 58 Language string 59 60 // For debugging. Needs DebugScore set, but public so tests in 61 // other packages can print some diagnostics. 62 Debug string `json:",omitempty"` 63 64 Branches []string `json:",omitempty"` 65 66 // One of LineMatches or ChunkMatches will be returned depending on whether 67 // the SearchOptions.ChunkMatches is set. 68 LineMatches []LineMatch `json:",omitempty"` 69 ChunkMatches []ChunkMatch `json:",omitempty"` 70 71 // Only set if requested 72 Content []byte `json:",omitempty"` 73 74 // Checksum of the content. 75 Checksum []byte 76 77 // Ranking; the higher, the better. 78 Score float64 `json:",omitempty"` 79 80 // RepositoryPriority is a Sourcegraph extension. It is used by Sourcegraph to 81 // order results from different repositories relative to each other. 82 RepositoryPriority float64 `json:",omitempty"` 83 84 // RepositoryID is a Sourcegraph extension. This is the ID of Repository in 85 // Sourcegraph. 86 RepositoryID uint32 `json:",omitempty"` 87} 88 89func (m *FileMatch) sizeBytes() (sz uint64) { 90 // Score 91 sz += 8 92 93 for _, s := range []string{ 94 m.Debug, 95 m.FileName, 96 m.Repository, 97 m.Language, 98 m.SubRepositoryName, 99 m.SubRepositoryPath, 100 m.Version, 101 } { 102 sz += stringHeaderBytes + uint64(len(s)) 103 } 104 105 // Branches 106 sz += sliceHeaderBytes 107 for _, s := range m.Branches { 108 sz += stringHeaderBytes + uint64(len(s)) 109 } 110 111 // LineMatches 112 sz += sliceHeaderBytes 113 for _, lm := range m.LineMatches { 114 sz += lm.sizeBytes() 115 } 116 117 // ChunkMatches 118 sz += sliceHeaderBytes 119 for _, cm := range m.ChunkMatches { 120 sz += cm.sizeBytes() 121 } 122 123 // RepositoryID 124 sz += 4 125 126 // RepositoryPriority 127 sz += 8 128 129 // Content 130 sz += sliceHeaderBytes + uint64(len(m.Content)) 131 132 // Checksum 133 sz += sliceHeaderBytes + uint64(len(m.Checksum)) 134 135 return 136} 137 138// ChunkMatch is a set of non-overlapping matches within a contiguous range of 139// lines in the file. 140type ChunkMatch struct { 141 DebugScore string 142 143 // Content is a contiguous range of complete lines that fully contains Ranges. 144 Content []byte 145 146 // Ranges is a set of matching ranges within this chunk. Each range is relative 147 // to the beginning of the file (not the beginning of Content). 148 Ranges []Range 149 150 // SymbolInfo is the symbol information associated with Ranges. If it is non-nil, 151 // its length will equal that of Ranges. Any of its elements may be nil. 152 SymbolInfo []*Symbol 153 154 // FileName indicates whether this match is a match on the file name, in 155 // which case Content will contain the file name. 156 FileName bool 157 158 // ContentStart is the location (inclusive) of the beginning of content 159 // relative to the beginning of the file. It will always be at the 160 // beginning of a line (Column will always be 1). 161 ContentStart Location 162 163 Score float64 164} 165 166func (cm *ChunkMatch) sizeBytes() (sz uint64) { 167 // Content 168 sz += sliceHeaderBytes + uint64(len(cm.Content)) 169 170 // ContentStart 171 sz += cm.ContentStart.sizeBytes() 172 173 // FileName 174 sz += 1 175 176 // Ranges 177 sz += sliceHeaderBytes 178 if len(cm.Ranges) > 0 { 179 sz += uint64(len(cm.Ranges)) * cm.Ranges[0].sizeBytes() 180 } 181 182 // SymbolInfo 183 sz += sliceHeaderBytes 184 for _, si := range cm.SymbolInfo { 185 sz += pointerSize 186 if si != nil { 187 sz += si.sizeBytes() 188 } 189 } 190 191 // Score 192 sz += 8 193 194 // DebugScore 195 sz += stringHeaderBytes + uint64(len(cm.DebugScore)) 196 197 return 198} 199 200type Range struct { 201 // The inclusive beginning of the range. 202 Start Location 203 // The exclusive end of the range. 204 End Location 205} 206 207func (r *Range) sizeBytes() uint64 { 208 return r.Start.sizeBytes() + r.End.sizeBytes() 209} 210 211type Location struct { 212 // 0-based byte offset from the beginning of the file 213 ByteOffset uint32 214 // 1-based line number from the beginning of the file 215 LineNumber uint32 216 // 1-based column number (in runes) from the beginning of line 217 Column uint32 218} 219 220func (l *Location) sizeBytes() uint64 { 221 return 3 * 4 222} 223 224// LineMatch holds the matches within a single line in a file. 225type LineMatch struct { 226 // The line in which a match was found. 227 Line []byte 228 LineStart int 229 LineEnd int 230 LineNumber int 231 232 // Before and After are only set when SearchOptions.NumContextLines is > 0 233 Before []byte 234 After []byte 235 236 // If set, this was a match on the filename. 237 FileName bool 238 239 // The higher the better. Only ranks the quality of the match 240 // within the file, does not take rank of file into account 241 Score float64 242 DebugScore string 243 244 LineFragments []LineFragmentMatch 245} 246 247func (lm *LineMatch) sizeBytes() (sz uint64) { 248 // Line 249 sz += sliceHeaderBytes + uint64(len(lm.Line)) 250 251 // LineStart, LineEnd, LineNumber 252 sz += 3 * 8 253 254 // Before 255 sz += sliceHeaderBytes + uint64(len(lm.Before)) 256 257 // After 258 sz += sliceHeaderBytes + uint64(len(lm.After)) 259 260 // FileName 261 sz += 1 262 263 // Score 264 sz += 8 265 266 // DebugScore 267 sz += stringHeaderBytes + uint64(len(lm.DebugScore)) 268 269 // LineFragments 270 sz += sliceHeaderBytes 271 for _, lf := range lm.LineFragments { 272 sz += lf.sizeBytes() 273 } 274 275 return 276} 277 278type Symbol struct { 279 Sym string 280 Kind string 281 Parent string 282 ParentKind string 283} 284 285func (s *Symbol) sizeBytes() uint64 { 286 return 4*stringHeaderBytes + uint64(len(s.Sym)+len(s.Kind)+len(s.Parent)+len(s.ParentKind)) 287} 288 289// LineFragmentMatch a segment of matching text within a line. 290type LineFragmentMatch struct { 291 // Offset within the line, in bytes. 292 LineOffset int 293 294 // Offset from file start, in bytes. 295 Offset uint32 296 297 // Number bytes that match. 298 MatchLength int 299 300 SymbolInfo *Symbol 301} 302 303func (lfm *LineFragmentMatch) sizeBytes() (sz uint64) { 304 // LineOffset 305 sz += 8 306 307 // Offset 308 sz += 4 309 310 // MatchLength 311 sz += 8 312 313 // SymbolInfo 314 sz += pointerSize 315 if lfm.SymbolInfo != nil { 316 sz += lfm.SymbolInfo.sizeBytes() 317 } 318 319 return 320} 321 322type FlushReason uint8 323 324const ( 325 FlushReasonTimerExpired FlushReason = 1 << iota 326 FlushReasonFinalFlush 327 FlushReasonMaxSize 328) 329 330var FlushReasonStrings = map[FlushReason]string{ 331 FlushReasonTimerExpired: "timer_expired", 332 FlushReasonFinalFlush: "final_flush", 333 FlushReasonMaxSize: "max_size_reached", 334} 335 336func (fr FlushReason) String() string { 337 if v, ok := FlushReasonStrings[fr]; ok { 338 return v 339 } 340 341 return "none" 342} 343 344// Stats contains interesting numbers on the search 345type Stats struct { 346 // Amount of I/O for reading contents. 347 ContentBytesLoaded int64 348 349 // Amount of I/O for reading from index. 350 IndexBytesLoaded int64 351 352 // Number of search shards that had a crash. 353 Crashes int 354 355 // Wall clock time for this search 356 Duration time.Duration 357 358 // Number of files containing a match. 359 FileCount int 360 361 // Number of files in shards that we considered. 362 ShardFilesConsidered int 363 364 // Files that we evaluated. Equivalent to files for which all 365 // atom matches (including negations) evaluated to true. 366 FilesConsidered int 367 368 // Files for which we loaded file content to verify substring matches 369 FilesLoaded int 370 371 // Candidate files whose contents weren't examined because we 372 // gathered enough matches. 373 FilesSkipped int 374 375 // Shards that we scanned to find matches. 376 ShardsScanned int 377 378 // Shards that we did not process because a query was canceled. 379 ShardsSkipped int 380 381 // Shards that we did not process because the query was rejected by the 382 // ngram filter indicating it had no matches. 383 ShardsSkippedFilter int 384 385 // Number of non-overlapping matches 386 MatchCount int 387 388 // Number of candidate matches as a result of searching ngrams. 389 NgramMatches int 390 391 // NgramLookups is the number of times we accessed an ngram in the index. 392 NgramLookups int 393 394 // Wall clock time for queued search. 395 Wait time.Duration 396 397 // Aggregate wall clock time spent constructing and pruning the match tree. 398 // This accounts for time such as lookups in the trigram index. 399 MatchTreeConstruction time.Duration 400 401 // Aggregate wall clock time spent searching the match tree. This accounts 402 // for the bulk of search work done looking for matches. 403 MatchTreeSearch time.Duration 404 405 // Number of times regexp was called on files that we evaluated. 406 RegexpsConsidered int 407 408 // FlushReason explains why results were flushed. 409 FlushReason FlushReason 410} 411 412func (s *Stats) sizeBytes() (sz uint64) { 413 sz = 16 * 8 // This assumes we are running on a 64-bit architecture 414 sz += 1 // FlushReason 415 416 return 417} 418 419func (s *Stats) Add(o Stats) { 420 s.ContentBytesLoaded += o.ContentBytesLoaded 421 s.IndexBytesLoaded += o.IndexBytesLoaded 422 s.Crashes += o.Crashes 423 s.FileCount += o.FileCount 424 s.FilesConsidered += o.FilesConsidered 425 s.FilesLoaded += o.FilesLoaded 426 s.FilesSkipped += o.FilesSkipped 427 s.MatchCount += o.MatchCount 428 s.NgramMatches += o.NgramMatches 429 s.NgramLookups += o.NgramLookups 430 s.ShardFilesConsidered += o.ShardFilesConsidered 431 s.ShardsScanned += o.ShardsScanned 432 s.ShardsSkipped += o.ShardsSkipped 433 s.ShardsSkippedFilter += o.ShardsSkippedFilter 434 s.Wait += o.Wait 435 s.MatchTreeConstruction += o.MatchTreeConstruction 436 s.MatchTreeSearch += o.MatchTreeSearch 437 s.RegexpsConsidered += o.RegexpsConsidered 438 439 // We want the first non-zero FlushReason to be sticky. This is a useful 440 // property when aggregating stats from several Zoekts. 441 if s.FlushReason == 0 { 442 s.FlushReason = o.FlushReason 443 } 444} 445 446// Zero returns true if stats is empty. 447func (s *Stats) Zero() bool { 448 if s == nil { 449 return true 450 } 451 452 return !(s.ContentBytesLoaded > 0 || 453 s.IndexBytesLoaded > 0 || 454 s.Crashes > 0 || 455 s.FileCount > 0 || 456 s.FilesConsidered > 0 || 457 s.FilesLoaded > 0 || 458 s.FilesSkipped > 0 || 459 s.MatchCount > 0 || 460 s.NgramMatches > 0 || 461 s.NgramLookups > 0 || 462 s.ShardFilesConsidered > 0 || 463 s.ShardsScanned > 0 || 464 s.ShardsSkipped > 0 || 465 s.ShardsSkippedFilter > 0 || 466 s.Wait > 0 || 467 s.MatchTreeConstruction > 0 || 468 s.MatchTreeSearch > 0 || 469 s.RegexpsConsidered > 0) 470} 471 472// Progress contains information about the global progress of the running search query. 473// This is used by the frontend to reorder results and emit them when stable. 474// Sourcegraph specific: this is used when querying multiple zoekt-webserver instances. 475type Progress struct { 476 // Priority of the shard that was searched. 477 Priority float64 478 479 // MaxPendingPriority is the maximum priority of pending result that is being searched in parallel. 480 // This is used to reorder results when the result set is known to be stable-- that is, when a result's 481 // Priority is greater than the max(MaxPendingPriority) from the latest results of each backend, it can be returned to the user. 482 // 483 // MaxPendingPriority decreases monotonically in each SearchResult. 484 MaxPendingPriority float64 485} 486 487func (p *Progress) sizeBytes() uint64 { 488 return 2 * 8 489} 490 491// SearchResult contains search matches and extra data 492type SearchResult struct { 493 Stats 494 495 // Do not encode this as we cannot encode -Inf in JSON 496 Progress `json:"-"` 497 498 Files []FileMatch 499 500 // RepoURLs holds a repo => template string map. 501 RepoURLs map[string]string 502 503 // FragmentNames holds a repo => template string map, for 504 // the line number fragment. 505 LineFragments map[string]string 506} 507 508// SizeBytes is a best-effort estimate of the size of SearchResult in memory. 509// The estimate does not take alignment into account. The result is a lower 510// bound on the actual size in memory. 511func (sr *SearchResult) SizeBytes() (sz uint64) { 512 sz += sr.Stats.sizeBytes() 513 sz += sr.Progress.sizeBytes() 514 515 // Files 516 sz += sliceHeaderBytes 517 for _, f := range sr.Files { 518 sz += f.sizeBytes() 519 } 520 521 // RepoURLs 522 sz += mapHeaderBytes 523 for k, v := range sr.RepoURLs { 524 sz += stringHeaderBytes + uint64(len(k)) 525 sz += stringHeaderBytes + uint64(len(v)) 526 } 527 528 // LineFragments 529 sz += mapHeaderBytes 530 for k, v := range sr.LineFragments { 531 sz += stringHeaderBytes + uint64(len(k)) 532 sz += stringHeaderBytes + uint64(len(v)) 533 } 534 535 return 536} 537 538// RepositoryBranch describes an indexed branch, which is a name 539// combined with a version. 540type RepositoryBranch struct { 541 Name string 542 Version string 543} 544 545func (r RepositoryBranch) String() string { 546 return fmt.Sprintf("%s@%s", r.Name, r.Version) 547} 548 549// Repository holds repository metadata. 550type Repository struct { 551 // Sourcegraph's repository ID 552 ID uint32 553 554 // The repository name 555 Name string 556 557 // The repository URL. 558 URL string 559 560 // The physical source where this repo came from, eg. full 561 // path to the zip filename or git repository directory. This 562 // will not be exposed in the UI, but can be used to detect 563 // orphaned index shards. 564 Source string 565 566 // The branches indexed in this repo. 567 Branches []RepositoryBranch 568 569 // Nil if this is not the super project. 570 SubRepoMap map[string]*Repository 571 572 // URL template to link to the commit of a branch 573 CommitURLTemplate string 574 575 // The repository URL for getting to a file. Has access to 576 // {{.Version}}, {{.Path}} 577 FileURLTemplate string 578 579 // The URL fragment to add to a file URL for line numbers. has 580 // access to {{.LineNumber}}. The fragment should include the 581 // separator, generally '#' or ';'. 582 LineFragmentTemplate string 583 584 // Perf optimization: priority is set when we load the shard. It corresponds to 585 // the value of "priority" stored in RawConfig. 586 priority float64 587 588 // All zoekt.* configuration settings. 589 RawConfig map[string]string 590 591 // Importance of the repository, bigger is more important 592 Rank uint16 593 594 // IndexOptions is a hash of the options used to create the index for the 595 // repo. 596 IndexOptions string 597 598 // HasSymbols is true if this repository has indexed ctags 599 // output. Sourcegraph specific: This field is more appropriate for 600 // IndexMetadata. However, we store it here since the Sourcegraph frontend 601 // can read this structure but not IndexMetadata. 602 HasSymbols bool 603 604 // Tombstone is true if we are not allowed to search this repo. 605 Tombstone bool 606 607 // LatestCommitDate is the date of the latest commit among all indexed Branches. 608 // The date might be time.Time's 0-value if the repository was last indexed 609 // before this field was added. 610 LatestCommitDate time.Time 611 612 // FileTombstones is a set of file paths that should be ignored across all branches 613 // in this shard. 614 FileTombstones map[string]struct{} `json:",omitempty"` 615} 616 617func (r *Repository) UnmarshalJSON(data []byte) error { 618 // We define a new type so that we can use json.Unmarshal 619 // without recursing into this same method. 620 type repository *Repository 621 repo := repository(r) 622 623 err := json.Unmarshal(data, repo) 624 if err != nil { 625 return err 626 } 627 628 if v, ok := repo.RawConfig["repoid"]; ok { 629 id, _ := strconv.ParseUint(v, 10, 32) 630 r.ID = uint32(id) 631 } 632 633 if v, ok := repo.RawConfig["priority"]; ok { 634 r.priority, err = strconv.ParseFloat(v, 64) 635 if err != nil { 636 r.priority = 0 637 } 638 639 // Sourcegraph indexserver doesn't set repo.Rank, so we set it here 640 // based on priority. Setting it on read instead of during indexing 641 // allows us to avoid a complete reindex. 642 if r.Rank == 0 && r.priority > 0 { 643 // Normalize the repo score within [0, 1), with the midpoint at 5,000. This means popular 644 // repos (roughly ones with over 5,000 stars) see diminishing returns from more stars. 645 r.Rank = uint16(r.priority / (5000.0 + r.priority) * maxUInt16) 646 } 647 } 648 return nil 649} 650 651// MergeMutable will merge x into r. mutated will be true if it made any 652// changes. err is non-nil if we needed to mutate an immutable field. 653// 654// Note: SubRepoMap, IndexOptions and HasSymbol fields are ignored. They are 655// computed while indexing so can't be synthesized from x. 656// 657// Note: We ignore RawConfig fields which are duplicated into Repository: 658// name and id. 659// 660// Note: URL, *Template fields are ignored. They are not used by Sourcegraph. 661func (r *Repository) MergeMutable(x *Repository) (mutated bool, err error) { 662 if r.ID != x.ID { 663 // Sourcegraph: strange behaviour may occur if ID changes but names don't. 664 return mutated, errors.New("ID is immutable") 665 } 666 if r.Name != x.Name { 667 // Name is encoded into the shard name on disk. We need to re-index if it 668 // changes. 669 return mutated, errors.New("Name is immutable") 670 } 671 if !reflect.DeepEqual(r.Branches, x.Branches) { 672 // Need a reindex if content changing. 673 return mutated, errors.New("Branches is immutable") 674 } 675 676 for k, v := range x.RawConfig { 677 // We ignore name and id since they are encoded into the repository. 678 if k == "name" || k == "id" { 679 continue 680 } 681 if r.RawConfig == nil { 682 mutated = true 683 r.RawConfig = make(map[string]string) 684 } 685 if r.RawConfig[k] != v { 686 mutated = true 687 r.RawConfig[k] = v 688 } 689 } 690 691 return mutated, nil 692} 693 694// IndexMetadata holds metadata stored in the index file. It contains 695// data generated by the core indexing library. 696type IndexMetadata struct { 697 IndexFormatVersion int 698 IndexFeatureVersion int 699 IndexMinReaderVersion int 700 IndexTime time.Time 701 PlainASCII bool 702 LanguageMap map[string]uint16 703 ZoektVersion string 704 ID string 705} 706 707// Statistics of a (collection of) repositories. 708type RepoStats struct { 709 // Repos is used for aggregrating the number of repositories. 710 // 711 // Note: This field is not populated on RepoListEntry.Stats (individual) but 712 // only for RepoList.Stats (aggregate). 713 Repos int 714 715 // Shards is the total number of search shards. 716 Shards int 717 718 // Documents holds the number of documents or files. 719 Documents int 720 721 // IndexBytes is the amount of RAM used for index overhead. 722 IndexBytes int64 723 724 // ContentBytes is the amount of RAM used for raw content. 725 ContentBytes int64 726 727 // Sourcegraph specific stats below. These are not as efficient to calculate 728 // as the above statistics. We experimentally measured about a 10% slower 729 // shard load time. However, we find these values very useful to track and 730 // computing them outside of load time introduces a lot of complexity. 731 732 // NewLinesCount is the number of newlines "\n" that appear in the zoekt 733 // indexed documents. This is not exactly the same as line count, since it 734 // will not include lines not terminated by "\n" (eg a file with no "\n", or 735 // a final line without "\n"). Note: Zoekt deduplicates documents across 736 // branches, so if a path has the same contents on multiple branches, there 737 // is only one document for it. As such that document's newlines is only 738 // counted once. See DefaultBranchNewLinesCount and AllBranchesNewLinesCount 739 // for counts which do not deduplicate. 740 NewLinesCount uint64 741 742 // DefaultBranchNewLinesCount is the number of newlines "\n" in the default 743 // branch. 744 DefaultBranchNewLinesCount uint64 745 746 // OtherBranchesNewLinesCount is the number of newlines "\n" in all branches 747 // except the default branch. 748 OtherBranchesNewLinesCount uint64 749} 750 751func (s *RepoStats) Add(o *RepoStats) { 752 // can't update Repos, since one repo may have multiple 753 // shards. 754 s.Shards += o.Shards 755 s.IndexBytes += o.IndexBytes 756 s.Documents += o.Documents 757 s.ContentBytes += o.ContentBytes 758 759 // Sourcegraph specific 760 s.NewLinesCount += o.NewLinesCount 761 s.DefaultBranchNewLinesCount += o.DefaultBranchNewLinesCount 762 s.OtherBranchesNewLinesCount += o.OtherBranchesNewLinesCount 763} 764 765type RepoListEntry struct { 766 Repository Repository 767 IndexMetadata IndexMetadata 768 Stats RepoStats 769} 770 771// MinimalRepoListEntry is a subset of RepoListEntry. It was added after 772// performance profiling of sourcegraph.com revealed that querying this 773// information from Zoekt was causing lots of CPU and memory usage. Note: we 774// can revisit this, how we store and query this information has changed a lot 775// since this was introduced. 776type MinimalRepoListEntry struct { 777 // HasSymbols is exported since Sourcegraph uses this information at search 778 // planning time to decide between Zoekt and an unindexed symbol search. 779 // 780 // Note: it pretty much is always true in practice. 781 HasSymbols bool 782 783 // Branches is used by Sourcegraphs query planner to decided if it can use 784 // zoekt or go via an unindexed code path. 785 Branches []RepositoryBranch 786 787 // IndexTimeUnix is the IndexTime converted to unix time (number of seconds 788 // since the epoch). This is to make it clear we are not transporting the 789 // full fidelty timestamp (ie with milliseconds and location). Additionally 790 // it saves 16 bytes in this struct. 791 // 792 // IndexTime is used as a heuristic in Sourcegraph to decide in aggregate 793 // how many repositories need updating after a ranking change/etc. 794 // 795 // TODO(keegancsmith) audit updates to IndexTime and document how and when 796 // it changes. Concerned about things like metadata updates or compound 797 // shards leading to untrustworthy data here. 798 IndexTimeUnix int64 799} 800 801type ReposMap map[uint32]MinimalRepoListEntry 802 803// MarshalBinary implements a specialized encoder for ReposMap. 804func (q *ReposMap) MarshalBinary() ([]byte, error) { 805 return reposMapEncode(*q) 806} 807 808// UnmarshalBinary implements a specialized decoder for ReposMap. 809func (q *ReposMap) UnmarshalBinary(b []byte) error { 810 var err error 811 (*q), err = reposMapDecode(b) 812 return err 813} 814 815// RepoList holds a set of Repository metadata. 816type RepoList struct { 817 // Returned when ListOptions.Field is RepoListFieldRepos. 818 Repos []*RepoListEntry 819 820 // ReposMap is set when ListOptions.Field is RepoListFieldReposMap. 821 ReposMap ReposMap 822 823 Crashes int 824 825 // Stats response to a List request. 826 // This is the aggregate RepoStats of all repos matching the input query. 827 Stats RepoStats 828} 829 830type Searcher interface { 831 Search(ctx context.Context, q query.Q, opts *SearchOptions) (*SearchResult, error) 832 833 // List lists repositories. The query `q` can only contain 834 // query.Repo atoms. 835 List(ctx context.Context, q query.Q, opts *ListOptions) (*RepoList, error) 836 Close() 837 838 // Describe the searcher for debug messages. 839 String() string 840} 841 842type RepoListField int 843 844const ( 845 RepoListFieldRepos RepoListField = 0 846 RepoListFieldReposMap = 2 847) 848 849type ListOptions struct { 850 // Field decides which field to populate in RepoList response. 851 Field RepoListField 852} 853 854func (o *ListOptions) GetField() (RepoListField, error) { 855 if o == nil { 856 return RepoListFieldRepos, nil 857 } 858 switch o.Field { 859 case RepoListFieldRepos, RepoListFieldReposMap: 860 return o.Field, nil 861 case 1: 862 return 0, fmt.Errorf("RepoListFieldMinimal (%d) is no longer supported", o.Field) 863 default: 864 return 0, fmt.Errorf("unknown RepoListField %d", o.Field) 865 } 866} 867 868func (o *ListOptions) String() string { 869 return fmt.Sprintf("%#v", o) 870} 871 872type SearchOptions struct { 873 // Return an upper-bound estimate of eligible documents in 874 // stats.ShardFilesConsidered. 875 EstimateDocCount bool 876 877 // Return the whole file. 878 Whole bool 879 880 // Maximum number of matches: skip all processing an index 881 // shard after we found this many non-overlapping matches. 882 ShardMaxMatchCount int 883 884 // Maximum number of matches: stop looking for more matches 885 // once we have this many matches across shards. 886 TotalMaxMatchCount int 887 888 // Maximum number of matches: skip processing documents for a repository in 889 // a shard once we have found ShardRepoMaxMatchCount. 890 // 891 // A compound shard may contain multiple repositories. This will most often 892 // be set to 1 to find all repositories containing a result. 893 ShardRepoMaxMatchCount int 894 895 // Deprecated: this field is not read anymore. 896 ShardMaxImportantMatch int 897 898 // Deprecated: this field is not read anymore. 899 TotalMaxImportantMatch int 900 901 // Abort the search after this much time has passed. 902 MaxWallTime time.Duration 903 904 // FlushWallTime if non-zero will stop streaming behaviour at first and 905 // instead will collate and sort results. At FlushWallTime the results will 906 // be sent and then the behaviour will revert to the normal streaming. 907 FlushWallTime time.Duration 908 909 // Truncates the number of documents (i.e. files) after collating and 910 // sorting the results. 911 MaxDocDisplayCount int 912 913 // Truncates the number of matchs after collating and sorting the results. 914 MaxMatchDisplayCount int 915 916 // If set to a number greater than zero then up to this many number 917 // of context lines will be added before and after each matched line. 918 // Note that the included context lines might contain matches and 919 // it's up to the consumer of the result to remove those lines. 920 NumContextLines int 921 922 // If true, ChunkMatches will be returned in each FileMatch rather than LineMatches 923 // EXPERIMENTAL: the behavior of this flag may be changed in future versions. 924 ChunkMatches bool 925 926 // EXPERIMENTAL. If true, document ranks are used as additional input for 927 // sorting matches. 928 UseDocumentRanks bool 929 930 // EXPERIMENTAL. When UseDocumentRanks is enabled, this can be optionally set to adjust 931 // their weight in the file match score. If the value is <= 0.0, the default weight value 932 // will be used. This option is temporary and is only exposed for testing/ tuning purposes. 933 DocumentRanksWeight float64 934 935 // EXPERIMENTAL. If true, use keyword-style scoring instead of the default scoring formula. 936 // Currently, this treats each match in a file as a term and computes an approximation to BM25. 937 // When enabled, all other scoring signals are ignored, including document ranks. 938 UseKeywordScoring bool 939 940 // Trace turns on opentracing for this request if true and if the Jaeger address was provided as 941 // a command-line flag 942 Trace bool 943 944 // If set, the search results will contain debug information for scoring. 945 DebugScore bool 946 947 // SpanContext is the opentracing span context, if it exists, from the zoekt client 948 SpanContext map[string]string 949} 950 951// String returns a succinct representation of the options. This is meant for 952// human consumption in logs and traces. 953// 954// Note: some tracing systems have limits on length of values, so we take care 955// to try and make this small, and include the important information near the 956// front incase of truncation. 957func (s *SearchOptions) String() string { 958 var b strings.Builder 959 960 add := func(name, value string) { 961 b.WriteString(name) 962 b.WriteByte('=') 963 b.WriteString(value) 964 b.WriteByte(' ') 965 } 966 addInt := func(name string, value int) { 967 if value != 0 { 968 add(name, strconv.Itoa(value)) 969 } 970 } 971 addDuration := func(name string, value time.Duration) { 972 if value != 0 { 973 add(name, value.String()) 974 } 975 } 976 addBool := func(name string, value bool) { 977 if !value { 978 return 979 } 980 b.WriteString(name) 981 b.WriteByte(' ') 982 } 983 984 b.WriteString("zoekt.SearchOptions{ ") 985 986 addInt("ShardMaxMatchCount", s.ShardMaxMatchCount) 987 addInt("TotalMaxMatchCount", s.TotalMaxMatchCount) 988 addInt("ShardRepoMaxMatchCount", s.ShardRepoMaxMatchCount) 989 addInt("ShardMaxImportantMatch", s.ShardMaxImportantMatch) 990 addInt("TotalMaxImportantMatch", s.TotalMaxImportantMatch) 991 addInt("MaxDocDisplayCount", s.MaxDocDisplayCount) 992 addInt("MaxMatchDisplayCount", s.MaxMatchDisplayCount) 993 addInt("NumContextLines", s.NumContextLines) 994 995 addDuration("MaxWallTime", s.MaxWallTime) 996 addDuration("FlushWallTime", s.FlushWallTime) 997 998 if s.DocumentRanksWeight > 0 { 999 add("DocumentRanksWeight", strconv.FormatFloat(s.DocumentRanksWeight, 'g', -1, 64)) 1000 } 1001 1002 addBool("EstimateDocCount", s.EstimateDocCount) 1003 addBool("Whole", s.Whole) 1004 addBool("ChunkMatches", s.ChunkMatches) 1005 addBool("UseDocumentRanks", s.UseDocumentRanks) 1006 addBool("UseKeywordScoring", s.UseKeywordScoring) 1007 addBool("Trace", s.Trace) 1008 addBool("DebugScore", s.DebugScore) 1009 1010 for k, v := range s.SpanContext { 1011 add("SpanContext."+k, strconv.Quote(v)) 1012 } 1013 1014 b.WriteByte('}') 1015 return b.String() 1016} 1017 1018// Sender is the interface that wraps the basic Send method. 1019type Sender interface { 1020 Send(*SearchResult) 1021} 1022 1023// Streamer adds the method StreamSearch to the Searcher interface. 1024type Streamer interface { 1025 Searcher 1026 StreamSearch(ctx context.Context, q query.Q, opts *SearchOptions, sender Sender) (err error) 1027}