fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

1// Copyright 2016 Google Inc. All rights reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package zoekt // import "github.com/sourcegraph/zoekt" 16 17import ( 18 "context" 19 "encoding/json" 20 "errors" 21 "fmt" 22 "reflect" 23 "strconv" 24 "strings" 25 "time" 26 27 "github.com/sourcegraph/zoekt/query" 28) 29 30const ( 31 mapHeaderBytes uint64 = 48 32 sliceHeaderBytes uint64 = 24 33 stringHeaderBytes uint64 = 16 34 pointerSize uint64 = 8 35 interfaceBytes uint64 = 16 36) 37 38// FileMatch contains all the matches within a file. 39type FileMatch struct { 40 FileName string 41 42 // Repository is the globally unique name of the repo of the 43 // match 44 Repository string 45 46 // SubRepositoryName is the globally unique name of the repo, 47 // if it came from a subrepository 48 SubRepositoryName string `json:",omitempty"` 49 50 // SubRepositoryPath holds the prefix where the subrepository 51 // was mounted. 52 SubRepositoryPath string `json:",omitempty"` 53 54 // Commit SHA1 (hex) of the (sub)repo holding the file. 55 Version string `json:",omitempty"` 56 57 // Detected language of the result. 58 Language string 59 60 // For debugging. Needs DebugScore set, but public so tests in 61 // other packages can print some diagnostics. 62 Debug string `json:",omitempty"` 63 64 Branches []string `json:",omitempty"` 65 66 // One of LineMatches or ChunkMatches will be returned depending on whether 67 // the SearchOptions.ChunkMatches is set. 68 LineMatches []LineMatch `json:",omitempty"` 69 ChunkMatches []ChunkMatch `json:",omitempty"` 70 71 // Only set if requested 72 Content []byte `json:",omitempty"` 73 74 // Checksum of the content. 75 Checksum []byte 76 77 // Ranking; the higher, the better. 78 Score float64 `json:",omitempty"` 79 80 // RepositoryPriority is a Sourcegraph extension. It is used by Sourcegraph to 81 // order results from different repositories relative to each other. 82 RepositoryPriority float64 `json:",omitempty"` 83 84 // RepositoryID is a Sourcegraph extension. This is the ID of Repository in 85 // Sourcegraph. 86 RepositoryID uint32 `json:",omitempty"` 87} 88 89func (m *FileMatch) sizeBytes() (sz uint64) { 90 // Score 91 sz += 8 92 93 for _, s := range []string{ 94 m.Debug, 95 m.FileName, 96 m.Repository, 97 m.Language, 98 m.SubRepositoryName, 99 m.SubRepositoryPath, 100 m.Version, 101 } { 102 sz += stringHeaderBytes + uint64(len(s)) 103 } 104 105 // Branches 106 sz += sliceHeaderBytes 107 for _, s := range m.Branches { 108 sz += stringHeaderBytes + uint64(len(s)) 109 } 110 111 // LineMatches 112 sz += sliceHeaderBytes 113 for _, lm := range m.LineMatches { 114 sz += lm.sizeBytes() 115 } 116 117 // ChunkMatches 118 sz += sliceHeaderBytes 119 for _, cm := range m.ChunkMatches { 120 sz += cm.sizeBytes() 121 } 122 123 // RepositoryID 124 sz += 4 125 126 // RepositoryPriority 127 sz += 8 128 129 // Content 130 sz += sliceHeaderBytes + uint64(len(m.Content)) 131 132 // Checksum 133 sz += sliceHeaderBytes + uint64(len(m.Checksum)) 134 135 return 136} 137 138// ChunkMatch is a set of non-overlapping matches within a contiguous range of 139// lines in the file. 140type ChunkMatch struct { 141 DebugScore string 142 143 // Content is a contiguous range of complete lines that fully contains Ranges. 144 Content []byte 145 146 // Ranges is a set of matching ranges within this chunk. Each range is relative 147 // to the beginning of the file (not the beginning of Content). 148 Ranges []Range 149 150 // SymbolInfo is the symbol information associated with Ranges. If it is non-nil, 151 // its length will equal that of Ranges. Any of its elements may be nil. 152 SymbolInfo []*Symbol 153 154 // FileName indicates whether this match is a match on the file name, in 155 // which case Content will contain the file name. 156 FileName bool 157 158 // ContentStart is the location (inclusive) of the beginning of content 159 // relative to the beginning of the file. It will always be at the 160 // beginning of a line (Column will always be 1). 161 ContentStart Location 162 163 Score float64 164} 165 166func (cm *ChunkMatch) sizeBytes() (sz uint64) { 167 // Content 168 sz += sliceHeaderBytes + uint64(len(cm.Content)) 169 170 // ContentStart 171 sz += cm.ContentStart.sizeBytes() 172 173 // FileName 174 sz += 1 175 176 // Ranges 177 sz += sliceHeaderBytes 178 if len(cm.Ranges) > 0 { 179 sz += uint64(len(cm.Ranges)) * cm.Ranges[0].sizeBytes() 180 } 181 182 // SymbolInfo 183 sz += sliceHeaderBytes 184 for _, si := range cm.SymbolInfo { 185 sz += pointerSize 186 if si != nil { 187 sz += si.sizeBytes() 188 } 189 } 190 191 // Score 192 sz += 8 193 194 // DebugScore 195 sz += stringHeaderBytes + uint64(len(cm.DebugScore)) 196 197 return 198} 199 200type Range struct { 201 // The inclusive beginning of the range. 202 Start Location 203 // The exclusive end of the range. 204 End Location 205} 206 207func (r *Range) sizeBytes() uint64 { 208 return r.Start.sizeBytes() + r.End.sizeBytes() 209} 210 211type Location struct { 212 // 0-based byte offset from the beginning of the file 213 ByteOffset uint32 214 // 1-based line number from the beginning of the file 215 LineNumber uint32 216 // 1-based column number (in runes) from the beginning of line 217 Column uint32 218} 219 220func (l *Location) sizeBytes() uint64 { 221 return 3 * 4 222} 223 224// LineMatch holds the matches within a single line in a file. 225type LineMatch struct { 226 // The line in which a match was found. 227 Line []byte 228 LineStart int 229 LineEnd int 230 LineNumber int 231 232 // Before and After are only set when SearchOptions.NumContextLines is > 0 233 Before []byte 234 After []byte 235 236 // If set, this was a match on the filename. 237 FileName bool 238 239 // The higher the better. Only ranks the quality of the match 240 // within the file, does not take rank of file into account 241 Score float64 242 DebugScore string 243 244 LineFragments []LineFragmentMatch 245} 246 247func (lm *LineMatch) sizeBytes() (sz uint64) { 248 // Line 249 sz += sliceHeaderBytes + uint64(len(lm.Line)) 250 251 // LineStart, LineEnd, LineNumber 252 sz += 3 * 8 253 254 // Before 255 sz += sliceHeaderBytes + uint64(len(lm.Before)) 256 257 // After 258 sz += sliceHeaderBytes + uint64(len(lm.After)) 259 260 // FileName 261 sz += 1 262 263 // Score 264 sz += 8 265 266 // DebugScore 267 sz += stringHeaderBytes + uint64(len(lm.DebugScore)) 268 269 // LineFragments 270 sz += sliceHeaderBytes 271 for _, lf := range lm.LineFragments { 272 sz += lf.sizeBytes() 273 } 274 275 return 276} 277 278type Symbol struct { 279 Sym string 280 Kind string 281 Parent string 282 ParentKind string 283} 284 285func (s *Symbol) sizeBytes() uint64 { 286 return 4*stringHeaderBytes + uint64(len(s.Sym)+len(s.Kind)+len(s.Parent)+len(s.ParentKind)) 287} 288 289// LineFragmentMatch a segment of matching text within a line. 290type LineFragmentMatch struct { 291 // Offset within the line, in bytes. 292 LineOffset int 293 294 // Offset from file start, in bytes. 295 Offset uint32 296 297 // Number bytes that match. 298 MatchLength int 299 300 SymbolInfo *Symbol 301} 302 303func (lfm *LineFragmentMatch) sizeBytes() (sz uint64) { 304 // LineOffset 305 sz += 8 306 307 // Offset 308 sz += 4 309 310 // MatchLength 311 sz += 8 312 313 // SymbolInfo 314 sz += pointerSize 315 if lfm.SymbolInfo != nil { 316 sz += lfm.SymbolInfo.sizeBytes() 317 } 318 319 return 320} 321 322type FlushReason uint8 323 324const ( 325 FlushReasonTimerExpired FlushReason = 1 << iota 326 FlushReasonFinalFlush 327 FlushReasonMaxSize 328) 329 330var FlushReasonStrings = map[FlushReason]string{ 331 FlushReasonTimerExpired: "timer_expired", 332 FlushReasonFinalFlush: "final_flush", 333 FlushReasonMaxSize: "max_size_reached", 334} 335 336func (fr FlushReason) String() string { 337 if v, ok := FlushReasonStrings[fr]; ok { 338 return v 339 } 340 341 return "none" 342} 343 344// Stats contains interesting numbers on the search 345type Stats struct { 346 // Amount of I/O for reading contents. 347 ContentBytesLoaded int64 348 349 // Amount of I/O for reading from index. 350 IndexBytesLoaded int64 351 352 // Number of search shards that had a crash. 353 Crashes int 354 355 // Wall clock time for this search 356 Duration time.Duration 357 358 // Number of files containing a match. 359 FileCount int 360 361 // Number of files in shards that we considered. 362 ShardFilesConsidered int 363 364 // Files that we evaluated. Equivalent to files for which all 365 // atom matches (including negations) evaluated to true. 366 FilesConsidered int 367 368 // Files for which we loaded file content to verify substring matches 369 FilesLoaded int 370 371 // Candidate files whose contents weren't examined because we 372 // gathered enough matches. 373 FilesSkipped int 374 375 // Shards that we scanned to find matches. 376 ShardsScanned int 377 378 // Shards that we did not process because a query was canceled. 379 ShardsSkipped int 380 381 // Shards that we did not process because the query was rejected by the 382 // ngram filter indicating it had no matches. 383 ShardsSkippedFilter int 384 385 // Number of non-overlapping matches 386 MatchCount int 387 388 // Number of candidate matches as a result of searching ngrams. 389 NgramMatches int 390 391 // NgramLookups is the number of times we accessed an ngram in the index. 392 NgramLookups int 393 394 // Wall clock time for queued search. 395 Wait time.Duration 396 397 // Aggregate wall clock time spent constructing and pruning the match tree. 398 // This accounts for time such as lookups in the trigram index. 399 MatchTreeConstruction time.Duration 400 401 // Aggregate wall clock time spent searching the match tree. This accounts 402 // for the bulk of search work done looking for matches. 403 MatchTreeSearch time.Duration 404 405 // Number of times regexp was called on files that we evaluated. 406 RegexpsConsidered int 407 408 // FlushReason explains why results were flushed. 409 FlushReason FlushReason 410} 411 412func (s *Stats) sizeBytes() (sz uint64) { 413 sz = 16 * 8 // This assumes we are running on a 64-bit architecture 414 sz += 1 // FlushReason 415 416 return 417} 418 419func (s *Stats) Add(o Stats) { 420 s.ContentBytesLoaded += o.ContentBytesLoaded 421 s.IndexBytesLoaded += o.IndexBytesLoaded 422 s.Crashes += o.Crashes 423 s.FileCount += o.FileCount 424 s.FilesConsidered += o.FilesConsidered 425 s.FilesLoaded += o.FilesLoaded 426 s.FilesSkipped += o.FilesSkipped 427 s.MatchCount += o.MatchCount 428 s.NgramMatches += o.NgramMatches 429 s.NgramLookups += o.NgramLookups 430 s.ShardFilesConsidered += o.ShardFilesConsidered 431 s.ShardsScanned += o.ShardsScanned 432 s.ShardsSkipped += o.ShardsSkipped 433 s.ShardsSkippedFilter += o.ShardsSkippedFilter 434 s.Wait += o.Wait 435 s.MatchTreeConstruction += o.MatchTreeConstruction 436 s.MatchTreeSearch += o.MatchTreeSearch 437 s.RegexpsConsidered += o.RegexpsConsidered 438 439 // We want the first non-zero FlushReason to be sticky. This is a useful 440 // property when aggregating stats from several Zoekts. 441 if s.FlushReason == 0 { 442 s.FlushReason = o.FlushReason 443 } 444} 445 446// Zero returns true if stats is empty. 447func (s *Stats) Zero() bool { 448 if s == nil { 449 return true 450 } 451 452 return !(s.ContentBytesLoaded > 0 || 453 s.IndexBytesLoaded > 0 || 454 s.Crashes > 0 || 455 s.FileCount > 0 || 456 s.FilesConsidered > 0 || 457 s.FilesLoaded > 0 || 458 s.FilesSkipped > 0 || 459 s.MatchCount > 0 || 460 s.NgramMatches > 0 || 461 s.NgramLookups > 0 || 462 s.ShardFilesConsidered > 0 || 463 s.ShardsScanned > 0 || 464 s.ShardsSkipped > 0 || 465 s.ShardsSkippedFilter > 0 || 466 s.Wait > 0 || 467 s.MatchTreeConstruction > 0 || 468 s.MatchTreeSearch > 0 || 469 s.RegexpsConsidered > 0) 470} 471 472// Progress contains information about the global progress of the running search query. 473// This is used by the frontend to reorder results and emit them when stable. 474// Sourcegraph specific: this is used when querying multiple zoekt-webserver instances. 475type Progress struct { 476 // Priority of the shard that was searched. 477 Priority float64 478 479 // MaxPendingPriority is the maximum priority of pending result that is being searched in parallel. 480 // This is used to reorder results when the result set is known to be stable-- that is, when a result's 481 // Priority is greater than the max(MaxPendingPriority) from the latest results of each backend, it can be returned to the user. 482 // 483 // MaxPendingPriority decreases monotonically in each SearchResult. 484 MaxPendingPriority float64 485} 486 487func (p *Progress) sizeBytes() uint64 { 488 return 2 * 8 489} 490 491// SearchResult contains search matches and extra data 492type SearchResult struct { 493 Stats 494 495 // Do not encode this as we cannot encode -Inf in JSON 496 Progress `json:"-"` 497 498 Files []FileMatch 499 500 // RepoURLs holds a repo => template string map. 501 RepoURLs map[string]string 502 503 // FragmentNames holds a repo => template string map, for 504 // the line number fragment. 505 LineFragments map[string]string 506} 507 508// SizeBytes is a best-effort estimate of the size of SearchResult in memory. 509// The estimate does not take alignment into account. The result is a lower 510// bound on the actual size in memory. 511func (sr *SearchResult) SizeBytes() (sz uint64) { 512 sz += sr.Stats.sizeBytes() 513 sz += sr.Progress.sizeBytes() 514 515 // Files 516 sz += sliceHeaderBytes 517 for _, f := range sr.Files { 518 sz += f.sizeBytes() 519 } 520 521 // RepoURLs 522 sz += mapHeaderBytes 523 for k, v := range sr.RepoURLs { 524 sz += stringHeaderBytes + uint64(len(k)) 525 sz += stringHeaderBytes + uint64(len(v)) 526 } 527 528 // LineFragments 529 sz += mapHeaderBytes 530 for k, v := range sr.LineFragments { 531 sz += stringHeaderBytes + uint64(len(k)) 532 sz += stringHeaderBytes + uint64(len(v)) 533 } 534 535 return 536} 537 538// RepositoryBranch describes an indexed branch, which is a name 539// combined with a version. 540type RepositoryBranch struct { 541 Name string 542 Version string 543} 544 545func (r RepositoryBranch) String() string { 546 return fmt.Sprintf("%s@%s", r.Name, r.Version) 547} 548 549// Repository holds repository metadata. 550type Repository struct { 551 // Sourcegraph's repository ID 552 ID uint32 553 554 // The repository name 555 Name string 556 557 // The repository URL. 558 URL string 559 560 // The physical source where this repo came from, eg. full 561 // path to the zip filename or git repository directory. This 562 // will not be exposed in the UI, but can be used to detect 563 // orphaned index shards. 564 Source string 565 566 // The branches indexed in this repo. 567 Branches []RepositoryBranch 568 569 // Nil if this is not the super project. 570 SubRepoMap map[string]*Repository 571 572 // URL template to link to the commit of a branch 573 CommitURLTemplate string 574 575 // The repository URL for getting to a file. Has access to 576 // {{.Version}}, {{.Path}} 577 FileURLTemplate string 578 579 // The URL fragment to add to a file URL for line numbers. has 580 // access to {{.LineNumber}}. The fragment should include the 581 // separator, generally '#' or ';'. 582 LineFragmentTemplate string 583 584 // Perf optimization: priority is set when we load the shard. It corresponds to 585 // the value of "priority" stored in RawConfig. 586 priority float64 587 588 // All zoekt.* configuration settings. 589 RawConfig map[string]string 590 591 // Importance of the repository, bigger is more important 592 Rank uint16 593 594 // IndexOptions is a hash of the options used to create the index for the 595 // repo. 596 IndexOptions string 597 598 // HasSymbols is true if this repository has indexed ctags 599 // output. Sourcegraph specific: This field is more appropriate for 600 // IndexMetadata. However, we store it here since the Sourcegraph frontend 601 // can read this structure but not IndexMetadata. 602 HasSymbols bool 603 604 // Tombstone is true if we are not allowed to search this repo. 605 Tombstone bool 606 607 // LatestCommitDate is the date of the latest commit among all indexed Branches. 608 // The date might be time.Time's 0-value if the repository was last indexed 609 // before this field was added. 610 LatestCommitDate time.Time 611 612 // FileTombstones is a set of file paths that should be ignored across all branches 613 // in this shard. 614 FileTombstones map[string]struct{} `json:",omitempty"` 615} 616 617func (r *Repository) UnmarshalJSON(data []byte) error { 618 // We define a new type so that we can use json.Unmarshal 619 // without recursing into this same method. 620 type repository *Repository 621 repo := repository(r) 622 623 err := json.Unmarshal(data, repo) 624 if err != nil { 625 return err 626 } 627 628 if v, ok := repo.RawConfig["repoid"]; ok { 629 id, _ := strconv.ParseUint(v, 10, 32) 630 r.ID = uint32(id) 631 } 632 633 if v, ok := repo.RawConfig["priority"]; ok { 634 r.priority, err = strconv.ParseFloat(v, 64) 635 if err != nil { 636 r.priority = 0 637 } 638 639 // Sourcegraph indexserver doesn't set repo.Rank, so we set it here 640 // based on priority. Setting it on read instead of during indexing 641 // allows us to avoid a complete reindex. 642 if r.Rank == 0 && r.priority > 0 { 643 // Normalize the repo score within [0, 1), with the midpoint at 5,000. This means popular 644 // repos (roughly ones with over 5,000 stars) see diminishing returns from more stars. 645 r.Rank = uint16(r.priority / (5000.0 + r.priority) * maxUInt16) 646 } 647 } 648 return nil 649} 650 651// MergeMutable will merge x into r. mutated will be true if it made any 652// changes. err is non-nil if we needed to mutate an immutable field. 653// 654// Note: SubRepoMap, IndexOptions and HasSymbol fields are ignored. They are 655// computed while indexing so can't be synthesized from x. 656// 657// Note: We ignore RawConfig fields which are duplicated into Repository: 658// name and id. 659// 660// Note: URL, *Template fields are ignored. They are not used by Sourcegraph. 661func (r *Repository) MergeMutable(x *Repository) (mutated bool, err error) { 662 if r.ID != x.ID { 663 // Sourcegraph: strange behaviour may occur if ID changes but names don't. 664 return mutated, errors.New("ID is immutable") 665 } 666 if r.Name != x.Name { 667 // Name is encoded into the shard name on disk. We need to re-index if it 668 // changes. 669 return mutated, errors.New("Name is immutable") 670 } 671 if !reflect.DeepEqual(r.Branches, x.Branches) { 672 // Need a reindex if content changing. 673 return mutated, errors.New("Branches is immutable") 674 } 675 676 for k, v := range x.RawConfig { 677 // We ignore name and id since they are encoded into the repository. 678 if k == "name" || k == "id" { 679 continue 680 } 681 if r.RawConfig == nil { 682 mutated = true 683 r.RawConfig = make(map[string]string) 684 } 685 if r.RawConfig[k] != v { 686 mutated = true 687 r.RawConfig[k] = v 688 } 689 } 690 691 return mutated, nil 692} 693 694// IndexMetadata holds metadata stored in the index file. It contains 695// data generated by the core indexing library. 696type IndexMetadata struct { 697 IndexFormatVersion int 698 IndexFeatureVersion int 699 IndexMinReaderVersion int 700 IndexTime time.Time 701 PlainASCII bool 702 LanguageMap map[string]uint16 703 ZoektVersion string 704 ID string 705} 706 707// Statistics of a (collection of) repositories. 708type RepoStats struct { 709 // Repos is used for aggregrating the number of repositories. 710 // 711 // Note: This field is not populated on RepoListEntry.Stats (individual) but 712 // only for RepoList.Stats (aggregate). 713 Repos int 714 715 // Shards is the total number of search shards. 716 Shards int 717 718 // Documents holds the number of documents or files. 719 Documents int 720 721 // IndexBytes is the amount of RAM used for index overhead. 722 IndexBytes int64 723 724 // ContentBytes is the amount of RAM used for raw content. 725 ContentBytes int64 726 727 // Sourcegraph specific stats below. These are not as efficient to calculate 728 // as the above statistics. We experimentally measured about a 10% slower 729 // shard load time. However, we find these values very useful to track and 730 // computing them outside of load time introduces a lot of complexity. 731 732 // NewLinesCount is the number of newlines "\n" that appear in the zoekt 733 // indexed documents. This is not exactly the same as line count, since it 734 // will not include lines not terminated by "\n" (eg a file with no "\n", or 735 // a final line without "\n"). Note: Zoekt deduplicates documents across 736 // branches, so if a path has the same contents on multiple branches, there 737 // is only one document for it. As such that document's newlines is only 738 // counted once. See DefaultBranchNewLinesCount and AllBranchesNewLinesCount 739 // for counts which do not deduplicate. 740 NewLinesCount uint64 741 742 // DefaultBranchNewLinesCount is the number of newlines "\n" in the default 743 // branch. 744 DefaultBranchNewLinesCount uint64 745 746 // OtherBranchesNewLinesCount is the number of newlines "\n" in all branches 747 // except the default branch. 748 OtherBranchesNewLinesCount uint64 749} 750 751func (s *RepoStats) Add(o *RepoStats) { 752 // can't update Repos, since one repo may have multiple 753 // shards. 754 s.Shards += o.Shards 755 s.IndexBytes += o.IndexBytes 756 s.Documents += o.Documents 757 s.ContentBytes += o.ContentBytes 758 759 // Sourcegraph specific 760 s.NewLinesCount += o.NewLinesCount 761 s.DefaultBranchNewLinesCount += o.DefaultBranchNewLinesCount 762 s.OtherBranchesNewLinesCount += o.OtherBranchesNewLinesCount 763} 764 765type RepoListEntry struct { 766 Repository Repository 767 IndexMetadata IndexMetadata 768 Stats RepoStats 769} 770 771// MinimalRepoListEntry is a subset of RepoListEntry. It was added after 772// performance profiling of sourcegraph.com revealed that querying this 773// information from Zoekt was causing lots of CPU and memory usage. Note: we 774// can revisit this, how we store and query this information has changed a lot 775// since this was introduced. 776type MinimalRepoListEntry struct { 777 // HasSymbols is exported since Sourcegraph uses this information at search 778 // planning time to decide between Zoekt and an unindexed symbol search. 779 // 780 // Note: it pretty much is always true in practice. 781 HasSymbols bool 782 783 // Branches is used by Sourcegraphs query planner to decided if it can use 784 // zoekt or go via an unindexed code path. 785 Branches []RepositoryBranch 786 787 // IndexTimeUnix is the IndexTime converted to unix time (number of seconds 788 // since the epoch). This is to make it clear we are not transporting the 789 // full fidelty timestamp (ie with milliseconds and location). Additionally 790 // it saves 16 bytes in this struct. 791 // 792 // IndexTime is used as a heuristic in Sourcegraph to decide in aggregate 793 // how many repositories need updating after a ranking change/etc. 794 // 795 // TODO(keegancsmith) audit updates to IndexTime and document how and when 796 // it changes. Concerned about things like metadata updates or compound 797 // shards leading to untrustworthy data here. 798 IndexTimeUnix int64 799} 800 801type ReposMap map[uint32]MinimalRepoListEntry 802 803// MarshalBinary implements a specialized encoder for ReposMap. 804func (q *ReposMap) MarshalBinary() ([]byte, error) { 805 return reposMapEncode(*q) 806} 807 808// UnmarshalBinary implements a specialized decoder for ReposMap. 809func (q *ReposMap) UnmarshalBinary(b []byte) error { 810 var err error 811 (*q), err = reposMapDecode(b) 812 return err 813} 814 815// RepoList holds a set of Repository metadata. 816type RepoList struct { 817 // Returned when ListOptions.Field is RepoListFieldRepos. 818 Repos []*RepoListEntry 819 820 // ReposMap is set when ListOptions.Field is RepoListFieldReposMap. 821 ReposMap ReposMap 822 823 Crashes int 824 825 // Stats response to a List request. 826 // This is the aggregate RepoStats of all repos matching the input query. 827 Stats RepoStats 828} 829 830type Searcher interface { 831 Search(ctx context.Context, q query.Q, opts *SearchOptions) (*SearchResult, error) 832 833 // List lists repositories. The query `q` can only contain 834 // query.Repo atoms. 835 List(ctx context.Context, q query.Q, opts *ListOptions) (*RepoList, error) 836 Close() 837 838 // Describe the searcher for debug messages. 839 String() string 840} 841 842type RepoListField int 843 844const ( 845 RepoListFieldRepos RepoListField = 0 846 RepoListFieldReposMap = 2 847) 848 849type ListOptions struct { 850 // Field decides which field to populate in RepoList response. 851 Field RepoListField 852} 853 854func (o *ListOptions) GetField() (RepoListField, error) { 855 if o == nil { 856 return RepoListFieldRepos, nil 857 } 858 switch o.Field { 859 case RepoListFieldRepos, RepoListFieldReposMap: 860 return o.Field, nil 861 case 1: 862 return 0, fmt.Errorf("RepoListFieldMinimal (%d) is no longer supported", o.Field) 863 default: 864 return 0, fmt.Errorf("unknown RepoListField %d", o.Field) 865 } 866} 867 868func (o *ListOptions) String() string { 869 return fmt.Sprintf("%#v", o) 870} 871 872type SearchOptions struct { 873 // Return an upper-bound estimate of eligible documents in 874 // stats.ShardFilesConsidered. 875 EstimateDocCount bool 876 877 // Return the whole file. 878 Whole bool 879 880 // Maximum number of matches: skip all processing an index 881 // shard after we found this many non-overlapping matches. 882 ShardMaxMatchCount int 883 884 // Maximum number of matches: stop looking for more matches 885 // once we have this many matches across shards. 886 TotalMaxMatchCount int 887 888 // Maximum number of matches: skip processing documents for a repository in 889 // a shard once we have found ShardRepoMaxMatchCount. 890 // 891 // A compound shard may contain multiple repositories. This will most often 892 // be set to 1 to find all repositories containing a result. 893 ShardRepoMaxMatchCount int 894 895 // Abort the search after this much time has passed. 896 MaxWallTime time.Duration 897 898 // FlushWallTime if non-zero will stop streaming behaviour at first and 899 // instead will collate and sort results. At FlushWallTime the results will 900 // be sent and then the behaviour will revert to the normal streaming. 901 FlushWallTime time.Duration 902 903 // Truncates the number of documents (i.e. files) after collating and 904 // sorting the results. 905 MaxDocDisplayCount int 906 907 // Truncates the number of matchs after collating and sorting the results. 908 MaxMatchDisplayCount int 909 910 // If set to a number greater than zero then up to this many number 911 // of context lines will be added before and after each matched line. 912 // Note that the included context lines might contain matches and 913 // it's up to the consumer of the result to remove those lines. 914 NumContextLines int 915 916 // If true, ChunkMatches will be returned in each FileMatch rather than LineMatches 917 // EXPERIMENTAL: the behavior of this flag may be changed in future versions. 918 ChunkMatches bool 919 920 // EXPERIMENTAL. If true, document ranks are used as additional input for 921 // sorting matches. 922 UseDocumentRanks bool 923 924 // EXPERIMENTAL. When UseDocumentRanks is enabled, this can be optionally set to adjust 925 // their weight in the file match score. If the value is <= 0.0, the default weight value 926 // will be used. This option is temporary and is only exposed for testing/ tuning purposes. 927 DocumentRanksWeight float64 928 929 // EXPERIMENTAL. If true, use keyword-style scoring instead of the default scoring formula. 930 // Currently, this treats each match in a file as a term and computes an approximation to BM25. 931 // When enabled, all other scoring signals are ignored, including document ranks. 932 UseKeywordScoring bool 933 934 // Trace turns on opentracing for this request if true and if the Jaeger address was provided as 935 // a command-line flag 936 Trace bool 937 938 // If set, the search results will contain debug information for scoring. 939 DebugScore bool 940 941 // SpanContext is the opentracing span context, if it exists, from the zoekt client 942 SpanContext map[string]string 943} 944 945// String returns a succinct representation of the options. This is meant for 946// human consumption in logs and traces. 947// 948// Note: some tracing systems have limits on length of values, so we take care 949// to try and make this small, and include the important information near the 950// front incase of truncation. 951func (s *SearchOptions) String() string { 952 var b strings.Builder 953 954 add := func(name, value string) { 955 b.WriteString(name) 956 b.WriteByte('=') 957 b.WriteString(value) 958 b.WriteByte(' ') 959 } 960 addInt := func(name string, value int) { 961 if value != 0 { 962 add(name, strconv.Itoa(value)) 963 } 964 } 965 addDuration := func(name string, value time.Duration) { 966 if value != 0 { 967 add(name, value.String()) 968 } 969 } 970 addBool := func(name string, value bool) { 971 if !value { 972 return 973 } 974 b.WriteString(name) 975 b.WriteByte(' ') 976 } 977 978 b.WriteString("zoekt.SearchOptions{ ") 979 980 addInt("ShardMaxMatchCount", s.ShardMaxMatchCount) 981 addInt("TotalMaxMatchCount", s.TotalMaxMatchCount) 982 addInt("ShardRepoMaxMatchCount", s.ShardRepoMaxMatchCount) 983 addInt("MaxDocDisplayCount", s.MaxDocDisplayCount) 984 addInt("MaxMatchDisplayCount", s.MaxMatchDisplayCount) 985 addInt("NumContextLines", s.NumContextLines) 986 987 addDuration("MaxWallTime", s.MaxWallTime) 988 addDuration("FlushWallTime", s.FlushWallTime) 989 990 if s.DocumentRanksWeight > 0 { 991 add("DocumentRanksWeight", strconv.FormatFloat(s.DocumentRanksWeight, 'g', -1, 64)) 992 } 993 994 addBool("EstimateDocCount", s.EstimateDocCount) 995 addBool("Whole", s.Whole) 996 addBool("ChunkMatches", s.ChunkMatches) 997 addBool("UseDocumentRanks", s.UseDocumentRanks) 998 addBool("UseKeywordScoring", s.UseKeywordScoring) 999 addBool("Trace", s.Trace) 1000 addBool("DebugScore", s.DebugScore) 1001 1002 for k, v := range s.SpanContext { 1003 add("SpanContext."+k, strconv.Quote(v)) 1004 } 1005 1006 b.WriteByte('}') 1007 return b.String() 1008} 1009 1010// Sender is the interface that wraps the basic Send method. 1011type Sender interface { 1012 Send(*SearchResult) 1013} 1014 1015// Streamer adds the method StreamSearch to the Searcher interface. 1016type Streamer interface { 1017 Searcher 1018 StreamSearch(ctx context.Context, q query.Q, opts *SearchOptions, sender Sender) (err error) 1019}