fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

1// Copyright 2016 Google Inc. All rights reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package zoekt // import "github.com/sourcegraph/zoekt" 16 17import ( 18 "context" 19 "encoding/json" 20 "errors" 21 "fmt" 22 "reflect" 23 "strconv" 24 "strings" 25 "time" 26 27 "github.com/sourcegraph/zoekt/query" 28) 29 30const ( 31 mapHeaderBytes uint64 = 48 32 sliceHeaderBytes uint64 = 24 33 stringHeaderBytes uint64 = 16 34 pointerSize uint64 = 8 35 interfaceBytes uint64 = 16 36) 37 38// FileMatch contains all the matches within a file. 39type FileMatch struct { 40 FileName string 41 42 // Repository is the globally unique name of the repo of the 43 // match 44 Repository string 45 46 // SubRepositoryName is the globally unique name of the repo, 47 // if it came from a subrepository 48 SubRepositoryName string `json:",omitempty"` 49 50 // SubRepositoryPath holds the prefix where the subrepository 51 // was mounted. 52 SubRepositoryPath string `json:",omitempty"` 53 54 // Commit SHA1 (hex) of the (sub)repo holding the file. 55 Version string `json:",omitempty"` 56 57 // Detected language of the result. 58 Language string 59 60 // For debugging. Needs DebugScore set, but public so tests in 61 // other packages can print some diagnostics. 62 Debug string `json:",omitempty"` 63 64 Branches []string `json:",omitempty"` 65 66 // One of LineMatches or ChunkMatches will be returned depending on whether 67 // the SearchOptions.ChunkMatches is set. 68 LineMatches []LineMatch `json:",omitempty"` 69 ChunkMatches []ChunkMatch `json:",omitempty"` 70 71 // Only set if requested 72 Content []byte `json:",omitempty"` 73 74 // Checksum of the content. 75 Checksum []byte 76 77 // Ranking; the higher, the better. 78 Score float64 `json:",omitempty"` 79 80 // RepositoryPriority is a Sourcegraph extension. It is used by Sourcegraph to 81 // order results from different repositories relative to each other. 82 RepositoryPriority float64 `json:",omitempty"` 83 84 // RepositoryID is a Sourcegraph extension. This is the ID of Repository in 85 // Sourcegraph. 86 RepositoryID uint32 `json:",omitempty"` 87} 88 89func (m *FileMatch) sizeBytes() (sz uint64) { 90 // Score 91 sz += 8 92 93 for _, s := range []string{ 94 m.Debug, 95 m.FileName, 96 m.Repository, 97 m.Language, 98 m.SubRepositoryName, 99 m.SubRepositoryPath, 100 m.Version, 101 } { 102 sz += stringHeaderBytes + uint64(len(s)) 103 } 104 105 // Branches 106 sz += sliceHeaderBytes 107 for _, s := range m.Branches { 108 sz += stringHeaderBytes + uint64(len(s)) 109 } 110 111 // LineMatches 112 sz += sliceHeaderBytes 113 for _, lm := range m.LineMatches { 114 sz += lm.sizeBytes() 115 } 116 117 // ChunkMatches 118 sz += sliceHeaderBytes 119 for _, cm := range m.ChunkMatches { 120 sz += cm.sizeBytes() 121 } 122 123 // RepositoryID 124 sz += 4 125 126 // RepositoryPriority 127 sz += 8 128 129 // Content 130 sz += sliceHeaderBytes + uint64(len(m.Content)) 131 132 // Checksum 133 sz += sliceHeaderBytes + uint64(len(m.Checksum)) 134 135 return 136} 137 138// ChunkMatch is a set of non-overlapping matches within a contiguous range of 139// lines in the file. 140type ChunkMatch struct { 141 DebugScore string 142 143 // Content is a contiguous range of complete lines that fully contains Ranges. 144 Content []byte 145 146 // Ranges is a set of matching ranges within this chunk. Each range is relative 147 // to the beginning of the file (not the beginning of Content). 148 Ranges []Range 149 150 // SymbolInfo is the symbol information associated with Ranges. If it is non-nil, 151 // its length will equal that of Ranges. Any of its elements may be nil. 152 SymbolInfo []*Symbol 153 154 // FileName indicates whether this match is a match on the file name, in 155 // which case Content will contain the file name. 156 FileName bool 157 158 // ContentStart is the location (inclusive) of the beginning of content 159 // relative to the beginning of the file. It will always be at the 160 // beginning of a line (Column will always be 1). 161 ContentStart Location 162 163 Score float64 164} 165 166func (cm *ChunkMatch) sizeBytes() (sz uint64) { 167 // Content 168 sz += sliceHeaderBytes + uint64(len(cm.Content)) 169 170 // ContentStart 171 sz += cm.ContentStart.sizeBytes() 172 173 // FileName 174 sz += 1 175 176 // Ranges 177 sz += sliceHeaderBytes 178 if len(cm.Ranges) > 0 { 179 sz += uint64(len(cm.Ranges)) * cm.Ranges[0].sizeBytes() 180 } 181 182 // SymbolInfo 183 sz += sliceHeaderBytes 184 for _, si := range cm.SymbolInfo { 185 sz += pointerSize 186 if si != nil { 187 sz += si.sizeBytes() 188 } 189 } 190 191 // Score 192 sz += 8 193 194 // DebugScore 195 sz += stringHeaderBytes + uint64(len(cm.DebugScore)) 196 197 return 198} 199 200type Range struct { 201 // The inclusive beginning of the range. 202 Start Location 203 // The exclusive end of the range. 204 End Location 205} 206 207func (r *Range) sizeBytes() uint64 { 208 return r.Start.sizeBytes() + r.End.sizeBytes() 209} 210 211type Location struct { 212 // 0-based byte offset from the beginning of the file 213 ByteOffset uint32 214 // 1-based line number from the beginning of the file 215 LineNumber uint32 216 // 1-based column number (in runes) from the beginning of line 217 Column uint32 218} 219 220func (l *Location) sizeBytes() uint64 { 221 return 3 * 4 222} 223 224// LineMatch holds the matches within a single line in a file. 225type LineMatch struct { 226 // The line in which a match was found. 227 Line []byte 228 LineStart int 229 LineEnd int 230 LineNumber int 231 232 // Before and After are only set when SearchOptions.NumContextLines is > 0 233 Before []byte 234 After []byte 235 236 // If set, this was a match on the filename. 237 FileName bool 238 239 // The higher the better. Only ranks the quality of the match 240 // within the file, does not take rank of file into account 241 Score float64 242 DebugScore string 243 244 LineFragments []LineFragmentMatch 245} 246 247func (lm *LineMatch) sizeBytes() (sz uint64) { 248 // Line 249 sz += sliceHeaderBytes + uint64(len(lm.Line)) 250 251 // LineStart, LineEnd, LineNumber 252 sz += 3 * 8 253 254 // Before 255 sz += sliceHeaderBytes + uint64(len(lm.Before)) 256 257 // After 258 sz += sliceHeaderBytes + uint64(len(lm.After)) 259 260 // FileName 261 sz += 1 262 263 // Score 264 sz += 8 265 266 // DebugScore 267 sz += stringHeaderBytes + uint64(len(lm.DebugScore)) 268 269 // LineFragments 270 sz += sliceHeaderBytes 271 for _, lf := range lm.LineFragments { 272 sz += lf.sizeBytes() 273 } 274 275 return 276} 277 278type Symbol struct { 279 Sym string 280 Kind string 281 Parent string 282 ParentKind string 283} 284 285func (s *Symbol) sizeBytes() uint64 { 286 return 4*stringHeaderBytes + uint64(len(s.Sym)+len(s.Kind)+len(s.Parent)+len(s.ParentKind)) 287} 288 289// LineFragmentMatch a segment of matching text within a line. 290type LineFragmentMatch struct { 291 // Offset within the line, in bytes. 292 LineOffset int 293 294 // Offset from file start, in bytes. 295 Offset uint32 296 297 // Number bytes that match. 298 MatchLength int 299 300 SymbolInfo *Symbol 301} 302 303func (lfm *LineFragmentMatch) sizeBytes() (sz uint64) { 304 // LineOffset 305 sz += 8 306 307 // Offset 308 sz += 4 309 310 // MatchLength 311 sz += 8 312 313 // SymbolInfo 314 sz += pointerSize 315 if lfm.SymbolInfo != nil { 316 sz += lfm.SymbolInfo.sizeBytes() 317 } 318 319 return 320} 321 322type FlushReason uint8 323 324const ( 325 FlushReasonTimerExpired FlushReason = 1 << iota 326 FlushReasonFinalFlush 327 FlushReasonMaxSize 328) 329 330var FlushReasonStrings = map[FlushReason]string{ 331 FlushReasonTimerExpired: "timer_expired", 332 FlushReasonFinalFlush: "final_flush", 333 FlushReasonMaxSize: "max_size_reached", 334} 335 336func (fr FlushReason) String() string { 337 if v, ok := FlushReasonStrings[fr]; ok { 338 return v 339 } 340 341 return "none" 342} 343 344// Stats contains interesting numbers on the search 345type Stats struct { 346 // Amount of I/O for reading contents. 347 ContentBytesLoaded int64 348 349 // Amount of I/O for reading from index. 350 IndexBytesLoaded int64 351 352 // Number of search shards that had a crash. 353 Crashes int 354 355 // Wall clock time for this search 356 Duration time.Duration 357 358 // Number of files containing a match. 359 FileCount int 360 361 // Number of files in shards that we considered. 362 ShardFilesConsidered int 363 364 // Files that we evaluated. Equivalent to files for which all 365 // atom matches (including negations) evaluated to true. 366 FilesConsidered int 367 368 // Files for which we loaded file content to verify substring matches 369 FilesLoaded int 370 371 // Candidate files whose contents weren't examined because we 372 // gathered enough matches. 373 FilesSkipped int 374 375 // Shards that we scanned to find matches. 376 ShardsScanned int 377 378 // Shards that we did not process because a query was canceled. 379 ShardsSkipped int 380 381 // Shards that we did not process because the query was rejected by the 382 // ngram filter indicating it had no matches. 383 ShardsSkippedFilter int 384 385 // Number of non-overlapping matches 386 MatchCount int 387 388 // Number of candidate matches as a result of searching ngrams. 389 NgramMatches int 390 391 // NgramLookups is the number of times we accessed an ngram in the index. 392 NgramLookups int 393 394 // Wall clock time for queued search. 395 Wait time.Duration 396 397 // Aggregate wall clock time spent constructing and pruning the match tree. 398 // This accounts for time such as lookups in the trigram index. 399 MatchTreeConstruction time.Duration 400 401 // Aggregate wall clock time spent searching the match tree. This accounts 402 // for the bulk of search work done looking for matches. 403 MatchTreeSearch time.Duration 404 405 // Number of times regexp was called on files that we evaluated. 406 RegexpsConsidered int 407 408 // FlushReason explains why results were flushed. 409 FlushReason FlushReason 410} 411 412func (s *Stats) sizeBytes() (sz uint64) { 413 sz = 16 * 8 // This assumes we are running on a 64-bit architecture 414 sz += 1 // FlushReason 415 416 return 417} 418 419func (s *Stats) Add(o Stats) { 420 s.ContentBytesLoaded += o.ContentBytesLoaded 421 s.IndexBytesLoaded += o.IndexBytesLoaded 422 s.Crashes += o.Crashes 423 s.FileCount += o.FileCount 424 s.FilesConsidered += o.FilesConsidered 425 s.FilesLoaded += o.FilesLoaded 426 s.FilesSkipped += o.FilesSkipped 427 s.MatchCount += o.MatchCount 428 s.NgramMatches += o.NgramMatches 429 s.NgramLookups += o.NgramLookups 430 s.ShardFilesConsidered += o.ShardFilesConsidered 431 s.ShardsScanned += o.ShardsScanned 432 s.ShardsSkipped += o.ShardsSkipped 433 s.ShardsSkippedFilter += o.ShardsSkippedFilter 434 s.Wait += o.Wait 435 s.MatchTreeConstruction += o.MatchTreeConstruction 436 s.MatchTreeSearch += o.MatchTreeSearch 437 s.RegexpsConsidered += o.RegexpsConsidered 438 439 // We want the first non-zero FlushReason to be sticky. This is a useful 440 // property when aggregating stats from several Zoekts. 441 if s.FlushReason == 0 { 442 s.FlushReason = o.FlushReason 443 } 444} 445 446// Zero returns true if stats is empty. 447func (s *Stats) Zero() bool { 448 if s == nil { 449 return true 450 } 451 452 return !(s.ContentBytesLoaded > 0 || 453 s.IndexBytesLoaded > 0 || 454 s.Crashes > 0 || 455 s.FileCount > 0 || 456 s.FilesConsidered > 0 || 457 s.FilesLoaded > 0 || 458 s.FilesSkipped > 0 || 459 s.MatchCount > 0 || 460 s.NgramMatches > 0 || 461 s.NgramLookups > 0 || 462 s.ShardFilesConsidered > 0 || 463 s.ShardsScanned > 0 || 464 s.ShardsSkipped > 0 || 465 s.ShardsSkippedFilter > 0 || 466 s.Wait > 0 || 467 s.MatchTreeConstruction > 0 || 468 s.MatchTreeSearch > 0 || 469 s.RegexpsConsidered > 0) 470} 471 472// Progress contains information about the global progress of the running search query. 473// This is used by the frontend to reorder results and emit them when stable. 474// Sourcegraph specific: this is used when querying multiple zoekt-webserver instances. 475type Progress struct { 476 // Priority of the shard that was searched. 477 Priority float64 478 479 // MaxPendingPriority is the maximum priority of pending result that is being searched in parallel. 480 // This is used to reorder results when the result set is known to be stable-- that is, when a result's 481 // Priority is greater than the max(MaxPendingPriority) from the latest results of each backend, it can be returned to the user. 482 // 483 // MaxPendingPriority decreases monotonically in each SearchResult. 484 MaxPendingPriority float64 485} 486 487func (p *Progress) sizeBytes() uint64 { 488 return 2 * 8 489} 490 491// SearchResult contains search matches and extra data 492type SearchResult struct { 493 Stats 494 495 // Do not encode this as we cannot encode -Inf in JSON 496 Progress `json:"-"` 497 498 Files []FileMatch 499 500 // RepoURLs holds a repo => template string map. 501 RepoURLs map[string]string 502 503 // FragmentNames holds a repo => template string map, for 504 // the line number fragment. 505 LineFragments map[string]string 506} 507 508// SizeBytes is a best-effort estimate of the size of SearchResult in memory. 509// The estimate does not take alignment into account. The result is a lower 510// bound on the actual size in memory. 511func (sr *SearchResult) SizeBytes() (sz uint64) { 512 sz += sr.Stats.sizeBytes() 513 sz += sr.Progress.sizeBytes() 514 515 // Files 516 sz += sliceHeaderBytes 517 for _, f := range sr.Files { 518 sz += f.sizeBytes() 519 } 520 521 // RepoURLs 522 sz += mapHeaderBytes 523 for k, v := range sr.RepoURLs { 524 sz += stringHeaderBytes + uint64(len(k)) 525 sz += stringHeaderBytes + uint64(len(v)) 526 } 527 528 // LineFragments 529 sz += mapHeaderBytes 530 for k, v := range sr.LineFragments { 531 sz += stringHeaderBytes + uint64(len(k)) 532 sz += stringHeaderBytes + uint64(len(v)) 533 } 534 535 return 536} 537 538// RepositoryBranch describes an indexed branch, which is a name 539// combined with a version. 540type RepositoryBranch struct { 541 Name string 542 Version string 543} 544 545func (r RepositoryBranch) String() string { 546 return fmt.Sprintf("%s@%s", r.Name, r.Version) 547} 548 549// Repository holds repository metadata. 550type Repository struct { 551 // Sourcegraph's repository ID 552 ID uint32 553 554 // The repository name 555 Name string 556 557 // The repository URL. 558 URL string 559 560 // The physical source where this repo came from, eg. full 561 // path to the zip filename or git repository directory. This 562 // will not be exposed in the UI, but can be used to detect 563 // orphaned index shards. 564 Source string 565 566 // The branches indexed in this repo. 567 Branches []RepositoryBranch 568 569 // Nil if this is not the super project. 570 SubRepoMap map[string]*Repository 571 572 // URL template to link to the commit of a branch 573 CommitURLTemplate string 574 575 // The repository URL for getting to a file. Has access to 576 // {{.Version}}, {{.Path}} 577 FileURLTemplate string 578 579 // The URL fragment to add to a file URL for line numbers. has 580 // access to {{.LineNumber}}. The fragment should include the 581 // separator, generally '#' or ';'. 582 LineFragmentTemplate string 583 584 // Perf optimization: priority is set when we load the shard. It corresponds to 585 // the value of "priority" stored in RawConfig. 586 priority float64 587 588 // All zoekt.* configuration settings. 589 RawConfig map[string]string 590 591 // Importance of the repository, bigger is more important 592 Rank uint16 593 594 // IndexOptions is a hash of the options used to create the index for the 595 // repo. 596 IndexOptions string 597 598 // HasSymbols is true if this repository has indexed ctags 599 // output. Sourcegraph specific: This field is more appropriate for 600 // IndexMetadata. However, we store it here since the Sourcegraph frontend 601 // can read this structure but not IndexMetadata. 602 HasSymbols bool 603 604 // Tombstone is true if we are not allowed to search this repo. 605 Tombstone bool 606 607 // LatestCommitDate is the date of the latest commit among all indexed Branches. 608 // The date might be time.Time's 0-value if the repository was last indexed 609 // before this field was added. 610 LatestCommitDate time.Time 611 612 // FileTombstones is a set of file paths that should be ignored across all branches 613 // in this shard. 614 FileTombstones map[string]struct{} `json:",omitempty"` 615} 616 617func (r *Repository) UnmarshalJSON(data []byte) error { 618 // We define a new type so that we can use json.Unmarshal 619 // without recursing into this same method. 620 type repository *Repository 621 repo := repository(r) 622 623 err := json.Unmarshal(data, repo) 624 if err != nil { 625 return err 626 } 627 628 if v, ok := repo.RawConfig["repoid"]; ok { 629 id, _ := strconv.ParseUint(v, 10, 32) 630 r.ID = uint32(id) 631 } 632 633 if v, ok := repo.RawConfig["priority"]; ok { 634 r.priority, err = strconv.ParseFloat(v, 64) 635 if err != nil { 636 r.priority = 0 637 } 638 639 // Sourcegraph indexserver doesn't set repo.Rank, so we set it here 640 // based on priority. Setting it on read instead of during indexing 641 // allows us to avoid a complete reindex. 642 if r.Rank == 0 && r.priority > 0 { 643 // Normalize the repo score within [0, 1), with the midpoint at 5,000. This means popular 644 // repos (roughly ones with over 5,000 stars) see diminishing returns from more stars. 645 r.Rank = uint16(r.priority / (5000.0 + r.priority) * maxUInt16) 646 } 647 } 648 return nil 649} 650 651// MergeMutable will merge x into r. mutated will be true if it made any 652// changes. err is non-nil if we needed to mutate an immutable field. 653// 654// Note: SubRepoMap, IndexOptions and HasSymbol fields are ignored. They are 655// computed while indexing so can't be synthesized from x. 656// 657// Note: We ignore RawConfig fields which are duplicated into Repository: 658// name and id. 659func (r *Repository) MergeMutable(x *Repository) (mutated bool, err error) { 660 if r.ID != x.ID { 661 // Sourcegraph: strange behaviour may occur if ID changes but names don't. 662 return mutated, errors.New("ID is immutable") 663 } 664 if r.Name != x.Name { 665 // Name is encoded into the shard name on disk. We need to re-index if it 666 // changes. 667 return mutated, errors.New("Name is immutable") 668 } 669 if !reflect.DeepEqual(r.Branches, x.Branches) { 670 // Need a reindex if content changing. 671 return mutated, errors.New("Branches is immutable") 672 } 673 674 for k, v := range x.RawConfig { 675 // We ignore name and id since they are encoded into the repository. 676 if k == "name" || k == "id" { 677 continue 678 } 679 if r.RawConfig == nil { 680 mutated = true 681 r.RawConfig = make(map[string]string) 682 } 683 if r.RawConfig[k] != v { 684 mutated = true 685 r.RawConfig[k] = v 686 } 687 } 688 689 if r.URL != x.URL { 690 mutated = true 691 r.URL = x.URL 692 } 693 if r.CommitURLTemplate != x.CommitURLTemplate { 694 mutated = true 695 r.CommitURLTemplate = x.CommitURLTemplate 696 } 697 if r.FileURLTemplate != x.FileURLTemplate { 698 mutated = true 699 r.FileURLTemplate = x.FileURLTemplate 700 } 701 if r.LineFragmentTemplate != x.LineFragmentTemplate { 702 mutated = true 703 r.LineFragmentTemplate = x.LineFragmentTemplate 704 } 705 706 return mutated, nil 707} 708 709// IndexMetadata holds metadata stored in the index file. It contains 710// data generated by the core indexing library. 711type IndexMetadata struct { 712 IndexFormatVersion int 713 IndexFeatureVersion int 714 IndexMinReaderVersion int 715 IndexTime time.Time 716 PlainASCII bool 717 LanguageMap map[string]uint16 718 ZoektVersion string 719 ID string 720} 721 722// Statistics of a (collection of) repositories. 723type RepoStats struct { 724 // Repos is used for aggregrating the number of repositories. 725 // 726 // Note: This field is not populated on RepoListEntry.Stats (individual) but 727 // only for RepoList.Stats (aggregate). 728 Repos int 729 730 // Shards is the total number of search shards. 731 Shards int 732 733 // Documents holds the number of documents or files. 734 Documents int 735 736 // IndexBytes is the amount of RAM used for index overhead. 737 IndexBytes int64 738 739 // ContentBytes is the amount of RAM used for raw content. 740 ContentBytes int64 741 742 // Sourcegraph specific stats below. These are not as efficient to calculate 743 // as the above statistics. We experimentally measured about a 10% slower 744 // shard load time. However, we find these values very useful to track and 745 // computing them outside of load time introduces a lot of complexity. 746 747 // NewLinesCount is the number of newlines "\n" that appear in the zoekt 748 // indexed documents. This is not exactly the same as line count, since it 749 // will not include lines not terminated by "\n" (eg a file with no "\n", or 750 // a final line without "\n"). Note: Zoekt deduplicates documents across 751 // branches, so if a path has the same contents on multiple branches, there 752 // is only one document for it. As such that document's newlines is only 753 // counted once. See DefaultBranchNewLinesCount and AllBranchesNewLinesCount 754 // for counts which do not deduplicate. 755 NewLinesCount uint64 756 757 // DefaultBranchNewLinesCount is the number of newlines "\n" in the default 758 // branch. 759 DefaultBranchNewLinesCount uint64 760 761 // OtherBranchesNewLinesCount is the number of newlines "\n" in all branches 762 // except the default branch. 763 OtherBranchesNewLinesCount uint64 764} 765 766func (s *RepoStats) Add(o *RepoStats) { 767 // can't update Repos, since one repo may have multiple 768 // shards. 769 s.Shards += o.Shards 770 s.IndexBytes += o.IndexBytes 771 s.Documents += o.Documents 772 s.ContentBytes += o.ContentBytes 773 774 // Sourcegraph specific 775 s.NewLinesCount += o.NewLinesCount 776 s.DefaultBranchNewLinesCount += o.DefaultBranchNewLinesCount 777 s.OtherBranchesNewLinesCount += o.OtherBranchesNewLinesCount 778} 779 780type RepoListEntry struct { 781 Repository Repository 782 IndexMetadata IndexMetadata 783 Stats RepoStats 784} 785 786// MinimalRepoListEntry is a subset of RepoListEntry. It was added after 787// performance profiling of sourcegraph.com revealed that querying this 788// information from Zoekt was causing lots of CPU and memory usage. Note: we 789// can revisit this, how we store and query this information has changed a lot 790// since this was introduced. 791type MinimalRepoListEntry struct { 792 // HasSymbols is exported since Sourcegraph uses this information at search 793 // planning time to decide between Zoekt and an unindexed symbol search. 794 // 795 // Note: it pretty much is always true in practice. 796 HasSymbols bool 797 798 // Branches is used by Sourcegraphs query planner to decided if it can use 799 // zoekt or go via an unindexed code path. 800 Branches []RepositoryBranch 801 802 // IndexTimeUnix is the IndexTime converted to unix time (number of seconds 803 // since the epoch). This is to make it clear we are not transporting the 804 // full fidelty timestamp (ie with milliseconds and location). Additionally 805 // it saves 16 bytes in this struct. 806 // 807 // IndexTime is used as a heuristic in Sourcegraph to decide in aggregate 808 // how many repositories need updating after a ranking change/etc. 809 // 810 // TODO(keegancsmith) audit updates to IndexTime and document how and when 811 // it changes. Concerned about things like metadata updates or compound 812 // shards leading to untrustworthy data here. 813 IndexTimeUnix int64 814} 815 816type ReposMap map[uint32]MinimalRepoListEntry 817 818// MarshalBinary implements a specialized encoder for ReposMap. 819func (q *ReposMap) MarshalBinary() ([]byte, error) { 820 return reposMapEncode(*q) 821} 822 823// UnmarshalBinary implements a specialized decoder for ReposMap. 824func (q *ReposMap) UnmarshalBinary(b []byte) error { 825 var err error 826 (*q), err = reposMapDecode(b) 827 return err 828} 829 830// RepoList holds a set of Repository metadata. 831type RepoList struct { 832 // Returned when ListOptions.Field is RepoListFieldRepos. 833 Repos []*RepoListEntry 834 835 // ReposMap is set when ListOptions.Field is RepoListFieldReposMap. 836 ReposMap ReposMap 837 838 Crashes int 839 840 // Stats response to a List request. 841 // This is the aggregate RepoStats of all repos matching the input query. 842 Stats RepoStats 843} 844 845type Searcher interface { 846 Search(ctx context.Context, q query.Q, opts *SearchOptions) (*SearchResult, error) 847 848 // List lists repositories. The query `q` can only contain 849 // query.Repo atoms. 850 List(ctx context.Context, q query.Q, opts *ListOptions) (*RepoList, error) 851 Close() 852 853 // Describe the searcher for debug messages. 854 String() string 855} 856 857type RepoListField int 858 859const ( 860 RepoListFieldRepos RepoListField = 0 861 RepoListFieldReposMap = 2 862) 863 864type ListOptions struct { 865 // Field decides which field to populate in RepoList response. 866 Field RepoListField 867} 868 869func (o *ListOptions) GetField() (RepoListField, error) { 870 if o == nil { 871 return RepoListFieldRepos, nil 872 } 873 switch o.Field { 874 case RepoListFieldRepos, RepoListFieldReposMap: 875 return o.Field, nil 876 case 1: 877 return 0, fmt.Errorf("RepoListFieldMinimal (%d) is no longer supported", o.Field) 878 default: 879 return 0, fmt.Errorf("unknown RepoListField %d", o.Field) 880 } 881} 882 883func (o *ListOptions) String() string { 884 return fmt.Sprintf("%#v", o) 885} 886 887type SearchOptions struct { 888 // Return an upper-bound estimate of eligible documents in 889 // stats.ShardFilesConsidered. 890 EstimateDocCount bool 891 892 // Return the whole file. 893 Whole bool 894 895 // Maximum number of matches: skip all processing an index 896 // shard after we found this many non-overlapping matches. 897 ShardMaxMatchCount int 898 899 // Maximum number of matches: stop looking for more matches 900 // once we have this many matches across shards. 901 TotalMaxMatchCount int 902 903 // Maximum number of matches: skip processing documents for a repository in 904 // a shard once we have found ShardRepoMaxMatchCount. 905 // 906 // A compound shard may contain multiple repositories. This will most often 907 // be set to 1 to find all repositories containing a result. 908 ShardRepoMaxMatchCount int 909 910 // Abort the search after this much time has passed. 911 MaxWallTime time.Duration 912 913 // FlushWallTime if non-zero will stop streaming behaviour at first and 914 // instead will collate and sort results. At FlushWallTime the results will 915 // be sent and then the behaviour will revert to the normal streaming. 916 FlushWallTime time.Duration 917 918 // Truncates the number of documents (i.e. files) after collating and 919 // sorting the results. 920 MaxDocDisplayCount int 921 922 // Truncates the number of matchs after collating and sorting the results. 923 MaxMatchDisplayCount int 924 925 // If set to a number greater than zero then up to this many number 926 // of context lines will be added before and after each matched line. 927 // Note that the included context lines might contain matches and 928 // it's up to the consumer of the result to remove those lines. 929 NumContextLines int 930 931 // If true, ChunkMatches will be returned in each FileMatch rather than LineMatches 932 // EXPERIMENTAL: the behavior of this flag may be changed in future versions. 933 ChunkMatches bool 934 935 // EXPERIMENTAL. If true, document ranks are used as additional input for 936 // sorting matches. 937 UseDocumentRanks bool 938 939 // EXPERIMENTAL. When UseDocumentRanks is enabled, this can be optionally set to adjust 940 // their weight in the file match score. If the value is <= 0.0, the default weight value 941 // will be used. This option is temporary and is only exposed for testing/ tuning purposes. 942 DocumentRanksWeight float64 943 944 // EXPERIMENTAL. If true, use keyword-style scoring instead of the default scoring formula. 945 // Currently, this treats each match in a file as a term and computes an approximation to BM25. 946 // When enabled, all other scoring signals are ignored, including document ranks. 947 UseKeywordScoring bool 948 949 // Trace turns on opentracing for this request if true and if the Jaeger address was provided as 950 // a command-line flag 951 Trace bool 952 953 // If set, the search results will contain debug information for scoring. 954 DebugScore bool 955 956 // SpanContext is the opentracing span context, if it exists, from the zoekt client 957 SpanContext map[string]string 958} 959 960// String returns a succinct representation of the options. This is meant for 961// human consumption in logs and traces. 962// 963// Note: some tracing systems have limits on length of values, so we take care 964// to try and make this small, and include the important information near the 965// front incase of truncation. 966func (s *SearchOptions) String() string { 967 var b strings.Builder 968 969 add := func(name, value string) { 970 b.WriteString(name) 971 b.WriteByte('=') 972 b.WriteString(value) 973 b.WriteByte(' ') 974 } 975 addInt := func(name string, value int) { 976 if value != 0 { 977 add(name, strconv.Itoa(value)) 978 } 979 } 980 addDuration := func(name string, value time.Duration) { 981 if value != 0 { 982 add(name, value.String()) 983 } 984 } 985 addBool := func(name string, value bool) { 986 if !value { 987 return 988 } 989 b.WriteString(name) 990 b.WriteByte(' ') 991 } 992 993 b.WriteString("zoekt.SearchOptions{ ") 994 995 addInt("ShardMaxMatchCount", s.ShardMaxMatchCount) 996 addInt("TotalMaxMatchCount", s.TotalMaxMatchCount) 997 addInt("ShardRepoMaxMatchCount", s.ShardRepoMaxMatchCount) 998 addInt("MaxDocDisplayCount", s.MaxDocDisplayCount) 999 addInt("MaxMatchDisplayCount", s.MaxMatchDisplayCount) 1000 addInt("NumContextLines", s.NumContextLines) 1001 1002 addDuration("MaxWallTime", s.MaxWallTime) 1003 addDuration("FlushWallTime", s.FlushWallTime) 1004 1005 if s.DocumentRanksWeight > 0 { 1006 add("DocumentRanksWeight", strconv.FormatFloat(s.DocumentRanksWeight, 'g', -1, 64)) 1007 } 1008 1009 addBool("EstimateDocCount", s.EstimateDocCount) 1010 addBool("Whole", s.Whole) 1011 addBool("ChunkMatches", s.ChunkMatches) 1012 addBool("UseDocumentRanks", s.UseDocumentRanks) 1013 addBool("UseKeywordScoring", s.UseKeywordScoring) 1014 addBool("Trace", s.Trace) 1015 addBool("DebugScore", s.DebugScore) 1016 1017 for k, v := range s.SpanContext { 1018 add("SpanContext."+k, strconv.Quote(v)) 1019 } 1020 1021 b.WriteByte('}') 1022 return b.String() 1023} 1024 1025// Sender is the interface that wraps the basic Send method. 1026type Sender interface { 1027 Send(*SearchResult) 1028} 1029 1030// Streamer adds the method StreamSearch to the Searcher interface. 1031type Streamer interface { 1032 Searcher 1033 StreamSearch(ctx context.Context, q query.Q, opts *SearchOptions, sender Sender) (err error) 1034}