fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

1// Copyright 2016 Google Inc. All rights reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package zoekt 16 17import ( 18 "context" 19 "fmt" 20 "log" 21 "math" 22 "regexp/syntax" 23 "sort" 24 "strings" 25 "time" 26 27 enry_data "github.com/go-enry/go-enry/v2/data" 28 "github.com/grafana/regexp" 29 30 "github.com/sourcegraph/zoekt/query" 31) 32 33const maxUInt16 = 0xffff 34 35func (m *FileMatch) addScore(what string, s float64, debugScore bool) { 36 if s != 0 && debugScore { 37 m.Debug += fmt.Sprintf("%s:%.2f, ", what, s) 38 } 39 m.Score += s 40} 41 42func (m *FileMatch) addKeywordScore(score float64, sumTf float64, L float64, debugScore bool) { 43 if debugScore { 44 m.Debug += fmt.Sprintf("keyword-score:%.2f (sum-tf: %.2f, length-ratio: %.2f)", score, sumTf, L) 45 } 46 m.Score += score 47} 48 49// simplifyMultiRepo takes a query and a predicate. It returns Const(true) if all 50// repository names fulfill the predicate, Const(false) if none of them do, and q 51// otherwise. 52func (d *indexData) simplifyMultiRepo(q query.Q, predicate func(*Repository) bool) query.Q { 53 count := 0 54 alive := len(d.repoMetaData) 55 for i := range d.repoMetaData { 56 if d.repoMetaData[i].Tombstone { 57 alive-- 58 } else if predicate(&d.repoMetaData[i]) { 59 count++ 60 } 61 } 62 if count == alive { 63 return &query.Const{Value: true} 64 } 65 if count > 0 { 66 return q 67 } 68 return &query.Const{Value: false} 69} 70 71func (d *indexData) simplify(in query.Q) query.Q { 72 eval := query.Map(in, func(q query.Q) query.Q { 73 switch r := q.(type) { 74 case *query.Repo: 75 return d.simplifyMultiRepo(q, func(repo *Repository) bool { 76 return r.Regexp.MatchString(repo.Name) 77 }) 78 case *query.RepoRegexp: 79 return d.simplifyMultiRepo(q, func(repo *Repository) bool { 80 return r.Regexp.MatchString(repo.Name) 81 }) 82 case *query.BranchesRepos: 83 for i := range d.repoMetaData { 84 for _, br := range r.List { 85 if br.Repos.Contains(d.repoMetaData[i].ID) { 86 return q 87 } 88 } 89 } 90 return &query.Const{Value: false} 91 case *query.RepoSet: 92 return d.simplifyMultiRepo(q, func(repo *Repository) bool { 93 return r.Set[repo.Name] 94 }) 95 case *query.RepoIDs: 96 return d.simplifyMultiRepo(q, func(repo *Repository) bool { 97 return r.Repos.Contains(repo.ID) 98 }) 99 case *query.Language: 100 _, has := d.metaData.LanguageMap[r.Language] 101 if !has && d.metaData.IndexFeatureVersion < 12 { 102 // For index files that haven't been re-indexed by go-enry, 103 // fall back to file-based matching and continue even if this 104 // repo doesn't have the specific language present. 105 extsForLang := enry_data.ExtensionsByLanguage[r.Language] 106 if extsForLang != nil { 107 extFrags := make([]string, 0, len(extsForLang)) 108 for _, ext := range extsForLang { 109 extFrags = append(extFrags, regexp.QuoteMeta(ext)) 110 } 111 if len(extFrags) > 0 { 112 pattern := fmt.Sprintf("(?i)(%s)$", strings.Join(extFrags, "|")) 113 // inlined copy of query.regexpQuery 114 re, err := syntax.Parse(pattern, syntax.Perl) 115 if err != nil { 116 return &query.Const{Value: false} 117 } 118 if re.Op == syntax.OpLiteral { 119 return &query.Substring{ 120 Pattern: string(re.Rune), 121 FileName: true, 122 } 123 } 124 return &query.Regexp{ 125 Regexp: re, 126 FileName: true, 127 } 128 } 129 } 130 } 131 if !has { 132 return &query.Const{Value: false} 133 } 134 } 135 return q 136 }) 137 return query.Simplify(eval) 138} 139 140func (o *SearchOptions) SetDefaults() { 141 if o.ShardMaxMatchCount == 0 { 142 // We cap the total number of matches, so overly broad 143 // searches don't crash the machine. 144 o.ShardMaxMatchCount = 100000 145 } 146 if o.TotalMaxMatchCount == 0 { 147 o.TotalMaxMatchCount = 10 * o.ShardMaxMatchCount 148 } 149} 150 151func (d *indexData) Search(ctx context.Context, q query.Q, opts *SearchOptions) (sr *SearchResult, err error) { 152 timer := newTimer() 153 154 copyOpts := *opts 155 opts = &copyOpts 156 opts.SetDefaults() 157 158 var res SearchResult 159 if len(d.fileNameIndex) == 0 { 160 return &res, nil 161 } 162 163 select { 164 case <-ctx.Done(): 165 res.Stats.ShardsSkipped++ 166 return &res, nil 167 default: 168 } 169 170 q = d.simplify(q) 171 if c, ok := q.(*query.Const); ok && !c.Value { 172 return &res, nil 173 } 174 175 if opts.EstimateDocCount { 176 res.Stats.ShardFilesConsidered = len(d.fileBranchMasks) 177 return &res, nil 178 } 179 180 q = query.Map(q, query.ExpandFileContent) 181 182 mt, err := d.newMatchTree(q, matchTreeOpt{}) 183 if err != nil { 184 return nil, err 185 } 186 187 // Capture the costs of construction before pruning 188 updateMatchTreeStats(mt, &res.Stats) 189 190 mt, err = pruneMatchTree(mt) 191 if err != nil { 192 return nil, err 193 } 194 res.Stats.MatchTreeConstruction = timer.Elapsed() 195 if mt == nil { 196 res.Stats.ShardsSkippedFilter++ 197 return &res, nil 198 } 199 200 res.Stats.ShardsScanned++ 201 202 cp := &contentProvider{ 203 id: d, 204 stats: &res.Stats, 205 } 206 207 // Track the number of documents found in a repository for 208 // ShardRepoMaxMatchCount 209 var ( 210 lastRepoID uint16 211 repoMatchCount int 212 ) 213 214 docCount := uint32(len(d.fileBranchMasks)) 215 lastDoc := int(-1) 216 217nextFileMatch: 218 for { 219 canceled := false 220 select { 221 case <-ctx.Done(): 222 canceled = true 223 default: 224 } 225 226 nextDoc := mt.nextDoc() 227 if int(nextDoc) <= lastDoc { 228 nextDoc = uint32(lastDoc + 1) 229 } 230 231 for ; nextDoc < docCount; nextDoc++ { 232 repoID := d.repos[nextDoc] 233 repoMetadata := &d.repoMetaData[repoID] 234 235 // Skip tombstoned repositories 236 if repoMetadata.Tombstone { 237 continue 238 } 239 240 // Skip documents that are tombstoned 241 if len(repoMetadata.FileTombstones) > 0 { 242 if _, tombstoned := repoMetadata.FileTombstones[string(d.fileName(nextDoc))]; tombstoned { 243 continue 244 } 245 } 246 247 // Skip documents over ShardRepoMaxMatchCount if specified. 248 if opts.ShardRepoMaxMatchCount > 0 { 249 if repoMatchCount >= opts.ShardRepoMaxMatchCount && repoID == lastRepoID { 250 res.Stats.FilesSkipped++ 251 continue 252 } 253 } 254 255 break 256 } 257 258 if nextDoc >= docCount { 259 break 260 } 261 262 lastDoc = int(nextDoc) 263 264 // We track lastRepoID for ShardRepoMaxMatchCount 265 if lastRepoID != d.repos[nextDoc] { 266 lastRepoID = d.repos[nextDoc] 267 repoMatchCount = 0 268 } 269 270 if canceled || (res.Stats.MatchCount >= opts.ShardMaxMatchCount && opts.ShardMaxMatchCount > 0) { 271 res.Stats.FilesSkipped += int(docCount - nextDoc) 272 break 273 } 274 275 res.Stats.FilesConsidered++ 276 mt.prepare(nextDoc) 277 278 cp.setDocument(nextDoc) 279 280 known := make(map[matchTree]bool) 281 md := d.repoMetaData[d.repos[nextDoc]] 282 283 for cost := costMin; cost <= costMax; cost++ { 284 v, ok := mt.matches(cp, cost, known) 285 if ok && !v { 286 continue nextFileMatch 287 } 288 289 if cost == costMax && !ok { 290 log.Panicf("did not decide. Repo %s, doc %d, known %v", 291 md.Name, nextDoc, known) 292 } 293 } 294 295 fileMatch := FileMatch{ 296 Repository: md.Name, 297 RepositoryID: md.ID, 298 RepositoryPriority: md.priority, 299 FileName: string(d.fileName(nextDoc)), 300 Checksum: d.getChecksum(nextDoc), 301 Language: d.languageMap[d.getLanguage(nextDoc)], 302 } 303 304 if s := d.subRepos[nextDoc]; s > 0 { 305 if s >= uint32(len(d.subRepoPaths[d.repos[nextDoc]])) { 306 log.Panicf("corrupt index: subrepo %d beyond %v", s, d.subRepoPaths) 307 } 308 path := d.subRepoPaths[d.repos[nextDoc]][s] 309 fileMatch.SubRepositoryPath = path 310 sr := md.SubRepoMap[path] 311 fileMatch.SubRepositoryName = sr.Name 312 if idx := d.branchIndex(nextDoc); idx >= 0 { 313 fileMatch.Version = sr.Branches[idx].Version 314 } 315 } else { 316 idx := d.branchIndex(nextDoc) 317 if idx >= 0 { 318 fileMatch.Version = md.Branches[idx].Version 319 } 320 } 321 322 shouldMergeMatches := !opts.ChunkMatches 323 finalCands := gatherMatches(mt, known, shouldMergeMatches) 324 325 if len(finalCands) == 0 { 326 nm := d.fileName(nextDoc) 327 finalCands = append(finalCands, 328 &candidateMatch{ 329 caseSensitive: false, 330 fileName: true, 331 substrBytes: nm, 332 substrLowered: nm, 333 file: nextDoc, 334 runeOffset: 0, 335 byteOffset: 0, 336 byteMatchSz: uint32(len(nm)), 337 }) 338 } 339 340 if opts.ChunkMatches { 341 fileMatch.ChunkMatches = cp.fillChunkMatches(finalCands, opts.NumContextLines, fileMatch.Language, opts.DebugScore) 342 } else { 343 fileMatch.LineMatches = cp.fillMatches(finalCands, opts.NumContextLines, fileMatch.Language, opts.DebugScore) 344 } 345 346 if opts.UseKeywordScoring { 347 d.scoreFileUsingBM25(&fileMatch, nextDoc, finalCands, opts) 348 } else { 349 // Use the standard, non-experimental scoring method by default 350 d.scoreFile(&fileMatch, nextDoc, mt, known, opts) 351 } 352 353 fileMatch.Branches = d.gatherBranches(nextDoc, mt, known) 354 sortMatchesByScore(fileMatch.LineMatches) 355 sortChunkMatchesByScore(fileMatch.ChunkMatches) 356 if opts.Whole { 357 fileMatch.Content = cp.data(false) 358 } 359 360 matchedChunkRanges := 0 361 for _, cm := range fileMatch.ChunkMatches { 362 matchedChunkRanges += len(cm.Ranges) 363 } 364 365 repoMatchCount += len(fileMatch.LineMatches) 366 repoMatchCount += matchedChunkRanges 367 368 if opts.DebugScore { 369 fileMatch.Debug = fmt.Sprintf("score:%.2f <- %s", fileMatch.Score, fileMatch.Debug) 370 } 371 372 res.Files = append(res.Files, fileMatch) 373 res.Stats.MatchCount += len(fileMatch.LineMatches) 374 res.Stats.MatchCount += matchedChunkRanges 375 res.Stats.FileCount++ 376 } 377 378 // We do not sort Files here, instead we rely on the shards pkg to do file 379 // ranking. If we sorted now, we would break the assumption that results 380 // from the same repo in a shard appear next to each other. 381 382 for _, md := range d.repoMetaData { 383 r := md 384 addRepo(&res, &r) 385 for _, v := range r.SubRepoMap { 386 addRepo(&res, v) 387 } 388 } 389 390 // Update stats based on work done during document search. 391 updateMatchTreeStats(mt, &res.Stats) 392 393 // If document ranking is enabled, then we can rank and truncate the files to save memory. 394 if opts.UseDocumentRanks { 395 res.Files = SortAndTruncateFiles(res.Files, opts) 396 } 397 398 res.Stats.MatchTreeSearch = timer.Elapsed() 399 400 return &res, nil 401} 402 403// scoreFile computes a score for the file match using various scoring signals, like 404// whether there's an exact match on a symbol, the number of query clauses that matched, etc. 405func (d *indexData) scoreFile(fileMatch *FileMatch, doc uint32, mt matchTree, known map[matchTree]bool, opts *SearchOptions) { 406 atomMatchCount := 0 407 visitMatches(mt, known, func(mt matchTree) { 408 atomMatchCount++ 409 }) 410 411 // atom-count boosts files with matches from more than 1 atom. The 412 // maximum boost is scoreFactorAtomMatch. 413 if atomMatchCount > 0 { 414 fileMatch.addScore("atom", (1.0-1.0/float64(atomMatchCount))*scoreFactorAtomMatch, opts.DebugScore) 415 } 416 417 maxFileScore := 0.0 418 repetitions := 0 419 for i := range fileMatch.LineMatches { 420 if maxFileScore < fileMatch.LineMatches[i].Score { 421 maxFileScore = fileMatch.LineMatches[i].Score 422 repetitions = 0 423 } else if maxFileScore == fileMatch.LineMatches[i].Score { 424 repetitions += 1 425 } 426 427 // Order by ordering in file. 428 fileMatch.LineMatches[i].Score += scoreLineOrderFactor * (1.0 - (float64(i) / float64(len(fileMatch.LineMatches)))) 429 } 430 431 for i := range fileMatch.ChunkMatches { 432 if maxFileScore < fileMatch.ChunkMatches[i].Score { 433 maxFileScore = fileMatch.ChunkMatches[i].Score 434 } 435 436 // Order by ordering in file. 437 fileMatch.ChunkMatches[i].Score += scoreLineOrderFactor * (1.0 - (float64(i) / float64(len(fileMatch.ChunkMatches)))) 438 } 439 440 // Maintain ordering of input files. This 441 // strictly dominates the in-file ordering of 442 // the matches. 443 fileMatch.addScore("fragment", maxFileScore, opts.DebugScore) 444 445 // Prefer docs with several top-scored matches. 446 fileMatch.addScore("repetition-boost", scoreRepetitionFactor*float64(repetitions), opts.DebugScore) 447 448 if opts.UseDocumentRanks && len(d.ranks) > int(doc) { 449 weight := scoreFileRankFactor 450 if opts.DocumentRanksWeight > 0.0 { 451 weight = opts.DocumentRanksWeight 452 } 453 454 ranks := d.ranks[doc] 455 // The ranks slice always contains one entry representing the file rank (unless it's empty since the 456 // file doesn't have a rank). This is left over from when documents could have multiple rank signals, 457 // and we plan to clean this up. 458 if len(ranks) > 0 { 459 // The file rank represents a log (base 2) count. The log ranks should be bounded at 32, but we 460 // cap it just in case to ensure it falls in the range [0, 1]. 461 normalized := math.Min(1.0, ranks[0]/32.0) 462 fileMatch.addScore("file-rank", weight*normalized, opts.DebugScore) 463 } 464 } 465 466 md := d.repoMetaData[d.repos[doc]] 467 fileMatch.addScore("doc-order", scoreFileOrderFactor*(1.0-float64(doc)/float64(len(d.boundaries))), opts.DebugScore) 468 fileMatch.addScore("repo-rank", scoreRepoRankFactor*float64(md.Rank)/maxUInt16, opts.DebugScore) 469 470 if opts.DebugScore { 471 fileMatch.Debug = strings.TrimSuffix(fileMatch.Debug, ", ") 472 } 473} 474 475// scoreFileUsingBM25 computes a score for the file match using an approximation to BM25, the most common scoring 476// algorithm for keyword search: https://en.wikipedia.org/wiki/Okapi_BM25. It implements all parts of the formula 477// except inverse document frequency (idf), since we don't have access to global term frequency statistics. 478// 479// This scoring strategy ignores all other signals including document ranks. This keeps things simple for now, 480// since BM25 is not normalized and can be tricky to combine with other scoring signals. 481func (d *indexData) scoreFileUsingBM25(fileMatch *FileMatch, doc uint32, cands []*candidateMatch, opts *SearchOptions) { 482 // Treat each candidate match as a term and compute the frequencies. For now, ignore case 483 // sensitivity and treat filenames and symbols the same as content. 484 termFreqs := map[string]int{} 485 for _, cand := range cands { 486 term := string(cand.substrLowered) 487 termFreqs[term]++ 488 } 489 490 // Compute the file length ratio. Usually the calculation would be based on terms, but using 491 // bytes should work fine, as we're just computing a ratio. 492 fileLength := float64(d.boundaries[doc+1] - d.boundaries[doc]) 493 numFiles := len(d.boundaries) 494 averageFileLength := float64(d.boundaries[numFiles-1]) / float64(numFiles) 495 L := fileLength / averageFileLength 496 497 // Use standard parameter defaults (used in Lucene and academic papers) 498 k, b := 1.2, 0.75 499 sumTf := 0.0 // Just for debugging 500 score := 0.0 501 for _, freq := range termFreqs { 502 tf := float64(freq) 503 sumTf += tf 504 score += ((k + 1.0) * tf) / (k*(1.0-b+b*L) + tf) 505 } 506 507 fileMatch.addKeywordScore(score, sumTf, L, opts.DebugScore) 508} 509 510func addRepo(res *SearchResult, repo *Repository) { 511 if res.RepoURLs == nil { 512 res.RepoURLs = map[string]string{} 513 } 514 res.RepoURLs[repo.Name] = repo.FileURLTemplate 515 516 if res.LineFragments == nil { 517 res.LineFragments = map[string]string{} 518 } 519 res.LineFragments[repo.Name] = repo.LineFragmentTemplate 520} 521 522type sortByOffsetSlice []*candidateMatch 523 524func (m sortByOffsetSlice) Len() int { return len(m) } 525func (m sortByOffsetSlice) Swap(i, j int) { m[i], m[j] = m[j], m[i] } 526func (m sortByOffsetSlice) Less(i, j int) bool { 527 return m[i].byteOffset < m[j].byteOffset 528} 529 530// Gather matches from this document. This never returns a mixture of 531// filename/content matches: if there are content matches, all 532// filename matches are trimmed from the result. The matches are 533// returned in document order and are non-overlapping. 534// 535// If `merge` is set, overlapping and adjacent matches will be merged 536// into a single match. Otherwise, overlapping matches will be removed, 537// but adjacent matches will remain. 538func gatherMatches(mt matchTree, known map[matchTree]bool, merge bool) []*candidateMatch { 539 var cands []*candidateMatch 540 visitMatches(mt, known, func(mt matchTree) { 541 if smt, ok := mt.(*substrMatchTree); ok { 542 cands = append(cands, smt.current...) 543 } 544 if rmt, ok := mt.(*regexpMatchTree); ok { 545 cands = append(cands, rmt.found...) 546 } 547 if rmt, ok := mt.(*wordMatchTree); ok { 548 cands = append(cands, rmt.found...) 549 } 550 if smt, ok := mt.(*symbolRegexpMatchTree); ok { 551 cands = append(cands, smt.found...) 552 } 553 }) 554 555 foundContentMatch := false 556 for _, c := range cands { 557 if !c.fileName { 558 foundContentMatch = true 559 break 560 } 561 } 562 563 res := cands[:0] 564 for _, c := range cands { 565 if !foundContentMatch || !c.fileName { 566 res = append(res, c) 567 } 568 } 569 cands = res 570 571 if merge { 572 // Merge adjacent candidates. This guarantees that the matches 573 // are non-overlapping. 574 sort.Sort((sortByOffsetSlice)(cands)) 575 res = cands[:0] 576 for i, c := range cands { 577 if i == 0 { 578 res = append(res, c) 579 continue 580 } 581 last := res[len(res)-1] 582 lastEnd := last.byteOffset + last.byteMatchSz 583 end := c.byteOffset + c.byteMatchSz 584 if lastEnd >= c.byteOffset { 585 if end > lastEnd { 586 last.byteMatchSz = end - last.byteOffset 587 } 588 continue 589 } 590 591 res = append(res, c) 592 } 593 } else { 594 // Remove overlapping candidates. This guarantees that the matches 595 // are non-overlapping, but also preserves expected match counts. 596 sort.Sort((sortByOffsetSlice)(cands)) 597 res = cands[:0] 598 for i, c := range cands { 599 if i == 0 { 600 res = append(res, c) 601 continue 602 } 603 last := res[len(res)-1] 604 lastEnd := last.byteOffset + last.byteMatchSz 605 if lastEnd > c.byteOffset { 606 continue 607 } 608 609 res = append(res, c) 610 } 611 } 612 613 return res 614} 615 616func (d *indexData) branchIndex(docID uint32) int { 617 mask := d.fileBranchMasks[docID] 618 idx := 0 619 for mask != 0 { 620 if mask&0x1 != 0 { 621 return idx 622 } 623 idx++ 624 mask >>= 1 625 } 626 return -1 627} 628 629// gatherBranches returns a list of branch names taking into account any branch 630// filters in the query. If the query contains a branch filter, it returns all 631// branches containing the docID and matching the branch filter. Otherwise, it 632// returns all branches containing docID. 633func (d *indexData) gatherBranches(docID uint32, mt matchTree, known map[matchTree]bool) []string { 634 var mask uint64 635 visitMatches(mt, known, func(mt matchTree) { 636 bq, ok := mt.(*branchQueryMatchTree) 637 if !ok { 638 return 639 } 640 641 mask = mask | bq.branchMask() 642 }) 643 644 if mask == 0 { 645 mask = d.fileBranchMasks[docID] 646 } 647 648 var branches []string 649 id := uint32(1) 650 branchNames := d.branchNames[d.repos[docID]] 651 for mask != 0 { 652 if mask&0x1 != 0 { 653 branches = append(branches, branchNames[uint(id)]) 654 } 655 id <<= 1 656 mask >>= 1 657 } 658 659 return branches 660} 661 662func (d *indexData) List(ctx context.Context, q query.Q, opts *ListOptions) (rl *RepoList, err error) { 663 var include func(rle *RepoListEntry) bool 664 665 q = d.simplify(q) 666 if c, ok := q.(*query.Const); ok { 667 if !c.Value { 668 return &RepoList{}, nil 669 } 670 include = func(rle *RepoListEntry) bool { 671 return true 672 } 673 } else { 674 sr, err := d.Search(ctx, q, &SearchOptions{ 675 ShardRepoMaxMatchCount: 1, 676 }) 677 if err != nil { 678 return nil, err 679 } 680 681 foundRepos := make(map[string]struct{}, len(sr.Files)) 682 for _, file := range sr.Files { 683 foundRepos[file.Repository] = struct{}{} 684 } 685 686 include = func(rle *RepoListEntry) bool { 687 _, ok := foundRepos[rle.Repository.Name] 688 return ok 689 } 690 } 691 692 var l RepoList 693 694 field, err := opts.GetField() 695 if err != nil { 696 return nil, err 697 } 698 switch field { 699 case RepoListFieldRepos: 700 l.Repos = make([]*RepoListEntry, 0, len(d.repoListEntry)) 701 case RepoListFieldReposMap: 702 l.ReposMap = make(ReposMap, len(d.repoListEntry)) 703 } 704 705 for i := range d.repoListEntry { 706 if d.repoMetaData[i].Tombstone { 707 continue 708 } 709 rle := &d.repoListEntry[i] 710 if !include(rle) { 711 continue 712 } 713 714 l.Stats.Add(&rle.Stats) 715 716 // Backwards compat for when ID is missing 717 if rle.Repository.ID == 0 { 718 l.Repos = append(l.Repos, rle) 719 continue 720 } 721 722 switch field { 723 case RepoListFieldRepos: 724 l.Repos = append(l.Repos, rle) 725 case RepoListFieldReposMap: 726 l.ReposMap[rle.Repository.ID] = MinimalRepoListEntry{ 727 HasSymbols: rle.Repository.HasSymbols, 728 Branches: rle.Repository.Branches, 729 IndexTimeUnix: rle.IndexMetadata.IndexTime.Unix(), 730 } 731 } 732 733 } 734 735 // Only one of these fields is populated and in all cases the size of that 736 // field is the number of Repos in this shard. 737 l.Stats.Repos = len(l.Repos) + len(l.ReposMap) 738 739 return &l, nil 740} 741 742// regexpToMatchTreeRecursive converts a regular expression to a matchTree mt. If 743// mt is equivalent to the input r, isEqual = true and the matchTree can be used 744// in place of the regex r. If singleLine = true, then the matchTree and all 745// its children only match terms on the same line. singleLine is used during 746// recursion to decide whether to return an andLineMatchTree (singleLine = true) 747// or a andMatchTree (singleLine = false). 748func (d *indexData) regexpToMatchTreeRecursive(r *syntax.Regexp, minTextSize int, fileName bool, caseSensitive bool) (mt matchTree, isEqual bool, singleLine bool, err error) { 749 // TODO - we could perhaps transform Begin/EndText in '\n'? 750 // TODO - we could perhaps transform CharClass in (OrQuery ) 751 // if there are just a few runes, and part of a OpConcat? 752 switch r.Op { 753 case syntax.OpLiteral: 754 s := string(r.Rune) 755 if len(s) >= minTextSize { 756 mt, err := d.newSubstringMatchTree(&query.Substring{Pattern: s, FileName: fileName, CaseSensitive: caseSensitive}) 757 return mt, true, !strings.Contains(s, "\n"), err 758 } 759 case syntax.OpCapture: 760 return d.regexpToMatchTreeRecursive(r.Sub[0], minTextSize, fileName, caseSensitive) 761 762 case syntax.OpPlus: 763 return d.regexpToMatchTreeRecursive(r.Sub[0], minTextSize, fileName, caseSensitive) 764 765 case syntax.OpRepeat: 766 if r.Min == 1 { 767 return d.regexpToMatchTreeRecursive(r.Sub[0], minTextSize, fileName, caseSensitive) 768 } else if r.Min > 1 { 769 // (x){2,} can't be expressed precisely by the matchTree 770 mt, _, singleLine, err := d.regexpToMatchTreeRecursive(r.Sub[0], minTextSize, fileName, caseSensitive) 771 return mt, false, singleLine, err 772 } 773 case syntax.OpConcat, syntax.OpAlternate: 774 var qs []matchTree 775 isEq := true 776 singleLine = true 777 for _, sr := range r.Sub { 778 if sq, subIsEq, subSingleLine, err := d.regexpToMatchTreeRecursive(sr, minTextSize, fileName, caseSensitive); sq != nil { 779 if err != nil { 780 return nil, false, false, err 781 } 782 isEq = isEq && subIsEq 783 singleLine = singleLine && subSingleLine 784 qs = append(qs, sq) 785 } 786 } 787 if r.Op == syntax.OpConcat { 788 if len(qs) > 1 { 789 isEq = false 790 } 791 newQs := make([]matchTree, 0, len(qs)) 792 for _, q := range qs { 793 if _, ok := q.(*bruteForceMatchTree); ok { 794 continue 795 } 796 newQs = append(newQs, q) 797 } 798 if len(newQs) == 1 { 799 return newQs[0], isEq, singleLine, nil 800 } 801 if len(newQs) == 0 { 802 return &bruteForceMatchTree{}, isEq, singleLine, nil 803 } 804 if singleLine { 805 return &andLineMatchTree{andMatchTree{children: newQs}}, isEq, singleLine, nil 806 } 807 return &andMatchTree{newQs}, isEq, singleLine, nil 808 } 809 for _, q := range qs { 810 if _, ok := q.(*bruteForceMatchTree); ok { 811 return q, isEq, false, nil 812 } 813 } 814 if len(qs) == 0 { 815 return &noMatchTree{Why: "const"}, isEq, false, nil 816 } 817 return &orMatchTree{qs}, isEq, false, nil 818 case syntax.OpStar: 819 if r.Sub[0].Op == syntax.OpAnyCharNotNL { 820 return &bruteForceMatchTree{}, false, true, nil 821 } 822 } 823 return &bruteForceMatchTree{}, false, false, nil 824} 825 826type timer struct { 827 last time.Time 828} 829 830func newTimer() *timer { 831 return &timer{ 832 last: time.Now(), 833 } 834} 835 836func (t *timer) Elapsed() time.Duration { 837 now := time.Now() 838 d := now.Sub(t.last) 839 t.last = now 840 return d 841}