fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

ranking: allow partial overlap with symbol (#742)

With this update, we score matches which overlap with a symbol in any way. Before, we only added a score if the match was fully contained in a symbol.

Test plan:
- new unit tests
- updated scoring tests

+187 -18
+32
build/scoring_test.go
··· 162 162 // 7000 (symbol) + 900 (Java enum) + 500 (word) + 300 (atom) + 10 (file order) 163 163 wantScore: 8710, 164 164 }, 165 + { 166 + fileName: "example.java", 167 + content: exampleJava, 168 + query: &query.Substring{Content: true, Pattern: "unInnerInterface("}, 169 + language: "Java", 170 + // 4000 (overlap Symbol) + 700 (Java method) + 50 (partial word) + 10 (file order) 171 + wantScore: 4760, 172 + }, 173 + { 174 + fileName: "example.java", 175 + content: exampleJava, 176 + query: &query.Substring{Content: true, Pattern: "InnerEnum"}, 177 + language: "Java", 178 + // 7000 (Symbol) + 900 (Java enum) + 500 (word) + 10 (file order) 179 + wantScore: 8410, 180 + }, 181 + { 182 + fileName: "example.java", 183 + content: exampleJava, 184 + query: &query.Substring{Content: true, Pattern: "enum InnerEnum"}, 185 + language: "Java", 186 + // 5500 (edge Symbol) + 900 (Java enum) + 500 (word) + 10 (file order) 187 + wantScore: 6910, 188 + }, 189 + { 190 + fileName: "example.java", 191 + content: exampleJava, 192 + query: &query.Substring{Content: true, Pattern: "public enum InnerEnum {"}, 193 + language: "Java", 194 + // 4000 (overlap Symbol) + 900 (Java enum) + 500 (word) + 10 (file order) 195 + wantScore: 5410, 196 + }, 165 197 } 166 198 167 199 for _, c := range cases {
+48 -3
contentprovider.go
··· 552 552 return 0, false 553 553 } 554 554 555 + // findMaxOverlappingSection returns the index of the section in secs that 556 + // overlaps the most with the area defined by off and sz, relative to the size 557 + // of the section. If no section overlaps, it returns 0, false. If multiple 558 + // sections overlap the same amount, the first one is returned. 559 + // 560 + // The implementation assumes that sections do not overlap and are sorted by 561 + // DocumentSection.Start. 562 + func findMaxOverlappingSection(secs []DocumentSection, off, sz uint32) (uint32, bool) { 563 + start := off 564 + end := off + sz 565 + 566 + // Find the first section that might overlap 567 + j := sort.Search(len(secs), func(i int) bool { return secs[i].End > start }) 568 + 569 + if j == len(secs) || secs[j].Start >= end { 570 + // No overlap. 571 + return 0, false 572 + } 573 + 574 + relOverlap := func(j int) float64 { 575 + secSize := secs[j].End - secs[j].Start 576 + if secSize == 0 { 577 + return 0 578 + } 579 + // This cannot overflow because we make sure there is overlap before calling relOverlap 580 + overlap := min(secs[j].End, end) - max(secs[j].Start, start) 581 + return float64(overlap) / float64(secSize) 582 + } 583 + 584 + ol1 := relOverlap(j) 585 + if epsilonEqualsOne(ol1) || j == len(secs)-1 || secs[j+1].Start >= end { 586 + return uint32(j), ol1 > 0 587 + } 588 + 589 + // We know that [off,off+sz[ overlaps with at least 2 sections. We only have to check 590 + // if the second section overlaps more than the first one, because a third 591 + // section can only overlap if the overlap with the second section is complete. 592 + ol2 := relOverlap(j + 1) 593 + if ol2 > ol1 { 594 + return uint32(j + 1), ol2 > 0 595 + } 596 + 597 + return uint32(j), ol1 > 0 598 + } 599 + 555 600 func (p *contentProvider) findSymbol(cm *candidateMatch) (DocumentSection, *Symbol, bool) { 556 601 if cm.fileName { 557 602 return DocumentSection{}, nil, false ··· 561 606 562 607 secIdx, ok := cm.symbolIdx, cm.symbol 563 608 if !ok { 564 - // Not from a symbol matchtree. Lets see if it intersects with a symbol. 565 - secIdx, ok = findSection(secs, cm.byteOffset, cm.byteMatchSz) 609 + // Not from a symbol matchTree. Let's see if it overlaps with a symbol. 610 + secIdx, ok = findMaxOverlappingSection(secs, cm.byteOffset, cm.byteMatchSz) 566 611 } 567 612 if !ok { 568 613 return DocumentSection{}, nil, false ··· 637 682 } else if startMatch || endMatch { 638 683 addScore("EdgeSymbol", (scoreSymbol+scorePartialSymbol)/2) 639 684 } else { 640 - addScore("InnerSymbol", scorePartialSymbol) 685 + addScore("OverlapSymbol", scorePartialSymbol) 641 686 } 642 687 643 688 // Score based on symbol data
+53
contentprovider_test.go
··· 407 407 t.Fatal("unexpected value for backwards call for first column on second line", got) 408 408 } 409 409 } 410 + 411 + func TestFindMaxOverlappingSection(t *testing.T) { 412 + secs := []DocumentSection{ 413 + {Start: 0, End: 5}, 414 + {Start: 8, End: 19}, 415 + {Start: 22, End: 26}, 416 + } 417 + // 012345678901234567890123456 418 + // [....[ 419 + // [..........[ 420 + // [...[ 421 + 422 + testcases := []struct { 423 + name string 424 + off uint32 425 + sz uint32 426 + wantSecIdx uint32 427 + wantOverlap bool 428 + }{ 429 + {off: 0, sz: 1, wantSecIdx: 0, wantOverlap: true}, 430 + {off: 0, sz: 5, wantSecIdx: 0, wantOverlap: true}, 431 + {off: 2, sz: 5, wantSecIdx: 0, wantOverlap: true}, 432 + {off: 2, sz: 50, wantSecIdx: 1, wantOverlap: true}, 433 + {off: 4, sz: 10, wantSecIdx: 1, wantOverlap: true}, 434 + {off: 5, sz: 15, wantSecIdx: 1, wantOverlap: true}, 435 + {off: 18, sz: 10, wantSecIdx: 2, wantOverlap: true}, 436 + 437 + // Prefer full overlap, break ties by preferring the earlier section 438 + {off: 10, sz: 20, wantSecIdx: 2, wantOverlap: true}, 439 + {off: 0, sz: 100, wantSecIdx: 0, wantOverlap: true}, 440 + {off: 8, sz: 100, wantSecIdx: 1, wantOverlap: true}, 441 + {off: 0, sz: 10, wantSecIdx: 0, wantOverlap: true}, 442 + {off: 16, sz: 10, wantSecIdx: 2, wantOverlap: true}, 443 + 444 + // No overlap 445 + {off: 5, sz: 2, wantOverlap: false}, 446 + {off: 20, sz: 1, wantOverlap: false}, 447 + {off: 99, sz: 1, wantOverlap: false}, 448 + {off: 0, sz: 0, wantOverlap: false}, 449 + } 450 + 451 + for _, tt := range testcases { 452 + t.Run(fmt.Sprintf("off=%d size=%d", tt.off, tt.sz), func(t *testing.T) { 453 + got, haveOverlap := findMaxOverlappingSection(secs, tt.off, tt.sz) 454 + if haveOverlap != tt.wantOverlap { 455 + t.Fatalf("expected overlap %v, got %v", tt.wantOverlap, haveOverlap) 456 + } 457 + if got != tt.wantSecIdx { 458 + t.Fatalf("expected section %d, got %d", tt.wantSecIdx, got) 459 + } 460 + }) 461 + } 462 + }
+1
internal/e2e/e2e_rank_test.go
··· 54 54 q("test server", "github.com/golang/go/src/net/http/httptest/server.go"), 55 55 q("bytes buffer", "github.com/golang/go/src/bytes/buffer.go"), 56 56 q("bufio buffer", "github.com/golang/go/src/bufio/scan.go"), 57 + q("time compare\\(", "github.com/golang/go/src/time/time.go"), 57 58 58 59 // sourcegraph/sourcegraph 59 60 q("graphql type User", "github.com/sourcegraph/sourcegraph/cmd/frontend/graphqlbackend/schema.graphql"),
+1 -1
internal/e2e/testdata/coverage_data_writer.txt
··· 5 5 github.com/golang/go/src/internal/coverage/stringtab/stringtab.go 6 6 19:type Writer struct { 7 7 27:func (stw *Writer) InitWriter() { 8 - 9: "internal/coverage/slicereader" 8 + 70:func (stw *Writer) Write(w io.Writer) error { 9 9 hidden 16 more line matches 10 10 11 11 github.com/golang/go/src/cmd/cover/func.go
+4 -4
internal/e2e/testdata/rank_stats.txt
··· 1 - queries: 14 2 - recall@1: 9 (64%) 3 - recall@5: 11 (79%) 4 - mrr: 0.710733 1 + queries: 15 2 + recall@1: 10 (67%) 3 + recall@5: 12 (80%) 4 + mrr: 0.730017
+38
internal/e2e/testdata/time_compare.txt
··· 1 + queryString: time compare\( 2 + query: (and substr:"time" substr:"compare(") 3 + targetRank: 1 4 + 5 + **github.com/golang/go/src/time/time.go** 6 + 129:type Time struct { 7 + 79:package time 8 + 271:func (t Time) Compare(u Time) int { 9 + hidden 250 more line matches 10 + 11 + github.com/sourcegraph/sourcegraph/internal/api/api.go 12 + 127:func (r ExternalRepoSpec) Compare(s ExternalRepoSpec) int { 13 + 7: "time" 14 + 170: CreatedAt time.Time // the date when this settings value was created 15 + 16 + github.com/sourcegraph/sourcegraph/client/shared/src/codeintel/scip.ts 17 + 117: public compare(other: Range): number { 18 + 53: return this.compare(other) < 0 19 + 56: return this.compare(other) <= 0 20 + hidden 10 more line matches 21 + 22 + github.com/golang/go/src/strings/compare.go 23 + 13:func Compare(a, b string) int { 24 + 14: // NOTE(rsc): This function does NOT call the runtime cmpstring function, 25 + 26 + github.com/golang/go/src/go/constant/value.go 27 + 1337:func Compare(x_ Value, op token.Token, y_ Value) bool { 28 + 1102:// Division by zero leads to a run-time panic. 29 + 1381: re := Compare(x.re, token.EQL, y.re) 30 + hidden 1 more line matches 31 + 32 + github.com/golang/go/src/syscall/zsyscall_windows.go 33 + 878:func GetSystemTimeAsFileTime(time *Filetime) { 34 + 1088:func SetFileTime(handle Handle, ctime *Filetime, atime *Filetime, wtime *Filetime) (err error) { 35 + 132: procGetSystemTimeAsFileTime = modkernel32.NewProc("GetSystemTimeAsFileTime") 36 + hidden 19 more line matches 37 + 38 + hidden 139 more file matches
+2 -2
internal/e2e/testdata/zoekt_searcher.txt
··· 11 11 github.com/sourcegraph/zoekt/rpc/internal/srv/srv.go 12 12 33:type Searcher struct { 13 13 34: Searcher zoekt.Searcher 14 - 7: "github.com/sourcegraph/zoekt" 14 + 37:func (s *Searcher) Search(ctx context.Context, args *SearchArgs, reply *SearchReply) error { 15 15 hidden 9 more line matches 16 16 17 17 github.com/sourcegraph/sourcegraph/doc/admin/observability/dashboards.md ··· 35 35 github.com/sourcegraph/zoekt/json/json.go 36 36 26: Searcher zoekt.Searcher 37 37 25:type jsonSearcher struct { 38 - 9: "github.com/sourcegraph/zoekt" 38 + 48:func (s *jsonSearcher) jsonSearch(w http.ResponseWriter, req *http.Request) { 39 39 hidden 16 more line matches 40 40 41 41 hidden 119 more file matches
+2 -2
read_test.go
··· 306 306 continue 307 307 } 308 308 309 - if d := cmp.Diff(res.Files, want.FileMatches[j]); d != "" { 310 - t.Errorf("matches for %s on %s\n%s", q, name, d) 309 + if d := cmp.Diff(want.FileMatches[j], res.Files); d != "" { 310 + t.Errorf("matches for %s on %s (-want +got)\n%s", q, name, d) 311 311 } 312 312 } 313 313 }
+2 -2
testdata/golden/TestReadSearch/ctagsrepo_v16.00000.golden
··· 16 16 "Before": null, 17 17 "After": null, 18 18 "FileName": false, 19 - "Score": 501, 19 + "Score": 6801, 20 20 "DebugScore": "", 21 21 "LineFragments": [ 22 22 { ··· 29 29 } 30 30 ], 31 31 "Checksum": "n9fUYqacPXg=", 32 - "Score": 510 32 + "Score": 6810 33 33 } 34 34 ], 35 35 [
+2 -2
testdata/golden/TestReadSearch/ctagsrepo_v17.00000.golden
··· 16 16 "Before": null, 17 17 "After": null, 18 18 "FileName": false, 19 - "Score": 501, 19 + "Score": 6801, 20 20 "DebugScore": "", 21 21 "LineFragments": [ 22 22 { ··· 29 29 } 30 30 ], 31 31 "Checksum": "n9fUYqacPXg=", 32 - "Score": 510 32 + "Score": 6810 33 33 } 34 34 ], 35 35 [
+2 -2
testdata/golden/TestReadSearch/repo2_v16.00000.golden
··· 16 16 "Before": null, 17 17 "After": null, 18 18 "FileName": false, 19 - "Score": 501, 19 + "Score": 6801, 20 20 "DebugScore": "", 21 21 "LineFragments": [ 22 22 { ··· 29 29 } 30 30 ], 31 31 "Checksum": "Ju1TnQKZ6mE=", 32 - "Score": 510 32 + "Score": 6810 33 33 } 34 34 ], 35 35 [