fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

score: boost exported go ident and downrank _test.go (#675)

Right now our symbol analyser doesn't tell us if a symbol is exported. We add
a go specific tweak here to boost those results. Ideally this could be
something that is encoded in the symbol information.

Additionally we do downrank _test.go files via the doc-order. But in the case
of symbol matches the boosting overweighs doc order signficantly. I found the
extra downraking quite useful when experimenting.

Test Plan: lots of manual testing on the keyword branch

+42 -9
+2 -2
build/e2e_test.go
··· 1041 1041 &query.Symbol{Expr: &query.Substring{Pattern: "http", Content: true}}, 1042 1042 &query.Symbol{Expr: &query.Substring{Pattern: "Get", Content: true}}}}, 1043 1043 wantLanguage: "Go", 1044 - // 7000 (full base match) + 800 (Go func) + 500 (word) + 200 (atom) + 10 (file order) 1045 - wantScore: 8510, 1044 + // 7000 (full base match) + 800 (Go func) + 50 (Exported Go) + 500 (word) + 200 (atom) + 10 (file order) 1045 + wantScore: 8560, 1046 1046 }, 1047 1047 // 1048 1048 // C++
+40 -7
contentprovider.go
··· 22 22 "path" 23 23 "sort" 24 24 "strings" 25 + "unicode" 25 26 "unicode/utf8" 26 27 27 28 "golang.org/x/exp/slices" ··· 510 511 score.score += s 511 512 } 512 513 514 + data := p.data(m.FileName) 515 + filename := p.data(true) 516 + 513 517 for i, r := range m.Ranges { 514 518 // calculate the start and end offset relative to the start of the content 515 519 relStartOffset := int(r.Start.ByteOffset - m.ContentStart.ByteOffset) ··· 560 564 si = p.id.symbols.data(start + uint32(secIdx)) 561 565 } 562 566 if si != nil { 563 - addScore(fmt.Sprintf("kind:%s:%s", language, si.Kind), scoreKind(language, si.Kind)) 567 + sym := sectionSlice(data, sec) 568 + addScore(fmt.Sprintf("kind:%s:%s", language, si.Kind), scoreSymbolKind(language, filename, sym, si.Kind)) 564 569 } 565 570 } 566 571 ··· 593 598 score.score += s 594 599 } 595 600 601 + data := p.data(m.FileName) 602 + filename := p.data(true) 603 + 596 604 for _, f := range m.LineFragments { 597 605 startBoundary := f.LineOffset < len(m.Line) && (f.LineOffset == 0 || byteClass(m.Line[f.LineOffset-1]) != byteClass(m.Line[f.LineOffset])) 598 606 ··· 639 647 } 640 648 if si != nil { 641 649 // the LineFragment may not be on a symbol, then si will be nil. 642 - addScore(fmt.Sprintf("kind:%s:%s", language, si.Kind), scoreKind(language, si.Kind)) 650 + sym := sectionSlice(data, sec) 651 + addScore(fmt.Sprintf("kind:%s:%s", language, si.Kind), scoreSymbolKind(language, filename, sym, si.Kind)) 643 652 } 644 653 } 645 654 ··· 656 665 return maxScore.score, maxScore.what 657 666 } 658 667 659 - // scoreKind boosts a match based on the combination of language and kind. The 660 - // language string comes from go-enry, the kind string from ctags. 661 - func scoreKind(language string, kind string) float64 { 668 + // sectionSlice will return data[sec.Start:sec.End] but will clip Start and 669 + // End such that it won't be out of range. 670 + func sectionSlice(data []byte, sec DocumentSection) []byte { 671 + l := uint32(len(data)) 672 + if sec.Start >= l { 673 + return nil 674 + } 675 + if sec.End > l { 676 + sec.End = l 677 + } 678 + return data[sec.Start:sec.End] 679 + } 680 + 681 + // scoreSymbolKind boosts a match based on the combination of language, symbol 682 + // and kind. The language string comes from go-enry, the symbol and kind from 683 + // ctags. 684 + func scoreSymbolKind(language string, filename []byte, sym []byte, kind string) float64 { 662 685 var factor float64 663 686 664 687 // Generic ranking which will be overriden by language specific ranking ··· 731 754 // scip-ctags regression workaround https://github.com/sourcegraph/sourcegraph/issues/57659 732 755 // for each case a description of the fields in ctags in the comment 733 756 case "type": // interface struct talias 734 - factor = 10 735 - case "method", "function": // methodSpec 757 + factor = 9 758 + case "method", "function": // methodSpec func 736 759 factor = 8 737 760 case "variable": // var member 738 761 factor = 7 ··· 756 779 case "var": // variables 757 780 factor = 5 758 781 } 782 + 783 + // Boost exported go symbols. Same implementation as token.IsExported 784 + if ch, _ := utf8.DecodeRune(sym); unicode.IsUpper(ch) { 785 + factor += 0.5 786 + } 787 + 788 + if bytes.HasSuffix(filename, []byte("_test.go")) { 789 + factor *= 0.8 790 + } 791 + 759 792 // Could also rank on: 760 793 // 761 794 // - anonMember struct anonymous members