fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

ranking: boost Java matches based on symbol kind (#421)

With this change we add more granular boosts for Java matches based on
their "kind".

I am also correcting a inconsequential mistake I made earlier. Since we
use "interactive" mode in go-ctags, the output format is JSON which sets
the "kind" to its long form and not the the single letter form. For
example "class" instead of "c". Therfore we can remove the one-letter
cases from scoring.

Thanks to @olafurpg for ordering the Java "kinds" by importance.

+167 -17
+48 -11
build/e2e_test.go
··· 799 799 }, 800 800 } 801 801 802 + exampleJava, err := os.ReadFile("./test_data/example.java") 803 + if err != nil { 804 + t.Fatal(err) 805 + } 806 + 802 807 cases := []struct { 803 808 fileName string 804 809 content []byte ··· 807 812 wantScore float64 808 813 }{ 809 814 { 810 - fileName: "hw.java", 811 - content: []byte(` 812 - public class HelloWorld 813 - { 814 - public static void main (String[] args) 815 - { 816 - System.out.println("Hello World!"); 817 - } 818 - } 819 - `), 820 - query: &query.Substring{Content: true, Pattern: "lloWorld"}, 815 + fileName: "example.java", 816 + content: exampleJava, 817 + query: &query.Substring{Content: true, Pattern: "nnerClass"}, 821 818 wantLanguage: "Java", 822 819 // 5500 (partial symbol at boundary) + 1000 (Java class) + 50 (partial word) + 400 (atom) + 10 (file order) 823 820 wantScore: 6960, 821 + }, 822 + { 823 + fileName: "example.java", 824 + content: exampleJava, 825 + query: &query.Substring{Content: true, Pattern: "innerEnum"}, 826 + wantLanguage: "Java", 827 + // 7000 (symbol) + 900 (Java enum) + 500 (word) + 400 (atom) + 10 (file order) 828 + wantScore: 8810, 829 + }, 830 + { 831 + fileName: "example.java", 832 + content: exampleJava, 833 + query: &query.Substring{Content: true, Pattern: "innerInterface"}, 834 + wantLanguage: "Java", 835 + // 7000 (symbol) + 800 (Java interface) + 500 (word) + 400 (atom) + 10 (file order) 836 + wantScore: 8710, 837 + }, 838 + { 839 + fileName: "example.java", 840 + content: exampleJava, 841 + query: &query.Substring{Content: true, Pattern: "innerMethod"}, 842 + wantLanguage: "Java", 843 + // 7000 (symbol) + 700 (Java method) + 500 (word) + 400 (atom) + 10 (file order) 844 + wantScore: 8610, 845 + }, 846 + { 847 + fileName: "example.java", 848 + content: exampleJava, 849 + query: &query.Substring{Content: true, Pattern: "field"}, 850 + wantLanguage: "Java", 851 + // 7000 (symbol) + 600 (Java field) + 500 (word) + 400 (atom) + 10 (file order) 852 + wantScore: 8510, 853 + }, 854 + { 855 + fileName: "example.java", 856 + content: exampleJava, 857 + query: &query.Substring{Content: true, Pattern: "B"}, 858 + wantLanguage: "Java", 859 + // 7000 (symbol) + 500 (Java enum constant) + 500 (word) + 400 (atom) + 10 (file order) 860 + wantScore: 8410, 824 861 }, 825 862 { 826 863 fileName: "a/b/c/config.go",
+99
build/test_data/example.java
··· 1 + package minimized; 2 + 3 + public class InnerClasses { 4 + 5 + private final int exampleField; 6 + 7 + private static final String STRING = "asdf"; 8 + 9 + private static final int top = 5; 10 + private static final int bottom = 10; 11 + 12 + public InnerClasses(int exampleField) { 13 + this.exampleField = exampleField; 14 + } 15 + 16 + public enum InnerEnum { 17 + A, 18 + B, 19 + C 20 + } 21 + 22 + public interface InnerInterface<A, B> { 23 + B apply(A a); 24 + } 25 + 26 + public @interface InnerAnnotation { 27 + int value(); 28 + } 29 + 30 + @SuppressWarnings(STRING + " ") 31 + @InnerAnnotation(top / bottom) 32 + public static class InnerStaticClass { 33 + 34 + public static void innerStaticMethod() {} 35 + } 36 + 37 + public class InnerClass implements InnerInterface<Integer, Integer> { 38 + private final int field; 39 + 40 + public InnerClass(int field) { 41 + this.field = field; 42 + } 43 + 44 + public void innerMethod() { 45 + System.out.println(field + exampleField); 46 + } 47 + 48 + @Override 49 + public Integer apply(Integer integer) { 50 + return field * integer; 51 + } 52 + } 53 + 54 + private static <A, B> B runInnerInterface(InnerInterface<A, B> fn, A a) { 55 + return fn.apply(a); 56 + } 57 + 58 + public static void testEnum(InnerEnum magicEnum) { 59 + if (System.nanoTime() > System.currentTimeMillis()) { 60 + magicEnum = InnerEnum.B; 61 + } 62 + switch (magicEnum) { 63 + case B: 64 + System.out.println("b"); 65 + break; 66 + case A: 67 + System.out.println("a"); 68 + break; 69 + default: 70 + break; 71 + } 72 + if (magicEnum == InnerEnum.A) System.out.println("a"); 73 + else if (magicEnum == InnerEnum.C) System.out.println("b"); 74 + else System.out.println("c"); 75 + } 76 + 77 + public static void testAnon() { 78 + InnerInterface<String, String> fn = 79 + new InnerInterface<String, String>() { 80 + @Override 81 + public String apply(String s) { 82 + return s + "b"; 83 + } 84 + }; 85 + System.out.println(fn.apply("a")); 86 + } 87 + 88 + public static String app() { 89 + int a = 42; 90 + InnerStaticClass.innerStaticMethod(); 91 + InnerClasses innerClasses = new InnerClasses(a); 92 + InnerClass innerClass = innerClasses.new InnerClass(a); 93 + innerClass.innerMethod(); 94 + System.out.println(runInnerInterface(innerClass, a)); 95 + testEnum(InnerEnum.A); 96 + testAnon(); 97 + return ""; 98 + } 99 + }
+20 -6
contentprovider.go
··· 460 460 scoreImportantThreshold = 2000.0 461 461 scoreSymbol = 7000.0 462 462 scorePartialSymbol = 4000.0 463 - scoreKindMatch = 1000.0 463 + scoreKindMatch = 100.0 464 464 scoreFactorAtomMatch = 400.0 465 465 scoreShardRankFactor = 20.0 466 466 scoreFileOrderFactor = 10.0 ··· 641 641 // scoreKind boosts a match based on the combination of language and kind. The 642 642 // language string comes from go-enry, the kind string from ctags. 643 643 func scoreKind(language string, kind string) float64 { 644 - // Refer to universal-ctags --list-kinds=<language> to learn about the mappings 645 - // for a language. 644 + // Refer to universal-ctags --list-kinds-full=<language> to learn about which 645 + // kinds are detected for which language. 646 + // 647 + // Note that go-ctags uses universal-ctags's interactive mode and thus returns 648 + // the full name for "kind" and not the one-letter abbreviation. 649 + var factor float64 646 650 switch language { 647 651 case "Java": 648 652 switch kind { 649 653 // 2022-03-30: go-ctags contains a regex rule for Java classes that sets "kind" 650 654 // to "classes" instead of "c". We have to cover both cases to support existing 651 655 // indexes. 652 - case "c", "classes": 653 - return scoreKindMatch 656 + case "class", "classes": 657 + factor = 10 658 + case "enum": 659 + factor = 9 660 + case "interface": 661 + factor = 8 662 + case "method": 663 + factor = 7 664 + case "field": 665 + factor = 6 666 + case "enumConstant": 667 + factor = 5 654 668 } 655 669 } 656 - return 0 670 + return factor * scoreKindMatch 657 671 } 658 672 659 673 type matchScoreSlice []LineMatch