fork of https://github.com/sourcegraph/zoekt
1// Copyright 2016 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package e2e
16
17import (
18 "context"
19 "math"
20 "os"
21 "testing"
22
23 "github.com/sourcegraph/zoekt"
24 "github.com/sourcegraph/zoekt/index"
25 "github.com/sourcegraph/zoekt/internal/ctags"
26 "github.com/sourcegraph/zoekt/query"
27 "github.com/sourcegraph/zoekt/search"
28)
29
30type scoreCase struct {
31 fileName string
32 content []byte
33 query query.Q
34 language string
35 wantScore float64
36 wantBestLineMatch uint32
37}
38
39func TestFileNameMatch(t *testing.T) {
40 cases := []scoreCase{
41 {
42 fileName: "a/b/c/config.go",
43 query: &query.Substring{FileName: true, Pattern: "config"},
44 language: "Go",
45 // 5500 (partial base at boundary) + 500 (word)
46 wantScore: 6000,
47 },
48 {
49 fileName: "a/b/c/config.go",
50 query: &query.Substring{FileName: true, Pattern: "config.go"},
51 language: "Go",
52 // 7000 (full base match) + 500 (word)
53 wantScore: 7500,
54 },
55 {
56 fileName: "a/config/c/d.go",
57 query: &query.Substring{FileName: true, Pattern: "config"},
58 language: "Go",
59 // 500 (word)
60 wantScore: 500,
61 },
62 }
63
64 for _, c := range cases {
65 checkScoring(t, c, false, ctags.UniversalCTags)
66 }
67}
68
69func TestBM25(t *testing.T) {
70 exampleJava, err := os.ReadFile("./examples/example.java")
71 if err != nil {
72 t.Fatal(err)
73 }
74
75 exampleBin, err := os.ReadFile("./examples/example.bin")
76 if err != nil {
77 t.Fatal(err)
78 }
79
80 cases := []scoreCase{
81 {
82 // Matches on both filename and content
83 fileName: "example.java",
84 query: &query.Substring{Pattern: "example"},
85 content: exampleJava,
86 language: "Java",
87 // sum-termFrequencyScore: 14.00, length-ratio: 1.00
88 wantScore: 2.02,
89 // line 5: private final int exampleField;
90 wantBestLineMatch: 5,
91 }, {
92 // Matches only on content
93 fileName: "example.java",
94 query: &query.And{Children: []query.Q{
95 &query.Substring{Pattern: "inner"},
96 &query.Substring{Pattern: "static"},
97 &query.Substring{Pattern: "interface"},
98 }},
99 content: exampleJava,
100 language: "Java",
101 // sum-termFrequencyScore: 116.00, length-ratio: 1.00
102 wantScore: 6.30,
103 // line 54: private static <A, B> B runInnerInterface(InnerInterface<A, B> fn, A a) {
104 wantBestLineMatch: 54,
105 }, {
106 // another content-only match
107 fileName: "example.java",
108 query: &query.And{Children: []query.Q{
109 &query.Substring{Pattern: "system"},
110 &query.Substring{Pattern: "time"},
111 }},
112 content: exampleJava,
113 language: "Java",
114 // sum-termFrequencies: 12, length-ratio: 1.00
115 wantScore: 3.33,
116 // line 59: if (System.nanoTime() > System.currentTimeMillis()) {
117 wantBestLineMatch: 59,
118 }, {
119 // phrase boosting
120 fileName: "example.java",
121 query: &query.Or{Children: []query.Q{
122 &query.Boost{Child: &query.Substring{Pattern: "public string apply"}, Boost: 20},
123 &query.And{Children: []query.Q{
124 &query.Substring{Pattern: "public"},
125 &query.Substring{Pattern: "string"},
126 &query.Substring{Pattern: "apply"},
127 }},
128 }},
129 content: exampleJava,
130 language: "Java",
131 // sum-termFrequencies: sum-termFrequencies: 40, length-ratio: 1.00
132 wantScore: 140.80,
133 // public String apply(String s) {
134 wantBestLineMatch: 81,
135 },
136 {
137 // Matches only on filename
138 fileName: "example.java",
139 query: &query.Substring{Pattern: "java"},
140 content: exampleJava,
141 language: "Java",
142 // sum-termFrequencyScore: 5.00, length-ratio: 1.00
143 wantScore: 1.77,
144 },
145 {
146 // Matches only on filename, and content is missing
147 fileName: "a/b/c/config.go",
148 query: &query.Substring{Pattern: "config.go"},
149 language: "Go",
150 // sum-termFrequencyScore: 5.00, length-ratio: 0.00
151 wantScore: 2.07,
152 },
153 {
154 fileName: "example.py",
155 query: &query.Substring{Pattern: "example"},
156 language: "Python",
157 // sum-termFrequencyScore: 5.00, length-ratio: 0.00
158 wantScore: 2.07,
159 },
160 {
161 // Match on test should be downweighted
162 fileName: "test_example.py",
163 query: &query.Substring{Pattern: "example"},
164 language: "Python",
165 // sum-termFrequencyScore: 1.00, length-ratio: 0.00
166 wantScore: 1.69,
167 },
168 {
169 // Match on binary should be downweighted
170 fileName: "example.bin",
171 query: &query.Substring{Pattern: "example"},
172 language: "",
173 content: exampleBin,
174 // sum-termFrequencyScore: 1.00, length-ratio: 1.00
175 wantScore: 1.00,
176 },
177 }
178
179 for _, c := range cases {
180 checkScoring(t, c, true, ctags.UniversalCTags)
181 }
182}
183
184func TestJava(t *testing.T) {
185 exampleJava, err := os.ReadFile("./examples/example.java")
186 if err != nil {
187 t.Fatal(err)
188 }
189
190 cases := []scoreCase{
191 {
192 fileName: "example.java",
193 content: exampleJava,
194 query: &query.Substring{Content: true, Pattern: "nerClass"},
195 language: "Java",
196 // 5500 (partial symbol at boundary) + 1000 (Java class) + 50 (partial word)
197 wantScore: 6550,
198 // line 37: public class InnerClass implements InnerInterface<Integer, Integer> {
199 wantBestLineMatch: 37,
200 },
201 {
202 fileName: "example.java",
203 content: exampleJava,
204 query: &query.Substring{Content: true, Pattern: "StaticClass"},
205 language: "Java",
206 // 5500 (partial symbol at boundary) + 1000 (Java class) + 500 (word)
207 wantScore: 7000,
208 // line 32: public static class InnerStaticClass {
209 wantBestLineMatch: 32,
210 },
211 {
212 fileName: "example.java",
213 content: exampleJava,
214 query: &query.Substring{Content: true, Pattern: "innerEnum"},
215 language: "Java",
216 // 7000 (symbol) + 900 (Java enum) + 500 (word)
217 wantScore: 8400,
218 // line 16: public enum InnerEnum {
219 wantBestLineMatch: 16,
220 },
221 {
222 fileName: "example.java",
223 content: exampleJava,
224 query: &query.Substring{Content: true, Pattern: "innerInterface"},
225 language: "Java",
226 // 7000 (symbol) + 800 (Java interface) + 500 (word)
227 wantScore: 8300,
228 // line 22: public interface InnerInterface<A, B> {
229 wantBestLineMatch: 22,
230 },
231 {
232 fileName: "example.java",
233 content: exampleJava,
234 query: &query.Substring{Content: true, Pattern: "innerMethod"},
235 language: "Java",
236 // 7000 (symbol) + 700 (Java method) + 500 (word)
237 wantScore: 8200,
238 // line 44: public void innerMethod() {
239 wantBestLineMatch: 44,
240 },
241 {
242 fileName: "example.java",
243 content: exampleJava,
244 query: &query.Substring{Content: true, Pattern: "field"},
245 language: "Java",
246 // 7000 (symbol) + 600 (Java field) + 500 (word)
247 wantScore: 8100,
248 // line 38: private final int field;
249 wantBestLineMatch: 38,
250 },
251 {
252 fileName: "example.java",
253 content: exampleJava,
254 query: &query.Substring{Content: true, Pattern: "B"},
255 language: "Java",
256 // 7000 (symbol) + 500 (Java enum constant) + 500 (word)
257 wantScore: 8000,
258 // line 18: B,
259 wantBestLineMatch: 18,
260 },
261 // 2 Atoms (1x content and 1x filename)
262 {
263 fileName: "example.java",
264 content: exampleJava,
265 query: &query.Substring{Pattern: "example"}, // matches filename and a Java field
266 language: "Java",
267 // 5500 (edge symbol) + 600 (Java field) + 500 (word) + 200 (atom)
268 wantScore: 6800,
269 // line 5: private final int exampleField;
270 wantBestLineMatch: 5,
271 },
272 // 3 Atoms (2x content, 1x filename)
273 {
274 fileName: "example.java",
275 content: exampleJava,
276 query: &query.Or{Children: []query.Q{
277 &query.Substring{Pattern: "example"}, // matches filename and Java field
278 &query.Substring{Content: true, Pattern: "runInnerInterface"}, // matches a Java method
279 }},
280 language: "Java",
281 // 7000 (symbol) + 700 (Java method) + 500 (word) + 266.67 (atom)
282 wantScore: 8466,
283 // line 54: private static <A, B> B runInnerInterface(InnerInterface<A, B> fn, A a) {
284 wantBestLineMatch: 54,
285 },
286 // 4 Atoms (4x content)
287 {
288 fileName: "example.java",
289 content: exampleJava,
290 query: &query.Or{Children: []query.Q{
291 &query.Substring{Content: true, Pattern: "testAnon"},
292 &query.Substring{Content: true, Pattern: "Override"},
293 &query.Substring{Content: true, Pattern: "InnerEnum"},
294 &query.Substring{Content: true, Pattern: "app"},
295 }},
296 language: "Java",
297 // 7000 (symbol) + 900 (Java enum) + 500 (word) + 300 (atom)
298 wantScore: 8700,
299 // line 16: public enum InnerEnum {
300 wantBestLineMatch: 16,
301 },
302 {
303 fileName: "example.java",
304 content: exampleJava,
305 query: &query.Substring{Content: true, Pattern: "unInnerInterface("},
306 language: "Java",
307 // 4000 (overlap Symbol) + 700 (Java method) + 50 (partial word)
308 wantScore: 4750,
309 // line 54: private static <A, B> B runInnerInterface(InnerInterface<A, B> fn, A a) {
310 wantBestLineMatch: 54,
311 },
312 {
313 fileName: "example.java",
314 content: exampleJava,
315 query: &query.Substring{Content: true, Pattern: "InnerEnum"},
316 language: "Java",
317 // 7000 (Symbol) + 900 (Java enum) + 500 (word)
318 wantScore: 8400,
319 // line 16: public enum InnerEnum {
320 wantBestLineMatch: 16,
321 },
322 {
323 fileName: "example.java",
324 content: exampleJava,
325 query: &query.Substring{Content: true, Pattern: "enum InnerEnum"},
326 language: "Java",
327 // 5500 (edge Symbol) + 900 (Java enum) + 500 (word)
328 wantScore: 6900,
329 // line 16: public enum InnerEnum {
330 wantBestLineMatch: 16,
331 },
332 {
333 fileName: "example.java",
334 content: exampleJava,
335 query: &query.Substring{Content: true, Pattern: "public enum InnerEnum {"},
336 language: "Java",
337 // 4000 (overlap Symbol) + 900 (Java enum) + 500 (word)
338 wantScore: 5400,
339 // line 16: public enum InnerEnum {
340 wantBestLineMatch: 16,
341 },
342 }
343
344 for _, c := range cases {
345 checkScoring(t, c, false, ctags.UniversalCTags)
346 }
347}
348
349func TestKotlin(t *testing.T) {
350 exampleKotlin, err := os.ReadFile("./examples/example.kt")
351 if err != nil {
352 t.Fatal(err)
353 }
354
355 cases := []scoreCase{
356 {
357 fileName: "example.kt",
358 content: exampleKotlin,
359 query: &query.Substring{Content: true, Pattern: "oxyPreloader"},
360 language: "Kotlin",
361 // 5500 (partial symbol at boundary) + 1000 (Kotlin class) + 50 (partial word)
362 wantScore: 6550,
363 },
364 {
365 fileName: "example.kt",
366 content: exampleKotlin,
367 query: &query.Substring{Content: true, Pattern: "ViewMetadata"},
368 language: "Kotlin",
369 // 7000 (symbol) + 900 (Kotlin interface) + 500 (word)
370 wantScore: 8400,
371 },
372 {
373 fileName: "example.kt",
374 content: exampleKotlin,
375 query: &query.Substring{Content: true, Pattern: "onScrolled"},
376 language: "Kotlin",
377 // 7000 (symbol) + 800 (Kotlin method) + 500 (word)
378 wantScore: 8300,
379 },
380 {
381 fileName: "example.kt",
382 content: exampleKotlin,
383 query: &query.Substring{Content: true, Pattern: "PreloadErrorHandler"},
384 language: "Kotlin",
385 // 7000 (symbol) + 700 (Kotlin typealias) + 500 (word)
386 wantScore: 8200,
387 },
388 {
389 fileName: "example.kt",
390 content: exampleKotlin,
391 query: &query.Substring{Content: true, Pattern: "FLING_THRESHOLD_PX"},
392 language: "Kotlin",
393 // 7000 (symbol) + 600 (Kotlin constant) + 500 (word)
394 wantScore: 8100,
395 },
396 {
397 fileName: "example.kt",
398 content: exampleKotlin,
399 query: &query.Substring{Content: true, Pattern: "scrollState"},
400 language: "Kotlin",
401 // 7000 (symbol) + 500 (Kotlin variable) + 500 (word)
402 wantScore: 8000,
403 },
404 }
405
406 parserType := ctags.UniversalCTags
407 for _, c := range cases {
408 t.Run(c.language, func(t *testing.T) {
409 checkScoring(t, c, false, parserType)
410 })
411 }
412}
413
414func TestCpp(t *testing.T) {
415 exampleCpp, err := os.ReadFile("./examples/example.cc")
416 if err != nil {
417 t.Fatal(err)
418 }
419
420 cases := []scoreCase{
421 {
422 fileName: "example.cc",
423 content: exampleCpp,
424 query: &query.Substring{Content: true, Pattern: "FooClass"},
425 language: "C++",
426 // 7000 (Symbol) + 1000 (C++ class) + 500 (full word)
427 wantScore: 8500,
428 },
429 {
430 fileName: "example.cc",
431 content: exampleCpp,
432 query: &query.Substring{Content: true, Pattern: "NestedEnum"},
433 language: "C++",
434 // 7000 (Symbol) + 900 (C++ enum) + 500 (full word)
435 wantScore: 8400,
436 },
437 {
438 fileName: "example.cc",
439 content: exampleCpp,
440 query: &query.Substring{Content: true, Pattern: "main"},
441 language: "C++",
442 // 7000 (Symbol) + 800 (C++ function) + 500 (full word)
443 wantScore: 8300,
444 },
445 {
446 fileName: "example.cc",
447 content: exampleCpp,
448 query: &query.Substring{Content: true, Pattern: "FooStruct"},
449 language: "C++",
450 // 7000 (Symbol) + 700 (C++ struct) + 500 (full word)
451 wantScore: 8200,
452 },
453 {
454 fileName: "example.cc",
455 content: exampleCpp,
456 query: &query.Substring{Content: true, Pattern: "TheUnion"},
457 language: "C++",
458 // 7000 (Symbol) + 600 (C++ union) + 500 (full word)
459 wantScore: 8100,
460 },
461 }
462
463 parserType := ctags.UniversalCTags
464 for _, c := range cases {
465 t.Run(c.language, func(t *testing.T) {
466 checkScoring(t, c, false, parserType)
467 })
468 }
469}
470
471func TestPython(t *testing.T) {
472 examplePython, err := os.ReadFile("./examples/example.py")
473 if err != nil {
474 t.Fatal(err)
475 }
476
477 cases := []scoreCase{
478 {
479 fileName: "example.py",
480 content: examplePython,
481 query: &query.Substring{Content: true, Pattern: "C1"},
482 language: "Python",
483 // 7000 (symbol) + 1000 (Python class) + 500 (word)
484 wantScore: 8500,
485 },
486 {
487 fileName: "example.py",
488 content: examplePython,
489 query: &query.Substring{Content: true, Pattern: "g"},
490 language: "Python",
491 // 7000 (symbol) + 800 (Python function) + 500 (word)
492 wantScore: 8300,
493 },
494 }
495
496 for _, parserType := range []ctags.CTagsParserType{ctags.UniversalCTags, ctags.ScipCTags} {
497 for _, c := range cases {
498 checkScoring(t, c, false, parserType)
499 }
500 }
501
502 // Only test SCIP, as universal-ctags doesn't correctly recognize this as a method
503 scipOnlyCase := scoreCase{
504 fileName: "example.py",
505 content: examplePython,
506 query: &query.Substring{Content: true, Pattern: "__init__"},
507 language: "Python",
508 // 7000 (symbol) + 800 (Python method) + 50 (partial word)
509 wantScore: 7850,
510 }
511
512 checkScoring(t, scipOnlyCase, false, ctags.ScipCTags)
513}
514
515func TestRuby(t *testing.T) {
516 exampleRuby, err := os.ReadFile("./examples/example.rb")
517 if err != nil {
518 t.Fatal(err)
519 }
520
521 cases := []scoreCase{
522 {
523 fileName: "example.rb",
524 content: exampleRuby,
525 query: &query.Substring{Content: true, Pattern: "Parental"},
526 language: "Ruby",
527 // 7000 (symbol) + 1000 (Ruby class) + 500 (word)
528 wantScore: 8500,
529 },
530 {
531 fileName: "example.rb",
532 content: exampleRuby,
533 query: &query.Substring{Content: true, Pattern: "parental_func"},
534 language: "Ruby",
535 // 7000 (symbol) + 900 (Ruby method) + 500 (word)
536 wantScore: 8400,
537 },
538 {
539 fileName: "example.rb",
540 content: exampleRuby,
541 query: &query.Substring{Content: true, Pattern: "MyModule"},
542 language: "Ruby",
543 // 7000 (symbol) + 500 (Ruby module) + 500 (word)
544 wantScore: 8200,
545 },
546 }
547
548 for _, parserType := range []ctags.CTagsParserType{ctags.UniversalCTags, ctags.ScipCTags} {
549 for _, c := range cases {
550 checkScoring(t, c, false, parserType)
551 }
552 }
553}
554
555func TestScala(t *testing.T) {
556 exampleScala, err := os.ReadFile("./examples/example.scala")
557 if err != nil {
558 t.Fatal(err)
559 }
560
561 cases := []scoreCase{
562 {
563 fileName: "example.scala",
564 content: exampleScala,
565 query: &query.Substring{Content: true, Pattern: "SymbolIndexBucket"},
566 language: "Scala",
567 // 7000 (symbol) + 1000 (Scala class) + 500 (word)
568 wantScore: 8500,
569 },
570 {
571 fileName: "example.scala",
572 content: exampleScala,
573 query: &query.Substring{Content: true, Pattern: "stdLibPatches"},
574 language: "Scala",
575 // 7000 (symbol) + 800 (Scala object) + 500 (word)
576 wantScore: 8300,
577 },
578 {
579 fileName: "example.scala",
580 content: exampleScala,
581 query: &query.Substring{Content: true, Pattern: "close"},
582 language: "Scala",
583 // 7000 (symbol) + 700 (Scala method) + 500 (word)
584 wantScore: 8200,
585 },
586 {
587 fileName: "example.scala",
588 content: exampleScala,
589 query: &query.Substring{Content: true, Pattern: "javaSymbol"},
590 language: "Scala",
591 // 7000 (symbol) + 500 (Scala method) + 500 (word)
592 wantScore: 8000,
593 },
594 }
595
596 parserType := ctags.UniversalCTags
597 for _, c := range cases {
598 checkScoring(t, c, false, parserType)
599 }
600}
601
602func TestGo(t *testing.T) {
603 cases := []scoreCase{
604 {
605 fileName: "src/net/http/client.go",
606 content: []byte(`
607package http
608type aInterface interface {}
609`),
610 query: &query.Substring{Content: true, Pattern: "aInterface"},
611 language: "Go",
612 // 7000 (full base match) + 1000 (Go interface) + 500 (word)
613 wantScore: 8500,
614 },
615 {
616 fileName: "src/net/http/client.go",
617 content: []byte(`
618package http
619type aStruct struct {}
620`),
621 query: &query.Substring{Content: true, Pattern: "aStruct"},
622 language: "Go",
623 // 7000 (full base match) + 900 (Go struct) + 500 (word)
624 wantScore: 8400,
625 },
626 {
627 fileName: "src/net/http/client.go",
628 content: []byte(`
629package http
630func aFunc() bool {}
631`),
632 query: &query.Substring{Content: true, Pattern: "aFunc"},
633 language: "Go",
634 // 7000 (full base match) + 800 (Go function) + 500 (word)
635 wantScore: 8300,
636 },
637 {
638 fileName: "src/net/http/client.go",
639 content: []byte(`
640package http
641func Get() {
642 panic("")
643}
644`),
645 query: &query.And{Children: []query.Q{
646 &query.Symbol{Expr: &query.Substring{Pattern: "http", Content: true}},
647 &query.Symbol{Expr: &query.Substring{Pattern: "Get", Content: true}},
648 }},
649 language: "Go",
650 // 7000 (full base match) + 800 (Go func) + 50 (Exported Go) + 500 (word) + 200 (atom)
651 wantScore: 8550,
652 },
653 }
654
655 for _, parserType := range []ctags.CTagsParserType{ctags.UniversalCTags, ctags.ScipCTags} {
656 for _, c := range cases {
657 checkScoring(t, c, false, parserType)
658 }
659 }
660}
661
662func skipIfCTagsUnavailable(t *testing.T, parserType ctags.CTagsParserType) {
663 // Never skip universal-ctags tests in CI
664 if os.Getenv("CI") != "" && parserType == ctags.UniversalCTags {
665 return
666 }
667
668 switch parserType {
669 case ctags.UniversalCTags:
670 requireCTags(t)
671 case ctags.ScipCTags:
672 if checkScipCTags() == "" {
673 t.Skip("scip-ctags not available")
674 }
675 default:
676 t.Fatalf("unexpected parser type")
677 }
678}
679
680func checkScoring(t *testing.T, c scoreCase, useBM25 bool, parserType ctags.CTagsParserType) {
681 skipIfCTagsUnavailable(t, parserType)
682
683 name := c.language
684 if parserType == ctags.ScipCTags {
685 name += "-scip"
686 }
687
688 t.Run(name, func(t *testing.T) {
689 dir := t.TempDir()
690
691 opts := index.Options{
692 IndexDir: dir,
693 RepositoryDescription: zoekt.Repository{
694 Name: "repo",
695 },
696 LanguageMap: ctags.LanguageMap{c.language: parserType},
697 }
698
699 epsilon := 0.01
700
701 b, err := index.NewBuilder(opts)
702 if err != nil {
703 t.Fatalf("NewBuilder: %v", err)
704 }
705 if err := b.AddFile(c.fileName, c.content); err != nil {
706 t.Fatal(err)
707 }
708 if err := b.Finish(); err != nil {
709 t.Fatalf("Finish: %v", err)
710 }
711
712 ss, err := search.NewDirectorySearcher(dir)
713 if err != nil {
714 t.Fatalf("NewDirectorySearcher(%s): %v", dir, err)
715 }
716 defer ss.Close()
717
718 srs, err := ss.Search(context.Background(), c.query, &zoekt.SearchOptions{
719 UseBM25Scoring: useBM25,
720 ChunkMatches: true,
721 DebugScore: true})
722 if err != nil {
723 t.Fatal(err)
724 }
725
726 if got, want := len(srs.Files), 1; got != want {
727 t.Fatalf("file matches: want %d, got %d", want, got)
728 }
729
730 if got := withoutTiebreaker(srs.Files[0].Score, useBM25); math.Abs(got-c.wantScore) > epsilon {
731 t.Fatalf("score: want %f, got %f\ndebug: %s\ndebugscore: %s", c.wantScore, got, srs.Files[0].Debug, srs.Files[0].ChunkMatches[0].DebugScore)
732 }
733
734 if c.wantBestLineMatch != 0 {
735 if len(srs.Files[0].ChunkMatches) == 0 {
736 t.Fatalf("want BestLineMatch %d, but no chunk matches were returned", c.wantBestLineMatch)
737 }
738 chunkMatch := srs.Files[0].ChunkMatches[0]
739 if chunkMatch.BestLineMatch != c.wantBestLineMatch {
740 t.Fatalf("want BestLineMatch %d, got %d", c.wantBestLineMatch, chunkMatch.BestLineMatch)
741 }
742 }
743
744 if got := srs.Files[0].Language; got != c.language {
745 t.Fatalf("want %s, got %s", c.language, got)
746 }
747 })
748}
749
750// helper to remove the tiebreaker from the score for easier comparison
751func withoutTiebreaker(fullScore float64, useBM25 bool) float64 {
752 if useBM25 {
753 // BM25 doesn't use a tiebreaker
754 return fullScore
755 }
756 return math.Trunc(fullScore / index.ScoreOffset)
757}
758
759func TestRepoRanks(t *testing.T) {
760 requireCTags(t)
761 dir := t.TempDir()
762
763 opts := index.Options{
764 IndexDir: dir,
765 RepositoryDescription: zoekt.Repository{
766 Name: "repo",
767 },
768 }
769
770 searchQuery := &query.Substring{Content: true, Pattern: "Inner"}
771 exampleJava, err := os.ReadFile("./examples/example.java")
772 if err != nil {
773 t.Fatal(err)
774 }
775
776 cases := []struct {
777 name string
778 repoRank uint16
779 wantScore float64
780 }{
781 {
782 name: "no shard rank",
783 // 5500 (partial symbol at boundary) + 1000 (Java class) + 500 (word match) + 10 (file order)
784 wantScore: 7000_00000_10.00,
785 },
786 {
787 name: "medium shard rank",
788 repoRank: 30000,
789 // 5500 (partial symbol at boundary) + 1000 (Java class) + 500 (word match) + 30000 (repo rank) + 10 (file order)
790 wantScore: 7000_30000_10.00,
791 },
792 {
793 name: "high shard rank",
794 repoRank: 60000,
795 // 5500 (partial symbol at boundary) + 1000 (Java class) + 500 (word match) + 60000 (repo rank) + 10 (file order)
796 wantScore: 7000_60000_10.00,
797 },
798 }
799
800 for _, c := range cases {
801 t.Run(c.name, func(t *testing.T) {
802 opts.RepositoryDescription = zoekt.Repository{
803 Name: "repo",
804 Rank: c.repoRank,
805 }
806
807 b, err := index.NewBuilder(opts)
808 if err != nil {
809 t.Fatalf("NewBuilder: %v", err)
810 }
811
812 err = b.Add(index.Document{Name: "example.java", Content: exampleJava})
813 if err != nil {
814 t.Fatal(err)
815 }
816
817 if err := b.Finish(); err != nil {
818 t.Fatalf("Finish: %v", err)
819 }
820
821 ss, err := search.NewDirectorySearcher(dir)
822 if err != nil {
823 t.Fatalf("NewDirectorySearcher(%s): %v", dir, err)
824 }
825 defer ss.Close()
826
827 srs, err := ss.Search(context.Background(), searchQuery, &zoekt.SearchOptions{
828 DebugScore: true,
829 })
830 if err != nil {
831 t.Fatal(err)
832 }
833
834 if got, want := len(srs.Files), 1; got != want {
835 t.Fatalf("file matches: want %d, got %d", want, got)
836 }
837
838 if got := srs.Files[0].Score; math.Abs(got-c.wantScore) >= 0.01 {
839 t.Fatalf("score: want %f, got %f\ndebug: %s\ndebugscore: %s", c.wantScore, got, srs.Files[0].Debug, srs.Files[0].LineMatches[0].DebugScore)
840 }
841 })
842 }
843}