fork of https://github.com/sourcegraph/zoekt
1// Copyright 2016 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package build
16
17import (
18 "context"
19 "math"
20 "os"
21 "testing"
22
23 "github.com/sourcegraph/zoekt"
24 "github.com/sourcegraph/zoekt/ctags"
25 "github.com/sourcegraph/zoekt/query"
26 "github.com/sourcegraph/zoekt/shards"
27)
28
29type scoreCase struct {
30 fileName string
31 content []byte
32 query query.Q
33 language string
34 wantScore float64
35}
36
37func TestFileNameMatch(t *testing.T) {
38 cases := []scoreCase{
39 {
40 fileName: "a/b/c/config.go",
41 query: &query.Substring{FileName: true, Pattern: "config"},
42 language: "Go",
43 // 5500 (partial base at boundary) + 500 (word) + 10 (file order)
44 wantScore: 6010,
45 },
46 {
47 fileName: "a/b/c/config.go",
48 query: &query.Substring{FileName: true, Pattern: "config.go"},
49 language: "Go",
50 // 7000 (full base match) + 500 (word) + 10 (file order)
51 wantScore: 7510,
52 },
53 {
54 fileName: "a/config/c/d.go",
55 query: &query.Substring{FileName: true, Pattern: "config"},
56 language: "Go",
57 // 500 (word) + 10 (file order)
58 wantScore: 510,
59 },
60 }
61
62 for _, c := range cases {
63 checkScoring(t, c, ctags.UniversalCTags)
64 }
65}
66
67func TestJava(t *testing.T) {
68 exampleJava, err := os.ReadFile("./testdata/example.java")
69 if err != nil {
70 t.Fatal(err)
71 }
72
73 cases := []scoreCase{
74 {
75 fileName: "example.java",
76 content: exampleJava,
77 query: &query.Substring{Content: true, Pattern: "nerClass"},
78 language: "Java",
79 // 5500 (partial symbol at boundary) + 1000 (Java class) + 50 (partial word) + 10 (file order)
80 wantScore: 6560,
81 },
82 {
83 fileName: "example.java",
84 content: exampleJava,
85 query: &query.Substring{Content: true, Pattern: "StaticClass"},
86 language: "Java",
87 // 5500 (partial symbol at boundary) + 1000 (Java class) + 500 (word) + 10 (file order)
88 wantScore: 7010,
89 },
90 {
91 fileName: "example.java",
92 content: exampleJava,
93 query: &query.Substring{Content: true, Pattern: "innerEnum"},
94 language: "Java",
95 // 7000 (symbol) + 900 (Java enum) + 500 (word) + 10 (file order)
96 wantScore: 8410,
97 },
98 {
99 fileName: "example.java",
100 content: exampleJava,
101 query: &query.Substring{Content: true, Pattern: "innerInterface"},
102 language: "Java",
103 // 7000 (symbol) + 800 (Java interface) + 500 (word) + 10 (file order)
104 wantScore: 8310,
105 },
106 {
107 fileName: "example.java",
108 content: exampleJava,
109 query: &query.Substring{Content: true, Pattern: "innerMethod"},
110 language: "Java",
111 // 7000 (symbol) + 700 (Java method) + 500 (word) + 10 (file order)
112 wantScore: 8210,
113 },
114 {
115 fileName: "example.java",
116 content: exampleJava,
117 query: &query.Substring{Content: true, Pattern: "field"},
118 language: "Java",
119 // 7000 (symbol) + 600 (Java field) + 500 (word) + 10 (file order)
120 wantScore: 8110,
121 },
122 {
123 fileName: "example.java",
124 content: exampleJava,
125 query: &query.Substring{Content: true, Pattern: "B"},
126 language: "Java",
127 // 7000 (symbol) + 500 (Java enum constant) + 500 (word) + 10 (file order)
128 wantScore: 8010,
129 },
130 // 2 Atoms (1x content and 1x filename)
131 {
132 fileName: "example.java",
133 content: exampleJava,
134 query: &query.Substring{Pattern: "example"}, // matches filename and a Java field
135 language: "Java",
136 // 5500 (edge symbol) + 600 (Java field) + 500 (word) + 200 (atom) + 10 (file order)
137 wantScore: 6810,
138 },
139 // 3 Atoms (2x content, 1x filename)
140 {
141 fileName: "example.java",
142 content: exampleJava,
143 query: &query.Or{Children: []query.Q{
144 &query.Substring{Pattern: "example"}, // matches filename and Java field
145 &query.Substring{Content: true, Pattern: "runInnerInterface"}, // matches a Java method
146 }},
147 language: "Java",
148 // 7000 (symbol) + 700 (Java method) + 500 (word) + 266.67 (atom) + 10 (file order)
149 wantScore: 8476.667,
150 },
151 // 4 Atoms (4x content)
152 {
153 fileName: "example.java",
154 content: exampleJava,
155 query: &query.Or{Children: []query.Q{
156 &query.Substring{Content: true, Pattern: "testAnon"},
157 &query.Substring{Content: true, Pattern: "Override"},
158 &query.Substring{Content: true, Pattern: "InnerEnum"},
159 &query.Substring{Content: true, Pattern: "app"},
160 }},
161 language: "Java",
162 // 7000 (symbol) + 900 (Java enum) + 500 (word) + 300 (atom) + 10 (file order)
163 wantScore: 8710,
164 },
165 }
166
167 for _, c := range cases {
168 checkScoring(t, c, ctags.UniversalCTags)
169 }
170}
171
172func TestKotlin(t *testing.T) {
173 exampleKotlin, err := os.ReadFile("./testdata/example.kt")
174 if err != nil {
175 t.Fatal(err)
176 }
177
178 cases := []scoreCase{
179 {
180 fileName: "example.kt",
181 content: exampleKotlin,
182 query: &query.Substring{Content: true, Pattern: "oxyPreloader"},
183 language: "Kotlin",
184 // 5500 (partial symbol at boundary) + 1000 (Kotlin class) + 50 (partial word) + 10 (file order)
185 wantScore: 6560,
186 },
187 {
188 fileName: "example.kt",
189 content: exampleKotlin,
190 query: &query.Substring{Content: true, Pattern: "ViewMetadata"},
191 language: "Kotlin",
192 // 7000 (symbol) + 900 (Kotlin interface) + 500 (word) + 10 (file order)
193 wantScore: 8410,
194 },
195 {
196 fileName: "example.kt",
197 content: exampleKotlin,
198 query: &query.Substring{Content: true, Pattern: "onScrolled"},
199 language: "Kotlin",
200 // 7000 (symbol) + 800 (Kotlin method) + 500 (word) + 10 (file order)
201 wantScore: 8310,
202 },
203 {
204 fileName: "example.kt",
205 content: exampleKotlin,
206 query: &query.Substring{Content: true, Pattern: "PreloadErrorHandler"},
207 language: "Kotlin",
208 // 7000 (symbol) + 700 (Kotlin typealias) + 500 (word) + 10 (file order)
209 wantScore: 8210,
210 },
211 {
212 fileName: "example.kt",
213 content: exampleKotlin,
214 query: &query.Substring{Content: true, Pattern: "FLING_THRESHOLD_PX"},
215 language: "Kotlin",
216 // 7000 (symbol) + 600 (Kotlin constant) + 500 (word) + 10 (file order)
217 wantScore: 8110,
218 },
219 {
220 fileName: "example.kt",
221 content: exampleKotlin,
222 query: &query.Substring{Content: true, Pattern: "scrollState"},
223 language: "Kotlin",
224 // 7000 (symbol) + 500 (Kotlin variable) + 500 (word) + 10 (file order)
225 wantScore: 8010,
226 },
227 }
228
229 parserType := ctags.UniversalCTags
230 for _, c := range cases {
231 t.Run(c.language, func(t *testing.T) {
232 checkScoring(t, c, parserType)
233 })
234 }
235}
236
237func TestCpp(t *testing.T) {
238 exampleCpp, err := os.ReadFile("./testdata/example.cc")
239 if err != nil {
240 t.Fatal(err)
241 }
242
243 cases := []scoreCase{
244 {
245 fileName: "example.cc",
246 content: exampleCpp,
247 query: &query.Substring{Content: true, Pattern: "FooClass"},
248 language: "C++",
249 // 7000 (Symbol) + 1000 (C++ class) + 500 (full word) + 10 (file order)
250 wantScore: 8510,
251 },
252 {
253 fileName: "example.cc",
254 content: exampleCpp,
255 query: &query.Substring{Content: true, Pattern: "NestedEnum"},
256 language: "C++",
257 // 7000 (Symbol) + 900 (C++ enum) + 500 (full word) + 10 (file order)
258 wantScore: 8410,
259 },
260 {
261 fileName: "example.cc",
262 content: exampleCpp,
263 query: &query.Substring{Content: true, Pattern: "main"},
264 language: "C++",
265 // 7000 (Symbol) + 800 (C++ function) + 500 (full word) + 10 (file order)
266 wantScore: 8310,
267 },
268 {
269 fileName: "example.cc",
270 content: exampleCpp,
271 query: &query.Substring{Content: true, Pattern: "FooStruct"},
272 language: "C++",
273 // 7000 (Symbol) + 700 (C++ struct) + 500 (full word) + 10 (file order)
274 wantScore: 8210,
275 },
276 {
277 fileName: "example.cc",
278 content: exampleCpp,
279 query: &query.Substring{Content: true, Pattern: "TheUnion"},
280 language: "C++",
281 // 7000 (Symbol) + 600 (C++ union) + 500 (full word) + 10 (file order)
282 wantScore: 8110,
283 },
284 }
285
286 parserType := ctags.UniversalCTags
287 for _, c := range cases {
288 t.Run(c.language, func(t *testing.T) {
289 checkScoring(t, c, parserType)
290 })
291 }
292}
293
294func TestPython(t *testing.T) {
295 examplePython, err := os.ReadFile("./testdata/example.py")
296 if err != nil {
297 t.Fatal(err)
298 }
299
300 cases := []scoreCase{
301 {
302 fileName: "example.py",
303 content: examplePython,
304 query: &query.Substring{Content: true, Pattern: "C1"},
305 language: "Python",
306 // 7000 (symbol) + 1000 (Python class) + 500 (word) + 10 (file order)
307 wantScore: 8510,
308 },
309 {
310 fileName: "example.py",
311 content: examplePython,
312 query: &query.Substring{Content: true, Pattern: "g"},
313 language: "Python",
314 // 7000 (symbol) + 800 (Python function) + 500 (word) + 10 (file order)
315 wantScore: 8310,
316 },
317 }
318
319 for _, parserType := range []ctags.CTagsParserType{ctags.UniversalCTags, ctags.ScipCTags} {
320 for _, c := range cases {
321 checkScoring(t, c, parserType)
322 }
323 }
324
325 // Only test SCIP, as universal-ctags doesn't correctly recognize this as a method
326 scipOnlyCase := scoreCase{
327 fileName: "example.py",
328 content: examplePython,
329 query: &query.Substring{Content: true, Pattern: "__init__"},
330 language: "Python",
331 // 7000 (symbol) + 800 (Python method) + 50 (partial word) + 10 (file order)
332 wantScore: 7860,
333 }
334
335 checkScoring(t, scipOnlyCase, ctags.ScipCTags)
336}
337
338func TestRuby(t *testing.T) {
339 exampleRuby, err := os.ReadFile("./testdata/example.rb")
340 if err != nil {
341 t.Fatal(err)
342 }
343
344 cases := []scoreCase{
345 {
346 fileName: "example.rb",
347 content: exampleRuby,
348 query: &query.Substring{Content: true, Pattern: "Parental"},
349 language: "Ruby",
350 // 7000 (symbol) + 1000 (Ruby class) + 500 (word) + 10 (file order)
351 wantScore: 8510,
352 },
353 {
354 fileName: "example.rb",
355 content: exampleRuby,
356 query: &query.Substring{Content: true, Pattern: "parental_func"},
357 language: "Ruby",
358 // 7000 (symbol) + 900 (Ruby method) + 500 (word) + 10 (file order)
359 wantScore: 8410,
360 },
361 {
362 fileName: "example.rb",
363 content: exampleRuby,
364 query: &query.Substring{Content: true, Pattern: "MyModule"},
365 language: "Ruby",
366 // 7000 (symbol) + 500 (Ruby module) + 500 (word) + 10 (file order)
367 wantScore: 8210,
368 },
369 }
370
371 for _, parserType := range []ctags.CTagsParserType{ctags.UniversalCTags, ctags.ScipCTags} {
372 for _, c := range cases {
373 checkScoring(t, c, parserType)
374 }
375 }
376}
377
378func TestScala(t *testing.T) {
379 exampleScala, err := os.ReadFile("./testdata/example.scala")
380 if err != nil {
381 t.Fatal(err)
382 }
383
384 cases := []scoreCase{
385 {
386 fileName: "example.scala",
387 content: exampleScala,
388 query: &query.Substring{Content: true, Pattern: "SymbolIndexBucket"},
389 language: "Scala",
390 // 7000 (symbol) + 1000 (Scala class) + 500 (word) + 10 (file order)
391 wantScore: 8510,
392 },
393 {
394 fileName: "example.scala",
395 content: exampleScala,
396 query: &query.Substring{Content: true, Pattern: "stdLibPatches"},
397 language: "Scala",
398 // 7000 (symbol) + 800 (Scala object) + 500 (word) + 10 (file order)
399 wantScore: 8310,
400 },
401 {
402 fileName: "example.scala",
403 content: exampleScala,
404 query: &query.Substring{Content: true, Pattern: "close"},
405 language: "Scala",
406 // 7000 (symbol) + 700 (Scala method) + 500 (word) + 10 (file order)
407 wantScore: 8210,
408 },
409 {
410 fileName: "example.scala",
411 content: exampleScala,
412 query: &query.Substring{Content: true, Pattern: "javaSymbol"},
413 language: "Scala",
414 // 7000 (symbol) + 500 (Scala method) + 500 (word) + 10 (file order)
415 wantScore: 8010,
416 },
417 }
418
419 parserType := ctags.UniversalCTags
420 for _, c := range cases {
421 checkScoring(t, c, parserType)
422 }
423}
424
425func TestGo(t *testing.T) {
426 cases := []scoreCase{
427 {
428 fileName: "src/net/http/client.go",
429 content: []byte(`
430package http
431type aInterface interface {}
432`),
433 query: &query.Substring{Content: true, Pattern: "aInterface"},
434 language: "Go",
435 // 7000 (full base match) + 1000 (Go interface) + 500 (word) + 10 (file order)
436 wantScore: 8510,
437 },
438 {
439 fileName: "src/net/http/client.go",
440 content: []byte(`
441package http
442type aStruct struct {}
443`),
444 query: &query.Substring{Content: true, Pattern: "aStruct"},
445 language: "Go",
446 // 7000 (full base match) + 900 (Go struct) + 500 (word) + 10 (file order)
447 wantScore: 8410,
448 },
449 {
450 fileName: "src/net/http/client.go",
451 content: []byte(`
452package http
453func aFunc() bool {}
454`),
455 query: &query.Substring{Content: true, Pattern: "aFunc"},
456 language: "Go",
457 // 7000 (full base match) + 800 (Go function) + 500 (word) + 10 (file order)
458 wantScore: 8310,
459 },
460 {
461 fileName: "src/net/http/client.go",
462 content: []byte(`
463package http
464func Get() {
465 panic("")
466}
467`),
468 query: &query.And{Children: []query.Q{
469 &query.Symbol{Expr: &query.Substring{Pattern: "http", Content: true}},
470 &query.Symbol{Expr: &query.Substring{Pattern: "Get", Content: true}}}},
471 language: "Go",
472 // 7000 (full base match) + 800 (Go func) + 50 (Exported Go) + 500 (word) + 200 (atom) + 10 (file order)
473 wantScore: 8560,
474 },
475 }
476
477 for _, parserType := range []ctags.CTagsParserType{ctags.UniversalCTags, ctags.ScipCTags} {
478 for _, c := range cases {
479 checkScoring(t, c, parserType)
480 }
481 }
482}
483
484func skipIfCTagsUnavailable(t *testing.T, parserType ctags.CTagsParserType) {
485 // Never skip universal-ctags tests in CI
486 if os.Getenv("CI") != "" && parserType == ctags.UniversalCTags {
487 return
488 }
489
490 switch parserType {
491 case ctags.UniversalCTags:
492 requireCTags(t)
493 case ctags.ScipCTags:
494 if checkScipCTags() == "" {
495 t.Skip("scip-ctags not available")
496 }
497 default:
498 t.Fatalf("unexpected parser type")
499 }
500}
501
502func checkScoring(t *testing.T, c scoreCase, parserType ctags.CTagsParserType) {
503 skipIfCTagsUnavailable(t, parserType)
504
505 name := c.language
506 if parserType == ctags.ScipCTags {
507 name += "-scip"
508 }
509
510 t.Run(name, func(t *testing.T) {
511 dir := t.TempDir()
512
513 opts := Options{
514 IndexDir: dir,
515 RepositoryDescription: zoekt.Repository{
516 Name: "repo",
517 },
518 LanguageMap: ctags.LanguageMap{
519 normalizeLanguage(c.language): parserType},
520 }
521
522 epsilon := 0.01
523
524 b, err := NewBuilder(opts)
525 if err != nil {
526 t.Fatalf("NewBuilder: %v", err)
527 }
528 if err := b.AddFile(c.fileName, c.content); err != nil {
529 t.Fatal(err)
530 }
531 if err := b.Finish(); err != nil {
532 t.Fatalf("Finish: %v", err)
533 }
534
535 ss, err := shards.NewDirectorySearcher(dir)
536 if err != nil {
537 t.Fatalf("NewDirectorySearcher(%s): %v", dir, err)
538 }
539 defer ss.Close()
540
541 srs, err := ss.Search(context.Background(), c.query, &zoekt.SearchOptions{DebugScore: true})
542 if err != nil {
543 t.Fatal(err)
544 }
545
546 if got, want := len(srs.Files), 1; got != want {
547 t.Fatalf("file matches: want %d, got %d", want, got)
548 }
549
550 if got := srs.Files[0].Score; math.Abs(got-c.wantScore) > epsilon {
551 t.Fatalf("score: want %f, got %f\ndebug: %s\ndebugscore: %s", c.wantScore, got, srs.Files[0].Debug, srs.Files[0].LineMatches[0].DebugScore)
552 }
553
554 if got := srs.Files[0].Language; got != c.language {
555 t.Fatalf("want %s, got %s", c.language, got)
556 }
557 })
558}
559
560func TestDocumentRanks(t *testing.T) {
561 requireCTags(t)
562 dir := t.TempDir()
563
564 opts := Options{
565 IndexDir: dir,
566 RepositoryDescription: zoekt.Repository{
567 Name: "repo",
568 },
569 DocumentRanksVersion: "ranking",
570 }
571
572 searchQuery := &query.Substring{Content: true, Pattern: "Inner"}
573 exampleJava, err := os.ReadFile("./testdata/example.java")
574 if err != nil {
575 t.Fatal(err)
576 }
577
578 cases := []struct {
579 name string
580 documentRank float64
581 documentRanksWeight float64
582 wantScore float64
583 }{
584 {
585 name: "score with no document ranks",
586 // 5500 (partial symbol at boundary) + 1000 (Java class) + 500 (word match) + 10 (file order)
587 wantScore: 7010.00,
588 },
589 {
590 name: "score with document ranks",
591 documentRank: 0.8,
592 // 5500 (partial symbol at boundary) + 1000 (Java class) + 500 (word match) + 225 (file rank) + 10 (file order)
593 wantScore: 7235.00,
594 },
595 {
596 name: "score with custom document ranks weight",
597 documentRank: 0.8,
598 documentRanksWeight: 1000.0,
599 // 5500 (partial symbol at boundary) + 1000 (Java class) + 500 (word match) + 25.00 (file rank) + 10 (file order)
600 wantScore: 7035.00,
601 },
602 }
603
604 for _, c := range cases {
605 t.Run(c.name, func(t *testing.T) {
606 b, err := NewBuilder(opts)
607 if err != nil {
608 t.Fatalf("NewBuilder: %v", err)
609 }
610
611 err = b.Add(zoekt.Document{Name: "example.java", Content: exampleJava, Ranks: []float64{c.documentRank}})
612 if err != nil {
613 t.Fatal(err)
614 }
615
616 if err := b.Finish(); err != nil {
617 t.Fatalf("Finish: %v", err)
618 }
619
620 ss, err := shards.NewDirectorySearcher(dir)
621 if err != nil {
622 t.Fatalf("NewDirectorySearcher(%s): %v", dir, err)
623 }
624 defer ss.Close()
625
626 srs, err := ss.Search(context.Background(), searchQuery, &zoekt.SearchOptions{
627 UseDocumentRanks: true,
628 DocumentRanksWeight: c.documentRanksWeight,
629 DebugScore: true,
630 })
631
632 if err != nil {
633 t.Fatal(err)
634 }
635
636 if got, want := len(srs.Files), 1; got != want {
637 t.Fatalf("file matches: want %d, got %d", want, got)
638 }
639
640 if got := srs.Files[0].Score; got != c.wantScore {
641 t.Fatalf("score: want %f, got %f\ndebug: %s\ndebugscore: %s", c.wantScore, got, srs.Files[0].Debug, srs.Files[0].LineMatches[0].DebugScore)
642 }
643 })
644 }
645}
646
647func TestRepoRanks(t *testing.T) {
648 requireCTags(t)
649 dir := t.TempDir()
650
651 opts := Options{
652 IndexDir: dir,
653 RepositoryDescription: zoekt.Repository{
654 Name: "repo",
655 },
656 DocumentRanksVersion: "ranking",
657 }
658
659 searchQuery := &query.Substring{Content: true, Pattern: "Inner"}
660 exampleJava, err := os.ReadFile("./testdata/example.java")
661 if err != nil {
662 t.Fatal(err)
663 }
664
665 cases := []struct {
666 name string
667 repoRank uint16
668 wantScore float64
669 }{
670 {
671 name: "no shard rank",
672 // 5500 (partial symbol at boundary) + 1000 (Java class) + 500 (word match) + 10 (file order)
673 wantScore: 7010.00,
674 },
675 {
676 name: "medium shard rank",
677 repoRank: 30000,
678 // 5500 (partial symbol at boundary) + 1000 (Java class) + 500 (word match) + 10 (file order) + 9.16 (repo rank)
679 wantScore: 7019.16,
680 },
681 {
682 name: "high shard rank",
683 repoRank: 60000,
684 // 5500 (partial symbol at boundary) + 1000 (Java class) + 500 (word match) + 10 (file order) + 18.31 (repo rank)
685 wantScore: 7028.31,
686 },
687 }
688
689 for _, c := range cases {
690 t.Run(c.name, func(t *testing.T) {
691 opts.RepositoryDescription = zoekt.Repository{
692 Name: "repo",
693 Rank: c.repoRank,
694 }
695
696 b, err := NewBuilder(opts)
697 if err != nil {
698 t.Fatalf("NewBuilder: %v", err)
699 }
700
701 err = b.Add(zoekt.Document{Name: "example.java", Content: exampleJava})
702 if err != nil {
703 t.Fatal(err)
704 }
705
706 if err := b.Finish(); err != nil {
707 t.Fatalf("Finish: %v", err)
708 }
709
710 ss, err := shards.NewDirectorySearcher(dir)
711 if err != nil {
712 t.Fatalf("NewDirectorySearcher(%s): %v", dir, err)
713 }
714 defer ss.Close()
715
716 srs, err := ss.Search(context.Background(), searchQuery, &zoekt.SearchOptions{
717 UseDocumentRanks: true,
718 DebugScore: true,
719 })
720
721 if err != nil {
722 t.Fatal(err)
723 }
724
725 if got, want := len(srs.Files), 1; got != want {
726 t.Fatalf("file matches: want %d, got %d", want, got)
727 }
728
729 if got := srs.Files[0].Score; math.Abs(got-c.wantScore) >= 0.01 {
730 t.Fatalf("score: want %f, got %f\ndebug: %s\ndebugscore: %s", c.wantScore, got, srs.Files[0].Debug, srs.Files[0].LineMatches[0].DebugScore)
731 }
732 })
733 }
734}