fork of https://github.com/sourcegraph/zoekt
1// Copyright 2016 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package build
16
17import (
18 "bytes"
19 "context"
20 "encoding/json"
21 "fmt"
22 "log"
23 "math"
24 "os"
25 "path/filepath"
26 "reflect"
27 "runtime"
28 "sort"
29 "strings"
30 "testing"
31 "time"
32
33 "github.com/google/go-cmp/cmp"
34 "github.com/google/go-cmp/cmp/cmpopts"
35 "github.com/grafana/regexp"
36
37 "github.com/sourcegraph/zoekt"
38 "github.com/sourcegraph/zoekt/query"
39 "github.com/sourcegraph/zoekt/shards"
40)
41
42func TestBasic(t *testing.T) {
43 dir := t.TempDir()
44
45 opts := Options{
46 IndexDir: dir,
47 ShardMax: 1024,
48 RepositoryDescription: zoekt.Repository{
49 Name: "repo",
50 },
51 Parallelism: 2,
52 SizeMax: 1 << 20,
53 }
54
55 b, err := NewBuilder(opts)
56 if err != nil {
57 t.Fatalf("NewBuilder: %v", err)
58 }
59
60 for i := 0; i < 4; i++ {
61 s := fmt.Sprintf("%d", i)
62 if err := b.AddFile("F"+s, []byte(strings.Repeat(s, 1000))); err != nil {
63 t.Fatal(err)
64 }
65 }
66
67 if err := b.Finish(); err != nil {
68 t.Errorf("Finish: %v", err)
69 }
70
71 fs, _ := filepath.Glob(dir + "/*.zoekt")
72 if len(fs) <= 1 {
73 t.Fatalf("want multiple shards, got %v", fs)
74 }
75
76 _, md0, err := zoekt.ReadMetadataPath(fs[0])
77 if err != nil {
78 t.Fatal(err)
79 }
80 for _, f := range fs[1:] {
81 _, md, err := zoekt.ReadMetadataPath(f)
82 if err != nil {
83 t.Fatal(err)
84 }
85 if md.IndexTime != md0.IndexTime {
86 t.Fatalf("wanted identical time stamps but got %v!=%v", md.IndexTime, md0.IndexTime)
87 }
88 if md.ID != md0.ID {
89 t.Fatalf("wanted identical IDs but got %s!=%s", md.ID, md0.ID)
90 }
91 }
92
93 ss, err := shards.NewDirectorySearcher(dir)
94 if err != nil {
95 t.Fatalf("NewDirectorySearcher(%s): %v", dir, err)
96 }
97 defer ss.Close()
98
99 q, err := query.Parse("111")
100 if err != nil {
101 t.Fatalf("Parse(111): %v", err)
102 }
103
104 var sOpts zoekt.SearchOptions
105 ctx := context.Background()
106 result, err := ss.Search(ctx, q, &sOpts)
107 if err != nil {
108 t.Fatalf("Search(%v): %v", q, err)
109 }
110
111 if len(result.Files) != 1 {
112 t.Errorf("got %v, want 1 file.", result.Files)
113 } else if gotFile, wantFile := result.Files[0].FileName, "F1"; gotFile != wantFile {
114 t.Errorf("got file %q, want %q", gotFile, wantFile)
115 } else if gotRepo, wantRepo := result.Files[0].Repository, "repo"; gotRepo != wantRepo {
116 t.Errorf("got repo %q, want %q", gotRepo, wantRepo)
117 }
118
119 t.Run("meta file", func(t *testing.T) {
120 // use retryTest to allow for the directory watcher to notice the meta
121 // file
122 retryTest(t, func(fatalf func(format string, args ...interface{})) {
123 // Add a .meta file for each shard with repo.Name set to
124 // "repo-mutated". We do this inside retry helper since we have noticed
125 // some flakiness on github CI.
126 for _, p := range fs {
127 repos, _, err := zoekt.ReadMetadataPath(p)
128 if err != nil {
129 t.Fatal(err)
130 }
131 repos[0].Name = "repo-mutated"
132 b, err := json.Marshal(repos[0])
133 if err != nil {
134 t.Fatal(err)
135 }
136
137 if err := os.WriteFile(p+".meta", b, 0600); err != nil {
138 t.Fatal(err)
139 }
140 }
141
142 result, err := ss.Search(ctx, q, &sOpts)
143 if err != nil {
144 fatalf("Search(%v): %v", q, err)
145 }
146
147 if len(result.Files) != 1 {
148 fatalf("got %v, want 1 file.", result.Files)
149 } else if gotFile, wantFile := result.Files[0].FileName, "F1"; gotFile != wantFile {
150 fatalf("got file %q, want %q", gotFile, wantFile)
151 } else if gotRepo, wantRepo := result.Files[0].Repository, "repo-mutated"; gotRepo != wantRepo {
152 fatalf("got repo %q, want %q", gotRepo, wantRepo)
153 }
154 })
155 })
156}
157
158// retryTest will retry f until min(t.Deadline(), time.Minute). It returns
159// once f doesn't call fatalf.
160func retryTest(t *testing.T, f func(fatalf func(format string, args ...interface{}))) {
161 t.Helper()
162
163 sleep := 10 * time.Millisecond
164 deadline := time.Now().Add(time.Minute)
165 if d, ok := t.Deadline(); ok && d.Before(deadline) {
166 // give 1s for us to do a final test run
167 deadline = d.Add(-time.Second)
168 }
169
170 for {
171 done := make(chan bool)
172 go func() {
173 defer close(done)
174
175 f(func(format string, args ...interface{}) {
176 runtime.Goexit()
177 })
178
179 done <- true
180 }()
181
182 success := <-done
183 if success {
184 return
185 }
186
187 // each time we increase sleep by 1.5
188 sleep := sleep*2 - sleep/2
189 if time.Now().Add(sleep).After(deadline) {
190 break
191 }
192 time.Sleep(sleep)
193 }
194
195 // final run for the test, using the real t.Fatalf
196 f(t.Fatalf)
197}
198
199func TestLargeFileOption(t *testing.T) {
200 dir := t.TempDir()
201
202 sizeMax := 1000
203 opts := Options{
204 IndexDir: dir,
205 LargeFiles: []string{"F0", "F1", "F2", "!F1"},
206 RepositoryDescription: zoekt.Repository{
207 Name: "repo",
208 },
209 SizeMax: sizeMax,
210 }
211
212 b, err := NewBuilder(opts)
213 if err != nil {
214 t.Fatalf("NewBuilder: %v", err)
215 }
216
217 for i := 0; i < 4; i++ {
218 s := fmt.Sprintf("%d", i)
219 if err := b.AddFile("F"+s, []byte(strings.Repeat("a", sizeMax+1))); err != nil {
220 t.Fatal(err)
221 }
222 }
223
224 if err := b.Finish(); err != nil {
225 t.Errorf("Finish: %v", err)
226 }
227
228 ss, err := shards.NewDirectorySearcher(dir)
229 if err != nil {
230 t.Fatalf("NewDirectorySearcher(%s): %v", dir, err)
231 }
232
233 q, err := query.Parse("aaa")
234 if err != nil {
235 t.Fatalf("Parse(aaa): %v", err)
236 }
237
238 var sOpts zoekt.SearchOptions
239 ctx := context.Background()
240 result, err := ss.Search(ctx, q, &sOpts)
241 if err != nil {
242 t.Fatalf("Search(%v): %v", q, err)
243 }
244
245 if len(result.Files) != 2 {
246 t.Errorf("got %v files, want 2 files.", len(result.Files))
247 }
248 defer ss.Close()
249}
250
251func TestUpdate(t *testing.T) {
252 dir := t.TempDir()
253
254 opts := Options{
255 IndexDir: dir,
256 ShardMax: 1024,
257 RepositoryDescription: zoekt.Repository{
258 Name: "repo",
259 FileURLTemplate: "url",
260 },
261 Parallelism: 2,
262 SizeMax: 1 << 20,
263 }
264
265 if b, err := NewBuilder(opts); err != nil {
266 t.Fatalf("NewBuilder: %v", err)
267 } else {
268 if err := b.AddFile("F", []byte("hoi")); err != nil {
269 t.Errorf("AddFile: %v", err)
270 }
271 if err := b.Finish(); err != nil {
272 t.Errorf("Finish: %v", err)
273 }
274 }
275 ss, err := shards.NewDirectorySearcher(dir)
276 if err != nil {
277 t.Fatalf("NewDirectorySearcher(%s): %v", dir, err)
278 }
279
280 ctx := context.Background()
281 repos, err := ss.List(ctx, &query.Repo{Regexp: regexp.MustCompile("repo")}, nil)
282 if err != nil {
283 t.Fatalf("List: %v", err)
284 }
285
286 if len(repos.Repos) != 1 {
287 t.Errorf("List(repo): got %v, want 1 repo", repos.Repos)
288 }
289
290 fs, err := filepath.Glob(filepath.Join(dir, "*"))
291 if err != nil {
292 t.Fatalf("glob: %v", err)
293 }
294
295 opts.RepositoryDescription = zoekt.Repository{
296 Name: "repo2",
297 FileURLTemplate: "url2",
298 }
299
300 if b, err := NewBuilder(opts); err != nil {
301 t.Fatalf("NewBuilder: %v", err)
302 } else {
303 if err := b.AddFile("F", []byte("hoi")); err != nil {
304 t.Errorf("AddFile: %v", err)
305 }
306 if err := b.Finish(); err != nil {
307 t.Errorf("Finish: %v", err)
308 }
309 }
310
311 // This is ugly, and potentially flaky, but there is no
312 // observable synchronization for the Sharded searcher, so
313 // this is the best we can do.
314 time.Sleep(100 * time.Millisecond)
315
316 ctx = context.Background()
317 if repos, err = ss.List(ctx, &query.Repo{Regexp: regexp.MustCompile("repo")}, nil); err != nil {
318 t.Fatalf("List: %v", err)
319 } else if len(repos.Repos) != 2 {
320 t.Errorf("List(repo): got %v, want 2 repos", repos.Repos)
321 }
322
323 for _, fn := range fs {
324 log.Printf("removing %s", fn)
325 if err := os.Remove(fn); err != nil {
326 t.Fatalf("Remove(%s): %v", fn, err)
327 }
328 }
329
330 time.Sleep(100 * time.Millisecond)
331
332 ctx = context.Background()
333 if repos, err = ss.List(ctx, &query.Repo{Regexp: regexp.MustCompile("repo")}, nil); err != nil {
334 t.Fatalf("List: %v", err)
335 } else if len(repos.Repos) != 1 {
336 var ss []string
337 for _, r := range repos.Repos {
338 ss = append(ss, r.Repository.Name)
339 }
340 t.Errorf("List(repo): got %v, want 1 repo", ss)
341 }
342}
343
344func TestDeleteOldShards(t *testing.T) {
345 dir := t.TempDir()
346
347 opts := Options{
348 IndexDir: dir,
349 ShardMax: 1024,
350 RepositoryDescription: zoekt.Repository{
351 Name: "repo",
352 FileURLTemplate: "url",
353 },
354 SizeMax: 1 << 20,
355 }
356 opts.SetDefaults()
357
358 b, err := NewBuilder(opts)
359 if err != nil {
360 t.Fatalf("NewBuilder: %v", err)
361 }
362 for i := 0; i < 4; i++ {
363 s := fmt.Sprintf("%d\n", i)
364 if err := b.AddFile("F"+s, []byte(strings.Repeat(s, 1024/2))); err != nil {
365 t.Errorf("AddFile: %v", err)
366 }
367 }
368 if err := b.Finish(); err != nil {
369 t.Errorf("Finish: %v", err)
370 }
371
372 glob := filepath.Join(dir, "*.zoekt")
373 fs, err := filepath.Glob(glob)
374 if err != nil {
375 t.Fatalf("Glob(%s): %v", glob, err)
376 } else if len(fs) != 4 {
377 t.Fatalf("Glob(%s): got %v, want 4 shards", glob, fs)
378 }
379
380 if fi, err := os.Lstat(fs[0]); err != nil {
381 t.Fatalf("Lstat: %v", err)
382 } else if fi.Mode()&0o666 == 0o600 {
383 // This fails spuriously if your umask is very restrictive.
384 t.Errorf("got mode %o, should respect umask.", fi.Mode())
385 }
386
387 // Do again, without sharding.
388 opts.ShardMax = 1 << 20
389 b, err = NewBuilder(opts)
390 if err != nil {
391 t.Fatalf("NewBuilder: %v", err)
392 }
393 for i := 0; i < 4; i++ {
394 s := fmt.Sprintf("%d\n", i)
395 if err := b.AddFile("F"+s, []byte(strings.Repeat(s, 1024/2))); err != nil {
396 t.Fatal(err)
397 }
398 }
399 if err := b.Finish(); err != nil {
400 t.Errorf("Finish: %v", err)
401 }
402
403 fs, err = filepath.Glob(glob)
404 if err != nil {
405 t.Fatalf("Glob(%s): %v", glob, err)
406 } else if len(fs) != 1 {
407 t.Fatalf("Glob(%s): got %v, want 1 shard", glob, fs)
408 }
409
410 // Again, but don't index anything; should leave old shards intact.
411 b, err = NewBuilder(opts)
412 if err != nil {
413 t.Fatalf("NewBuilder: %v", err)
414 }
415 if err := b.Finish(); err != nil {
416 t.Errorf("Finish: %v", err)
417 }
418
419 fs, err = filepath.Glob(glob)
420 if err != nil {
421 t.Fatalf("Glob(%s): %v", glob, err)
422 } else if len(fs) != 1 {
423 t.Fatalf("Glob(%s): got %v, want 1 shard", glob, fs)
424 }
425}
426
427func TestPartialSuccess(t *testing.T) {
428 dir := t.TempDir()
429
430 opts := Options{
431 IndexDir: dir,
432 ShardMax: 1024,
433 SizeMax: 1 << 20,
434 Parallelism: 1,
435 }
436 opts.RepositoryDescription.Name = "repo"
437 opts.SetDefaults()
438
439 b, err := NewBuilder(opts)
440 if err != nil {
441 t.Fatalf("NewBuilder: %v", err)
442 }
443
444 for i := 0; i < 4; i++ {
445 nm := fmt.Sprintf("F%d", i)
446 _ = b.AddFile(nm, []byte(strings.Repeat("01234567\n", 128)))
447 }
448 b.buildError = fmt.Errorf("any error")
449
450 // No error checking.
451 _ = b.Finish()
452
453 // Finish cleans up temporary files.
454 if fs, err := filepath.Glob(dir + "/*"); err != nil {
455 t.Errorf("glob(%s): %v", dir, err)
456 } else if len(fs) != 0 {
457 t.Errorf("got shards %v, want []", fs)
458 }
459}
460
461type filerankCase struct {
462 name string
463 docs []*zoekt.Document
464 want []int
465}
466
467func testFileRankAspect(t *testing.T, c filerankCase) {
468 var want []*zoekt.Document
469 for _, j := range c.want {
470 want = append(want, c.docs[j])
471 }
472
473 got := make([]*zoekt.Document, len(c.docs))
474 copy(got, c.docs)
475 sortDocuments(got)
476
477 print := func(ds []*zoekt.Document) string {
478 r := ""
479 for _, d := range ds {
480 r += fmt.Sprintf("%v, ", d)
481 }
482 return r
483 }
484 if !reflect.DeepEqual(got, want) {
485 t.Errorf("got docs [%v], want [%v]", print(got), print(want))
486 }
487}
488
489func TestFileRank(t *testing.T) {
490 for _, c := range []filerankCase{{
491 name: "filename",
492 docs: []*zoekt.Document{
493 {
494 Name: "longlonglong",
495 Content: []byte("bla"),
496 },
497 {
498 Name: "short",
499 Content: []byte("bla"),
500 },
501 },
502 want: []int{1, 0},
503 }, {
504 name: "test",
505 docs: []*zoekt.Document{
506 {
507 Name: "test",
508 Content: []byte("bla"),
509 },
510 {
511 Name: "longlonglong",
512 Content: []byte("bla"),
513 },
514 },
515 want: []int{1, 0},
516 }, {
517 name: "content",
518 docs: []*zoekt.Document{
519 {
520 Content: []byte("bla"),
521 },
522 {
523 Content: []byte("blablablabla"),
524 },
525 {
526 Content: []byte("blabla"),
527 },
528 },
529 want: []int{0, 2, 1},
530 }} {
531 t.Run(c.name, func(t *testing.T) {
532 testFileRankAspect(t, c)
533 })
534 }
535}
536
537func TestEmptyContent(t *testing.T) {
538 dir := t.TempDir()
539
540 opts := Options{
541 IndexDir: dir,
542 RepositoryDescription: zoekt.Repository{
543 Name: "repo",
544 },
545 }
546 opts.SetDefaults()
547
548 b, err := NewBuilder(opts)
549 if err != nil {
550 t.Fatalf("NewBuilder: %v", err)
551 }
552 if err := b.Finish(); err != nil {
553 t.Errorf("Finish: %v", err)
554 }
555
556 fs, _ := filepath.Glob(dir + "/*.zoekt")
557 if len(fs) != 1 {
558 t.Fatalf("want a shard, got %v", fs)
559 }
560
561 ss, err := shards.NewDirectorySearcher(dir)
562 if err != nil {
563 t.Fatalf("NewDirectorySearcher(%s): %v", dir, err)
564 }
565 defer ss.Close()
566
567 ctx := context.Background()
568 result, err := ss.List(ctx, &query.Const{Value: true}, nil)
569 if err != nil {
570 t.Fatalf("List: %v", err)
571 }
572
573 if len(result.Repos) != 1 || result.Repos[0].Repository.Name != "repo" {
574 t.Errorf("got %+v, want 1 repo.", result.Repos)
575 }
576}
577
578func TestDeltaShards(t *testing.T) {
579 // TODO: Need to write a test for compound shards as well.
580 type step struct {
581 name string
582 documents []zoekt.Document
583 optFn func(t *testing.T, o *Options)
584
585 query string
586 expectedDocuments []zoekt.Document
587 }
588
589 var (
590 fooAtMain = zoekt.Document{Name: "foo.go", Branches: []string{"main"}, Content: []byte("common foo-main-v1")}
591 fooAtMainV2 = zoekt.Document{Name: "foo.go", Branches: []string{"main"}, Content: []byte("common foo-main-v2")}
592
593 fooAtMainAndRelease = zoekt.Document{Name: "foo.go", Branches: []string{"main", "release"}, Content: []byte("common foo-main-and-release")}
594
595 barAtMain = zoekt.Document{Name: "bar.go", Branches: []string{"main"}, Content: []byte("common bar-main")}
596 barAtMainV2 = zoekt.Document{Name: "bar.go", Branches: []string{"main"}, Content: []byte("common bar-main-v2")}
597 )
598
599 for _, test := range []struct {
600 name string
601 steps []step
602 }{
603 {
604 name: "tombstone older documents",
605 steps: []step{
606 {
607 name: "setup",
608 documents: []zoekt.Document{barAtMain, fooAtMain},
609 query: "common",
610 expectedDocuments: []zoekt.Document{barAtMain, fooAtMain},
611 },
612 {
613 name: "add new version of foo, tombstone older ones",
614 documents: []zoekt.Document{fooAtMainV2},
615 optFn: func(t *testing.T, o *Options) {
616 o.IsDelta = true
617 o.changedOrRemovedFiles = []string{"foo.go"}
618 },
619 query: "common",
620 expectedDocuments: []zoekt.Document{barAtMain, fooAtMainV2},
621 },
622 {
623 name: "add new version of bar, tombstone older ones",
624 documents: []zoekt.Document{barAtMainV2},
625 optFn: func(t *testing.T, o *Options) {
626 o.IsDelta = true
627 o.changedOrRemovedFiles = []string{"bar.go"}
628 },
629 query: "common",
630 expectedDocuments: []zoekt.Document{barAtMainV2, fooAtMainV2},
631 },
632 },
633 },
634 {
635 name: "tombstone older documents even if the latest shard has no documents",
636 steps: []step{
637 {
638 name: "setup",
639 documents: []zoekt.Document{barAtMain, fooAtMain},
640 query: "common",
641 expectedDocuments: []zoekt.Document{barAtMain, fooAtMain},
642 },
643 {
644 // a build with no documents could represent a deletion
645 name: "tombstone older documents",
646 documents: nil,
647 optFn: func(t *testing.T, o *Options) {
648 o.IsDelta = true
649 o.changedOrRemovedFiles = []string{"foo.go"}
650 },
651 query: "common",
652 expectedDocuments: []zoekt.Document{barAtMain},
653 },
654 },
655 },
656 {
657 name: "tombstones affect document across branches",
658 steps: []step{
659 {
660 name: "setup",
661 documents: []zoekt.Document{barAtMain, fooAtMainAndRelease},
662 query: "common",
663 expectedDocuments: []zoekt.Document{barAtMain, fooAtMainAndRelease},
664 },
665 {
666
667 name: "tombstone foo",
668 documents: nil,
669 optFn: func(t *testing.T, o *Options) {
670 o.IsDelta = true
671 o.changedOrRemovedFiles = []string{"foo.go"}
672 },
673 query: "common",
674 expectedDocuments: []zoekt.Document{barAtMain},
675 },
676 },
677 },
678 } {
679 t.Run(test.name, func(t *testing.T) {
680 indexDir := t.TempDir()
681
682 branchSet := make(map[string]struct{})
683
684 for _, s := range test.steps {
685 for _, d := range s.documents {
686 for _, b := range d.Branches {
687 branchSet[b] = struct{}{}
688 }
689 }
690 }
691
692 for _, step := range test.steps {
693 repository := zoekt.Repository{ID: 1, Name: "repository"}
694
695 for b := range branchSet {
696 repository.Branches = append(repository.Branches, zoekt.RepositoryBranch{Name: b})
697 }
698
699 sort.Slice(repository.Branches, func(i, j int) bool {
700 a, b := repository.Branches[i], repository.Branches[j]
701
702 return a.Name < b.Name
703 })
704
705 buildOpts := Options{
706 IndexDir: indexDir,
707 RepositoryDescription: repository,
708 }
709 buildOpts.SetDefaults()
710
711 if step.optFn != nil {
712 step.optFn(t, &buildOpts)
713 }
714
715 b, err := NewBuilder(buildOpts)
716 if err != nil {
717 t.Fatalf("step %q: NewBuilder: %s", step.name, err)
718 }
719
720 for _, d := range step.documents {
721 err := b.Add(d)
722 if err != nil {
723 t.Fatalf("step %q: adding document %q to builder: %s", step.name, d.Name, err)
724 }
725 }
726
727 // Call b.Finish() multiple times to ensure that it is idempotent
728 for i := 0; i < 3; i++ {
729
730 err = b.Finish()
731 if err != nil {
732 t.Fatalf("step %q: finishing builder (call #%d): %s", step.name, i, err)
733 }
734 }
735
736 err = b.Finish()
737 if err != nil {
738 t.Fatalf("step %q: finishing builder: %s", step.name, err)
739 }
740
741 state, _ := buildOpts.IndexState()
742 if diff := cmp.Diff(IndexStateEqual, state); diff != "" {
743 t.Errorf("unexpected diff in index state (-want +got):\n%s", diff)
744 }
745
746 ss, err := shards.NewDirectorySearcher(indexDir)
747 if err != nil {
748 t.Fatalf("step %q: NewDirectorySearcher(%s): %s", step.name, indexDir, err)
749 }
750 defer ss.Close()
751
752 searchOpts := &zoekt.SearchOptions{Whole: true}
753 q := &query.Substring{Pattern: step.query}
754
755 result, err := ss.Search(context.Background(), q, searchOpts)
756 if err != nil {
757 t.Fatalf("step %q: Search(%q): %s", step.name, step.query, err)
758 }
759
760 var receivedDocuments []zoekt.Document
761 for _, f := range result.Files {
762 receivedDocuments = append(receivedDocuments, zoekt.Document{
763 Name: f.FileName,
764 Content: f.Content,
765 })
766 }
767
768 cmpOpts := []cmp.Option{
769 cmpopts.IgnoreFields(zoekt.Document{}, "Branches"),
770 cmpopts.SortSlices(func(a, b zoekt.Document) bool {
771 if a.Name < b.Name {
772 return true
773 }
774
775 return bytes.Compare(a.Content, b.Content) < 0
776 }),
777 }
778
779 if diff := cmp.Diff(step.expectedDocuments, receivedDocuments, cmpOpts...); diff != "" {
780 t.Errorf("step %q: diff in received documents (-want +got):%s\n:", step.name, diff)
781 }
782 }
783 })
784 }
785}
786
787// With this test we want to capture regressions in the names returned by our
788// language detection and the scores assigned to file matches. We rely on the
789// detected language and its spelling, for example, in scoring (see scoreKind).
790func TestScoring(t *testing.T) {
791 if os.Getenv("CI") == "" && checkCTags() == "" {
792 t.Skip("ctags not available")
793 }
794 dir := t.TempDir()
795
796 opts := Options{
797 IndexDir: dir,
798 RepositoryDescription: zoekt.Repository{
799 Name: "repo",
800 },
801 }
802
803 exampleJava, err := os.ReadFile("./testdata/example.java")
804 if err != nil {
805 t.Fatal(err)
806 }
807
808 exampleKotlin, err := os.ReadFile("./testdata/example.kt")
809 if err != nil {
810 t.Fatal(err)
811 }
812
813 exampleCpp, err := os.ReadFile("./testdata/example.cc")
814 if err != nil {
815 t.Fatal(err)
816 }
817
818 examplePython, err := os.ReadFile("./testdata/example.py")
819 if err != nil {
820 t.Fatal(err)
821 }
822
823 exampleRuby, err := os.ReadFile("./testdata/example.rb")
824 if err != nil {
825 t.Fatal(err)
826 }
827
828 exampleScala, err := os.ReadFile("./testdata/example.scala")
829 if err != nil {
830 t.Fatal(err)
831 }
832
833 cases := []struct {
834 fileName string
835 content []byte
836 query query.Q
837 wantLanguage string
838 wantScore float64
839 }{
840 //
841 // Kotlin
842 //
843 {
844 fileName: "example.kt",
845 content: exampleKotlin,
846 query: &query.Substring{Content: true, Pattern: "oxyPreloader"},
847 wantLanguage: "Kotlin",
848 // 5500 (partial symbol at boundary) + 1000 (Kotlin class) + 50 (partial word) + 10 (file order)
849 wantScore: 6560,
850 },
851 {
852 fileName: "example.kt",
853 content: exampleKotlin,
854 query: &query.Substring{Content: true, Pattern: "ViewMetadata"},
855 wantLanguage: "Kotlin",
856 // 7000 (symbol) + 900 (Kotlin interface) + 500 (word) + 10 (file order)
857 wantScore: 8410,
858 },
859 {
860 fileName: "example.kt",
861 content: exampleKotlin,
862 query: &query.Substring{Content: true, Pattern: "onScrolled"},
863 wantLanguage: "Kotlin",
864 // 7000 (symbol) + 800 (Kotlin method) + 500 (word) + 10 (file order)
865 wantScore: 8310,
866 },
867 {
868 fileName: "example.kt",
869 content: exampleKotlin,
870 query: &query.Substring{Content: true, Pattern: "PreloadErrorHandler"},
871 wantLanguage: "Kotlin",
872 // 7000 (symbol) + 700 (Kotlin typealias) + 500 (word) + 10 (file order)
873 wantScore: 8210,
874 },
875 {
876 fileName: "example.kt",
877 content: exampleKotlin,
878 query: &query.Substring{Content: true, Pattern: "FLING_THRESHOLD_PX"},
879 wantLanguage: "Kotlin",
880 // 7000 (symbol) + 600 (Kotlin constant) + 500 (word) + 10 (file order)
881 wantScore: 8110,
882 },
883 {
884 fileName: "example.kt",
885 content: exampleKotlin,
886 query: &query.Substring{Content: true, Pattern: "scrollState"},
887 wantLanguage: "Kotlin",
888 // 7000 (symbol) + 500 (Kotlin variable) + 500 (word) + 10 (file order)
889 wantScore: 8010,
890 },
891 //
892 // Java
893 //
894 {
895 fileName: "example.java",
896 content: exampleJava,
897 query: &query.Substring{Content: true, Pattern: "nerClass"},
898 wantLanguage: "Java",
899 // 5500 (partial symbol at boundary) + 1000 (Java class) + 50 (partial word) + 10 (file order)
900 wantScore: 6560,
901 },
902 {
903 fileName: "example.java",
904 content: exampleJava,
905 query: &query.Substring{Content: true, Pattern: "StaticClass"},
906 wantLanguage: "Java",
907 // 5500 (partial symbol at boundary) + 1000 (Java class) + 500 (word) + 10 (file order)
908 wantScore: 7010,
909 },
910 {
911 fileName: "example.java",
912 content: exampleJava,
913 query: &query.Substring{Content: true, Pattern: "innerEnum"},
914 wantLanguage: "Java",
915 // 7000 (symbol) + 900 (Java enum) + 500 (word) + 10 (file order)
916 wantScore: 8410,
917 },
918 {
919 fileName: "example.java",
920 content: exampleJava,
921 query: &query.Substring{Content: true, Pattern: "innerInterface"},
922 wantLanguage: "Java",
923 // 7000 (symbol) + 800 (Java interface) + 500 (word) + 10 (file order)
924 wantScore: 8310,
925 },
926 {
927 fileName: "example.java",
928 content: exampleJava,
929 query: &query.Substring{Content: true, Pattern: "innerMethod"},
930 wantLanguage: "Java",
931 // 7000 (symbol) + 700 (Java method) + 500 (word) + 10 (file order)
932 wantScore: 8210,
933 },
934 {
935 fileName: "example.java",
936 content: exampleJava,
937 query: &query.Substring{Content: true, Pattern: "field"},
938 wantLanguage: "Java",
939 // 7000 (symbol) + 600 (Java field) + 500 (word) + 10 (file order)
940 wantScore: 8110,
941 },
942 {
943 fileName: "example.java",
944 content: exampleJava,
945 query: &query.Substring{Content: true, Pattern: "B"},
946 wantLanguage: "Java",
947 // 7000 (symbol) + 500 (Java enum constant) + 500 (word) + 10 (file order)
948 wantScore: 8010,
949 },
950 // 2 Atoms (1x content and 1x filename)
951 {
952 fileName: "example.java",
953 content: exampleJava,
954 query: &query.Substring{Pattern: "example"}, // matches filename and a Java field
955 wantLanguage: "Java",
956 // 5500 (edge symbol) + 600 (Java field) + 500 (word) + 200 (atom) + 10 (file order)
957 wantScore: 6810,
958 },
959 // 3 Atoms (2x content, 1x filename)
960 {
961 fileName: "example.java",
962 content: exampleJava,
963 query: &query.Or{Children: []query.Q{
964 &query.Substring{Pattern: "example"}, // matches filename and Java field
965 &query.Substring{Content: true, Pattern: "runInnerInterface"}, // matches a Java method
966 }},
967 wantLanguage: "Java",
968 // 7000 (symbol) + 700 (Java method) + 500 (word) + 266.67 (atom) + 10 (file order)
969 wantScore: 8476.667,
970 },
971 // 4 Atoms (4x content)
972 {
973 fileName: "example.java",
974 content: exampleJava,
975 query: &query.Or{Children: []query.Q{
976 &query.Substring{Content: true, Pattern: "testAnon"},
977 &query.Substring{Content: true, Pattern: "Override"},
978 &query.Substring{Content: true, Pattern: "InnerEnum"},
979 &query.Substring{Content: true, Pattern: "app"},
980 }},
981 wantLanguage: "Java",
982 // 7000 (symbol) + 900 (Java enum) + 500 (word) + 300 (atom) + 10 (file order)
983 wantScore: 8710,
984 },
985 //
986 // Go
987 //
988 {
989 fileName: "a/b/c/config.go",
990 query: &query.Substring{FileName: true, Pattern: "config"},
991 wantLanguage: "Go",
992 // 5500 (partial base at boundary) + 500 (word) + 10 (file order)
993 wantScore: 6010,
994 },
995 {
996 fileName: "a/b/c/config.go",
997 query: &query.Substring{FileName: true, Pattern: "config.go"},
998 wantLanguage: "Go",
999 // 7000 (full base match) + 500 (word) + 10 (file order)
1000 wantScore: 7510,
1001 },
1002 {
1003 fileName: "a/config/c/d.go",
1004 query: &query.Substring{FileName: true, Pattern: "config"},
1005 wantLanguage: "Go",
1006 // 500 (word) + 10 (file order)
1007 wantScore: 510,
1008 },
1009 {
1010 fileName: "src/net/http/client.go",
1011 content: []byte(`
1012package http
1013type aInterface interface {}
1014`),
1015 query: &query.Substring{Content: true, Pattern: "aInterface"},
1016 wantLanguage: "Go",
1017 // 7000 (full base match) + 1000 (Go interface) + 500 (word) + 10 (file order)
1018 wantScore: 8510,
1019 },
1020 {
1021 fileName: "src/net/http/client.go",
1022 content: []byte(`
1023package http
1024type aStruct struct {}
1025`),
1026 query: &query.Substring{Content: true, Pattern: "aStruct"},
1027 wantLanguage: "Go",
1028 // 7000 (full base match) + 900 (Go struct) + 500 (word) + 10 (file order)
1029 wantScore: 8410,
1030 },
1031 {
1032 fileName: "src/net/http/client.go",
1033 content: []byte(`
1034package http
1035func aFunc() bool {}
1036`),
1037 query: &query.Substring{Content: true, Pattern: "aFunc"},
1038 wantLanguage: "Go",
1039 // 7000 (full base match) + 800 (Go function) + 500 (word) + 10 (file order)
1040 wantScore: 8310,
1041 },
1042 {
1043 fileName: "src/net/http/client.go",
1044 content: []byte(`
1045package http
1046func Get() {
1047 panic("")
1048}
1049`),
1050 query: &query.And{Children: []query.Q{
1051 &query.Symbol{Expr: &query.Substring{Pattern: "http", Content: true}},
1052 &query.Symbol{Expr: &query.Substring{Pattern: "Get", Content: true}}}},
1053 wantLanguage: "Go",
1054 // 7000 (full base match) + 800 (Go func) + 50 (Exported Go) + 500 (word) + 200 (atom) + 10 (file order)
1055 wantScore: 8560,
1056 },
1057 //
1058 // C++
1059 //
1060 {
1061 fileName: "example.cc",
1062 content: exampleCpp,
1063 query: &query.Substring{Content: true, Pattern: "FooClass"},
1064 wantLanguage: "C++",
1065 // 7000 (Symbol) + 1000 (C++ class) + 500 (full word) + 10 (file order)
1066 wantScore: 8510,
1067 },
1068 {
1069 fileName: "example.cc",
1070 content: exampleCpp,
1071 query: &query.Substring{Content: true, Pattern: "NestedEnum"},
1072 wantLanguage: "C++",
1073 // 7000 (Symbol) + 900 (C++ enum) + 500 (full word) + 10 (file order)
1074 wantScore: 8410,
1075 },
1076 {
1077 fileName: "example.cc",
1078 content: exampleCpp,
1079 query: &query.Substring{Content: true, Pattern: "main"},
1080 wantLanguage: "C++",
1081 // 7000 (Symbol) + 800 (C++ function) + 500 (full word) + 10 (file order)
1082 wantScore: 8310,
1083 },
1084 {
1085 fileName: "example.cc",
1086 content: exampleCpp,
1087 query: &query.Substring{Content: true, Pattern: "FooStruct"},
1088 wantLanguage: "C++",
1089 // 7000 (Symbol) + 700 (C++ struct) + 500 (full word) + 10 (file order)
1090 wantScore: 8210,
1091 },
1092 {
1093 fileName: "example.cc",
1094 content: exampleCpp,
1095 query: &query.Substring{Content: true, Pattern: "TheUnion"},
1096 wantLanguage: "C++",
1097 // 7000 (Symbol) + 600 (C++ union) + 500 (full word) + 10 (file order)
1098 wantScore: 8110,
1099 },
1100 //
1101 // Python
1102 //
1103 {
1104 fileName: "example.py",
1105 content: examplePython,
1106 query: &query.Substring{Content: true, Pattern: "C1"},
1107 wantLanguage: "Python",
1108 // 7000 (symbol) + 1000 (Python class) + 500 (word) + 10 (file order)
1109 wantScore: 8510,
1110 },
1111 {
1112 fileName: "example.py",
1113 content: examplePython,
1114 query: &query.Substring{Content: true, Pattern: "g"},
1115 wantLanguage: "Python",
1116 // 7000 (symbol) + 800 (Python function) + 500 (word) + 10 (file order)
1117 wantScore: 8310,
1118 },
1119 {
1120 fileName: "example.py",
1121 content: examplePython,
1122 query: &query.Substring{Content: true, Pattern: "__init__"},
1123 wantLanguage: "Python",
1124 // 7000 (symbol) + 400 (Python member) + 50 (partial word) + 10 (file order)
1125 wantScore: 7460,
1126 },
1127 //
1128 // Ruby
1129 //
1130 {
1131 fileName: "example.rb",
1132 content: exampleRuby,
1133 query: &query.Substring{Content: true, Pattern: "Parental"},
1134 wantLanguage: "Ruby",
1135 // 7000 (symbol) + 1000 (Ruby class) + 500 (word) + 10 (file order)
1136 wantScore: 8510,
1137 },
1138 {
1139 fileName: "example.rb",
1140 content: exampleRuby,
1141 query: &query.Substring{Content: true, Pattern: "parental_func"},
1142 wantLanguage: "Ruby",
1143 // 7000 (symbol) + 900 (Ruby method) + 500 (word) + 10 (file order)
1144 wantScore: 8410,
1145 },
1146 {
1147 fileName: "example.rb",
1148 content: exampleRuby,
1149 query: &query.Substring{Content: true, Pattern: "MyModule"},
1150 wantLanguage: "Ruby",
1151 // 7000 (symbol) + 500 (Ruby module) + 500 (word) + 10 (file order)
1152 wantScore: 8210,
1153 },
1154 //
1155 // Scala
1156 //
1157 {
1158 fileName: "example.scala",
1159 content: exampleScala,
1160 query: &query.Substring{Content: true, Pattern: "SymbolIndexBucket"},
1161 wantLanguage: "Scala",
1162 // 7000 (symbol) + 1000 (Scala class) + 500 (word) + 10 (file order)
1163 wantScore: 8510,
1164 },
1165 {
1166 fileName: "example.scala",
1167 content: exampleScala,
1168 query: &query.Substring{Content: true, Pattern: "stdLibPatches"},
1169 wantLanguage: "Scala",
1170 // 7000 (symbol) + 800 (Scala object) + 500 (word) + 10 (file order)
1171 wantScore: 8310,
1172 },
1173 {
1174 fileName: "example.scala",
1175 content: exampleScala,
1176 query: &query.Substring{Content: true, Pattern: "close"},
1177 wantLanguage: "Scala",
1178 // 7000 (symbol) + 700 (Scala method) + 500 (word) + 10 (file order)
1179 wantScore: 8210,
1180 },
1181 {
1182 fileName: "example.scala",
1183 content: exampleScala,
1184 query: &query.Substring{Content: true, Pattern: "javaSymbol"},
1185 wantLanguage: "Scala",
1186 // 7000 (symbol) + 500 (Scala method) + 500 (word) + 10 (file order)
1187 wantScore: 8010,
1188 },
1189 }
1190
1191 epsilon := 0.01
1192 for _, c := range cases {
1193 t.Run(c.wantLanguage, func(t *testing.T) {
1194 b, err := NewBuilder(opts)
1195 if err != nil {
1196 t.Fatalf("NewBuilder: %v", err)
1197 }
1198 if err := b.AddFile(c.fileName, c.content); err != nil {
1199 t.Fatal(err)
1200 }
1201 if err := b.Finish(); err != nil {
1202 t.Fatalf("Finish: %v", err)
1203 }
1204
1205 ss, err := shards.NewDirectorySearcher(dir)
1206 if err != nil {
1207 t.Fatalf("NewDirectorySearcher(%s): %v", dir, err)
1208 }
1209 defer ss.Close()
1210
1211 srs, err := ss.Search(context.Background(), c.query, &zoekt.SearchOptions{DebugScore: true})
1212 if err != nil {
1213 t.Fatal(err)
1214 }
1215
1216 if got, want := len(srs.Files), 1; got != want {
1217 t.Fatalf("file matches: want %d, got %d", want, got)
1218 }
1219
1220 if got := srs.Files[0].Score; math.Abs(got-c.wantScore) > epsilon {
1221 t.Fatalf("score: want %f, got %f\ndebug: %s\ndebugscore: %s", c.wantScore, got, srs.Files[0].Debug, srs.Files[0].LineMatches[0].DebugScore)
1222 }
1223
1224 if got := srs.Files[0].Language; got != c.wantLanguage {
1225 t.Fatalf("want %s, got %s", c.wantLanguage, got)
1226 }
1227 })
1228 }
1229}
1230
1231func TestScoringWithDocumentRanks(t *testing.T) {
1232 if os.Getenv("CI") == "" && checkCTags() == "" {
1233 t.Skip("ctags not available")
1234 }
1235 dir := t.TempDir()
1236
1237 opts := Options{
1238 IndexDir: dir,
1239 RepositoryDescription: zoekt.Repository{
1240 Name: "repo",
1241 },
1242 DocumentRanksVersion: "ranking",
1243 }
1244
1245 searchQuery := &query.Substring{Content: true, Pattern: "Inner"}
1246 exampleJava, err := os.ReadFile("./testdata/example.java")
1247 if err != nil {
1248 t.Fatal(err)
1249 }
1250
1251 cases := []struct {
1252 name string
1253 documentRank float64
1254 documentRanksWeight float64
1255 wantScore float64
1256 }{
1257 {
1258 name: "score with no document ranks",
1259 // 5500 (partial symbol at boundary) + 1000 (Java class) + 500 (word match) + 10 (file order)
1260 wantScore: 7010.00,
1261 },
1262 {
1263 name: "score with document ranks",
1264 documentRank: 0.8,
1265 // 5500 (partial symbol at boundary) + 1000 (Java class) + 500 (word match) + 225 (file rank) + 10 (file order)
1266 wantScore: 7235.00,
1267 },
1268 {
1269 name: "score with custom document ranks weight",
1270 documentRank: 0.8,
1271 documentRanksWeight: 1000.0,
1272 // 5500 (partial symbol at boundary) + 1000 (Java class) + 500 (word match) + 25.00 (file rank) + 10 (file order)
1273 wantScore: 7035.00,
1274 },
1275 }
1276
1277 for _, c := range cases {
1278 t.Run(c.name, func(t *testing.T) {
1279 b, err := NewBuilder(opts)
1280 if err != nil {
1281 t.Fatalf("NewBuilder: %v", err)
1282 }
1283
1284 err = b.Add(zoekt.Document{Name: "example.java", Content: exampleJava, Ranks: []float64{c.documentRank}})
1285 if err != nil {
1286 t.Fatal(err)
1287 }
1288
1289 if err := b.Finish(); err != nil {
1290 t.Fatalf("Finish: %v", err)
1291 }
1292
1293 ss, err := shards.NewDirectorySearcher(dir)
1294 if err != nil {
1295 t.Fatalf("NewDirectorySearcher(%s): %v", dir, err)
1296 }
1297 defer ss.Close()
1298
1299 srs, err := ss.Search(context.Background(), searchQuery, &zoekt.SearchOptions{
1300 UseDocumentRanks: true,
1301 DocumentRanksWeight: c.documentRanksWeight,
1302 DebugScore: true,
1303 })
1304
1305 if err != nil {
1306 t.Fatal(err)
1307 }
1308
1309 if got, want := len(srs.Files), 1; got != want {
1310 t.Fatalf("file matches: want %d, got %d", want, got)
1311 }
1312
1313 if got := srs.Files[0].Score; got != c.wantScore {
1314 t.Fatalf("score: want %f, got %f\ndebug: %s\ndebugscore: %s", c.wantScore, got, srs.Files[0].Debug, srs.Files[0].LineMatches[0].DebugScore)
1315 }
1316 })
1317 }
1318}
1319
1320func TestRepoRanks(t *testing.T) {
1321 if os.Getenv("CI") == "" && checkCTags() == "" {
1322 t.Skip("ctags not available")
1323 }
1324 dir := t.TempDir()
1325
1326 opts := Options{
1327 IndexDir: dir,
1328 RepositoryDescription: zoekt.Repository{
1329 Name: "repo",
1330 },
1331 DocumentRanksVersion: "ranking",
1332 }
1333
1334 searchQuery := &query.Substring{Content: true, Pattern: "Inner"}
1335 exampleJava, err := os.ReadFile("./testdata/example.java")
1336 if err != nil {
1337 t.Fatal(err)
1338 }
1339
1340 cases := []struct {
1341 name string
1342 repoRank uint16
1343 wantScore float64
1344 }{
1345 {
1346 name: "no shard rank",
1347 // 5500 (partial symbol at boundary) + 1000 (Java class) + 500 (word match) + 10 (file order)
1348 wantScore: 7010.00,
1349 },
1350 {
1351 name: "medium shard rank",
1352 repoRank: 30000,
1353 // 5500 (partial symbol at boundary) + 1000 (Java class) + 500 (word match) + 10 (file order) + 9.16 (repo rank)
1354 wantScore: 7019.16,
1355 },
1356 {
1357 name: "high shard rank",
1358 repoRank: 60000,
1359 // 5500 (partial symbol at boundary) + 1000 (Java class) + 500 (word match) + 10 (file order) + 18.31 (repo rank)
1360 wantScore: 7028.31,
1361 },
1362 }
1363
1364 for _, c := range cases {
1365 t.Run(c.name, func(t *testing.T) {
1366 opts.RepositoryDescription = zoekt.Repository{
1367 Name: "repo",
1368 Rank: c.repoRank,
1369 }
1370
1371 b, err := NewBuilder(opts)
1372 if err != nil {
1373 t.Fatalf("NewBuilder: %v", err)
1374 }
1375
1376 err = b.Add(zoekt.Document{Name: "example.java", Content: exampleJava})
1377 if err != nil {
1378 t.Fatal(err)
1379 }
1380
1381 if err := b.Finish(); err != nil {
1382 t.Fatalf("Finish: %v", err)
1383 }
1384
1385 ss, err := shards.NewDirectorySearcher(dir)
1386 if err != nil {
1387 t.Fatalf("NewDirectorySearcher(%s): %v", dir, err)
1388 }
1389 defer ss.Close()
1390
1391 srs, err := ss.Search(context.Background(), searchQuery, &zoekt.SearchOptions{
1392 UseDocumentRanks: true,
1393 DebugScore: true,
1394 })
1395
1396 if err != nil {
1397 t.Fatal(err)
1398 }
1399
1400 if got, want := len(srs.Files), 1; got != want {
1401 t.Fatalf("file matches: want %d, got %d", want, got)
1402 }
1403
1404 if got := srs.Files[0].Score; math.Abs(got-c.wantScore) >= 0.01 {
1405 t.Fatalf("score: want %f, got %f\ndebug: %s\ndebugscore: %s", c.wantScore, got, srs.Files[0].Debug, srs.Files[0].LineMatches[0].DebugScore)
1406 }
1407 })
1408 }
1409}