fork of https://github.com/sourcegraph/zoekt
1// Copyright 2016 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package build
16
17import (
18 "bytes"
19 "context"
20 "encoding/json"
21 "fmt"
22 "log"
23 "math"
24 "os"
25 "path/filepath"
26 "reflect"
27 "runtime"
28 "sort"
29 "strings"
30 "testing"
31 "time"
32
33 "github.com/google/go-cmp/cmp"
34 "github.com/google/go-cmp/cmp/cmpopts"
35 "github.com/grafana/regexp"
36
37 "github.com/sourcegraph/zoekt"
38 "github.com/sourcegraph/zoekt/query"
39 "github.com/sourcegraph/zoekt/shards"
40)
41
42func TestBasic(t *testing.T) {
43 dir := t.TempDir()
44
45 opts := Options{
46 IndexDir: dir,
47 ShardMax: 1024,
48 RepositoryDescription: zoekt.Repository{
49 Name: "repo",
50 },
51 Parallelism: 2,
52 SizeMax: 1 << 20,
53 }
54
55 b, err := NewBuilder(opts)
56 if err != nil {
57 t.Fatalf("NewBuilder: %v", err)
58 }
59
60 for i := 0; i < 4; i++ {
61 s := fmt.Sprintf("%d", i)
62 if err := b.AddFile("F"+s, []byte(strings.Repeat(s, 1000))); err != nil {
63 t.Fatal(err)
64 }
65 }
66
67 if err := b.Finish(); err != nil {
68 t.Errorf("Finish: %v", err)
69 }
70
71 fs, _ := filepath.Glob(dir + "/*.zoekt")
72 if len(fs) <= 1 {
73 t.Fatalf("want multiple shards, got %v", fs)
74 }
75
76 _, md0, err := zoekt.ReadMetadataPath(fs[0])
77 if err != nil {
78 t.Fatal(err)
79 }
80 for _, f := range fs[1:] {
81 _, md, err := zoekt.ReadMetadataPath(f)
82 if err != nil {
83 t.Fatal(err)
84 }
85 if md.IndexTime != md0.IndexTime {
86 t.Fatalf("wanted identical time stamps but got %v!=%v", md.IndexTime, md0.IndexTime)
87 }
88 if md.ID != md0.ID {
89 t.Fatalf("wanted identical IDs but got %s!=%s", md.ID, md0.ID)
90 }
91 }
92
93 ss, err := shards.NewDirectorySearcher(dir)
94 if err != nil {
95 t.Fatalf("NewDirectorySearcher(%s): %v", dir, err)
96 }
97 defer ss.Close()
98
99 q, err := query.Parse("111")
100 if err != nil {
101 t.Fatalf("Parse(111): %v", err)
102 }
103
104 var sOpts zoekt.SearchOptions
105 ctx := context.Background()
106 result, err := ss.Search(ctx, q, &sOpts)
107 if err != nil {
108 t.Fatalf("Search(%v): %v", q, err)
109 }
110
111 if len(result.Files) != 1 {
112 t.Errorf("got %v, want 1 file.", result.Files)
113 } else if gotFile, wantFile := result.Files[0].FileName, "F1"; gotFile != wantFile {
114 t.Errorf("got file %q, want %q", gotFile, wantFile)
115 } else if gotRepo, wantRepo := result.Files[0].Repository, "repo"; gotRepo != wantRepo {
116 t.Errorf("got repo %q, want %q", gotRepo, wantRepo)
117 }
118
119 t.Run("meta file", func(t *testing.T) {
120 // use retryTest to allow for the directory watcher to notice the meta
121 // file
122 retryTest(t, func(fatalf func(format string, args ...interface{})) {
123 // Add a .meta file for each shard with repo.Name set to
124 // "repo-mutated". We do this inside retry helper since we have noticed
125 // some flakiness on github CI.
126 for _, p := range fs {
127 repos, _, err := zoekt.ReadMetadataPath(p)
128 if err != nil {
129 t.Fatal(err)
130 }
131 repos[0].Name = "repo-mutated"
132 b, err := json.Marshal(repos[0])
133 if err != nil {
134 t.Fatal(err)
135 }
136
137 if err := os.WriteFile(p+".meta", b, 0600); err != nil {
138 t.Fatal(err)
139 }
140 }
141
142 result, err := ss.Search(ctx, q, &sOpts)
143 if err != nil {
144 fatalf("Search(%v): %v", q, err)
145 }
146
147 if len(result.Files) != 1 {
148 fatalf("got %v, want 1 file.", result.Files)
149 } else if gotFile, wantFile := result.Files[0].FileName, "F1"; gotFile != wantFile {
150 fatalf("got file %q, want %q", gotFile, wantFile)
151 } else if gotRepo, wantRepo := result.Files[0].Repository, "repo-mutated"; gotRepo != wantRepo {
152 fatalf("got repo %q, want %q", gotRepo, wantRepo)
153 }
154 })
155 })
156}
157
158// retryTest will retry f until min(t.Deadline(), time.Minute). It returns
159// once f doesn't call fatalf.
160func retryTest(t *testing.T, f func(fatalf func(format string, args ...interface{}))) {
161 t.Helper()
162
163 sleep := 10 * time.Millisecond
164 deadline := time.Now().Add(time.Minute)
165 if d, ok := t.Deadline(); ok && d.Before(deadline) {
166 // give 1s for us to do a final test run
167 deadline = d.Add(-time.Second)
168 }
169
170 for {
171 done := make(chan bool)
172 go func() {
173 defer close(done)
174
175 f(func(format string, args ...interface{}) {
176 runtime.Goexit()
177 })
178
179 done <- true
180 }()
181
182 success := <-done
183 if success {
184 return
185 }
186
187 // each time we increase sleep by 1.5
188 sleep := sleep*2 - sleep/2
189 if time.Now().Add(sleep).After(deadline) {
190 break
191 }
192 time.Sleep(sleep)
193 }
194
195 // final run for the test, using the real t.Fatalf
196 f(t.Fatalf)
197}
198
199func TestLargeFileOption(t *testing.T) {
200 dir := t.TempDir()
201
202 sizeMax := 1000
203 opts := Options{
204 IndexDir: dir,
205 LargeFiles: []string{"F0", "F1", "F2", "!F1"},
206 RepositoryDescription: zoekt.Repository{
207 Name: "repo",
208 },
209 SizeMax: sizeMax,
210 }
211
212 b, err := NewBuilder(opts)
213 if err != nil {
214 t.Fatalf("NewBuilder: %v", err)
215 }
216
217 for i := 0; i < 4; i++ {
218 s := fmt.Sprintf("%d", i)
219 if err := b.AddFile("F"+s, []byte(strings.Repeat("a", sizeMax+1))); err != nil {
220 t.Fatal(err)
221 }
222 }
223
224 if err := b.Finish(); err != nil {
225 t.Errorf("Finish: %v", err)
226 }
227
228 ss, err := shards.NewDirectorySearcher(dir)
229 if err != nil {
230 t.Fatalf("NewDirectorySearcher(%s): %v", dir, err)
231 }
232
233 q, err := query.Parse("aaa")
234 if err != nil {
235 t.Fatalf("Parse(aaa): %v", err)
236 }
237
238 var sOpts zoekt.SearchOptions
239 ctx := context.Background()
240 result, err := ss.Search(ctx, q, &sOpts)
241 if err != nil {
242 t.Fatalf("Search(%v): %v", q, err)
243 }
244
245 if len(result.Files) != 2 {
246 t.Errorf("got %v files, want 2 files.", len(result.Files))
247 }
248 defer ss.Close()
249}
250
251func TestUpdate(t *testing.T) {
252 dir := t.TempDir()
253
254 opts := Options{
255 IndexDir: dir,
256 ShardMax: 1024,
257 RepositoryDescription: zoekt.Repository{
258 Name: "repo",
259 FileURLTemplate: "url",
260 },
261 Parallelism: 2,
262 SizeMax: 1 << 20,
263 }
264
265 if b, err := NewBuilder(opts); err != nil {
266 t.Fatalf("NewBuilder: %v", err)
267 } else {
268 if err := b.AddFile("F", []byte("hoi")); err != nil {
269 t.Errorf("AddFile: %v", err)
270 }
271 if err := b.Finish(); err != nil {
272 t.Errorf("Finish: %v", err)
273 }
274 }
275 ss, err := shards.NewDirectorySearcher(dir)
276 if err != nil {
277 t.Fatalf("NewDirectorySearcher(%s): %v", dir, err)
278 }
279
280 ctx := context.Background()
281 repos, err := ss.List(ctx, &query.Repo{Regexp: regexp.MustCompile("repo")}, nil)
282 if err != nil {
283 t.Fatalf("List: %v", err)
284 }
285
286 if len(repos.Repos) != 1 {
287 t.Errorf("List(repo): got %v, want 1 repo", repos.Repos)
288 }
289
290 fs, err := filepath.Glob(filepath.Join(dir, "*"))
291 if err != nil {
292 t.Fatalf("glob: %v", err)
293 }
294
295 opts.RepositoryDescription = zoekt.Repository{
296 Name: "repo2",
297 FileURLTemplate: "url2",
298 }
299
300 if b, err := NewBuilder(opts); err != nil {
301 t.Fatalf("NewBuilder: %v", err)
302 } else {
303 if err := b.AddFile("F", []byte("hoi")); err != nil {
304 t.Errorf("AddFile: %v", err)
305 }
306 if err := b.Finish(); err != nil {
307 t.Errorf("Finish: %v", err)
308 }
309 }
310
311 // This is ugly, and potentially flaky, but there is no
312 // observable synchronization for the Sharded searcher, so
313 // this is the best we can do.
314 time.Sleep(100 * time.Millisecond)
315
316 ctx = context.Background()
317 if repos, err = ss.List(ctx, &query.Repo{Regexp: regexp.MustCompile("repo")}, nil); err != nil {
318 t.Fatalf("List: %v", err)
319 } else if len(repos.Repos) != 2 {
320 t.Errorf("List(repo): got %v, want 2 repos", repos.Repos)
321 }
322
323 for _, fn := range fs {
324 log.Printf("removing %s", fn)
325 if err := os.Remove(fn); err != nil {
326 t.Fatalf("Remove(%s): %v", fn, err)
327 }
328 }
329
330 time.Sleep(100 * time.Millisecond)
331
332 ctx = context.Background()
333 if repos, err = ss.List(ctx, &query.Repo{Regexp: regexp.MustCompile("repo")}, nil); err != nil {
334 t.Fatalf("List: %v", err)
335 } else if len(repos.Repos) != 1 {
336 var ss []string
337 for _, r := range repos.Repos {
338 ss = append(ss, r.Repository.Name)
339 }
340 t.Errorf("List(repo): got %v, want 1 repo", ss)
341 }
342}
343
344func TestDeleteOldShards(t *testing.T) {
345 dir := t.TempDir()
346
347 opts := Options{
348 IndexDir: dir,
349 ShardMax: 1024,
350 RepositoryDescription: zoekt.Repository{
351 Name: "repo",
352 FileURLTemplate: "url",
353 },
354 SizeMax: 1 << 20,
355 }
356 opts.SetDefaults()
357
358 b, err := NewBuilder(opts)
359 if err != nil {
360 t.Fatalf("NewBuilder: %v", err)
361 }
362 for i := 0; i < 4; i++ {
363 s := fmt.Sprintf("%d\n", i)
364 if err := b.AddFile("F"+s, []byte(strings.Repeat(s, 1024/2))); err != nil {
365 t.Errorf("AddFile: %v", err)
366 }
367 }
368 if err := b.Finish(); err != nil {
369 t.Errorf("Finish: %v", err)
370 }
371
372 glob := filepath.Join(dir, "*.zoekt")
373 fs, err := filepath.Glob(glob)
374 if err != nil {
375 t.Fatalf("Glob(%s): %v", glob, err)
376 } else if len(fs) != 4 {
377 t.Fatalf("Glob(%s): got %v, want 4 shards", glob, fs)
378 }
379
380 if fi, err := os.Lstat(fs[0]); err != nil {
381 t.Fatalf("Lstat: %v", err)
382 } else if fi.Mode()&0o666 == 0o600 {
383 // This fails spuriously if your umask is very restrictive.
384 t.Errorf("got mode %o, should respect umask.", fi.Mode())
385 }
386
387 // Do again, without sharding.
388 opts.ShardMax = 1 << 20
389 b, err = NewBuilder(opts)
390 if err != nil {
391 t.Fatalf("NewBuilder: %v", err)
392 }
393 for i := 0; i < 4; i++ {
394 s := fmt.Sprintf("%d\n", i)
395 if err := b.AddFile("F"+s, []byte(strings.Repeat(s, 1024/2))); err != nil {
396 t.Fatal(err)
397 }
398 }
399 if err := b.Finish(); err != nil {
400 t.Errorf("Finish: %v", err)
401 }
402
403 fs, err = filepath.Glob(glob)
404 if err != nil {
405 t.Fatalf("Glob(%s): %v", glob, err)
406 } else if len(fs) != 1 {
407 t.Fatalf("Glob(%s): got %v, want 1 shard", glob, fs)
408 }
409
410 // Again, but don't index anything; should leave old shards intact.
411 b, err = NewBuilder(opts)
412 if err != nil {
413 t.Fatalf("NewBuilder: %v", err)
414 }
415 if err := b.Finish(); err != nil {
416 t.Errorf("Finish: %v", err)
417 }
418
419 fs, err = filepath.Glob(glob)
420 if err != nil {
421 t.Fatalf("Glob(%s): %v", glob, err)
422 } else if len(fs) != 1 {
423 t.Fatalf("Glob(%s): got %v, want 1 shard", glob, fs)
424 }
425}
426
427func TestPartialSuccess(t *testing.T) {
428 dir := t.TempDir()
429
430 opts := Options{
431 IndexDir: dir,
432 ShardMax: 1024,
433 SizeMax: 1 << 20,
434 Parallelism: 1,
435 }
436 opts.RepositoryDescription.Name = "repo"
437 opts.SetDefaults()
438
439 b, err := NewBuilder(opts)
440 if err != nil {
441 t.Fatalf("NewBuilder: %v", err)
442 }
443
444 for i := 0; i < 4; i++ {
445 nm := fmt.Sprintf("F%d", i)
446 _ = b.AddFile(nm, []byte(strings.Repeat("01234567\n", 128)))
447 }
448 b.buildError = fmt.Errorf("any error")
449
450 // No error checking.
451 _ = b.Finish()
452
453 // Finish cleans up temporary files.
454 if fs, err := filepath.Glob(dir + "/*"); err != nil {
455 t.Errorf("glob(%s): %v", dir, err)
456 } else if len(fs) != 0 {
457 t.Errorf("got shards %v, want []", fs)
458 }
459}
460
461type filerankCase struct {
462 name string
463 docs []*zoekt.Document
464 want []int
465}
466
467func testFileRankAspect(t *testing.T, c filerankCase) {
468 var want []*zoekt.Document
469 for _, j := range c.want {
470 want = append(want, c.docs[j])
471 }
472
473 got := make([]*zoekt.Document, len(c.docs))
474 copy(got, c.docs)
475 sortDocuments(got)
476
477 print := func(ds []*zoekt.Document) string {
478 r := ""
479 for _, d := range ds {
480 r += fmt.Sprintf("%v, ", d)
481 }
482 return r
483 }
484 if !reflect.DeepEqual(got, want) {
485 t.Errorf("got docs [%v], want [%v]", print(got), print(want))
486 }
487}
488
489func TestFileRank(t *testing.T) {
490 for _, c := range []filerankCase{{
491 name: "filename",
492 docs: []*zoekt.Document{
493 {
494 Name: "longlonglong",
495 Content: []byte("bla"),
496 },
497 {
498 Name: "short",
499 Content: []byte("bla"),
500 },
501 },
502 want: []int{1, 0},
503 }, {
504 name: "test",
505 docs: []*zoekt.Document{
506 {
507 Name: "test",
508 Content: []byte("bla"),
509 },
510 {
511 Name: "longlonglong",
512 Content: []byte("bla"),
513 },
514 },
515 want: []int{1, 0},
516 }, {
517 name: "content",
518 docs: []*zoekt.Document{
519 {
520 Content: []byte("bla"),
521 },
522 {
523 Content: []byte("blablablabla"),
524 },
525 {
526 Content: []byte("blabla"),
527 },
528 },
529 want: []int{0, 2, 1},
530 }} {
531 t.Run(c.name, func(t *testing.T) {
532 testFileRankAspect(t, c)
533 })
534 }
535}
536
537func TestEmptyContent(t *testing.T) {
538 dir := t.TempDir()
539
540 opts := Options{
541 IndexDir: dir,
542 RepositoryDescription: zoekt.Repository{
543 Name: "repo",
544 },
545 }
546 opts.SetDefaults()
547
548 b, err := NewBuilder(opts)
549 if err != nil {
550 t.Fatalf("NewBuilder: %v", err)
551 }
552 if err := b.Finish(); err != nil {
553 t.Errorf("Finish: %v", err)
554 }
555
556 fs, _ := filepath.Glob(dir + "/*.zoekt")
557 if len(fs) != 1 {
558 t.Fatalf("want a shard, got %v", fs)
559 }
560
561 ss, err := shards.NewDirectorySearcher(dir)
562 if err != nil {
563 t.Fatalf("NewDirectorySearcher(%s): %v", dir, err)
564 }
565 defer ss.Close()
566
567 ctx := context.Background()
568 result, err := ss.List(ctx, &query.Const{Value: true}, nil)
569 if err != nil {
570 t.Fatalf("List: %v", err)
571 }
572
573 if len(result.Repos) != 1 || result.Repos[0].Repository.Name != "repo" {
574 t.Errorf("got %+v, want 1 repo.", result.Repos)
575 }
576}
577
578func TestDeltaShards(t *testing.T) {
579 // TODO: Need to write a test for compound shards as well.
580 type step struct {
581 name string
582 documents []zoekt.Document
583 optFn func(t *testing.T, o *Options)
584
585 query string
586 expectedDocuments []zoekt.Document
587 }
588
589 var (
590 fooAtMain = zoekt.Document{Name: "foo.go", Branches: []string{"main"}, Content: []byte("common foo-main-v1")}
591 fooAtMainV2 = zoekt.Document{Name: "foo.go", Branches: []string{"main"}, Content: []byte("common foo-main-v2")}
592
593 fooAtMainAndRelease = zoekt.Document{Name: "foo.go", Branches: []string{"main", "release"}, Content: []byte("common foo-main-and-release")}
594
595 barAtMain = zoekt.Document{Name: "bar.go", Branches: []string{"main"}, Content: []byte("common bar-main")}
596 barAtMainV2 = zoekt.Document{Name: "bar.go", Branches: []string{"main"}, Content: []byte("common bar-main-v2")}
597 )
598
599 for _, test := range []struct {
600 name string
601 steps []step
602 }{
603 {
604 name: "tombstone older documents",
605 steps: []step{
606 {
607 name: "setup",
608 documents: []zoekt.Document{barAtMain, fooAtMain},
609 query: "common",
610 expectedDocuments: []zoekt.Document{barAtMain, fooAtMain},
611 },
612 {
613 name: "add new version of foo, tombstone older ones",
614 documents: []zoekt.Document{fooAtMainV2},
615 optFn: func(t *testing.T, o *Options) {
616 o.IsDelta = true
617 o.changedOrRemovedFiles = []string{"foo.go"}
618 },
619 query: "common",
620 expectedDocuments: []zoekt.Document{barAtMain, fooAtMainV2},
621 },
622 {
623 name: "add new version of bar, tombstone older ones",
624 documents: []zoekt.Document{barAtMainV2},
625 optFn: func(t *testing.T, o *Options) {
626 o.IsDelta = true
627 o.changedOrRemovedFiles = []string{"bar.go"}
628 },
629 query: "common",
630 expectedDocuments: []zoekt.Document{barAtMainV2, fooAtMainV2},
631 },
632 },
633 },
634 {
635 name: "tombstone older documents even if the latest shard has no documents",
636 steps: []step{
637 {
638 name: "setup",
639 documents: []zoekt.Document{barAtMain, fooAtMain},
640 query: "common",
641 expectedDocuments: []zoekt.Document{barAtMain, fooAtMain},
642 },
643 {
644 // a build with no documents could represent a deletion
645 name: "tombstone older documents",
646 documents: nil,
647 optFn: func(t *testing.T, o *Options) {
648 o.IsDelta = true
649 o.changedOrRemovedFiles = []string{"foo.go"}
650 },
651 query: "common",
652 expectedDocuments: []zoekt.Document{barAtMain},
653 },
654 },
655 },
656 {
657 name: "tombstones affect document across branches",
658 steps: []step{
659 {
660 name: "setup",
661 documents: []zoekt.Document{barAtMain, fooAtMainAndRelease},
662 query: "common",
663 expectedDocuments: []zoekt.Document{barAtMain, fooAtMainAndRelease},
664 },
665 {
666
667 name: "tombstone foo",
668 documents: nil,
669 optFn: func(t *testing.T, o *Options) {
670 o.IsDelta = true
671 o.changedOrRemovedFiles = []string{"foo.go"}
672 },
673 query: "common",
674 expectedDocuments: []zoekt.Document{barAtMain},
675 },
676 },
677 },
678 } {
679 t.Run(test.name, func(t *testing.T) {
680 indexDir := t.TempDir()
681
682 branchSet := make(map[string]struct{})
683
684 for _, s := range test.steps {
685 for _, d := range s.documents {
686 for _, b := range d.Branches {
687 branchSet[b] = struct{}{}
688 }
689 }
690 }
691
692 for _, step := range test.steps {
693 repository := zoekt.Repository{ID: 1, Name: "repository"}
694
695 for b := range branchSet {
696 repository.Branches = append(repository.Branches, zoekt.RepositoryBranch{Name: b})
697 }
698
699 sort.Slice(repository.Branches, func(i, j int) bool {
700 a, b := repository.Branches[i], repository.Branches[j]
701
702 return a.Name < b.Name
703 })
704
705 buildOpts := Options{
706 IndexDir: indexDir,
707 RepositoryDescription: repository,
708 }
709 buildOpts.SetDefaults()
710
711 if step.optFn != nil {
712 step.optFn(t, &buildOpts)
713 }
714
715 b, err := NewBuilder(buildOpts)
716 if err != nil {
717 t.Fatalf("step %q: NewBuilder: %s", step.name, err)
718 }
719
720 for _, d := range step.documents {
721 err := b.Add(d)
722 if err != nil {
723 t.Fatalf("step %q: adding document %q to builder: %s", step.name, d.Name, err)
724 }
725 }
726
727 // Call b.Finish() multiple times to ensure that it is idempotent
728 for i := 0; i < 3; i++ {
729
730 err = b.Finish()
731 if err != nil {
732 t.Fatalf("step %q: finishing builder (call #%d): %s", step.name, i, err)
733 }
734 }
735
736 err = b.Finish()
737 if err != nil {
738 t.Fatalf("step %q: finishing builder: %s", step.name, err)
739 }
740
741 state, _ := buildOpts.IndexState()
742 if diff := cmp.Diff(IndexStateEqual, state); diff != "" {
743 t.Errorf("unexpected diff in index state (-want +got):\n%s", diff)
744 }
745
746 ss, err := shards.NewDirectorySearcher(indexDir)
747 if err != nil {
748 t.Fatalf("step %q: NewDirectorySearcher(%s): %s", step.name, indexDir, err)
749 }
750 defer ss.Close()
751
752 searchOpts := &zoekt.SearchOptions{Whole: true}
753 q := &query.Substring{Pattern: step.query}
754
755 result, err := ss.Search(context.Background(), q, searchOpts)
756 if err != nil {
757 t.Fatalf("step %q: Search(%q): %s", step.name, step.query, err)
758 }
759
760 var receivedDocuments []zoekt.Document
761 for _, f := range result.Files {
762 receivedDocuments = append(receivedDocuments, zoekt.Document{
763 Name: f.FileName,
764 Content: f.Content,
765 })
766 }
767
768 cmpOpts := []cmp.Option{
769 cmpopts.IgnoreFields(zoekt.Document{}, "Branches"),
770 cmpopts.SortSlices(func(a, b zoekt.Document) bool {
771 if a.Name < b.Name {
772 return true
773 }
774
775 return bytes.Compare(a.Content, b.Content) < 0
776 }),
777 }
778
779 if diff := cmp.Diff(step.expectedDocuments, receivedDocuments, cmpOpts...); diff != "" {
780 t.Errorf("step %q: diff in received documents (-want +got):%s\n:", step.name, diff)
781 }
782 }
783 })
784 }
785}
786
787// With this test we want to capture regressions in the names returned by our
788// language detection and the scores assigned to file matches. We rely on the
789// detected language and its spelling, for example, in scoring (see scoreKind).
790func TestScoring(t *testing.T) {
791 if os.Getenv("CI") == "" && checkCTags() == "" {
792 t.Skip("ctags not available")
793 }
794 dir := t.TempDir()
795
796 opts := Options{
797 IndexDir: dir,
798 RepositoryDescription: zoekt.Repository{
799 Name: "repo",
800 },
801 }
802
803 exampleJava, err := os.ReadFile("./testdata/example.java")
804 if err != nil {
805 t.Fatal(err)
806 }
807
808 exampleKotlin, err := os.ReadFile("./testdata/example.kt")
809 if err != nil {
810 t.Fatal(err)
811 }
812
813 exampleCpp, err := os.ReadFile("./testdata/example.cc")
814 if err != nil {
815 t.Fatal(err)
816 }
817
818 exampleScala, err := os.ReadFile("./testdata/example.scala")
819 if err != nil {
820 t.Fatal(err)
821 }
822
823 cases := []struct {
824 fileName string
825 content []byte
826 query query.Q
827 wantLanguage string
828 wantScore float64
829 }{
830 //
831 // Kotlin
832 //
833 {
834 fileName: "example.kt",
835 content: exampleKotlin,
836 query: &query.Substring{Content: true, Pattern: "oxyPreloader"},
837 wantLanguage: "Kotlin",
838 // 5500 (partial symbol at boundary) + 1000 (Kotlin class) + 50 (partial word) + 10 (file order)
839 wantScore: 6560,
840 },
841 {
842 fileName: "example.kt",
843 content: exampleKotlin,
844 query: &query.Substring{Content: true, Pattern: "ViewMetadata"},
845 wantLanguage: "Kotlin",
846 // 7000 (symbol) + 900 (Kotlin interface) + 500 (word) + 10 (file order)
847 wantScore: 8410,
848 },
849 {
850 fileName: "example.kt",
851 content: exampleKotlin,
852 query: &query.Substring{Content: true, Pattern: "onScrolled"},
853 wantLanguage: "Kotlin",
854 // 7000 (symbol) + 800 (Kotlin method) + 500 (word) + 10 (file order)
855 wantScore: 8310,
856 },
857 {
858 fileName: "example.kt",
859 content: exampleKotlin,
860 query: &query.Substring{Content: true, Pattern: "PreloadErrorHandler"},
861 wantLanguage: "Kotlin",
862 // 7000 (symbol) + 700 (Kotlin typealias) + 500 (word) + 10 (file order)
863 wantScore: 8210,
864 },
865 {
866 fileName: "example.kt",
867 content: exampleKotlin,
868 query: &query.Substring{Content: true, Pattern: "FLING_THRESHOLD_PX"},
869 wantLanguage: "Kotlin",
870 // 7000 (symbol) + 600 (Kotlin constant) + 500 (word) + 10 (file order)
871 wantScore: 8110,
872 },
873 {
874 fileName: "example.kt",
875 content: exampleKotlin,
876 query: &query.Substring{Content: true, Pattern: "scrollState"},
877 wantLanguage: "Kotlin",
878 // 7000 (symbol) + 500 (Kotlin variable) + 500 (word) + 10 (file order)
879 wantScore: 8010,
880 },
881 //
882 // Java
883 //
884 {
885 fileName: "example.java",
886 content: exampleJava,
887 query: &query.Substring{Content: true, Pattern: "nerClass"},
888 wantLanguage: "Java",
889 // 5500 (partial symbol at boundary) + 1000 (Java class) + 50 (partial word) + 10 (file order)
890 wantScore: 6560,
891 },
892 {
893 fileName: "example.java",
894 content: exampleJava,
895 query: &query.Substring{Content: true, Pattern: "StaticClass"},
896 wantLanguage: "Java",
897 // 5500 (partial symbol at boundary) + 1000 (Java class) + 500 (word) + 10 (file order)
898 wantScore: 7010,
899 },
900 {
901 fileName: "example.java",
902 content: exampleJava,
903 query: &query.Substring{Content: true, Pattern: "innerEnum"},
904 wantLanguage: "Java",
905 // 7000 (symbol) + 900 (Java enum) + 500 (word) + 10 (file order)
906 wantScore: 8410,
907 },
908 {
909 fileName: "example.java",
910 content: exampleJava,
911 query: &query.Substring{Content: true, Pattern: "innerInterface"},
912 wantLanguage: "Java",
913 // 7000 (symbol) + 800 (Java interface) + 500 (word) + 10 (file order)
914 wantScore: 8310,
915 },
916 {
917 fileName: "example.java",
918 content: exampleJava,
919 query: &query.Substring{Content: true, Pattern: "innerMethod"},
920 wantLanguage: "Java",
921 // 7000 (symbol) + 700 (Java method) + 500 (word) + 10 (file order)
922 wantScore: 8210,
923 },
924 {
925 fileName: "example.java",
926 content: exampleJava,
927 query: &query.Substring{Content: true, Pattern: "field"},
928 wantLanguage: "Java",
929 // 7000 (symbol) + 600 (Java field) + 500 (word) + 10 (file order)
930 wantScore: 8110,
931 },
932 {
933 fileName: "example.java",
934 content: exampleJava,
935 query: &query.Substring{Content: true, Pattern: "B"},
936 wantLanguage: "Java",
937 // 7000 (symbol) + 500 (Java enum constant) + 500 (word) + 10 (file order)
938 wantScore: 8010,
939 },
940 // 2 Atoms (1x content and 1x filename)
941 {
942 fileName: "example.java",
943 content: exampleJava,
944 query: &query.Substring{Pattern: "example"}, // matches filename and a Java field
945 wantLanguage: "Java",
946 // 5500 (edge symbol) + 600 (Java field) + 500 (word) + 200 (atom) + 10 (file order)
947 wantScore: 6810,
948 },
949 // 3 Atoms (2x content, 1x filename)
950 {
951 fileName: "example.java",
952 content: exampleJava,
953 query: &query.Or{Children: []query.Q{
954 &query.Substring{Pattern: "example"}, // matches filename and Java field
955 &query.Substring{Content: true, Pattern: "runInnerInterface"}, // matches a Java method
956 }},
957 wantLanguage: "Java",
958 // 7000 (symbol) + 700 (Java method) + 500 (word) + 266.67 (atom) + 10 (file order)
959 wantScore: 8476.667,
960 },
961 // 4 Atoms (4x content)
962 {
963 fileName: "example.java",
964 content: exampleJava,
965 query: &query.Or{Children: []query.Q{
966 &query.Substring{Content: true, Pattern: "testAnon"},
967 &query.Substring{Content: true, Pattern: "Override"},
968 &query.Substring{Content: true, Pattern: "InnerEnum"},
969 &query.Substring{Content: true, Pattern: "app"},
970 }},
971 wantLanguage: "Java",
972 // 7000 (symbol) + 900 (Java enum) + 500 (word) + 300 (atom) + 10 (file order)
973 wantScore: 8710,
974 },
975 //
976 // Go
977 //
978 {
979 fileName: "a/b/c/config.go",
980 query: &query.Substring{FileName: true, Pattern: "config"},
981 wantLanguage: "Go",
982 // 5500 (partial base at boundary) + 500 (word) + 10 (file order)
983 wantScore: 6010,
984 },
985 {
986 fileName: "a/b/c/config.go",
987 query: &query.Substring{FileName: true, Pattern: "config.go"},
988 wantLanguage: "Go",
989 // 7000 (full base match) + 500 (word) + 10 (file order)
990 wantScore: 7510,
991 },
992 {
993 fileName: "a/config/c/d.go",
994 query: &query.Substring{FileName: true, Pattern: "config"},
995 wantLanguage: "Go",
996 // 500 (word) + 10 (file order)
997 wantScore: 510,
998 },
999 {
1000 fileName: "src/net/http/client.go",
1001 content: []byte(`
1002package http
1003type aInterface interface {}
1004`),
1005 query: &query.Substring{Content: true, Pattern: "aInterface"},
1006 wantLanguage: "Go",
1007 // 7000 (full base match) + 1000 (Go interface) + 500 (word) + 10 (file order)
1008 wantScore: 8510,
1009 },
1010 {
1011 fileName: "src/net/http/client.go",
1012 content: []byte(`
1013package http
1014type aStruct struct {}
1015`),
1016 query: &query.Substring{Content: true, Pattern: "aStruct"},
1017 wantLanguage: "Go",
1018 // 7000 (full base match) + 900 (Go interface) + 500 (word) + 10 (file order)
1019 wantScore: 8410,
1020 },
1021 {
1022 fileName: "src/net/http/client.go",
1023 content: []byte(`
1024package http
1025func Get() {
1026 panic("")
1027}
1028`),
1029 query: &query.And{Children: []query.Q{
1030 &query.Symbol{Expr: &query.Substring{Pattern: "http", Content: true}},
1031 &query.Symbol{Expr: &query.Substring{Pattern: "Get", Content: true}}}},
1032 wantLanguage: "Go",
1033 // 7000 (full base match) + 800 (Go func) + 500 (word) + 200 (atom) + 10 (file order)
1034 wantScore: 8510,
1035 },
1036 //
1037 // C++
1038 //
1039 {
1040 fileName: "example.cc",
1041 content: exampleCpp,
1042 query: &query.Substring{Content: true, Pattern: "FooClass"},
1043 wantLanguage: "C++",
1044 // 7000 (Symbol) + 1000 (C++ class) + 500 (full word) + 10 (file order)
1045 wantScore: 8510,
1046 },
1047 {
1048 fileName: "example.cc",
1049 content: exampleCpp,
1050 query: &query.Substring{Content: true, Pattern: "NestedEnum"},
1051 wantLanguage: "C++",
1052 // 7000 (Symbol) + 900 (C++ enum) + 500 (full word) + 10 (file order)
1053 wantScore: 8410,
1054 },
1055 {
1056 fileName: "example.cc",
1057 content: exampleCpp,
1058 query: &query.Substring{Content: true, Pattern: "main"},
1059 wantLanguage: "C++",
1060 // 7000 (Symbol) + 800 (C++ function) + 500 (full word) + 10 (file order)
1061 wantScore: 8310,
1062 },
1063 {
1064 fileName: "example.cc",
1065 content: exampleCpp,
1066 query: &query.Substring{Content: true, Pattern: "FooStruct"},
1067 wantLanguage: "C++",
1068 // 7000 (Symbol) + 700 (C++ struct) + 500 (full word) + 10 (file order)
1069 wantScore: 8210,
1070 },
1071 {
1072 fileName: "example.cc",
1073 content: exampleCpp,
1074 query: &query.Substring{Content: true, Pattern: "TheUnion"},
1075 wantLanguage: "C++",
1076 // 7000 (Symbol) + 600 (C++ union) + 500 (full word) + 10 (file order)
1077 wantScore: 8110,
1078 },
1079 //
1080 // Scala
1081 //
1082 {
1083 fileName: "example.scala",
1084 content: exampleScala,
1085 query: &query.Substring{Content: true, Pattern: "SymbolIndexBucket"},
1086 wantLanguage: "Scala",
1087 // 7000 (symbol) + 1000 (Scala class) + 500 (word) + 10 (file order)
1088 wantScore: 8510,
1089 },
1090 {
1091 fileName: "example.scala",
1092 content: exampleScala,
1093 query: &query.Substring{Content: true, Pattern: "stdLibPatches"},
1094 wantLanguage: "Scala",
1095 // 7000 (symbol) + 800 (Scala object) + 500 (word) + 10 (file order)
1096 wantScore: 8310,
1097 },
1098 {
1099 fileName: "example.scala",
1100 content: exampleScala,
1101 query: &query.Substring{Content: true, Pattern: "close"},
1102 wantLanguage: "Scala",
1103 // 7000 (symbol) + 700 (Scala method) + 500 (word) + 10 (file order)
1104 wantScore: 8210,
1105 },
1106 {
1107 fileName: "example.scala",
1108 content: exampleScala,
1109 query: &query.Substring{Content: true, Pattern: "javaSymbol"},
1110 wantLanguage: "Scala",
1111 // 7000 (symbol) + 500 (Scala method) + 500 (word) + 10 (file order)
1112 wantScore: 8010,
1113 },
1114 }
1115
1116 epsilon := 0.01
1117 for _, c := range cases {
1118 t.Run(c.wantLanguage, func(t *testing.T) {
1119 b, err := NewBuilder(opts)
1120 if err != nil {
1121 t.Fatalf("NewBuilder: %v", err)
1122 }
1123 if err := b.AddFile(c.fileName, c.content); err != nil {
1124 t.Fatal(err)
1125 }
1126 if err := b.Finish(); err != nil {
1127 t.Fatalf("Finish: %v", err)
1128 }
1129
1130 ss, err := shards.NewDirectorySearcher(dir)
1131 if err != nil {
1132 t.Fatalf("NewDirectorySearcher(%s): %v", dir, err)
1133 }
1134 defer ss.Close()
1135
1136 srs, err := ss.Search(context.Background(), c.query, &zoekt.SearchOptions{DebugScore: true})
1137 if err != nil {
1138 t.Fatal(err)
1139 }
1140
1141 if got, want := len(srs.Files), 1; got != want {
1142 t.Fatalf("file matches: want %d, got %d", want, got)
1143 }
1144
1145 if got := srs.Files[0].Score; math.Abs(got-c.wantScore) > epsilon {
1146 t.Fatalf("score: want %f, got %f\ndebug: %s\ndebugscore: %s", c.wantScore, got, srs.Files[0].Debug, srs.Files[0].LineMatches[0].DebugScore)
1147 }
1148
1149 if got := srs.Files[0].Language; got != c.wantLanguage {
1150 t.Fatalf("want %s, got %s", c.wantLanguage, got)
1151 }
1152 })
1153 }
1154}
1155
1156func TestScoringWithDocumentRanks(t *testing.T) {
1157 if os.Getenv("CI") == "" && checkCTags() == "" {
1158 t.Skip("ctags not available")
1159 }
1160 dir := t.TempDir()
1161
1162 opts := Options{
1163 IndexDir: dir,
1164 RepositoryDescription: zoekt.Repository{
1165 Name: "repo",
1166 },
1167 DocumentRanksVersion: "ranking",
1168 }
1169
1170 searchQuery := &query.Substring{Content: true, Pattern: "Inner"}
1171 exampleJava, err := os.ReadFile("./testdata/example.java")
1172 if err != nil {
1173 t.Fatal(err)
1174 }
1175
1176 cases := []struct {
1177 name string
1178 documentRank float64
1179 documentRanksWeight float64
1180 wantScore float64
1181 }{
1182 {
1183 name: "score with no document ranks",
1184 // 5500 (partial symbol at boundary) + 1000 (Java class) + 500 (word match) + 10 (file order)
1185 wantScore: 7012.00,
1186 },
1187 {
1188 name: "score with document ranks",
1189 documentRank: 0.8,
1190 // 5500 (partial symbol at boundary) + 1000 (Java class) + 500 (word match) + 225 (file rank) + 10 (file order)
1191 wantScore: 7237.00,
1192 },
1193 {
1194 name: "score with custom document ranks weight",
1195 documentRank: 0.8,
1196 documentRanksWeight: 1000.0,
1197 // 5500 (partial symbol at boundary) + 1000 (Java class) + 500 (word match) + 25.00 (file rank) + 10 (file order)
1198 wantScore: 7037.00,
1199 },
1200 }
1201
1202 for _, c := range cases {
1203 t.Run(c.name, func(t *testing.T) {
1204 b, err := NewBuilder(opts)
1205 if err != nil {
1206 t.Fatalf("NewBuilder: %v", err)
1207 }
1208
1209 err = b.Add(zoekt.Document{Name: "example.java", Content: exampleJava, Ranks: []float64{c.documentRank}})
1210 if err != nil {
1211 t.Fatal(err)
1212 }
1213
1214 if err := b.Finish(); err != nil {
1215 t.Fatalf("Finish: %v", err)
1216 }
1217
1218 ss, err := shards.NewDirectorySearcher(dir)
1219 if err != nil {
1220 t.Fatalf("NewDirectorySearcher(%s): %v", dir, err)
1221 }
1222 defer ss.Close()
1223
1224 srs, err := ss.Search(context.Background(), searchQuery, &zoekt.SearchOptions{
1225 UseDocumentRanks: true,
1226 DocumentRanksWeight: c.documentRanksWeight,
1227 DebugScore: true,
1228 })
1229
1230 if err != nil {
1231 t.Fatal(err)
1232 }
1233
1234 if got, want := len(srs.Files), 1; got != want {
1235 t.Fatalf("file matches: want %d, got %d", want, got)
1236 }
1237
1238 if got := srs.Files[0].Score; got != c.wantScore {
1239 t.Fatalf("score: want %f, got %f\ndebug: %s\ndebugscore: %s", c.wantScore, got, srs.Files[0].Debug, srs.Files[0].LineMatches[0].DebugScore)
1240 }
1241 })
1242 }
1243}
1244
1245func TestRepoRanks(t *testing.T) {
1246 if os.Getenv("CI") == "" && checkCTags() == "" {
1247 t.Skip("ctags not available")
1248 }
1249 dir := t.TempDir()
1250
1251 opts := Options{
1252 IndexDir: dir,
1253 RepositoryDescription: zoekt.Repository{
1254 Name: "repo",
1255 },
1256 DocumentRanksVersion: "ranking",
1257 }
1258
1259 searchQuery := &query.Substring{Content: true, Pattern: "Inner"}
1260 exampleJava, err := os.ReadFile("./testdata/example.java")
1261 if err != nil {
1262 t.Fatal(err)
1263 }
1264
1265 cases := []struct {
1266 name string
1267 repoRank uint16
1268 wantScore float64
1269 }{
1270 {
1271 name: "no shard rank",
1272 // 5500 (partial symbol at boundary) + 1000 (Java class) + 500 (word match) + 10 (file order)
1273 wantScore: 7012.00,
1274 },
1275 {
1276 name: "medium shard rank",
1277 repoRank: 30000,
1278 // 5500 (partial symbol at boundary) + 1000 (Java class) + 500 (word match) + 10 (file order) + 9.16 (repo rank)
1279 wantScore: 7021.16,
1280 },
1281 {
1282 name: "high shard rank",
1283 repoRank: 60000,
1284 // 5500 (partial symbol at boundary) + 1000 (Java class) + 500 (word match) + 10 (file order) + 18.31 (repo rank)
1285 wantScore: 7030.31,
1286 },
1287 }
1288
1289 for _, c := range cases {
1290 t.Run(c.name, func(t *testing.T) {
1291 opts.RepositoryDescription = zoekt.Repository{
1292 Name: "repo",
1293 Rank: c.repoRank,
1294 }
1295
1296 b, err := NewBuilder(opts)
1297 if err != nil {
1298 t.Fatalf("NewBuilder: %v", err)
1299 }
1300
1301 err = b.Add(zoekt.Document{Name: "example.java", Content: exampleJava})
1302 if err != nil {
1303 t.Fatal(err)
1304 }
1305
1306 if err := b.Finish(); err != nil {
1307 t.Fatalf("Finish: %v", err)
1308 }
1309
1310 ss, err := shards.NewDirectorySearcher(dir)
1311 if err != nil {
1312 t.Fatalf("NewDirectorySearcher(%s): %v", dir, err)
1313 }
1314 defer ss.Close()
1315
1316 srs, err := ss.Search(context.Background(), searchQuery, &zoekt.SearchOptions{
1317 UseDocumentRanks: true,
1318 DebugScore: true,
1319 })
1320
1321 if err != nil {
1322 t.Fatal(err)
1323 }
1324
1325 if got, want := len(srs.Files), 1; got != want {
1326 t.Fatalf("file matches: want %d, got %d", want, got)
1327 }
1328
1329 if got := srs.Files[0].Score; math.Abs(got-c.wantScore) >= 0.01 {
1330 t.Fatalf("score: want %f, got %f\ndebug: %s\ndebugscore: %s", c.wantScore, got, srs.Files[0].Debug, srs.Files[0].LineMatches[0].DebugScore)
1331 }
1332 })
1333 }
1334}