fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

1// Copyright 2016 Google Inc. All rights reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package build 16 17import ( 18 "bytes" 19 "context" 20 "encoding/json" 21 "fmt" 22 "log" 23 "math" 24 "os" 25 "path/filepath" 26 "reflect" 27 "runtime" 28 "sort" 29 "strings" 30 "testing" 31 "time" 32 33 "github.com/google/go-cmp/cmp" 34 "github.com/google/go-cmp/cmp/cmpopts" 35 "github.com/grafana/regexp" 36 37 "github.com/sourcegraph/zoekt" 38 "github.com/sourcegraph/zoekt/query" 39 "github.com/sourcegraph/zoekt/shards" 40) 41 42func TestBasic(t *testing.T) { 43 dir := t.TempDir() 44 45 opts := Options{ 46 IndexDir: dir, 47 ShardMax: 1024, 48 RepositoryDescription: zoekt.Repository{ 49 Name: "repo", 50 }, 51 Parallelism: 2, 52 SizeMax: 1 << 20, 53 } 54 55 b, err := NewBuilder(opts) 56 if err != nil { 57 t.Fatalf("NewBuilder: %v", err) 58 } 59 60 for i := 0; i < 4; i++ { 61 s := fmt.Sprintf("%d", i) 62 if err := b.AddFile("F"+s, []byte(strings.Repeat(s, 1000))); err != nil { 63 t.Fatal(err) 64 } 65 } 66 67 if err := b.Finish(); err != nil { 68 t.Errorf("Finish: %v", err) 69 } 70 71 fs, _ := filepath.Glob(dir + "/*.zoekt") 72 if len(fs) <= 1 { 73 t.Fatalf("want multiple shards, got %v", fs) 74 } 75 76 _, md0, err := zoekt.ReadMetadataPath(fs[0]) 77 if err != nil { 78 t.Fatal(err) 79 } 80 for _, f := range fs[1:] { 81 _, md, err := zoekt.ReadMetadataPath(f) 82 if err != nil { 83 t.Fatal(err) 84 } 85 if md.IndexTime != md0.IndexTime { 86 t.Fatalf("wanted identical time stamps but got %v!=%v", md.IndexTime, md0.IndexTime) 87 } 88 if md.ID != md0.ID { 89 t.Fatalf("wanted identical IDs but got %s!=%s", md.ID, md0.ID) 90 } 91 } 92 93 ss, err := shards.NewDirectorySearcher(dir) 94 if err != nil { 95 t.Fatalf("NewDirectorySearcher(%s): %v", dir, err) 96 } 97 defer ss.Close() 98 99 q, err := query.Parse("111") 100 if err != nil { 101 t.Fatalf("Parse(111): %v", err) 102 } 103 104 var sOpts zoekt.SearchOptions 105 ctx := context.Background() 106 result, err := ss.Search(ctx, q, &sOpts) 107 if err != nil { 108 t.Fatalf("Search(%v): %v", q, err) 109 } 110 111 if len(result.Files) != 1 { 112 t.Errorf("got %v, want 1 file.", result.Files) 113 } else if gotFile, wantFile := result.Files[0].FileName, "F1"; gotFile != wantFile { 114 t.Errorf("got file %q, want %q", gotFile, wantFile) 115 } else if gotRepo, wantRepo := result.Files[0].Repository, "repo"; gotRepo != wantRepo { 116 t.Errorf("got repo %q, want %q", gotRepo, wantRepo) 117 } 118 119 t.Run("meta file", func(t *testing.T) { 120 // use retryTest to allow for the directory watcher to notice the meta 121 // file 122 retryTest(t, func(fatalf func(format string, args ...interface{})) { 123 // Add a .meta file for each shard with repo.Name set to 124 // "repo-mutated". We do this inside retry helper since we have noticed 125 // some flakiness on github CI. 126 for _, p := range fs { 127 repos, _, err := zoekt.ReadMetadataPath(p) 128 if err != nil { 129 t.Fatal(err) 130 } 131 repos[0].Name = "repo-mutated" 132 b, err := json.Marshal(repos[0]) 133 if err != nil { 134 t.Fatal(err) 135 } 136 137 if err := os.WriteFile(p+".meta", b, 0600); err != nil { 138 t.Fatal(err) 139 } 140 } 141 142 result, err := ss.Search(ctx, q, &sOpts) 143 if err != nil { 144 fatalf("Search(%v): %v", q, err) 145 } 146 147 if len(result.Files) != 1 { 148 fatalf("got %v, want 1 file.", result.Files) 149 } else if gotFile, wantFile := result.Files[0].FileName, "F1"; gotFile != wantFile { 150 fatalf("got file %q, want %q", gotFile, wantFile) 151 } else if gotRepo, wantRepo := result.Files[0].Repository, "repo-mutated"; gotRepo != wantRepo { 152 fatalf("got repo %q, want %q", gotRepo, wantRepo) 153 } 154 }) 155 }) 156} 157 158// retryTest will retry f until min(t.Deadline(), time.Minute). It returns 159// once f doesn't call fatalf. 160func retryTest(t *testing.T, f func(fatalf func(format string, args ...interface{}))) { 161 t.Helper() 162 163 sleep := 10 * time.Millisecond 164 deadline := time.Now().Add(time.Minute) 165 if d, ok := t.Deadline(); ok && d.Before(deadline) { 166 // give 1s for us to do a final test run 167 deadline = d.Add(-time.Second) 168 } 169 170 for { 171 done := make(chan bool) 172 go func() { 173 defer close(done) 174 175 f(func(format string, args ...interface{}) { 176 runtime.Goexit() 177 }) 178 179 done <- true 180 }() 181 182 success := <-done 183 if success { 184 return 185 } 186 187 // each time we increase sleep by 1.5 188 sleep := sleep*2 - sleep/2 189 if time.Now().Add(sleep).After(deadline) { 190 break 191 } 192 time.Sleep(sleep) 193 } 194 195 // final run for the test, using the real t.Fatalf 196 f(t.Fatalf) 197} 198 199func TestLargeFileOption(t *testing.T) { 200 dir := t.TempDir() 201 202 sizeMax := 1000 203 opts := Options{ 204 IndexDir: dir, 205 LargeFiles: []string{"F0", "F1", "F2", "!F1"}, 206 RepositoryDescription: zoekt.Repository{ 207 Name: "repo", 208 }, 209 SizeMax: sizeMax, 210 } 211 212 b, err := NewBuilder(opts) 213 if err != nil { 214 t.Fatalf("NewBuilder: %v", err) 215 } 216 217 for i := 0; i < 4; i++ { 218 s := fmt.Sprintf("%d", i) 219 if err := b.AddFile("F"+s, []byte(strings.Repeat("a", sizeMax+1))); err != nil { 220 t.Fatal(err) 221 } 222 } 223 224 if err := b.Finish(); err != nil { 225 t.Errorf("Finish: %v", err) 226 } 227 228 ss, err := shards.NewDirectorySearcher(dir) 229 if err != nil { 230 t.Fatalf("NewDirectorySearcher(%s): %v", dir, err) 231 } 232 233 q, err := query.Parse("aaa") 234 if err != nil { 235 t.Fatalf("Parse(aaa): %v", err) 236 } 237 238 var sOpts zoekt.SearchOptions 239 ctx := context.Background() 240 result, err := ss.Search(ctx, q, &sOpts) 241 if err != nil { 242 t.Fatalf("Search(%v): %v", q, err) 243 } 244 245 if len(result.Files) != 2 { 246 t.Errorf("got %v files, want 2 files.", len(result.Files)) 247 } 248 defer ss.Close() 249} 250 251func TestUpdate(t *testing.T) { 252 dir := t.TempDir() 253 254 opts := Options{ 255 IndexDir: dir, 256 ShardMax: 1024, 257 RepositoryDescription: zoekt.Repository{ 258 Name: "repo", 259 FileURLTemplate: "url", 260 }, 261 Parallelism: 2, 262 SizeMax: 1 << 20, 263 } 264 265 if b, err := NewBuilder(opts); err != nil { 266 t.Fatalf("NewBuilder: %v", err) 267 } else { 268 if err := b.AddFile("F", []byte("hoi")); err != nil { 269 t.Errorf("AddFile: %v", err) 270 } 271 if err := b.Finish(); err != nil { 272 t.Errorf("Finish: %v", err) 273 } 274 } 275 ss, err := shards.NewDirectorySearcher(dir) 276 if err != nil { 277 t.Fatalf("NewDirectorySearcher(%s): %v", dir, err) 278 } 279 280 ctx := context.Background() 281 repos, err := ss.List(ctx, &query.Repo{Regexp: regexp.MustCompile("repo")}, nil) 282 if err != nil { 283 t.Fatalf("List: %v", err) 284 } 285 286 if len(repos.Repos) != 1 { 287 t.Errorf("List(repo): got %v, want 1 repo", repos.Repos) 288 } 289 290 fs, err := filepath.Glob(filepath.Join(dir, "*")) 291 if err != nil { 292 t.Fatalf("glob: %v", err) 293 } 294 295 opts.RepositoryDescription = zoekt.Repository{ 296 Name: "repo2", 297 FileURLTemplate: "url2", 298 } 299 300 if b, err := NewBuilder(opts); err != nil { 301 t.Fatalf("NewBuilder: %v", err) 302 } else { 303 if err := b.AddFile("F", []byte("hoi")); err != nil { 304 t.Errorf("AddFile: %v", err) 305 } 306 if err := b.Finish(); err != nil { 307 t.Errorf("Finish: %v", err) 308 } 309 } 310 311 // This is ugly, and potentially flaky, but there is no 312 // observable synchronization for the Sharded searcher, so 313 // this is the best we can do. 314 time.Sleep(100 * time.Millisecond) 315 316 ctx = context.Background() 317 if repos, err = ss.List(ctx, &query.Repo{Regexp: regexp.MustCompile("repo")}, nil); err != nil { 318 t.Fatalf("List: %v", err) 319 } else if len(repos.Repos) != 2 { 320 t.Errorf("List(repo): got %v, want 2 repos", repos.Repos) 321 } 322 323 for _, fn := range fs { 324 log.Printf("removing %s", fn) 325 if err := os.Remove(fn); err != nil { 326 t.Fatalf("Remove(%s): %v", fn, err) 327 } 328 } 329 330 time.Sleep(100 * time.Millisecond) 331 332 ctx = context.Background() 333 if repos, err = ss.List(ctx, &query.Repo{Regexp: regexp.MustCompile("repo")}, nil); err != nil { 334 t.Fatalf("List: %v", err) 335 } else if len(repos.Repos) != 1 { 336 var ss []string 337 for _, r := range repos.Repos { 338 ss = append(ss, r.Repository.Name) 339 } 340 t.Errorf("List(repo): got %v, want 1 repo", ss) 341 } 342} 343 344func TestDeleteOldShards(t *testing.T) { 345 dir := t.TempDir() 346 347 opts := Options{ 348 IndexDir: dir, 349 ShardMax: 1024, 350 RepositoryDescription: zoekt.Repository{ 351 Name: "repo", 352 FileURLTemplate: "url", 353 }, 354 SizeMax: 1 << 20, 355 } 356 opts.SetDefaults() 357 358 b, err := NewBuilder(opts) 359 if err != nil { 360 t.Fatalf("NewBuilder: %v", err) 361 } 362 for i := 0; i < 4; i++ { 363 s := fmt.Sprintf("%d\n", i) 364 if err := b.AddFile("F"+s, []byte(strings.Repeat(s, 1024/2))); err != nil { 365 t.Errorf("AddFile: %v", err) 366 } 367 } 368 if err := b.Finish(); err != nil { 369 t.Errorf("Finish: %v", err) 370 } 371 372 glob := filepath.Join(dir, "*.zoekt") 373 fs, err := filepath.Glob(glob) 374 if err != nil { 375 t.Fatalf("Glob(%s): %v", glob, err) 376 } else if len(fs) != 4 { 377 t.Fatalf("Glob(%s): got %v, want 4 shards", glob, fs) 378 } 379 380 if fi, err := os.Lstat(fs[0]); err != nil { 381 t.Fatalf("Lstat: %v", err) 382 } else if fi.Mode()&0o666 == 0o600 { 383 // This fails spuriously if your umask is very restrictive. 384 t.Errorf("got mode %o, should respect umask.", fi.Mode()) 385 } 386 387 // Do again, without sharding. 388 opts.ShardMax = 1 << 20 389 b, err = NewBuilder(opts) 390 if err != nil { 391 t.Fatalf("NewBuilder: %v", err) 392 } 393 for i := 0; i < 4; i++ { 394 s := fmt.Sprintf("%d\n", i) 395 if err := b.AddFile("F"+s, []byte(strings.Repeat(s, 1024/2))); err != nil { 396 t.Fatal(err) 397 } 398 } 399 if err := b.Finish(); err != nil { 400 t.Errorf("Finish: %v", err) 401 } 402 403 fs, err = filepath.Glob(glob) 404 if err != nil { 405 t.Fatalf("Glob(%s): %v", glob, err) 406 } else if len(fs) != 1 { 407 t.Fatalf("Glob(%s): got %v, want 1 shard", glob, fs) 408 } 409 410 // Again, but don't index anything; should leave old shards intact. 411 b, err = NewBuilder(opts) 412 if err != nil { 413 t.Fatalf("NewBuilder: %v", err) 414 } 415 if err := b.Finish(); err != nil { 416 t.Errorf("Finish: %v", err) 417 } 418 419 fs, err = filepath.Glob(glob) 420 if err != nil { 421 t.Fatalf("Glob(%s): %v", glob, err) 422 } else if len(fs) != 1 { 423 t.Fatalf("Glob(%s): got %v, want 1 shard", glob, fs) 424 } 425} 426 427func TestPartialSuccess(t *testing.T) { 428 dir := t.TempDir() 429 430 opts := Options{ 431 IndexDir: dir, 432 ShardMax: 1024, 433 SizeMax: 1 << 20, 434 Parallelism: 1, 435 } 436 opts.RepositoryDescription.Name = "repo" 437 opts.SetDefaults() 438 439 b, err := NewBuilder(opts) 440 if err != nil { 441 t.Fatalf("NewBuilder: %v", err) 442 } 443 444 for i := 0; i < 4; i++ { 445 nm := fmt.Sprintf("F%d", i) 446 _ = b.AddFile(nm, []byte(strings.Repeat("01234567\n", 128))) 447 } 448 b.buildError = fmt.Errorf("any error") 449 450 // No error checking. 451 _ = b.Finish() 452 453 // Finish cleans up temporary files. 454 if fs, err := filepath.Glob(dir + "/*"); err != nil { 455 t.Errorf("glob(%s): %v", dir, err) 456 } else if len(fs) != 0 { 457 t.Errorf("got shards %v, want []", fs) 458 } 459} 460 461type filerankCase struct { 462 name string 463 docs []*zoekt.Document 464 want []int 465} 466 467func testFileRankAspect(t *testing.T, c filerankCase) { 468 var want []*zoekt.Document 469 for _, j := range c.want { 470 want = append(want, c.docs[j]) 471 } 472 473 got := make([]*zoekt.Document, len(c.docs)) 474 copy(got, c.docs) 475 sortDocuments(got) 476 477 print := func(ds []*zoekt.Document) string { 478 r := "" 479 for _, d := range ds { 480 r += fmt.Sprintf("%v, ", d) 481 } 482 return r 483 } 484 if !reflect.DeepEqual(got, want) { 485 t.Errorf("got docs [%v], want [%v]", print(got), print(want)) 486 } 487} 488 489func TestFileRank(t *testing.T) { 490 for _, c := range []filerankCase{{ 491 name: "filename", 492 docs: []*zoekt.Document{ 493 { 494 Name: "longlonglong", 495 Content: []byte("bla"), 496 }, 497 { 498 Name: "short", 499 Content: []byte("bla"), 500 }, 501 }, 502 want: []int{1, 0}, 503 }, { 504 name: "test", 505 docs: []*zoekt.Document{ 506 { 507 Name: "test", 508 Content: []byte("bla"), 509 }, 510 { 511 Name: "longlonglong", 512 Content: []byte("bla"), 513 }, 514 }, 515 want: []int{1, 0}, 516 }, { 517 name: "content", 518 docs: []*zoekt.Document{ 519 { 520 Content: []byte("bla"), 521 }, 522 { 523 Content: []byte("blablablabla"), 524 }, 525 { 526 Content: []byte("blabla"), 527 }, 528 }, 529 want: []int{0, 2, 1}, 530 }} { 531 t.Run(c.name, func(t *testing.T) { 532 testFileRankAspect(t, c) 533 }) 534 } 535} 536 537func TestEmptyContent(t *testing.T) { 538 dir := t.TempDir() 539 540 opts := Options{ 541 IndexDir: dir, 542 RepositoryDescription: zoekt.Repository{ 543 Name: "repo", 544 }, 545 } 546 opts.SetDefaults() 547 548 b, err := NewBuilder(opts) 549 if err != nil { 550 t.Fatalf("NewBuilder: %v", err) 551 } 552 if err := b.Finish(); err != nil { 553 t.Errorf("Finish: %v", err) 554 } 555 556 fs, _ := filepath.Glob(dir + "/*.zoekt") 557 if len(fs) != 1 { 558 t.Fatalf("want a shard, got %v", fs) 559 } 560 561 ss, err := shards.NewDirectorySearcher(dir) 562 if err != nil { 563 t.Fatalf("NewDirectorySearcher(%s): %v", dir, err) 564 } 565 defer ss.Close() 566 567 ctx := context.Background() 568 result, err := ss.List(ctx, &query.Const{Value: true}, nil) 569 if err != nil { 570 t.Fatalf("List: %v", err) 571 } 572 573 if len(result.Repos) != 1 || result.Repos[0].Repository.Name != "repo" { 574 t.Errorf("got %+v, want 1 repo.", result.Repos) 575 } 576} 577 578func TestDeltaShards(t *testing.T) { 579 // TODO: Need to write a test for compound shards as well. 580 type step struct { 581 name string 582 documents []zoekt.Document 583 optFn func(t *testing.T, o *Options) 584 585 query string 586 expectedDocuments []zoekt.Document 587 } 588 589 var ( 590 fooAtMain = zoekt.Document{Name: "foo.go", Branches: []string{"main"}, Content: []byte("common foo-main-v1")} 591 fooAtMainV2 = zoekt.Document{Name: "foo.go", Branches: []string{"main"}, Content: []byte("common foo-main-v2")} 592 593 fooAtMainAndRelease = zoekt.Document{Name: "foo.go", Branches: []string{"main", "release"}, Content: []byte("common foo-main-and-release")} 594 595 barAtMain = zoekt.Document{Name: "bar.go", Branches: []string{"main"}, Content: []byte("common bar-main")} 596 barAtMainV2 = zoekt.Document{Name: "bar.go", Branches: []string{"main"}, Content: []byte("common bar-main-v2")} 597 ) 598 599 for _, test := range []struct { 600 name string 601 steps []step 602 }{ 603 { 604 name: "tombstone older documents", 605 steps: []step{ 606 { 607 name: "setup", 608 documents: []zoekt.Document{barAtMain, fooAtMain}, 609 query: "common", 610 expectedDocuments: []zoekt.Document{barAtMain, fooAtMain}, 611 }, 612 { 613 name: "add new version of foo, tombstone older ones", 614 documents: []zoekt.Document{fooAtMainV2}, 615 optFn: func(t *testing.T, o *Options) { 616 o.IsDelta = true 617 o.changedOrRemovedFiles = []string{"foo.go"} 618 }, 619 query: "common", 620 expectedDocuments: []zoekt.Document{barAtMain, fooAtMainV2}, 621 }, 622 { 623 name: "add new version of bar, tombstone older ones", 624 documents: []zoekt.Document{barAtMainV2}, 625 optFn: func(t *testing.T, o *Options) { 626 o.IsDelta = true 627 o.changedOrRemovedFiles = []string{"bar.go"} 628 }, 629 query: "common", 630 expectedDocuments: []zoekt.Document{barAtMainV2, fooAtMainV2}, 631 }, 632 }, 633 }, 634 { 635 name: "tombstone older documents even if the latest shard has no documents", 636 steps: []step{ 637 { 638 name: "setup", 639 documents: []zoekt.Document{barAtMain, fooAtMain}, 640 query: "common", 641 expectedDocuments: []zoekt.Document{barAtMain, fooAtMain}, 642 }, 643 { 644 // a build with no documents could represent a deletion 645 name: "tombstone older documents", 646 documents: nil, 647 optFn: func(t *testing.T, o *Options) { 648 o.IsDelta = true 649 o.changedOrRemovedFiles = []string{"foo.go"} 650 }, 651 query: "common", 652 expectedDocuments: []zoekt.Document{barAtMain}, 653 }, 654 }, 655 }, 656 { 657 name: "tombstones affect document across branches", 658 steps: []step{ 659 { 660 name: "setup", 661 documents: []zoekt.Document{barAtMain, fooAtMainAndRelease}, 662 query: "common", 663 expectedDocuments: []zoekt.Document{barAtMain, fooAtMainAndRelease}, 664 }, 665 { 666 667 name: "tombstone foo", 668 documents: nil, 669 optFn: func(t *testing.T, o *Options) { 670 o.IsDelta = true 671 o.changedOrRemovedFiles = []string{"foo.go"} 672 }, 673 query: "common", 674 expectedDocuments: []zoekt.Document{barAtMain}, 675 }, 676 }, 677 }, 678 } { 679 t.Run(test.name, func(t *testing.T) { 680 indexDir := t.TempDir() 681 682 branchSet := make(map[string]struct{}) 683 684 for _, s := range test.steps { 685 for _, d := range s.documents { 686 for _, b := range d.Branches { 687 branchSet[b] = struct{}{} 688 } 689 } 690 } 691 692 for _, step := range test.steps { 693 repository := zoekt.Repository{ID: 1, Name: "repository"} 694 695 for b := range branchSet { 696 repository.Branches = append(repository.Branches, zoekt.RepositoryBranch{Name: b}) 697 } 698 699 sort.Slice(repository.Branches, func(i, j int) bool { 700 a, b := repository.Branches[i], repository.Branches[j] 701 702 return a.Name < b.Name 703 }) 704 705 buildOpts := Options{ 706 IndexDir: indexDir, 707 RepositoryDescription: repository, 708 } 709 buildOpts.SetDefaults() 710 711 if step.optFn != nil { 712 step.optFn(t, &buildOpts) 713 } 714 715 b, err := NewBuilder(buildOpts) 716 if err != nil { 717 t.Fatalf("step %q: NewBuilder: %s", step.name, err) 718 } 719 720 for _, d := range step.documents { 721 err := b.Add(d) 722 if err != nil { 723 t.Fatalf("step %q: adding document %q to builder: %s", step.name, d.Name, err) 724 } 725 } 726 727 // Call b.Finish() multiple times to ensure that it is idempotent 728 for i := 0; i < 3; i++ { 729 730 err = b.Finish() 731 if err != nil { 732 t.Fatalf("step %q: finishing builder (call #%d): %s", step.name, i, err) 733 } 734 } 735 736 err = b.Finish() 737 if err != nil { 738 t.Fatalf("step %q: finishing builder: %s", step.name, err) 739 } 740 741 state, _ := buildOpts.IndexState() 742 if diff := cmp.Diff(IndexStateEqual, state); diff != "" { 743 t.Errorf("unexpected diff in index state (-want +got):\n%s", diff) 744 } 745 746 ss, err := shards.NewDirectorySearcher(indexDir) 747 if err != nil { 748 t.Fatalf("step %q: NewDirectorySearcher(%s): %s", step.name, indexDir, err) 749 } 750 defer ss.Close() 751 752 searchOpts := &zoekt.SearchOptions{Whole: true} 753 q := &query.Substring{Pattern: step.query} 754 755 result, err := ss.Search(context.Background(), q, searchOpts) 756 if err != nil { 757 t.Fatalf("step %q: Search(%q): %s", step.name, step.query, err) 758 } 759 760 var receivedDocuments []zoekt.Document 761 for _, f := range result.Files { 762 receivedDocuments = append(receivedDocuments, zoekt.Document{ 763 Name: f.FileName, 764 Content: f.Content, 765 }) 766 } 767 768 cmpOpts := []cmp.Option{ 769 cmpopts.IgnoreFields(zoekt.Document{}, "Branches"), 770 cmpopts.SortSlices(func(a, b zoekt.Document) bool { 771 if a.Name < b.Name { 772 return true 773 } 774 775 return bytes.Compare(a.Content, b.Content) < 0 776 }), 777 } 778 779 if diff := cmp.Diff(step.expectedDocuments, receivedDocuments, cmpOpts...); diff != "" { 780 t.Errorf("step %q: diff in received documents (-want +got):%s\n:", step.name, diff) 781 } 782 } 783 }) 784 } 785} 786 787// With this test we want to capture regressions in the names returned by our 788// language detection and the scores assigned to file matches. We rely on the 789// detected language and its spelling, for example, in scoring (see scoreKind). 790func TestScoring(t *testing.T) { 791 if os.Getenv("CI") == "" && checkCTags() == "" { 792 t.Skip("ctags not available") 793 } 794 dir := t.TempDir() 795 796 opts := Options{ 797 IndexDir: dir, 798 RepositoryDescription: zoekt.Repository{ 799 Name: "repo", 800 }, 801 } 802 803 exampleJava, err := os.ReadFile("./testdata/example.java") 804 if err != nil { 805 t.Fatal(err) 806 } 807 808 exampleKotlin, err := os.ReadFile("./testdata/example.kt") 809 if err != nil { 810 t.Fatal(err) 811 } 812 813 exampleCpp, err := os.ReadFile("./testdata/example.cc") 814 if err != nil { 815 t.Fatal(err) 816 } 817 818 exampleScala, err := os.ReadFile("./testdata/example.scala") 819 if err != nil { 820 t.Fatal(err) 821 } 822 823 cases := []struct { 824 fileName string 825 content []byte 826 query query.Q 827 wantLanguage string 828 wantScore float64 829 }{ 830 // 831 // Kotlin 832 // 833 { 834 fileName: "example.kt", 835 content: exampleKotlin, 836 query: &query.Substring{Content: true, Pattern: "oxyPreloader"}, 837 wantLanguage: "Kotlin", 838 // 5500 (partial symbol at boundary) + 1000 (Kotlin class) + 50 (partial word) + 10 (file order) 839 wantScore: 6560, 840 }, 841 { 842 fileName: "example.kt", 843 content: exampleKotlin, 844 query: &query.Substring{Content: true, Pattern: "ViewMetadata"}, 845 wantLanguage: "Kotlin", 846 // 7000 (symbol) + 900 (Kotlin interface) + 500 (word) + 10 (file order) 847 wantScore: 8410, 848 }, 849 { 850 fileName: "example.kt", 851 content: exampleKotlin, 852 query: &query.Substring{Content: true, Pattern: "onScrolled"}, 853 wantLanguage: "Kotlin", 854 // 7000 (symbol) + 800 (Kotlin method) + 500 (word) + 10 (file order) 855 wantScore: 8310, 856 }, 857 { 858 fileName: "example.kt", 859 content: exampleKotlin, 860 query: &query.Substring{Content: true, Pattern: "PreloadErrorHandler"}, 861 wantLanguage: "Kotlin", 862 // 7000 (symbol) + 700 (Kotlin typealias) + 500 (word) + 10 (file order) 863 wantScore: 8210, 864 }, 865 { 866 fileName: "example.kt", 867 content: exampleKotlin, 868 query: &query.Substring{Content: true, Pattern: "FLING_THRESHOLD_PX"}, 869 wantLanguage: "Kotlin", 870 // 7000 (symbol) + 600 (Kotlin constant) + 500 (word) + 10 (file order) 871 wantScore: 8110, 872 }, 873 { 874 fileName: "example.kt", 875 content: exampleKotlin, 876 query: &query.Substring{Content: true, Pattern: "scrollState"}, 877 wantLanguage: "Kotlin", 878 // 7000 (symbol) + 500 (Kotlin variable) + 500 (word) + 10 (file order) 879 wantScore: 8010, 880 }, 881 // 882 // Java 883 // 884 { 885 fileName: "example.java", 886 content: exampleJava, 887 query: &query.Substring{Content: true, Pattern: "nerClass"}, 888 wantLanguage: "Java", 889 // 5500 (partial symbol at boundary) + 1000 (Java class) + 50 (partial word) + 10 (file order) 890 wantScore: 6560, 891 }, 892 { 893 fileName: "example.java", 894 content: exampleJava, 895 query: &query.Substring{Content: true, Pattern: "StaticClass"}, 896 wantLanguage: "Java", 897 // 5500 (partial symbol at boundary) + 1000 (Java class) + 500 (word) + 10 (file order) 898 wantScore: 7010, 899 }, 900 { 901 fileName: "example.java", 902 content: exampleJava, 903 query: &query.Substring{Content: true, Pattern: "innerEnum"}, 904 wantLanguage: "Java", 905 // 7000 (symbol) + 900 (Java enum) + 500 (word) + 10 (file order) 906 wantScore: 8410, 907 }, 908 { 909 fileName: "example.java", 910 content: exampleJava, 911 query: &query.Substring{Content: true, Pattern: "innerInterface"}, 912 wantLanguage: "Java", 913 // 7000 (symbol) + 800 (Java interface) + 500 (word) + 10 (file order) 914 wantScore: 8310, 915 }, 916 { 917 fileName: "example.java", 918 content: exampleJava, 919 query: &query.Substring{Content: true, Pattern: "innerMethod"}, 920 wantLanguage: "Java", 921 // 7000 (symbol) + 700 (Java method) + 500 (word) + 10 (file order) 922 wantScore: 8210, 923 }, 924 { 925 fileName: "example.java", 926 content: exampleJava, 927 query: &query.Substring{Content: true, Pattern: "field"}, 928 wantLanguage: "Java", 929 // 7000 (symbol) + 600 (Java field) + 500 (word) + 10 (file order) 930 wantScore: 8110, 931 }, 932 { 933 fileName: "example.java", 934 content: exampleJava, 935 query: &query.Substring{Content: true, Pattern: "B"}, 936 wantLanguage: "Java", 937 // 7000 (symbol) + 500 (Java enum constant) + 500 (word) + 10 (file order) 938 wantScore: 8010, 939 }, 940 // 2 Atoms (1x content and 1x filename) 941 { 942 fileName: "example.java", 943 content: exampleJava, 944 query: &query.Substring{Pattern: "example"}, // matches filename and a Java field 945 wantLanguage: "Java", 946 // 5500 (edge symbol) + 600 (Java field) + 500 (word) + 200 (atom) + 10 (file order) 947 wantScore: 6810, 948 }, 949 // 3 Atoms (2x content, 1x filename) 950 { 951 fileName: "example.java", 952 content: exampleJava, 953 query: &query.Or{Children: []query.Q{ 954 &query.Substring{Pattern: "example"}, // matches filename and Java field 955 &query.Substring{Content: true, Pattern: "runInnerInterface"}, // matches a Java method 956 }}, 957 wantLanguage: "Java", 958 // 7000 (symbol) + 700 (Java method) + 500 (word) + 266.67 (atom) + 10 (file order) 959 wantScore: 8476.667, 960 }, 961 // 4 Atoms (4x content) 962 { 963 fileName: "example.java", 964 content: exampleJava, 965 query: &query.Or{Children: []query.Q{ 966 &query.Substring{Content: true, Pattern: "testAnon"}, 967 &query.Substring{Content: true, Pattern: "Override"}, 968 &query.Substring{Content: true, Pattern: "InnerEnum"}, 969 &query.Substring{Content: true, Pattern: "app"}, 970 }}, 971 wantLanguage: "Java", 972 // 7000 (symbol) + 900 (Java enum) + 500 (word) + 300 (atom) + 10 (file order) 973 wantScore: 8710, 974 }, 975 // 976 // Go 977 // 978 { 979 fileName: "a/b/c/config.go", 980 query: &query.Substring{FileName: true, Pattern: "config"}, 981 wantLanguage: "Go", 982 // 5500 (partial base at boundary) + 500 (word) + 10 (file order) 983 wantScore: 6010, 984 }, 985 { 986 fileName: "a/b/c/config.go", 987 query: &query.Substring{FileName: true, Pattern: "config.go"}, 988 wantLanguage: "Go", 989 // 7000 (full base match) + 500 (word) + 10 (file order) 990 wantScore: 7510, 991 }, 992 { 993 fileName: "a/config/c/d.go", 994 query: &query.Substring{FileName: true, Pattern: "config"}, 995 wantLanguage: "Go", 996 // 500 (word) + 10 (file order) 997 wantScore: 510, 998 }, 999 { 1000 fileName: "src/net/http/client.go", 1001 content: []byte(` 1002package http 1003type aInterface interface {} 1004`), 1005 query: &query.Substring{Content: true, Pattern: "aInterface"}, 1006 wantLanguage: "Go", 1007 // 7000 (full base match) + 1000 (Go interface) + 500 (word) + 10 (file order) 1008 wantScore: 8510, 1009 }, 1010 { 1011 fileName: "src/net/http/client.go", 1012 content: []byte(` 1013package http 1014type aStruct struct {} 1015`), 1016 query: &query.Substring{Content: true, Pattern: "aStruct"}, 1017 wantLanguage: "Go", 1018 // 7000 (full base match) + 900 (Go interface) + 500 (word) + 10 (file order) 1019 wantScore: 8410, 1020 }, 1021 { 1022 fileName: "src/net/http/client.go", 1023 content: []byte(` 1024package http 1025func Get() { 1026 panic("") 1027} 1028`), 1029 query: &query.And{Children: []query.Q{ 1030 &query.Symbol{Expr: &query.Substring{Pattern: "http", Content: true}}, 1031 &query.Symbol{Expr: &query.Substring{Pattern: "Get", Content: true}}}}, 1032 wantLanguage: "Go", 1033 // 7000 (full base match) + 800 (Go func) + 500 (word) + 200 (atom) + 10 (file order) 1034 wantScore: 8510, 1035 }, 1036 // 1037 // C++ 1038 // 1039 { 1040 fileName: "example.cc", 1041 content: exampleCpp, 1042 query: &query.Substring{Content: true, Pattern: "FooClass"}, 1043 wantLanguage: "C++", 1044 // 7000 (Symbol) + 1000 (C++ class) + 500 (full word) + 10 (file order) 1045 wantScore: 8510, 1046 }, 1047 { 1048 fileName: "example.cc", 1049 content: exampleCpp, 1050 query: &query.Substring{Content: true, Pattern: "NestedEnum"}, 1051 wantLanguage: "C++", 1052 // 7000 (Symbol) + 900 (C++ enum) + 500 (full word) + 10 (file order) 1053 wantScore: 8410, 1054 }, 1055 { 1056 fileName: "example.cc", 1057 content: exampleCpp, 1058 query: &query.Substring{Content: true, Pattern: "main"}, 1059 wantLanguage: "C++", 1060 // 7000 (Symbol) + 800 (C++ function) + 500 (full word) + 10 (file order) 1061 wantScore: 8310, 1062 }, 1063 { 1064 fileName: "example.cc", 1065 content: exampleCpp, 1066 query: &query.Substring{Content: true, Pattern: "FooStruct"}, 1067 wantLanguage: "C++", 1068 // 7000 (Symbol) + 700 (C++ struct) + 500 (full word) + 10 (file order) 1069 wantScore: 8210, 1070 }, 1071 { 1072 fileName: "example.cc", 1073 content: exampleCpp, 1074 query: &query.Substring{Content: true, Pattern: "TheUnion"}, 1075 wantLanguage: "C++", 1076 // 7000 (Symbol) + 600 (C++ union) + 500 (full word) + 10 (file order) 1077 wantScore: 8110, 1078 }, 1079 // 1080 // Scala 1081 // 1082 { 1083 fileName: "example.scala", 1084 content: exampleScala, 1085 query: &query.Substring{Content: true, Pattern: "SymbolIndexBucket"}, 1086 wantLanguage: "Scala", 1087 // 7000 (symbol) + 1000 (Scala class) + 500 (word) + 10 (file order) 1088 wantScore: 8510, 1089 }, 1090 { 1091 fileName: "example.scala", 1092 content: exampleScala, 1093 query: &query.Substring{Content: true, Pattern: "stdLibPatches"}, 1094 wantLanguage: "Scala", 1095 // 7000 (symbol) + 800 (Scala object) + 500 (word) + 10 (file order) 1096 wantScore: 8310, 1097 }, 1098 { 1099 fileName: "example.scala", 1100 content: exampleScala, 1101 query: &query.Substring{Content: true, Pattern: "close"}, 1102 wantLanguage: "Scala", 1103 // 7000 (symbol) + 700 (Scala method) + 500 (word) + 10 (file order) 1104 wantScore: 8210, 1105 }, 1106 { 1107 fileName: "example.scala", 1108 content: exampleScala, 1109 query: &query.Substring{Content: true, Pattern: "javaSymbol"}, 1110 wantLanguage: "Scala", 1111 // 7000 (symbol) + 500 (Scala method) + 500 (word) + 10 (file order) 1112 wantScore: 8010, 1113 }, 1114 } 1115 1116 epsilon := 0.01 1117 for _, c := range cases { 1118 t.Run(c.wantLanguage, func(t *testing.T) { 1119 b, err := NewBuilder(opts) 1120 if err != nil { 1121 t.Fatalf("NewBuilder: %v", err) 1122 } 1123 if err := b.AddFile(c.fileName, c.content); err != nil { 1124 t.Fatal(err) 1125 } 1126 if err := b.Finish(); err != nil { 1127 t.Fatalf("Finish: %v", err) 1128 } 1129 1130 ss, err := shards.NewDirectorySearcher(dir) 1131 if err != nil { 1132 t.Fatalf("NewDirectorySearcher(%s): %v", dir, err) 1133 } 1134 defer ss.Close() 1135 1136 srs, err := ss.Search(context.Background(), c.query, &zoekt.SearchOptions{DebugScore: true}) 1137 if err != nil { 1138 t.Fatal(err) 1139 } 1140 1141 if got, want := len(srs.Files), 1; got != want { 1142 t.Fatalf("file matches: want %d, got %d", want, got) 1143 } 1144 1145 if got := srs.Files[0].Score; math.Abs(got-c.wantScore) > epsilon { 1146 t.Fatalf("score: want %f, got %f\ndebug: %s\ndebugscore: %s", c.wantScore, got, srs.Files[0].Debug, srs.Files[0].LineMatches[0].DebugScore) 1147 } 1148 1149 if got := srs.Files[0].Language; got != c.wantLanguage { 1150 t.Fatalf("want %s, got %s", c.wantLanguage, got) 1151 } 1152 }) 1153 } 1154} 1155 1156func TestScoringWithDocumentRanks(t *testing.T) { 1157 if os.Getenv("CI") == "" && checkCTags() == "" { 1158 t.Skip("ctags not available") 1159 } 1160 dir := t.TempDir() 1161 1162 opts := Options{ 1163 IndexDir: dir, 1164 RepositoryDescription: zoekt.Repository{ 1165 Name: "repo", 1166 }, 1167 DocumentRanksVersion: "ranking", 1168 } 1169 1170 searchQuery := &query.Substring{Content: true, Pattern: "Inner"} 1171 exampleJava, err := os.ReadFile("./testdata/example.java") 1172 if err != nil { 1173 t.Fatal(err) 1174 } 1175 1176 cases := []struct { 1177 name string 1178 documentRank float64 1179 documentRanksWeight float64 1180 wantScore float64 1181 }{ 1182 { 1183 name: "score with no document ranks", 1184 // 5500 (partial symbol at boundary) + 1000 (Java class) + 500 (word match) + 10 (file order) 1185 wantScore: 7012.00, 1186 }, 1187 { 1188 name: "score with document ranks", 1189 documentRank: 0.8, 1190 // 5500 (partial symbol at boundary) + 1000 (Java class) + 500 (word match) + 225 (file rank) + 10 (file order) 1191 wantScore: 7237.00, 1192 }, 1193 { 1194 name: "score with custom document ranks weight", 1195 documentRank: 0.8, 1196 documentRanksWeight: 1000.0, 1197 // 5500 (partial symbol at boundary) + 1000 (Java class) + 500 (word match) + 25.00 (file rank) + 10 (file order) 1198 wantScore: 7037.00, 1199 }, 1200 } 1201 1202 for _, c := range cases { 1203 t.Run(c.name, func(t *testing.T) { 1204 b, err := NewBuilder(opts) 1205 if err != nil { 1206 t.Fatalf("NewBuilder: %v", err) 1207 } 1208 1209 err = b.Add(zoekt.Document{Name: "example.java", Content: exampleJava, Ranks: []float64{c.documentRank}}) 1210 if err != nil { 1211 t.Fatal(err) 1212 } 1213 1214 if err := b.Finish(); err != nil { 1215 t.Fatalf("Finish: %v", err) 1216 } 1217 1218 ss, err := shards.NewDirectorySearcher(dir) 1219 if err != nil { 1220 t.Fatalf("NewDirectorySearcher(%s): %v", dir, err) 1221 } 1222 defer ss.Close() 1223 1224 srs, err := ss.Search(context.Background(), searchQuery, &zoekt.SearchOptions{ 1225 UseDocumentRanks: true, 1226 DocumentRanksWeight: c.documentRanksWeight, 1227 DebugScore: true, 1228 }) 1229 1230 if err != nil { 1231 t.Fatal(err) 1232 } 1233 1234 if got, want := len(srs.Files), 1; got != want { 1235 t.Fatalf("file matches: want %d, got %d", want, got) 1236 } 1237 1238 if got := srs.Files[0].Score; got != c.wantScore { 1239 t.Fatalf("score: want %f, got %f\ndebug: %s\ndebugscore: %s", c.wantScore, got, srs.Files[0].Debug, srs.Files[0].LineMatches[0].DebugScore) 1240 } 1241 }) 1242 } 1243} 1244 1245func TestRepoRanks(t *testing.T) { 1246 if os.Getenv("CI") == "" && checkCTags() == "" { 1247 t.Skip("ctags not available") 1248 } 1249 dir := t.TempDir() 1250 1251 opts := Options{ 1252 IndexDir: dir, 1253 RepositoryDescription: zoekt.Repository{ 1254 Name: "repo", 1255 }, 1256 DocumentRanksVersion: "ranking", 1257 } 1258 1259 searchQuery := &query.Substring{Content: true, Pattern: "Inner"} 1260 exampleJava, err := os.ReadFile("./testdata/example.java") 1261 if err != nil { 1262 t.Fatal(err) 1263 } 1264 1265 cases := []struct { 1266 name string 1267 repoRank uint16 1268 wantScore float64 1269 }{ 1270 { 1271 name: "no shard rank", 1272 // 5500 (partial symbol at boundary) + 1000 (Java class) + 500 (word match) + 10 (file order) 1273 wantScore: 7012.00, 1274 }, 1275 { 1276 name: "medium shard rank", 1277 repoRank: 30000, 1278 // 5500 (partial symbol at boundary) + 1000 (Java class) + 500 (word match) + 10 (file order) + 9.16 (repo rank) 1279 wantScore: 7021.16, 1280 }, 1281 { 1282 name: "high shard rank", 1283 repoRank: 60000, 1284 // 5500 (partial symbol at boundary) + 1000 (Java class) + 500 (word match) + 10 (file order) + 18.31 (repo rank) 1285 wantScore: 7030.31, 1286 }, 1287 } 1288 1289 for _, c := range cases { 1290 t.Run(c.name, func(t *testing.T) { 1291 opts.RepositoryDescription = zoekt.Repository{ 1292 Name: "repo", 1293 Rank: c.repoRank, 1294 } 1295 1296 b, err := NewBuilder(opts) 1297 if err != nil { 1298 t.Fatalf("NewBuilder: %v", err) 1299 } 1300 1301 err = b.Add(zoekt.Document{Name: "example.java", Content: exampleJava}) 1302 if err != nil { 1303 t.Fatal(err) 1304 } 1305 1306 if err := b.Finish(); err != nil { 1307 t.Fatalf("Finish: %v", err) 1308 } 1309 1310 ss, err := shards.NewDirectorySearcher(dir) 1311 if err != nil { 1312 t.Fatalf("NewDirectorySearcher(%s): %v", dir, err) 1313 } 1314 defer ss.Close() 1315 1316 srs, err := ss.Search(context.Background(), searchQuery, &zoekt.SearchOptions{ 1317 UseDocumentRanks: true, 1318 DebugScore: true, 1319 }) 1320 1321 if err != nil { 1322 t.Fatal(err) 1323 } 1324 1325 if got, want := len(srs.Files), 1; got != want { 1326 t.Fatalf("file matches: want %d, got %d", want, got) 1327 } 1328 1329 if got := srs.Files[0].Score; math.Abs(got-c.wantScore) >= 0.01 { 1330 t.Fatalf("score: want %f, got %f\ndebug: %s\ndebugscore: %s", c.wantScore, got, srs.Files[0].Debug, srs.Files[0].LineMatches[0].DebugScore) 1331 } 1332 }) 1333 } 1334}