fork of https://github.com/sourcegraph/zoekt
1// Copyright 2021 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package gitindex
16
17import (
18 "bytes"
19 "context"
20 "errors"
21 "net/url"
22 "os"
23 "os/exec"
24 "path/filepath"
25 "runtime"
26 "sort"
27 "strings"
28 "testing"
29
30 "github.com/go-git/go-git/v5"
31 "github.com/go-git/go-git/v5/plumbing"
32 "github.com/google/go-cmp/cmp"
33 "github.com/google/go-cmp/cmp/cmpopts"
34
35 "github.com/sourcegraph/zoekt"
36 "github.com/sourcegraph/zoekt/ignore"
37 "github.com/sourcegraph/zoekt/index"
38 "github.com/sourcegraph/zoekt/query"
39 "github.com/sourcegraph/zoekt/search"
40)
41
42func TestIndexEmptyRepo(t *testing.T) {
43 t.Parallel()
44
45 dir := t.TempDir()
46
47 cmd := exec.Command("git", "init", "-b", "master", "repo")
48 cmd.Dir = dir
49
50 if err := cmd.Run(); err != nil {
51 t.Fatalf("cmd.Run: %v", err)
52 }
53
54 desc := zoekt.Repository{
55 Name: "repo",
56 }
57 opts := Options{
58 RepoDir: filepath.Join(dir, "repo", ".git"),
59 BuildOptions: index.Options{
60 RepositoryDescription: desc,
61 IndexDir: dir,
62 },
63 }
64
65 if _, err := IndexGitRepo(opts); err != nil {
66 t.Fatalf("IndexGitRepo: %v", err)
67 }
68}
69
70func TestIndexNonexistentRepo(t *testing.T) {
71 t.Parallel()
72
73 dir := t.TempDir()
74 desc := zoekt.Repository{
75 Name: "nonexistent",
76 }
77 opts := Options{
78 RepoDir: "does/not/exist",
79 Branches: []string{"main"},
80 BuildOptions: index.Options{
81 RepositoryDescription: desc,
82 IndexDir: dir,
83 },
84 }
85
86 if _, err := IndexGitRepo(opts); err == nil {
87 t.Fatal("expected error, got none")
88 } else if !errors.Is(err, git.ErrRepositoryNotExists) {
89 t.Fatalf("expected git.ErrRepositoryNotExists, got %v", err)
90 }
91}
92
93func TestIndexTinyRepo(t *testing.T) {
94 t.Parallel()
95
96 // Create a repo with one file in it.
97 dir := t.TempDir()
98 runGit(t, dir, "init", "-b", "main", "repo")
99
100 repoDir := filepath.Join(dir, "repo")
101 if err := os.WriteFile(filepath.Join(repoDir, "file1.go"), []byte("package main\n\nfunc main() {}\n"), 0644); err != nil {
102 t.Fatalf("WriteFile: %v", err)
103 }
104 runGit(t, repoDir, "add", ".")
105 runGit(t, repoDir, "commit", "-m", "initial commit")
106
107 // Test that indexing accepts both the repo directory, and the .git subdirectory.
108 for _, testDir := range []string{"repo", "repo/.git"} {
109 opts := Options{
110 RepoDir: filepath.Join(dir, testDir),
111 Branches: []string{"main"},
112 BuildOptions: index.Options{
113 RepositoryDescription: zoekt.Repository{Name: "repo"},
114 IndexDir: dir,
115 },
116 }
117
118 if _, err := IndexGitRepo(opts); err != nil {
119 t.Fatalf("unexpected error %v", err)
120 }
121
122 searcher, err := search.NewDirectorySearcher(dir)
123 if err != nil {
124 t.Fatal("NewDirectorySearcher", err)
125 }
126
127 results, err := searcher.Search(context.Background(), &query.Const{Value: true}, &zoekt.SearchOptions{})
128 searcher.Close()
129
130 if err != nil {
131 t.Fatal("search failed", err)
132 }
133
134 if len(results.Files) != 1 {
135 t.Fatalf("got search result %v, want 1 file", results.Files)
136 }
137 }
138}
139
140func TestIndexGitRepo_Worktree(t *testing.T) {
141 t.Parallel()
142
143 _, worktreeDir := initGitWorktree(t, "file1.go", "package main\n\nfunc main() {}\n")
144 indexDir := t.TempDir()
145
146 opts := Options{
147 RepoDir: worktreeDir,
148 Branches: []string{"HEAD"},
149 BuildOptions: index.Options{
150 RepositoryDescription: zoekt.Repository{Name: "repo"},
151 IndexDir: indexDir,
152 },
153 }
154
155 if _, err := IndexGitRepo(opts); err != nil {
156 t.Fatalf("IndexGitRepo(worktree): %v", err)
157 }
158
159 searcher, err := search.NewDirectorySearcher(indexDir)
160 if err != nil {
161 t.Fatal("NewDirectorySearcher", err)
162 }
163 defer searcher.Close()
164
165 results, err := searcher.Search(context.Background(), &query.Const{Value: true}, &zoekt.SearchOptions{})
166 if err != nil {
167 t.Fatal("search failed", err)
168 }
169
170 if len(results.Files) != 1 {
171 t.Fatalf("got search result %v, want 1 file", results.Files)
172 }
173}
174
175func TestOpenRepoVariants(t *testing.T) {
176 t.Parallel()
177
178 repoDir, worktreeDir := initGitWorktree(t, "file1.go", "package main\n\nfunc main() {}\n")
179 bareDir := cloneBareRepo(t, repoDir)
180
181 paths := []struct {
182 name string
183 path string
184 }{
185 {name: "repo root", path: repoDir},
186 {name: "dot git dir", path: filepath.Join(repoDir, ".git")},
187 {name: "worktree root", path: worktreeDir},
188 {name: "bare repo root", path: bareDir},
189 }
190
191 openers := []struct {
192 name string
193 open func(t *testing.T, repoDir string) *git.Repository
194 }{
195 {
196 name: "plain",
197 open: func(t *testing.T, repoDir string) *git.Repository {
198 t.Helper()
199
200 repo, err := plainOpenRepo(repoDir)
201 if err != nil {
202 t.Fatalf("plainOpenRepo(%q): %v", repoDir, err)
203 }
204
205 return repo
206 },
207 },
208 {
209 name: "optimized",
210 open: func(t *testing.T, repoDir string) *git.Repository {
211 t.Helper()
212
213 repo, closer, err := openRepo(repoDir)
214 if err != nil {
215 t.Fatalf("openRepo(%q): %v", repoDir, err)
216 }
217 t.Cleanup(func() {
218 _ = closer.Close()
219 })
220
221 return repo
222 },
223 },
224 }
225
226 for _, opener := range openers {
227 for _, tc := range paths {
228 t.Run(opener.name+"/"+tc.name, func(t *testing.T) {
229 t.Parallel()
230
231 repo := opener.open(t, tc.path)
232
233 head, err := repo.Head()
234 if err != nil {
235 t.Fatalf("repo.Head(): %v", err)
236 }
237
238 if _, err := repo.CommitObject(head.Hash()); err != nil {
239 t.Fatalf("repo.CommitObject(%s): %v", head.Hash(), err)
240 }
241 })
242 }
243 }
244}
245
246func TestIndexGitRepo_BareRepo_LegacyRepoOpen(t *testing.T) {
247 repoDir, _ := initGitWorktree(t, "file1.go", "package main\n\nfunc main() {}\n")
248 bareDir := cloneBareRepo(t, repoDir)
249 indexDir := t.TempDir()
250
251 t.Setenv("ZOEKT_DISABLE_GOGIT_OPTIMIZATION", "true")
252
253 opts := Options{
254 RepoDir: bareDir,
255 Branches: []string{"main"},
256 BuildOptions: index.Options{
257 RepositoryDescription: zoekt.Repository{Name: "repo"},
258 IndexDir: indexDir,
259 },
260 }
261
262 if _, err := IndexGitRepo(opts); err != nil {
263 t.Fatalf("IndexGitRepo(bare, legacy open): %v", err)
264 }
265
266 searcher, err := search.NewDirectorySearcher(indexDir)
267 if err != nil {
268 t.Fatal("NewDirectorySearcher", err)
269 }
270 defer searcher.Close()
271
272 results, err := searcher.Search(context.Background(), &query.Const{Value: true}, &zoekt.SearchOptions{})
273 if err != nil {
274 t.Fatal("search failed", err)
275 }
276
277 if len(results.Files) != 1 || results.Files[0].FileName != "file1.go" {
278 t.Fatalf("got search result %v, want file1.go", results.Files)
279 }
280}
281
282func TestCatfileFilterSpec(t *testing.T) {
283 t.Parallel()
284
285 for _, tc := range []struct {
286 name string
287 opts Options
288 want string
289 }{
290 {
291 name: "size max",
292 opts: Options{BuildOptions: index.Options{SizeMax: 1 << 20}},
293 want: "blob:limit=1048577",
294 },
295 {
296 name: "large file exception disables filter",
297 opts: Options{BuildOptions: index.Options{SizeMax: 1 << 20, LargeFiles: []string{"*.bin"}}},
298 want: "",
299 },
300 {
301 name: "zero size max disables filter",
302 opts: Options{BuildOptions: index.Options{SizeMax: 0}},
303 want: "",
304 },
305 } {
306 t.Run(tc.name, func(t *testing.T) {
307 t.Parallel()
308
309 if got := catfileFilterSpec(tc.opts); got != tc.want {
310 t.Fatalf("catfileFilterSpec() = %q, want %q", got, tc.want)
311 }
312 })
313 }
314}
315
316func initGitWorktree(t *testing.T, fileName, content string) (string, string) {
317 t.Helper()
318
319 dir := t.TempDir()
320 runGit(t, dir, "init", "-b", "main", "repo")
321
322 repoDir := filepath.Join(dir, "repo")
323 if err := os.WriteFile(filepath.Join(repoDir, fileName), []byte(content), 0o644); err != nil {
324 t.Fatalf("WriteFile: %v", err)
325 }
326 runGit(t, repoDir, "config", "remote.origin.url", "git@github.com:sourcegraph/zoekt.git")
327 runGit(t, repoDir, "add", ".")
328 runGit(t, repoDir, "commit", "-m", "initial commit")
329
330 worktreeDir := filepath.Join(dir, "wt")
331 runGit(t, repoDir, "worktree", "add", "-b", "worktree-branch", worktreeDir)
332
333 return repoDir, worktreeDir
334}
335
336func cloneBareRepo(t *testing.T, repoDir string) string {
337 t.Helper()
338
339 bareDir := filepath.Join(t.TempDir(), "repo.git")
340 runGit(t, filepath.Dir(repoDir), "clone", "--bare", repoDir, bareDir)
341
342 return bareDir
343}
344
345func TestIndexDeltaBasic(t *testing.T) {
346 t.Parallel()
347
348 type branchToDocumentMap map[string][]index.Document
349
350 type step struct {
351 name string
352 addedDocuments branchToDocumentMap
353 deletedDocuments branchToDocumentMap
354 optFn func(t *testing.T, options *Options)
355
356 expectedFallbackToNormalBuild bool
357 expectedDocuments []index.Document
358 }
359
360 helloWorld := index.Document{Name: "hello_world.txt", Content: []byte("hello")}
361
362 fruitV1 := index.Document{Name: "best_fruit.txt", Content: []byte("strawberry")}
363 fruitV1InFolder := index.Document{Name: "the_best/best_fruit.txt", Content: fruitV1.Content}
364 fruitV1WithNewName := index.Document{Name: "new_fruit.txt", Content: fruitV1.Content}
365
366 fruitV2 := index.Document{Name: "best_fruit.txt", Content: []byte("grapes")}
367 fruitV2InFolder := index.Document{Name: "the_best/best_fruit.txt", Content: fruitV2.Content}
368
369 fruitV3 := index.Document{Name: "best_fruit.txt", Content: []byte("oranges")}
370 fruitV4 := index.Document{Name: "best_fruit.txt", Content: []byte("apples")}
371
372 foo := index.Document{Name: "foo.txt", Content: []byte("bar")}
373
374 emptySourcegraphIgnore := index.Document{Name: ignore.IgnoreFile}
375 sourcegraphIgnoreWithContent := index.Document{Name: ignore.IgnoreFile, Content: []byte("good_content.txt")}
376
377 for _, test := range []struct {
378 name string
379 branches []string
380 steps []step
381 }{
382 {
383 name: "modification",
384 branches: []string{"main"},
385 steps: []step{
386 {
387 name: "setup",
388 addedDocuments: branchToDocumentMap{
389 "main": []index.Document{helloWorld, fruitV1},
390 },
391
392 expectedDocuments: []index.Document{helloWorld, fruitV1},
393 },
394 {
395 name: "add newer version of fruits",
396 addedDocuments: branchToDocumentMap{
397 "main": []index.Document{fruitV2},
398 },
399 optFn: func(t *testing.T, o *Options) {
400 o.BuildOptions.IsDelta = true
401 },
402
403 expectedDocuments: []index.Document{helloWorld, fruitV2},
404 },
405 },
406 },
407 {
408 name: "modification only inside nested folder",
409 branches: []string{"main"},
410 steps: []step{
411 {
412 name: "setup",
413 addedDocuments: branchToDocumentMap{
414 "main": []index.Document{foo, fruitV1InFolder},
415 },
416
417 expectedDocuments: []index.Document{foo, fruitV1InFolder},
418 },
419 {
420 name: "add newer version of fruits inside folder",
421 addedDocuments: branchToDocumentMap{
422 "main": []index.Document{fruitV2InFolder},
423 },
424 optFn: func(t *testing.T, o *Options) {
425 o.BuildOptions.IsDelta = true
426 },
427
428 expectedDocuments: []index.Document{foo, fruitV2InFolder},
429 },
430 },
431 },
432 {
433 name: "addition",
434 branches: []string{"main"},
435 steps: []step{
436 {
437 name: "setup",
438 addedDocuments: branchToDocumentMap{
439 "main": []index.Document{helloWorld, fruitV1},
440 },
441
442 expectedDocuments: []index.Document{helloWorld, fruitV1},
443 },
444 {
445 name: "add new file - foo",
446 addedDocuments: branchToDocumentMap{
447 "main": []index.Document{foo},
448 },
449 optFn: func(t *testing.T, o *Options) {
450 o.BuildOptions.IsDelta = true
451 },
452
453 expectedDocuments: []index.Document{helloWorld, fruitV1, foo},
454 },
455 },
456 },
457 {
458 name: "deletion",
459 branches: []string{"main"},
460 steps: []step{
461 {
462 name: "setup",
463 addedDocuments: branchToDocumentMap{
464 "main": []index.Document{helloWorld, fruitV1, foo},
465 },
466
467 expectedDocuments: []index.Document{helloWorld, fruitV1, foo},
468 },
469 {
470 name: "delete foo file",
471 addedDocuments: nil,
472 deletedDocuments: branchToDocumentMap{
473 "main": []index.Document{foo},
474 },
475
476 optFn: func(t *testing.T, o *Options) {
477 o.BuildOptions.IsDelta = true
478 },
479
480 expectedDocuments: []index.Document{helloWorld, fruitV1},
481 },
482 },
483 },
484 {
485 name: "addition and deletion on only one branch",
486 branches: []string{"main", "release", "dev"},
487 steps: []step{
488 {
489 name: "setup",
490 addedDocuments: branchToDocumentMap{
491 "main": []index.Document{fruitV1},
492 "release": []index.Document{fruitV2},
493 "dev": []index.Document{fruitV3},
494 },
495
496 expectedDocuments: []index.Document{fruitV1, fruitV2, fruitV3},
497 },
498 {
499 name: "replace fruits v3 with v4 on 'dev', delete fruits on 'main'",
500 addedDocuments: branchToDocumentMap{
501 "dev": []index.Document{fruitV4},
502 },
503 deletedDocuments: branchToDocumentMap{
504 "main": []index.Document{fruitV1},
505 },
506
507 optFn: func(t *testing.T, o *Options) {
508 o.BuildOptions.IsDelta = true
509 },
510
511 expectedDocuments: []index.Document{fruitV2, fruitV4},
512 },
513 },
514 },
515 {
516 name: "rename",
517 branches: []string{"main", "release"},
518 steps: []step{
519 {
520 name: "setup",
521 addedDocuments: branchToDocumentMap{
522 "main": []index.Document{fruitV1},
523 "release": []index.Document{fruitV2},
524 },
525 expectedDocuments: []index.Document{fruitV1, fruitV2},
526 },
527 {
528 name: "rename fruits file on 'main' + ensure that unmodified fruits file on 'release' is still searchable",
529 addedDocuments: branchToDocumentMap{
530 "main": []index.Document{fruitV1WithNewName},
531 },
532 deletedDocuments: branchToDocumentMap{
533 "main": []index.Document{fruitV1},
534 },
535
536 optFn: func(t *testing.T, o *Options) {
537 o.BuildOptions.IsDelta = true
538 },
539
540 expectedDocuments: []index.Document{fruitV1WithNewName, fruitV2},
541 },
542 },
543 },
544 {
545 name: "modification: update one branch with version of document from another branch (a.k.a. Keegan's test)",
546 branches: []string{"main", "dev"},
547 steps: []step{
548 {
549 name: "setup",
550 addedDocuments: branchToDocumentMap{
551 "main": []index.Document{fruitV1},
552 "dev": []index.Document{fruitV2},
553 },
554 expectedDocuments: []index.Document{fruitV1, fruitV2},
555 },
556 {
557 name: "switch main to dev's older version of fruits + bump dev's fruits to new version",
558 addedDocuments: branchToDocumentMap{
559 "main": []index.Document{fruitV2},
560 "dev": []index.Document{fruitV3},
561 },
562
563 optFn: func(t *testing.T, o *Options) {
564 o.BuildOptions.IsDelta = true
565 },
566
567 expectedDocuments: []index.Document{fruitV2, fruitV3},
568 },
569 },
570 },
571 {
572 name: "no-op delta builds (reindexing the same commits)",
573 branches: []string{"main", "dev"},
574 steps: []step{
575 {
576 name: "setup",
577 addedDocuments: branchToDocumentMap{
578 "main": []index.Document{fruitV1, foo},
579 "dev": []index.Document{helloWorld},
580 },
581 expectedDocuments: []index.Document{fruitV1, foo, helloWorld},
582 },
583 {
584 name: "first no-op (normal build -> delta build)",
585 optFn: func(t *testing.T, o *Options) {
586 o.BuildOptions.IsDelta = true
587 },
588
589 expectedDocuments: []index.Document{fruitV1, foo, helloWorld},
590 },
591 {
592 name: "second no-op (delta build -> delta build)",
593 optFn: func(t *testing.T, o *Options) {
594 o.BuildOptions.IsDelta = true
595 },
596
597 expectedDocuments: []index.Document{fruitV1, foo, helloWorld},
598 },
599 },
600 },
601 {
602 name: "should fallback to normal build if no prior shards exist",
603 branches: []string{"main"},
604 steps: []step{
605 {
606 name: "attempt delta build on a repository that hasn't been indexed yet",
607 addedDocuments: branchToDocumentMap{
608 "main": []index.Document{helloWorld},
609 },
610 optFn: func(t *testing.T, o *Options) {
611 o.BuildOptions.IsDelta = true
612 },
613
614 expectedFallbackToNormalBuild: true,
615 expectedDocuments: []index.Document{helloWorld},
616 },
617 },
618 },
619 {
620 name: "should fallback to normal build if the set of requested repository branches changes",
621 branches: []string{"main", "release", "dev"},
622 steps: []step{
623 {
624 name: "setup",
625 addedDocuments: branchToDocumentMap{
626 "main": []index.Document{fruitV1},
627 "release": []index.Document{fruitV2},
628 "dev": []index.Document{fruitV3},
629 },
630
631 expectedDocuments: []index.Document{fruitV1, fruitV2, fruitV3},
632 },
633 {
634 name: "try delta build after dropping 'main' branch from index ",
635 addedDocuments: branchToDocumentMap{
636 "release": []index.Document{fruitV4},
637 },
638 optFn: func(t *testing.T, o *Options) {
639 o.Branches = []string{"HEAD", "release", "dev"} // a bit of a hack to override it this way, but it gets the job done
640 o.BuildOptions.IsDelta = true
641 },
642
643 expectedFallbackToNormalBuild: true,
644 expectedDocuments: []index.Document{fruitV3, fruitV4},
645 },
646 },
647 },
648 {
649 name: "should expand branches correctly when using wildcards in branch names",
650 branches: []string{"release/1", "release/2"},
651 steps: []step{
652 {
653 name: "setup",
654 addedDocuments: branchToDocumentMap{
655 "release/1": []index.Document{fruitV1},
656 "release/2": []index.Document{fruitV2},
657 },
658
659 expectedDocuments: []index.Document{fruitV1, fruitV2},
660 },
661 {
662 name: "try delta build with wildcard in branches",
663 optFn: func(t *testing.T, o *Options) {
664 // use a wildcard here
665 o.Branches = []string{"HEAD", "release/*"}
666 o.BuildOptions.IsDelta = true
667 },
668
669 expectedDocuments: []index.Document{fruitV1, fruitV2},
670 },
671 },
672 },
673 {
674 name: "should fallback to normal build if one or more index options updates requires a full build",
675 branches: []string{"main"},
676 steps: []step{
677 {
678 name: "setup",
679 addedDocuments: branchToDocumentMap{
680 "main": []index.Document{fruitV1},
681 },
682
683 expectedDocuments: []index.Document{fruitV1},
684 },
685 {
686 name: "try delta build after updating Disable CTags index option",
687 addedDocuments: branchToDocumentMap{
688 "main": []index.Document{fruitV2},
689 },
690 optFn: func(t *testing.T, o *Options) {
691 o.BuildOptions.IsDelta = true
692 o.BuildOptions.DisableCTags = true
693 },
694
695 expectedFallbackToNormalBuild: true,
696 expectedDocuments: []index.Document{fruitV2},
697 },
698 {
699 name: "try delta build after reverting Disable CTags index option",
700 addedDocuments: branchToDocumentMap{
701 "main": []index.Document{fruitV3},
702 },
703 optFn: func(t *testing.T, o *Options) {
704 o.BuildOptions.IsDelta = true
705 o.BuildOptions.DisableCTags = false
706 },
707
708 expectedFallbackToNormalBuild: true,
709 expectedDocuments: []index.Document{fruitV3},
710 },
711 },
712 },
713 {
714 name: "should successfully perform multiple delta builds after disabling symbols",
715 branches: []string{"main"},
716 steps: []step{
717 {
718 name: "setup",
719 addedDocuments: branchToDocumentMap{
720 "main": []index.Document{fruitV1},
721 },
722
723 expectedDocuments: []index.Document{fruitV1},
724 },
725 {
726 name: "try delta build after updating Disable CTags index option",
727 addedDocuments: branchToDocumentMap{
728 "main": []index.Document{fruitV2},
729 },
730 optFn: func(t *testing.T, o *Options) {
731 o.BuildOptions.IsDelta = true
732 o.BuildOptions.DisableCTags = true
733 },
734
735 expectedFallbackToNormalBuild: true,
736 expectedDocuments: []index.Document{fruitV2},
737 },
738 {
739 name: "try another delta build while CTags is still disabled",
740 addedDocuments: branchToDocumentMap{
741 "main": []index.Document{fruitV3},
742 },
743 optFn: func(t *testing.T, o *Options) {
744 o.BuildOptions.IsDelta = true
745 o.BuildOptions.DisableCTags = true
746 },
747
748 expectedDocuments: []index.Document{fruitV3},
749 },
750 },
751 },
752 {
753 name: "should fallback to normal build if repository has unsupported Sourcegraph ignore file",
754 branches: []string{"main"},
755 steps: []step{
756 {
757 name: "setup",
758 addedDocuments: branchToDocumentMap{
759 "main": []index.Document{emptySourcegraphIgnore},
760 },
761
762 expectedDocuments: []index.Document{emptySourcegraphIgnore},
763 },
764 {
765 name: "attempt delta build after modifying ignore file",
766 addedDocuments: branchToDocumentMap{
767 "main": []index.Document{sourcegraphIgnoreWithContent},
768 },
769 optFn: func(t *testing.T, o *Options) {
770 o.BuildOptions.IsDelta = true
771 },
772
773 expectedFallbackToNormalBuild: true,
774 expectedDocuments: []index.Document{sourcegraphIgnoreWithContent},
775 },
776 },
777 },
778 {
779 name: "should fallback to a full, normal build if the repository has more than the specified threshold of shards",
780 branches: []string{"main"},
781 steps: []step{
782 {
783 name: "setup: first shard",
784 addedDocuments: branchToDocumentMap{
785 "main": []index.Document{foo},
786 },
787
788 expectedDocuments: []index.Document{foo},
789 },
790 {
791 name: "setup: second shard (delta)",
792 addedDocuments: branchToDocumentMap{
793 "main": []index.Document{fruitV1},
794 },
795 optFn: func(t *testing.T, o *Options) {
796 o.BuildOptions.IsDelta = true
797 },
798
799 expectedDocuments: []index.Document{foo, fruitV1},
800 },
801 {
802 name: "setup: third shard (delta)",
803 addedDocuments: branchToDocumentMap{
804 "main": []index.Document{helloWorld},
805 },
806 optFn: func(t *testing.T, o *Options) {
807 o.BuildOptions.IsDelta = true
808 },
809
810 expectedDocuments: []index.Document{foo, fruitV1, helloWorld},
811 },
812 {
813 name: "attempt another delta build after we already blew past the shard threshold",
814 addedDocuments: branchToDocumentMap{
815 "main": []index.Document{fruitV2InFolder},
816 },
817 optFn: func(t *testing.T, o *Options) {
818 o.DeltaShardNumberFallbackThreshold = 2
819 o.BuildOptions.IsDelta = true
820 },
821
822 expectedFallbackToNormalBuild: true,
823 expectedDocuments: []index.Document{foo, fruitV1, helloWorld, fruitV2InFolder},
824 },
825 },
826 },
827 } {
828 test := test
829
830 t.Run(test.name, func(t *testing.T) {
831 t.Parallel()
832
833 indexDir := t.TempDir()
834 repositoryDir := t.TempDir()
835
836 // setup: initialize the repository and all of its branches
837 runGit(t, repositoryDir, "init", "-b", "master")
838
839 for _, b := range test.branches {
840 runGit(t, repositoryDir, "checkout", "-b", b)
841 runGit(t, repositoryDir, "commit", "--allow-empty", "-m", "empty commit")
842 }
843
844 for _, step := range test.steps {
845 t.Run(step.name, func(t *testing.T) {
846 for _, b := range test.branches {
847 // setup: for each branch, process any document deletions / additions and commit those changes
848
849 hadChange := false
850
851 runGit(t, repositoryDir, "checkout", b)
852
853 for _, d := range step.deletedDocuments[b] {
854 hadChange = true
855
856 file := filepath.Join(repositoryDir, d.Name)
857
858 err := os.Remove(file)
859 if err != nil {
860 t.Fatalf("deleting file %q: %s", d.Name, err)
861 }
862 }
863
864 for _, d := range step.addedDocuments[b] {
865 hadChange = true
866
867 file := filepath.Join(repositoryDir, d.Name)
868
869 err := os.MkdirAll(filepath.Dir(file), 0o755)
870 if err != nil {
871 t.Fatalf("ensuring that folders exist for file %q: %s", file, err)
872 }
873
874 err = os.WriteFile(file, d.Content, 0o644)
875 if err != nil {
876 t.Fatalf("writing file %q: %s", d.Name, err)
877 }
878 }
879
880 if !hadChange {
881 continue
882 }
883
884 runGit(t, repositoryDir, "add", "-A")
885 runGit(t, repositoryDir, "commit", "-m", step.name)
886 }
887
888 // setup: prepare indexOptions with given overrides
889 buildOptions := index.Options{
890 IndexDir: indexDir,
891 RepositoryDescription: zoekt.Repository{
892 Name: "repository",
893 },
894 IsDelta: false,
895 }
896 buildOptions.SetDefaults()
897
898 branches := append([]string{"HEAD"}, test.branches...)
899
900 options := Options{
901 RepoDir: filepath.Join(repositoryDir, ".git"),
902 BuildOptions: buildOptions,
903 Branches: branches,
904 }
905
906 if step.optFn != nil {
907 step.optFn(t, &options)
908 }
909
910 // setup: prepare spy versions of prepare delta / normal build so that we can observe
911 // whether they were called appropriately
912 deltaBuildCalled := false
913 prepareDeltaSpy := func(options Options, repository *git.Repository) (repos map[fileKey]BlobLocation, branchVersions map[string]map[string]plumbing.Hash, changedOrDeletedPaths []string, err error) {
914 deltaBuildCalled = true
915 return prepareDeltaBuild(options, repository)
916 }
917
918 normalBuildCalled := false
919 prepareNormalSpy := func(options Options, repository *git.Repository) (repos map[fileKey]BlobLocation, branchVersions map[string]map[string]plumbing.Hash, err error) {
920 normalBuildCalled = true
921 return prepareNormalBuild(options, repository)
922 }
923
924 // run test
925 _, err := indexGitRepo(options, gitIndexConfig{
926 prepareDeltaBuild: prepareDeltaSpy,
927 prepareNormalBuild: prepareNormalSpy,
928 })
929 if err != nil {
930 t.Fatalf("IndexGitRepo: %s", err)
931 }
932
933 if options.BuildOptions.IsDelta != deltaBuildCalled {
934 // We should always try a delta build if we request it in the options.
935 t.Fatalf("expected deltaBuildCalled to be %t, got %t", options.BuildOptions.IsDelta, deltaBuildCalled)
936 }
937
938 if options.BuildOptions.IsDelta && (step.expectedFallbackToNormalBuild != normalBuildCalled) {
939 // We only check the normal spy on delta builds because it's only considered a "fallback" if we
940 // asked for a delta build in the first place.
941 t.Fatalf("expected normalBuildCalled to be %t, got %t", step.expectedFallbackToNormalBuild, normalBuildCalled)
942 }
943
944 // examine outcome: load shards into a searcher instance and run a dummy search query
945 // that returns every document contained in the shards
946 //
947 // then, compare returned set of documents with the expected set for the step and see if they agree
948
949 ss, err := search.NewDirectorySearcher(indexDir)
950 if err != nil {
951 t.Fatalf("NewDirectorySearcher(%s): %s", indexDir, err)
952 }
953 defer ss.Close()
954
955 searchOpts := &zoekt.SearchOptions{Whole: true}
956 result, err := ss.Search(context.Background(), &query.Const{Value: true}, searchOpts)
957 if err != nil {
958 t.Fatalf("Search: %s", err)
959 }
960
961 var receivedDocuments []index.Document
962 for _, f := range result.Files {
963 receivedDocuments = append(receivedDocuments, index.Document{
964 Name: f.FileName,
965 Content: f.Content,
966 })
967 }
968
969 for _, docs := range [][]index.Document{step.expectedDocuments, receivedDocuments} {
970 sort.Slice(docs, func(i, j int) bool {
971 a, b := docs[i], docs[j]
972
973 // first compare names, then fallback to contents if the names are equal
974
975 if a.Name < b.Name {
976 return true
977 }
978
979 if a.Name > b.Name {
980 return false
981 }
982
983 return bytes.Compare(a.Content, b.Content) < 0
984 })
985 }
986
987 compareOptions := []cmp.Option{
988 cmpopts.IgnoreFields(index.Document{}, "Branches"),
989 cmpopts.EquateEmpty(),
990 }
991
992 if diff := cmp.Diff(step.expectedDocuments, receivedDocuments, compareOptions...); diff != "" {
993 t.Errorf("diff in received documents (-want +got):%s\n:", diff)
994 }
995 })
996 }
997 })
998 }
999}
1000
1001func runGit(t *testing.T, cwd string, args ...string) {
1002 t.Helper()
1003
1004 err := os.MkdirAll(cwd, 0o755)
1005 if err != nil {
1006 t.Fatalf("ensuring path %q exists: %s", cwd, err)
1007 }
1008
1009 cmd := exec.Command("git", args...)
1010 cmd.Dir = cwd
1011 cmd.Env = append(os.Environ(),
1012 "GIT_CONFIG_GLOBAL=",
1013 "GIT_CONFIG_SYSTEM=",
1014 "GIT_COMMITTER_NAME=Kierkegaard",
1015 "GIT_COMMITTER_EMAIL=soren@apache.com",
1016 "GIT_AUTHOR_NAME=Kierkegaard",
1017 "GIT_AUTHOR_EMAIL=soren@apache.com",
1018 )
1019
1020 if out, err := cmd.CombinedOutput(); err != nil {
1021 t.Fatalf("execution error: %v, output %s", err, out)
1022 }
1023}
1024
1025func TestSetTemplates_e2e(t *testing.T) {
1026 t.Parallel()
1027
1028 repositoryDir := t.TempDir()
1029
1030 // setup: initialize the repository and all of its branches
1031 runGit(t, repositoryDir, "init", "-b", "master")
1032 runGit(t, repositoryDir, "config", "remote.origin.url", "git@github.com:sourcegraph/zoekt.git")
1033 desc := zoekt.Repository{}
1034 if err := setTemplatesFromConfig(&desc, repositoryDir); err != nil {
1035 t.Fatalf("setTemplatesFromConfig: %v", err)
1036 }
1037
1038 if got, want := desc.FileURLTemplate, `{{URLJoinPath "https://github.com/sourcegraph/zoekt" "blob" .Version .Path}}`; got != want {
1039 t.Errorf("got %q, want %q", got, want)
1040 }
1041}
1042
1043func TestSetTemplates_Worktree(t *testing.T) {
1044 t.Parallel()
1045
1046 _, worktreeDir := initGitWorktree(t, "hello.go", "package main\n")
1047 desc := zoekt.Repository{}
1048
1049 if err := setTemplatesFromConfig(&desc, worktreeDir); err != nil {
1050 t.Fatalf("setTemplatesFromConfig(worktree): %v", err)
1051 }
1052
1053 if got, want := desc.FileURLTemplate, `{{URLJoinPath "https://github.com/sourcegraph/zoekt" "blob" .Version .Path}}`; got != want {
1054 t.Errorf("got %q, want %q", got, want)
1055 }
1056}
1057
1058func TestSetTemplates(t *testing.T) {
1059 t.Parallel()
1060
1061 base := "https://example.com/repo/name"
1062 version := "VERSION"
1063 path := "dir/name.txt"
1064 lineNumber := 10
1065 cases := []struct {
1066 typ string
1067 commit string
1068 file string
1069 line string
1070 }{{
1071 typ: "gitiles",
1072 commit: "https://example.com/repo/name/%2B/VERSION",
1073 file: "https://example.com/repo/name/%2B/VERSION/dir/name.txt",
1074 line: "#10",
1075 }, {
1076 typ: "github",
1077 commit: "https://example.com/repo/name/commit/VERSION",
1078 file: "https://example.com/repo/name/blob/VERSION/dir/name.txt",
1079 line: "#L10",
1080 }, {
1081 typ: "cgit",
1082 commit: "https://example.com/repo/name/commit/?id=VERSION",
1083 file: "https://example.com/repo/name/tree/dir/name.txt/?id=VERSION",
1084 line: "#n10",
1085 }, {
1086 typ: "gitweb",
1087 commit: "https://example.com/repo/name;a=commit;h=VERSION",
1088 file: "https://example.com/repo/name;a=blob;f=dir/name.txt;hb=VERSION",
1089 line: "#l10",
1090 }, {
1091 typ: "source.bazel.build",
1092 commit: "https://example.com/repo/name/%2B/VERSION",
1093 file: "https://example.com/repo/name/%2B/VERSION:dir/name.txt",
1094 line: ";l=10",
1095 }, {
1096 typ: "bitbucket-server",
1097 commit: "https://example.com/repo/name/commits/VERSION",
1098 file: "https://example.com/repo/name/dir/name.txt?at=VERSION",
1099 line: "#10",
1100 }, {
1101 typ: "bitbucket-cloud",
1102 commit: "https://example.com/repo/name/commits/VERSION",
1103 file: "https://example.com/repo/name/src/VERSION/dir/name.txt",
1104 line: "#10",
1105 }, {
1106 typ: "azuredevops",
1107 commit: "https://example.com/repo/name/commit/VERSION",
1108 file: "https://example.com/repo/name?path=/dir/name.txt&version=GCVERSION&_a=contents",
1109 line: "&line=10&lineEnd=10&lineStartColumn=1&lineEndColumn=200",
1110 }, {
1111 typ: "gitlab",
1112 commit: "https://example.com/repo/name/-/commit/VERSION",
1113 file: "https://example.com/repo/name/-/blob/VERSION/dir/name.txt",
1114 line: "#L10",
1115 }, {
1116 typ: "gitea",
1117 commit: "https://example.com/repo/name/commit/VERSION",
1118 file: "https://example.com/repo/name/src/commit/VERSION/dir/name.txt?display=source",
1119 line: "#L10",
1120 }}
1121
1122 for _, tc := range cases {
1123 t.Run(tc.typ, func(t *testing.T) {
1124 t.Parallel()
1125
1126 assertOutput := func(templateText string, want string) {
1127 t.Helper()
1128
1129 tt, err := index.ParseTemplate(templateText)
1130 if err != nil {
1131 t.Fatal(err)
1132 }
1133
1134 var sb strings.Builder
1135 err = tt.Execute(&sb, map[string]any{
1136 "Version": version,
1137 "Path": path,
1138 "LineNumber": lineNumber,
1139 })
1140 if err != nil {
1141 t.Fatal(err)
1142 }
1143 if got := sb.String(); got != want {
1144 t.Fatalf("want: %q\ngot: %q", want, got)
1145 }
1146 }
1147
1148 var repo zoekt.Repository
1149 u, _ := url.Parse(base)
1150 err := setTemplates(&repo, u, tc.typ)
1151 if err != nil {
1152 t.Fatal(err)
1153 }
1154 assertOutput(repo.CommitURLTemplate, tc.commit)
1155 assertOutput(repo.FileURLTemplate, tc.file)
1156 assertOutput(repo.LineFragmentTemplate, tc.line)
1157 })
1158 }
1159}
1160
1161func BenchmarkPrepareNormalBuild(b *testing.B) {
1162 repoDir := requireBenchGitRepo(b)
1163 repo, err := plainOpenRepo(repoDir)
1164 if err != nil {
1165 b.Fatalf("Failed to open test repository: %v", err)
1166 }
1167
1168 opts := Options{
1169 RepoDir: repoDir,
1170 Submodules: false,
1171 BranchPrefix: "refs/heads/",
1172 Branches: []string{"HEAD"},
1173 BuildOptions: index.Options{
1174 RepositoryDescription: zoekt.Repository{
1175 Name: "test-repo",
1176 URL: "https://github.com/example/test-repo",
1177 },
1178 },
1179 }
1180
1181 b.ReportAllocs()
1182
1183 repos, branchVersions, err := prepareNormalBuild(opts, repo)
1184 if err != nil {
1185 b.Fatalf("prepareNormalBuild failed: %v", err)
1186 }
1187
1188 runtime.GC()
1189
1190 var m runtime.MemStats
1191 runtime.ReadMemStats(&m)
1192 b.ReportMetric(float64(m.HeapInuse), "heap-used-bytes")
1193 b.ReportMetric(float64(m.HeapInuse), "heap-allocated-bytes")
1194
1195 if len(repos) == 0 || len(branchVersions) == 0 {
1196 b.Fatalf("Unexpected empty results")
1197 }
1198}