fork of https://github.com/sourcegraph/zoekt
1package main
2
3import (
4 "fmt"
5 "net/url"
6 "os"
7 "path/filepath"
8 "reflect"
9 "sort"
10 "strings"
11 "testing"
12 "time"
13
14 "github.com/google/go-cmp/cmp"
15 "github.com/google/go-cmp/cmp/cmpopts"
16
17 "github.com/sourcegraph/zoekt"
18 "github.com/sourcegraph/zoekt/build"
19)
20
21func TestCleanup(t *testing.T) {
22 mk := func(name string, n int, mtime time.Time) shard {
23 return shard{
24 RepoID: fakeID(name),
25 RepoName: name,
26 Path: fmt.Sprintf("%s_v%d.%05d.zoekt", url.QueryEscape(name), 15, n),
27 ModTime: mtime,
28 RepoTombstone: false,
29 }
30 }
31 // We don't use getShards so that we have two implementations of the same
32 // thing (ie pick up bugs in one)
33 glob := func(pattern string) []shard {
34 paths, _ := filepath.Glob(pattern)
35 sort.Strings(paths)
36 var shards []shard
37 for _, path := range paths {
38 if filepath.Ext(path) != ".zoekt" {
39 continue
40 }
41 repos, _, _ := zoekt.ReadMetadataPathAlive(path)
42 fi, _ := os.Stat(path)
43 for _, repo := range repos {
44 shards = append(shards, shard{
45 RepoID: repo.ID,
46 RepoName: repo.Name,
47 Path: filepath.Base(path),
48 ModTime: fi.ModTime(),
49 })
50 }
51 }
52 return shards
53 }
54
55 now := time.Now().Truncate(time.Second)
56 recent := now.Add(-time.Hour)
57 old := now.Add(-25 * time.Hour)
58 cases := []struct {
59 name string
60 repos []string
61 index []shard
62 trash []shard
63 tmps []string
64
65 wantIndex []shard
66 wantTrash []shard
67 }{{
68 name: "noop",
69 }, {
70 name: "not indexed yet",
71 repos: []string{"foo", "bar"},
72 }, {
73 name: "just trash",
74 trash: []shard{mk("foo", 0, recent), mk("bar", 0, recent), mk("bar", 5, old)},
75 wantTrash: []shard{mk("foo", 0, recent)},
76 }, {
77 name: "single trash",
78 repos: []string{"foo"},
79 index: []shard{mk("foo", 0, old), mk("bar", 0, old), mk("bar", 1, old)},
80 wantIndex: []shard{mk("foo", 0, old)},
81 wantTrash: []shard{mk("bar", 0, now), mk("bar", 1, now)},
82 }, {
83 name: "just index",
84 repos: []string{"foo"},
85 index: []shard{mk("foo", 0, old), mk("foo", 1, recent), mk("bar", 0, recent), mk("bar", 1, old)},
86 wantIndex: []shard{mk("foo", 0, old), mk("foo", 1, recent)},
87 wantTrash: []shard{mk("bar", 0, now), mk("bar", 1, now)},
88 }, {
89 name: "future timestamp",
90 trash: []shard{mk("foo", 0, now.Add(time.Hour))},
91 wantTrash: []shard{mk("foo", 0, now)},
92 }, {
93 name: "conflict",
94 repos: []string{"foo"},
95 trash: []shard{mk("foo", 0, recent), mk("foo", 1, recent), mk("bar", 0, recent), mk("bar", 1, old)},
96 index: []shard{mk("foo", 0, recent), mk("bar", 0, recent)},
97 wantIndex: []shard{mk("foo", 0, recent)},
98 wantTrash: []shard{mk("bar", 0, now)},
99 }, {
100 name: "clean old .tmp files",
101 tmps: []string{"recent.tmp", "old.tmp"},
102 }, {
103 name: "all",
104 repos: []string{"exists", "trashed"},
105 trash: []shard{mk("trashed", 0, recent), mk("delete", 0, old)},
106 index: []shard{mk("exists", 0, recent), mk("trash", 0, recent)},
107 wantIndex: []shard{mk("exists", 0, recent), mk("trashed", 0, recent)},
108 wantTrash: []shard{mk("trash", 0, now)},
109 }}
110
111 for _, tt := range cases {
112 t.Run(tt.name, func(t *testing.T) {
113 dir := t.TempDir()
114
115 // Create index files
116 var fs []shard
117 for _, f := range tt.index {
118 f.Path = filepath.Join(dir, f.Path)
119 fs = append(fs, f)
120 }
121 for _, f := range tt.trash {
122 f.Path = filepath.Join(dir, ".trash", f.Path)
123 fs = append(fs, f)
124 }
125 for _, f := range fs {
126 createTestShard(t, f.RepoName, fakeID(f.RepoName), f.Path)
127 if err := os.Chtimes(f.Path, f.ModTime, f.ModTime); err != nil {
128 t.Fatal(err)
129 }
130 }
131 for _, name := range tt.tmps {
132 path := filepath.Join(dir, name)
133 if _, err := os.Create(path); err != nil {
134 t.Fatal(err)
135 }
136 }
137
138 var repoIDs []uint32
139 for _, name := range tt.repos {
140 repoIDs = append(repoIDs, fakeID(name))
141 }
142 cleanup(dir, repoIDs, now, false)
143
144 if d := cmp.Diff(tt.wantIndex, glob(filepath.Join(dir, "*.zoekt"))); d != "" {
145 t.Errorf("unexpected index (-want, +got):\n%s", d)
146 }
147 if d := cmp.Diff(tt.wantTrash, glob(filepath.Join(dir, ".trash", "*.zoekt"))); d != "" {
148 t.Errorf("unexpected trash (-want, +got):\n%s", d)
149 }
150 if tmps := globBase(filepath.Join(dir, "*.tmp")); len(tmps) > 0 {
151 t.Errorf("unexpected tmps: %v", tmps)
152 }
153
154 if testing.Verbose() {
155 data, _ := os.ReadFile(filepath.Join(dir, "zoekt-indexserver-shard-log.tsv"))
156 if len(data) > 0 {
157 t.Log("shard log contents:\n" + strings.TrimSpace(string(data)))
158 }
159 }
160 })
161 }
162}
163
164func createTestShard(t *testing.T, repo string, id uint32, path string, optFns ...func(in *zoekt.Repository)) {
165 t.Helper()
166
167 if err := os.MkdirAll(filepath.Dir(path), 0o700); err != nil {
168 t.Fatal(err)
169 }
170 r := &zoekt.Repository{
171 ID: id,
172 Name: repo,
173 }
174 for _, optFn := range optFns {
175 optFn(r)
176 }
177 b, err := zoekt.NewIndexBuilder(r)
178 if err != nil {
179 t.Fatal(err)
180 }
181 f, err := os.OpenFile(path, os.O_WRONLY|os.O_CREATE, 0o600)
182 if err != nil {
183 t.Fatal(err)
184 }
185 defer f.Close()
186 if err := b.Write(f); err != nil {
187 t.Fatal(err)
188 }
189}
190
191func globBase(pattern string) []string {
192 paths, _ := filepath.Glob(pattern)
193 for i := range paths {
194 paths[i] = filepath.Base(paths[i])
195 }
196 sort.Strings(paths)
197 return paths
198}
199
200func TestRemoveIncompleteShards(t *testing.T) {
201 shards, incomplete := []string{
202 "test.zoekt",
203 "foo.zoekt",
204 "bar.zoekt",
205 "bar.zoekt.meta",
206 }, []string{
207 "incomplete.zoekt123",
208 "crash.zoekt567",
209 "metacrash.zoekt789.meta",
210 }
211 sort.Strings(shards)
212
213 dir := t.TempDir()
214
215 for _, shard := range append(shards, incomplete...) {
216 _, err := os.Create(filepath.Join(dir, shard))
217 if err != nil {
218 t.Fatal(err)
219 }
220 }
221 removeIncompleteShards(dir)
222
223 left, _ := filepath.Glob(filepath.Join(dir, "*"))
224 sort.Strings(left)
225 for i := range left {
226 left[i] = filepath.Base(left[i])
227 }
228
229 if !reflect.DeepEqual(shards, left) {
230 t.Errorf("\ngot shards: %v\nwant: %v\n", left, shards)
231 }
232}
233
234func TestVacuum(t *testing.T) {
235 tmpDir := t.TempDir()
236 fn := createCompoundShard(t, tmpDir, []uint32{1, 2, 3, 4})
237
238 err := zoekt.SetTombstone(fn, 2)
239 if err != nil {
240 t.Fatal(err)
241 }
242
243 mockMerger = func() error { return mergeHelper(t, fn) }
244 got, err := removeTombstones(fn)
245 if err != nil {
246 t.Fatal(err)
247 }
248
249 if len(got) != 1 || got[0].Name != "repo2" {
250 t.Fatal(err)
251 }
252
253 shards, err := filepath.Glob(tmpDir + "/compound-*")
254 if err != nil {
255 t.Fatal(err)
256 }
257
258 if len(shards) != 1 {
259 t.Fatalf("expected 1 shard, but instead got %d", len(shards))
260 }
261
262 repos, _, err := zoekt.ReadMetadataPath(shards[0])
263 if err != nil {
264 t.Fatal(err)
265 }
266
267 if len(repos) != 3 {
268 t.Fatalf("wanted 3, got %d repos", len(repos))
269 }
270
271 for _, r := range repos {
272 if r.Tombstone {
273 t.Fatalf("found tombstone for %s", r.Name)
274 }
275 }
276}
277
278// Create 2 compound shards, each of which contains the same tombstoned repo but
279// from different commit dates.
280func TestGetTombstonedRepos(t *testing.T) {
281 setLastCommitDate := func(lastCommitDate time.Time) func(repository *zoekt.Repository) {
282 return func(repository *zoekt.Repository) {
283 repository.LatestCommitDate = lastCommitDate
284 }
285 }
286
287 dir := t.TempDir()
288 var repoID uint32 = 2
289 csOld := createCompoundShard(t, dir, []uint32{1, 2, 3, 4}, setLastCommitDate(time.Now().Add(-1*time.Hour)))
290 if err := zoekt.SetTombstone(csOld, repoID); err != nil {
291 t.Fatal(err)
292 }
293
294 now := time.Now()
295 csNew := createCompoundShard(t, dir, []uint32{5, 2, 6, 7}, setLastCommitDate(now))
296 if err := zoekt.SetTombstone(csNew, repoID); err != nil {
297 t.Fatal(err)
298 }
299
300 // Check that getTombstonedRepos returns the compound shard containing the
301 // tombstoned repo with id repoID with the latest commit.
302 got := getTombstonedRepos(dir)
303
304 if len(got) != 1 {
305 t.Fatalf("want 1 shard, got %d shards", len(got))
306 }
307
308 v, ok := got[repoID]
309 if !ok || v.Path != csNew {
310 t.Fatalf("want %s, got %s", csNew, v.Path)
311 }
312}
313
314// HAVE
315// ----
316// index/
317// CS 1
318//
319// r1, tombstoned, old
320// r2, tombstoned, old
321// r3, tombstoned, old
322//
323// CS 2
324//
325// r1, tombstoned, recent
326// r2, tombstoned, recent
327// r4, tombstoned, recent
328//
329// SS 1
330//
331// r1, now
332//
333// .trash/
334// SS 3
335//
336// r3, now
337//
338// SS 5
339//
340// r5, now
341//
342// TO BE INDEXED
343// -------------
344// repos r1, r2, r3, r4, r5
345//
346// WANT
347// ----
348// index/
349// CS 1
350//
351// r1, tombstoned, old
352// r2, tombstoned, old
353// r3, tombstoned, old
354//
355// CS 2
356//
357// r1, tombstoned, recent
358// r2, recent
359// r4, recent
360//
361// SS 1
362//
363// r1, now
364//
365// SS 3
366//
367// r3, now
368//
369// SS 5
370//
371// r5, now
372//
373// .trash/ --> empty
374func TestCleanupCompoundShards(t *testing.T) {
375 dir := t.TempDir()
376
377 // timestamps
378 now := time.Now()
379 recent := now.Add(-1 * time.Hour)
380 old := now.Add(-2 * time.Hour)
381
382 setTombstone := func(shardPath string, repoID uint32) {
383 t.Helper()
384 if err := zoekt.SetTombstone(shardPath, repoID); err != nil {
385 t.Fatal(err)
386 }
387 }
388
389 cs1 := createCompoundShard(t, dir, []uint32{1, 2, 3}, func(in *zoekt.Repository) {
390 in.LatestCommitDate = old
391 })
392 setTombstone(cs1, 1)
393 setTombstone(cs1, 2)
394 setTombstone(cs1, 3)
395
396 cs2 := createCompoundShard(t, dir, []uint32{1, 2, 4}, func(in *zoekt.Repository) {
397 in.LatestCommitDate = recent
398 })
399 setTombstone(cs2, 1)
400 setTombstone(cs2, 2)
401 setTombstone(cs2, 4)
402
403 createTestShard(t, "repo1", 1, filepath.Join(dir, "repo1.zoekt"), func(in *zoekt.Repository) {
404 in.LatestCommitDate = now
405 })
406 createTestShard(t, "repo3", 3, filepath.Join(dir, ".trash", "repo3.zoekt"), func(in *zoekt.Repository) {
407 in.LatestCommitDate = now
408 })
409 createTestShard(t, "repo5", 5, filepath.Join(dir, ".trash", "repo5.zoekt"), func(in *zoekt.Repository) {
410 in.LatestCommitDate = now
411 })
412
413 // want indexed
414 repos := []uint32{1, 2, 3, 4, 5}
415
416 cleanup(dir, repos, now, true)
417
418 index := getShards(dir)
419 trash := getShards(filepath.Join(dir, ".trash"))
420
421 if len(trash) != 0 {
422 t.Fatalf("expected empty trash, got %+v", trash)
423 }
424
425 wantIndex := map[uint32][]shard{
426 1: {{
427 RepoID: 1,
428 RepoName: "repo1",
429 Path: filepath.Join(dir, "repo1.zoekt"),
430 }},
431 2: {{
432 RepoID: 2,
433 RepoName: "repo2",
434 Path: cs2,
435 }},
436 3: {{
437 RepoID: 3,
438 RepoName: "repo3",
439 Path: filepath.Join(dir, "repo3.zoekt"),
440 }},
441 4: {{
442 RepoID: 4,
443 RepoName: "repo4",
444 Path: cs2,
445 }},
446 5: {{
447 RepoID: 5,
448 RepoName: "repo5",
449 Path: filepath.Join(dir, "repo5.zoekt"),
450 }},
451 }
452
453 if d := cmp.Diff(wantIndex, index, cmpopts.IgnoreFields(shard{}, "ModTime")); d != "" {
454 t.Fatalf("-want, +got: %s", d)
455 }
456}
457
458// createCompoundShard returns a path to a compound shard containing repos with
459// ids. Use optsFns to overwrite fields of zoekt.Repository for all repos.
460func createCompoundShard(t *testing.T, dir string, ids []uint32, optFns ...func(in *zoekt.Repository)) string {
461 t.Helper()
462
463 var repoFns []string
464
465 for _, id := range ids {
466 repo := zoekt.Repository{
467 ID: id,
468 Name: fmt.Sprintf("repo%d", id),
469 RawConfig: map[string]string{
470 "public": "1",
471 },
472 }
473 for _, optsFn := range optFns {
474 optsFn(&repo)
475 }
476
477 opts := build.Options{
478 IndexDir: dir,
479 RepositoryDescription: repo,
480 }
481 opts.SetDefaults()
482 b, err := build.NewBuilder(opts)
483 if err != nil {
484 t.Fatalf("NewBuilder: %v", err)
485 }
486 if err := b.AddFile("F", []byte(strings.Repeat("abc", 100))); err != nil {
487 t.Errorf("AddFile: %v", err)
488 }
489 if err := b.Finish(); err != nil {
490 t.Errorf("Finish: %v", err)
491 }
492
493 repoFns = append(repoFns, opts.FindAllShards()...)
494 }
495
496 // create a compound shard.
497 tmpFn, dstFn, err := merge(t, dir, repoFns)
498 if err != nil {
499 t.Fatal(err)
500 }
501 for _, old := range repoFns {
502 if err := os.Remove(old); err != nil {
503 t.Fatal(err)
504 }
505 }
506 if err := os.Rename(tmpFn, dstFn); err != nil {
507 t.Fatal(err)
508 }
509 return dstFn
510}
511
512func mergeHelper(t *testing.T, fn string) error {
513 t.Helper()
514
515 f, err := os.Open(fn)
516 if err != nil {
517 return fmt.Errorf("os.Open: %s", err)
518 }
519 defer f.Close()
520
521 indexFile, err := zoekt.NewIndexFile(f)
522 if err != nil {
523 return fmt.Errorf("zoekt.NewIndexFile: %s ", err)
524 }
525 defer indexFile.Close()
526
527 _, _, err = zoekt.Merge(filepath.Dir(fn), indexFile)
528 return err
529}