fork of https://github.com/sourcegraph/zoekt
1package main
2
3import (
4 "fmt"
5 "os"
6 "path/filepath"
7 "reflect"
8 "sort"
9 "strings"
10 "testing"
11 "time"
12
13 "github.com/google/go-cmp/cmp"
14 "github.com/google/go-cmp/cmp/cmpopts"
15
16 "github.com/sourcegraph/zoekt"
17 "github.com/sourcegraph/zoekt/build"
18)
19
20func TestCleanup(t *testing.T) {
21 mk := func(name string, n int, mtime time.Time) shard {
22 return shard{
23 RepoID: fakeID(name),
24 RepoName: name,
25 Path: zoekt.ShardName("", name, 15, n),
26 ModTime: mtime,
27 RepoTombstone: false,
28 }
29 }
30 // We don't use getShards so that we have two implementations of the same
31 // thing (ie pick up bugs in one)
32 glob := func(pattern string) []shard {
33 paths, _ := filepath.Glob(pattern)
34 sort.Strings(paths)
35 var shards []shard
36 for _, path := range paths {
37 if filepath.Ext(path) != ".zoekt" {
38 continue
39 }
40 repos, _, _ := zoekt.ReadMetadataPathAlive(path)
41 fi, _ := os.Stat(path)
42 for _, repo := range repos {
43 shards = append(shards, shard{
44 RepoID: repo.ID,
45 RepoName: repo.Name,
46 Path: filepath.Base(path),
47 ModTime: fi.ModTime(),
48 })
49 }
50 }
51 return shards
52 }
53
54 now := time.Now().Truncate(time.Second)
55 recent := now.Add(-time.Hour)
56 old := now.Add(-25 * time.Hour)
57 cases := []struct {
58 name string
59 repos []string
60 index []shard
61 trash []shard
62 tmps []string
63
64 wantIndex []shard
65 wantTrash []shard
66 }{{
67 name: "noop",
68 }, {
69 name: "not indexed yet",
70 repos: []string{"foo", "bar"},
71 }, {
72 name: "just trash",
73 trash: []shard{mk("foo", 0, recent), mk("bar", 0, recent), mk("bar", 5, old)},
74 wantTrash: []shard{mk("foo", 0, recent)},
75 }, {
76 name: "single trash",
77 repos: []string{"foo"},
78 index: []shard{mk("foo", 0, old), mk("bar", 0, old), mk("bar", 1, old)},
79 wantIndex: []shard{mk("foo", 0, old)},
80 wantTrash: []shard{mk("bar", 0, now), mk("bar", 1, now)},
81 }, {
82 name: "just index",
83 repos: []string{"foo"},
84 index: []shard{mk("foo", 0, old), mk("foo", 1, recent), mk("bar", 0, recent), mk("bar", 1, old)},
85 wantIndex: []shard{mk("foo", 0, old), mk("foo", 1, recent)},
86 wantTrash: []shard{mk("bar", 0, now), mk("bar", 1, now)},
87 }, {
88 name: "future timestamp",
89 trash: []shard{mk("foo", 0, now.Add(time.Hour))},
90 wantTrash: []shard{mk("foo", 0, now)},
91 }, {
92 name: "conflict",
93 repos: []string{"foo"},
94 trash: []shard{mk("foo", 0, recent), mk("foo", 1, recent), mk("bar", 0, recent), mk("bar", 1, old)},
95 index: []shard{mk("foo", 0, recent), mk("bar", 0, recent)},
96 wantIndex: []shard{mk("foo", 0, recent)},
97 wantTrash: []shard{mk("bar", 0, now)},
98 }, {
99 name: "clean old .tmp files",
100 tmps: []string{"recent.tmp", "old.tmp"},
101 }, {
102 name: "all",
103 repos: []string{"exists", "trashed"},
104 trash: []shard{mk("trashed", 0, recent), mk("delete", 0, old)},
105 index: []shard{mk("exists", 0, recent), mk("trash", 0, recent)},
106 wantIndex: []shard{mk("exists", 0, recent), mk("trashed", 0, recent)},
107 wantTrash: []shard{mk("trash", 0, now)},
108 }}
109
110 for _, tt := range cases {
111 t.Run(tt.name, func(t *testing.T) {
112 dir := t.TempDir()
113
114 // Create index files
115 var fs []shard
116 for _, f := range tt.index {
117 f.Path = filepath.Join(dir, f.Path)
118 fs = append(fs, f)
119 }
120 for _, f := range tt.trash {
121 f.Path = filepath.Join(dir, ".trash", f.Path)
122 fs = append(fs, f)
123 }
124 for _, f := range fs {
125 createTestShard(t, f.RepoName, fakeID(f.RepoName), f.Path)
126 if err := os.Chtimes(f.Path, f.ModTime, f.ModTime); err != nil {
127 t.Fatal(err)
128 }
129 }
130 for _, name := range tt.tmps {
131 path := filepath.Join(dir, name)
132 if _, err := os.Create(path); err != nil {
133 t.Fatal(err)
134 }
135 }
136
137 var repoIDs []uint32
138 for _, name := range tt.repos {
139 repoIDs = append(repoIDs, fakeID(name))
140 }
141 cleanup(dir, repoIDs, now, false)
142
143 if d := cmp.Diff(tt.wantIndex, glob(filepath.Join(dir, "*.zoekt"))); d != "" {
144 t.Errorf("unexpected index (-want, +got):\n%s", d)
145 }
146 if d := cmp.Diff(tt.wantTrash, glob(filepath.Join(dir, ".trash", "*.zoekt"))); d != "" {
147 t.Errorf("unexpected trash (-want, +got):\n%s", d)
148 }
149 if tmps := globBase(filepath.Join(dir, "*.tmp")); len(tmps) > 0 {
150 t.Errorf("unexpected tmps: %v", tmps)
151 }
152
153 if testing.Verbose() {
154 data, _ := os.ReadFile(filepath.Join(dir, "zoekt-indexserver-shard-log.tsv"))
155 if len(data) > 0 {
156 t.Log("shard log contents:\n" + strings.TrimSpace(string(data)))
157 }
158 }
159 })
160 }
161}
162
163func createTestShard(t *testing.T, repo string, id uint32, path string, optFns ...func(in *zoekt.Repository)) {
164 t.Helper()
165
166 if err := os.MkdirAll(filepath.Dir(path), 0o700); err != nil {
167 t.Fatal(err)
168 }
169 r := &zoekt.Repository{
170 ID: id,
171 Name: repo,
172 }
173 for _, optFn := range optFns {
174 optFn(r)
175 }
176 b, err := zoekt.NewIndexBuilder(r)
177 if err != nil {
178 t.Fatal(err)
179 }
180 f, err := os.OpenFile(path, os.O_WRONLY|os.O_CREATE, 0o600)
181 if err != nil {
182 t.Fatal(err)
183 }
184 defer f.Close()
185 if err := b.Write(f); err != nil {
186 t.Fatal(err)
187 }
188}
189
190func globBase(pattern string) []string {
191 paths, _ := filepath.Glob(pattern)
192 for i := range paths {
193 paths[i] = filepath.Base(paths[i])
194 }
195 sort.Strings(paths)
196 return paths
197}
198
199func TestRemoveIncompleteShards(t *testing.T) {
200 shards, incomplete := []string{
201 "test.zoekt",
202 "foo.zoekt",
203 "bar.zoekt",
204 "bar.zoekt.meta",
205 }, []string{
206 "incomplete.zoekt123",
207 "crash.zoekt567",
208 "metacrash.zoekt789.meta",
209 }
210 sort.Strings(shards)
211
212 dir := t.TempDir()
213
214 for _, shard := range append(shards, incomplete...) {
215 _, err := os.Create(filepath.Join(dir, shard))
216 if err != nil {
217 t.Fatal(err)
218 }
219 }
220 removeIncompleteShards(dir)
221
222 left, _ := filepath.Glob(filepath.Join(dir, "*"))
223 sort.Strings(left)
224 for i := range left {
225 left[i] = filepath.Base(left[i])
226 }
227
228 if !reflect.DeepEqual(shards, left) {
229 t.Errorf("\ngot shards: %v\nwant: %v\n", left, shards)
230 }
231}
232
233func TestVacuum(t *testing.T) {
234 tmpDir := t.TempDir()
235 fn := createCompoundShard(t, tmpDir, []uint32{1, 2, 3, 4})
236
237 err := zoekt.SetTombstone(fn, 2)
238 if err != nil {
239 t.Fatal(err)
240 }
241
242 mockMerger = func() error { return mergeHelper(t, fn) }
243 got, err := removeTombstones(fn)
244 if err != nil {
245 t.Fatal(err)
246 }
247
248 if len(got) != 1 || got[0].Name != "repo2" {
249 t.Fatal(err)
250 }
251
252 shards, err := filepath.Glob(tmpDir + "/compound-*")
253 if err != nil {
254 t.Fatal(err)
255 }
256
257 if len(shards) != 1 {
258 t.Fatalf("expected 1 shard, but instead got %d", len(shards))
259 }
260
261 repos, _, err := zoekt.ReadMetadataPath(shards[0])
262 if err != nil {
263 t.Fatal(err)
264 }
265
266 if len(repos) != 3 {
267 t.Fatalf("wanted 3, got %d repos", len(repos))
268 }
269
270 for _, r := range repos {
271 if r.Tombstone {
272 t.Fatalf("found tombstone for %s", r.Name)
273 }
274 }
275}
276
277// Create 2 compound shards, each of which contains the same tombstoned repo but
278// from different commit dates.
279func TestGetTombstonedRepos(t *testing.T) {
280 setLastCommitDate := func(lastCommitDate time.Time) func(repository *zoekt.Repository) {
281 return func(repository *zoekt.Repository) {
282 repository.LatestCommitDate = lastCommitDate
283 }
284 }
285
286 dir := t.TempDir()
287 var repoID uint32 = 2
288 csOld := createCompoundShard(t, dir, []uint32{1, 2, 3, 4}, setLastCommitDate(time.Now().Add(-1*time.Hour)))
289 if err := zoekt.SetTombstone(csOld, repoID); err != nil {
290 t.Fatal(err)
291 }
292
293 now := time.Now()
294 csNew := createCompoundShard(t, dir, []uint32{5, 2, 6, 7}, setLastCommitDate(now))
295 if err := zoekt.SetTombstone(csNew, repoID); err != nil {
296 t.Fatal(err)
297 }
298
299 // Check that getTombstonedRepos returns the compound shard containing the
300 // tombstoned repo with id repoID with the latest commit.
301 got := getTombstonedRepos(dir)
302
303 if len(got) != 1 {
304 t.Fatalf("want 1 shard, got %d shards", len(got))
305 }
306
307 v, ok := got[repoID]
308 if !ok || v.Path != csNew {
309 t.Fatalf("want %s, got %s", csNew, v.Path)
310 }
311}
312
313// HAVE
314// ----
315// index/
316// CS 1
317//
318// r1, tombstoned, old
319// r2, tombstoned, old
320// r3, tombstoned, old
321//
322// CS 2
323//
324// r1, tombstoned, recent
325// r2, tombstoned, recent
326// r4, tombstoned, recent
327//
328// SS 1
329//
330// r1, now
331//
332// .trash/
333// SS 3
334//
335// r3, now
336//
337// SS 5
338//
339// r5, now
340//
341// TO BE INDEXED
342// -------------
343// repos r1, r2, r3, r4, r5
344//
345// WANT
346// ----
347// index/
348// CS 1
349//
350// r1, tombstoned, old
351// r2, tombstoned, old
352// r3, tombstoned, old
353//
354// CS 2
355//
356// r1, tombstoned, recent
357// r2, recent
358// r4, recent
359//
360// SS 1
361//
362// r1, now
363//
364// SS 3
365//
366// r3, now
367//
368// SS 5
369//
370// r5, now
371//
372// .trash/ --> empty
373func TestCleanupCompoundShards(t *testing.T) {
374 dir := t.TempDir()
375
376 // timestamps
377 now := time.Now()
378 recent := now.Add(-1 * time.Hour)
379 old := now.Add(-2 * time.Hour)
380
381 setTombstone := func(shardPath string, repoID uint32) {
382 t.Helper()
383 if err := zoekt.SetTombstone(shardPath, repoID); err != nil {
384 t.Fatal(err)
385 }
386 }
387
388 cs1 := createCompoundShard(t, dir, []uint32{1, 2, 3}, func(in *zoekt.Repository) {
389 in.LatestCommitDate = old
390 })
391 setTombstone(cs1, 1)
392 setTombstone(cs1, 2)
393 setTombstone(cs1, 3)
394
395 cs2 := createCompoundShard(t, dir, []uint32{1, 2, 4}, func(in *zoekt.Repository) {
396 in.LatestCommitDate = recent
397 })
398 setTombstone(cs2, 1)
399 setTombstone(cs2, 2)
400 setTombstone(cs2, 4)
401
402 createTestShard(t, "repo1", 1, filepath.Join(dir, "repo1.zoekt"), func(in *zoekt.Repository) {
403 in.LatestCommitDate = now
404 })
405 createTestShard(t, "repo3", 3, filepath.Join(dir, ".trash", "repo3.zoekt"), func(in *zoekt.Repository) {
406 in.LatestCommitDate = now
407 })
408 createTestShard(t, "repo5", 5, filepath.Join(dir, ".trash", "repo5.zoekt"), func(in *zoekt.Repository) {
409 in.LatestCommitDate = now
410 })
411
412 // want indexed
413 repos := []uint32{1, 2, 3, 4, 5}
414
415 cleanup(dir, repos, now, true)
416
417 index := getShards(dir)
418 trash := getShards(filepath.Join(dir, ".trash"))
419
420 if len(trash) != 0 {
421 t.Fatalf("expected empty trash, got %+v", trash)
422 }
423
424 wantIndex := map[uint32][]shard{
425 1: {{
426 RepoID: 1,
427 RepoName: "repo1",
428 Path: filepath.Join(dir, "repo1.zoekt"),
429 }},
430 2: {{
431 RepoID: 2,
432 RepoName: "repo2",
433 Path: cs2,
434 }},
435 3: {{
436 RepoID: 3,
437 RepoName: "repo3",
438 Path: filepath.Join(dir, "repo3.zoekt"),
439 }},
440 4: {{
441 RepoID: 4,
442 RepoName: "repo4",
443 Path: cs2,
444 }},
445 5: {{
446 RepoID: 5,
447 RepoName: "repo5",
448 Path: filepath.Join(dir, "repo5.zoekt"),
449 }},
450 }
451
452 if d := cmp.Diff(wantIndex, index, cmpopts.IgnoreFields(shard{}, "ModTime")); d != "" {
453 t.Fatalf("-want, +got: %s", d)
454 }
455}
456
457// createCompoundShard returns a path to a compound shard containing repos with
458// ids. Use optsFns to overwrite fields of zoekt.Repository for all repos.
459func createCompoundShard(t *testing.T, dir string, ids []uint32, optFns ...func(in *zoekt.Repository)) string {
460 t.Helper()
461
462 var repoFns []string
463
464 for _, id := range ids {
465 repo := zoekt.Repository{
466 ID: id,
467 Name: fmt.Sprintf("repo%d", id),
468 RawConfig: map[string]string{
469 "public": "1",
470 },
471 }
472 for _, optsFn := range optFns {
473 optsFn(&repo)
474 }
475
476 opts := build.Options{
477 IndexDir: dir,
478 RepositoryDescription: repo,
479 }
480 opts.SetDefaults()
481 b, err := build.NewBuilder(opts)
482 if err != nil {
483 t.Fatalf("NewBuilder: %v", err)
484 }
485 if err := b.AddFile("F", []byte(strings.Repeat("abc", 100))); err != nil {
486 t.Errorf("AddFile: %v", err)
487 }
488 if err := b.Finish(); err != nil {
489 t.Errorf("Finish: %v", err)
490 }
491
492 repoFns = append(repoFns, opts.FindAllShards()...)
493 }
494
495 // create a compound shard.
496 tmpFn, dstFn, err := merge(t, dir, repoFns)
497 if err != nil {
498 t.Fatal(err)
499 }
500 for _, old := range repoFns {
501 if err := os.Remove(old); err != nil {
502 t.Fatal(err)
503 }
504 }
505 if err := os.Rename(tmpFn, dstFn); err != nil {
506 t.Fatal(err)
507 }
508 return dstFn
509}
510
511func mergeHelper(t *testing.T, fn string) error {
512 t.Helper()
513
514 f, err := os.Open(fn)
515 if err != nil {
516 return fmt.Errorf("os.Open: %s", err)
517 }
518 defer f.Close()
519
520 indexFile, err := zoekt.NewIndexFile(f)
521 if err != nil {
522 return fmt.Errorf("zoekt.NewIndexFile: %s ", err)
523 }
524 defer indexFile.Close()
525
526 _, _, err = zoekt.Merge(filepath.Dir(fn), indexFile)
527 return err
528}