fork of https://github.com/sourcegraph/zoekt
1package main
2
3import (
4 "crypto/sha1"
5 "fmt"
6 "io"
7 "os"
8 "os/exec"
9 "path/filepath"
10 "strings"
11 "testing"
12
13 "github.com/stretchr/testify/require"
14
15 "github.com/sourcegraph/zoekt"
16 "github.com/sourcegraph/zoekt/index"
17 "github.com/sourcegraph/zoekt/internal/tenant/tenanttest"
18)
19
20func TestHasMultipleShards(t *testing.T) {
21 dir := t.TempDir()
22
23 cases := []struct {
24 file string
25 wantHasMultipleShards bool
26 }{
27 {"large.00000.zoekt", true},
28 {"large.00001.zoekt", true},
29 {"small.00000.zoekt", false},
30 {"compound-foo.00000.zoekt", false},
31 {"else", false},
32 }
33
34 for _, c := range cases {
35 _, err := os.Create(filepath.Join(dir, c.file))
36 if err != nil {
37 t.Fatal(err)
38 }
39 }
40
41 for _, tt := range cases {
42 t.Run(tt.file, func(t *testing.T) {
43 if got := hasMultipleShards(filepath.Join(dir, tt.file)); got != tt.wantHasMultipleShards {
44 t.Fatalf("want %t, got %t", tt.wantHasMultipleShards, got)
45 }
46 })
47 }
48}
49
50func TestDoNotDeleteSingleShards(t *testing.T) {
51 dir := t.TempDir()
52
53 // Create a test shard.
54 opts := index.Options{
55 IndexDir: dir,
56 RepositoryDescription: zoekt.Repository{Name: "test-repo"},
57 }
58 opts.SetDefaults()
59 b, err := index.NewBuilder(opts)
60 if err != nil {
61 t.Fatalf("NewBuilder: %v", err)
62 }
63 if err := b.AddFile("F", []byte(strings.Repeat("abc", 100))); err != nil {
64 t.Fatalf("AddFile: %v", err)
65 }
66 if err := b.Finish(); err != nil {
67 t.Errorf("Finish: %v", err)
68 }
69
70 s := &Server{IndexDir: dir, mergeOpts: mergeOpts{targetSizeBytes: 2000 * 1024 * 1024}}
71 s.merge(helperCallMerge)
72
73 _, err = os.Stat(filepath.Join(dir, "test-repo_v16.00000.zoekt"))
74 if err != nil {
75 t.Fatal(err)
76 }
77}
78
79func helperCallMerge(s ...string) *exec.Cmd {
80 cs := []string{"-test.run=TestCallMerge", "--"}
81 cs = append(cs, s...)
82 env := []string{
83 "GO_TEST_WANT_CALL_MERGE=1",
84 }
85 cmd := exec.Command(os.Args[0], cs...)
86 cmd.Env = append(env, os.Environ()...)
87 return cmd
88}
89
90func TestCallMerge(t *testing.T) {
91 if os.Getenv("GO_TEST_WANT_CALL_MERGE") != "1" {
92 return
93 }
94 defer os.Exit(0)
95
96 args := os.Args
97 for len(args) > 0 {
98 if args[0] == "--" {
99 args = args[1:]
100 break
101 }
102 args = args[1:]
103 }
104
105 // We mock the merge process by deleting the input shards and creating an empty
106 // compound shard with a proper name.
107 h := sha1.New()
108 for _, a := range args {
109 h.Write([]byte(filepath.Base(a)))
110 h.Write([]byte{0})
111 _ = os.Remove(a)
112 }
113
114 compoundShardName := filepath.Join(filepath.Dir(args[1]), fmt.Sprintf("compound-%x_v%d.%05d.zoekt", h.Sum(nil), 17, 0))
115 f, _ := os.Create(compoundShardName)
116 _ = f.Close()
117
118 // Just like zoekt-merge-index, we write the name of the compound shard to
119 // stdout.
120 _, _ = fmt.Fprint(os.Stdout, compoundShardName)
121}
122
123func TestMerge(t *testing.T) {
124 // A fixed set of shards gives us reliable shard sizes which makes it easy to
125 // define a cutoff with targetSizeBytes.
126 m := []string{
127 "../../testdata/shards/repo_v16.00000.zoekt",
128 "../../testdata/shards/repo2_v16.00000.zoekt",
129 "../../testdata/shards/ctagsrepo_v16.00000.zoekt",
130 }
131
132 testCases := []struct {
133 name string
134 targetSizeBytes int64
135 wantCompound int
136 wantSimple int
137 }{
138 {
139 name: "3 shards",
140 targetSizeBytes: 6 * 1024,
141 wantCompound: 1,
142 wantSimple: 0,
143 },
144 {
145 name: "2 shards",
146 targetSizeBytes: 4 * 1024,
147 wantCompound: 1,
148 wantSimple: 1,
149 },
150 {
151 // This is a pathological case where the target size of a compound shard is
152 // smaller than the size of a simple shard. In realistic scenarios,
153 // targetSizeBytes should be 100x or more of a typical shard size.
154 name: "target size too small",
155 targetSizeBytes: 2 * 1024,
156 wantCompound: 0,
157 wantSimple: 3,
158 },
159 {
160 name: "target size too big",
161 targetSizeBytes: 10 * 1024,
162 wantCompound: 0,
163 wantSimple: 3,
164 },
165 {
166 name: "target size 0",
167 targetSizeBytes: 0,
168 wantCompound: 0,
169 wantSimple: 3,
170 },
171 }
172
173 checkCount := func(dir string, pattern string, want int) {
174 have, err := filepath.Glob(filepath.Join(dir, pattern))
175 if err != nil {
176 t.Fatal(err)
177 }
178 if len(have) != want {
179 t.Fatalf("want %d, have %d", want, len(have))
180 }
181 }
182
183 for _, tc := range testCases {
184 t.Run(tc.name, func(t *testing.T) {
185 dir := t.TempDir()
186 _, err := copyTestShards(dir, m)
187 if err != nil {
188 t.Fatal(err)
189 }
190
191 s := &Server{
192 IndexDir: dir,
193 mergeOpts: mergeOpts{targetSizeBytes: tc.targetSizeBytes},
194 }
195
196 s.merge(helperCallMerge)
197
198 checkCount(dir, "compound-*", tc.wantCompound)
199 checkCount(dir, "*_v16.00000.zoekt", tc.wantSimple)
200 })
201 }
202}
203
204func TestExplodeTenantCompoundShards(t *testing.T) {
205 tenanttest.MockEnforce(t)
206 dir := t.TempDir()
207 s := &Server{IndexDir: dir}
208
209 // Create two compound shards:
210 // 1. One with repos from tenant 1 and 2
211 // 2. One with repos from tenant 2 and 3
212 cs1 := createCompoundShard(t, dir, []uint32{1, 2}, func(in *zoekt.Repository) {
213 if in.ID == 1 {
214 in.TenantID = 1
215 } else {
216 in.TenantID = 2
217 }
218 })
219
220 cs2 := createCompoundShard(t, dir, []uint32{3, 4}, func(in *zoekt.Repository) {
221 if in.ID == 3 {
222 in.TenantID = 2
223 } else {
224 in.TenantID = 3
225 }
226 })
227
228 // Create context with tenant 1
229 ctx := tenanttest.NewTestContext()
230
231 // Explode shards for tenant 1
232 err := s.explodeTenantCompoundShards(ctx, func(path string) error {
233 // For this test we call explode directly instead of calling it in a
234 // separate process.
235 return index.Explode(dir, path)
236 })
237 require.NoError(t, err)
238
239 // Check that only cs1 was exploded (since it contained a repo from tenant
240 // 1) and cs2 remains untouched
241 require.NoFileExists(t, cs1)
242 require.FileExists(t, cs2)
243
244 // Check that we have 2 simple shards (from cs1) and 1 compound shard (cs2)
245 simpleShards, err := filepath.Glob(filepath.Join(dir, "*_v16.00000.zoekt"))
246 require.NoError(t, err)
247 require.Len(t, simpleShards, 2, "expected 2 simple shards")
248
249 // check that the simple shards are from tenant 1 and 2
250 for _, shard := range simpleShards {
251 repos, _, err := index.ReadMetadataPath(shard)
252 require.NoError(t, err)
253 for _, repo := range repos {
254 require.Contains(t, []int{1, 2}, repo.TenantID, "expected tenant 1 or 2, but got %d", repo.TenantID)
255 }
256 }
257
258 compoundShards, err := filepath.Glob(filepath.Join(dir, "compound-*"))
259 require.NoError(t, err)
260 require.Len(t, compoundShards, 1, "expected 1 compound shard")
261}
262
263func copyTestShards(dstDir string, srcShards []string) ([]string, error) {
264 var tmpShards []string
265 for _, s := range srcShards {
266 dst := filepath.Join(dstDir, filepath.Base(s))
267 tmpShards = append(tmpShards, dst)
268 if err := copyFile(s, dst); err != nil {
269 return nil, err
270 }
271 }
272 return tmpShards, nil
273}
274
275func copyFile(src, dst string) (err error) {
276 s, err := os.Open(src)
277 if err != nil {
278 return err
279 }
280 defer s.Close()
281
282 d, err := os.Create(dst)
283 if err != nil {
284 return err
285 }
286 if _, err := io.Copy(d, s); err != nil {
287 d.Close()
288 return err
289 }
290 return d.Close()
291}