fork of https://github.com/sourcegraph/zoekt
1package main
2
3import (
4 "context"
5 "errors"
6 "fmt"
7 "net"
8 "os"
9 "os/exec"
10 "path/filepath"
11 "strings"
12 "testing"
13 "time"
14
15 "github.com/google/go-cmp/cmp"
16 "github.com/sourcegraph/log/logtest"
17 "github.com/stretchr/testify/require"
18
19 "github.com/sourcegraph/zoekt"
20 configv1 "github.com/sourcegraph/zoekt/cmd/zoekt-sourcegraph-indexserver/grpc/protos/sourcegraph/zoekt/configuration/v1"
21 "github.com/sourcegraph/zoekt/gitindex"
22 "github.com/sourcegraph/zoekt/query"
23 "github.com/sourcegraph/zoekt/search"
24)
25
26func TestFetchRepoAndIndex_Integration(t *testing.T) {
27 requireGitDaemon(t)
28
29 for _, tc := range []struct {
30 name string
31 disableGoGitOptimization bool
32 }{
33 {name: "optimized repo open"},
34 {name: "legacy repo open", disableGoGitOptimization: true},
35 } {
36 t.Run(tc.name, func(t *testing.T) {
37 require := require.New(t)
38
39 ctx := context.Background()
40 fixture := newGitFetchFixture(t)
41
42 if tc.disableGoGitOptimization {
43 t.Setenv("ZOEKT_DISABLE_GOGIT_OPTIMIZATION", "true")
44 } else {
45 t.Setenv("ZOEKT_DISABLE_GOGIT_OPTIMIZATION", "false")
46 }
47
48 sg := &recordingSourcegraph{
49 opts: IndexOptions{
50 RepoID: 123,
51 Name: "test/repo",
52 CloneURL: fixture.cloneURL,
53 Symbols: false,
54 Branches: []zoekt.RepositoryBranch{
55 {Name: "HEAD", Version: fixture.mainCommit},
56 {Name: "dev", Version: fixture.devCommit},
57 },
58 TenantID: 1,
59 },
60 }
61
62 indexDir := t.TempDir()
63 server := &Server{
64 Sourcegraph: sg,
65 IndexDir: indexDir,
66 CPUCount: 1,
67 IndexConcurrency: 1,
68 }
69
70 result, err := sg.List(ctx, nil)
71 require.NoError(err)
72
73 var args *indexArgs
74 result.IterateIndexOptions(func(opts IndexOptions) {
75 args = server.indexArgs(opts)
76 })
77 require.NotNil(args)
78
79 gitDir := filepath.Join(t.TempDir(), "fetch.git")
80 c := gitIndexConfig{
81 runCmd: runIntegrationCommand,
82 findRepositoryMetadata: func(args *indexArgs) (*zoekt.Repository, *zoekt.IndexMetadata, bool, error) {
83 return args.BuildOptions().FindRepositoryMetadata()
84 },
85 timeout: time.Minute,
86 }
87
88 require.NoError(fetchRepo(ctx, gitDir, args, c, logtest.Scoped(t)))
89 assertPartialBareFetch(t, gitDir, fixture)
90
91 require.NoError(setZoektConfig(ctx, gitDir, args, c))
92
93 updated, err := gitindex.IndexGitRepo(gitIndexOptionsForTest(args, gitDir))
94 require.NoError(err)
95 require.True(updated)
96
97 repository, metadata, ok, err := args.BuildOptions().FindRepositoryMetadata()
98 require.NoError(err)
99 require.True(ok)
100 require.Equal(args.Name, repository.Name)
101 require.Equal(args.RepoID, repository.ID)
102 require.Equal(args.TenantID, repository.TenantID)
103 if diff := cmp.Diff(args.Branches, repository.Branches); diff != "" {
104 t.Fatalf("branches mismatch (-want +got):\n%s", diff)
105 }
106 require.Equal("123", repository.RawConfig["repoid"])
107 require.Equal("1", repository.RawConfig["tenantID"])
108
109 searcher, err := search.NewDirectorySearcher(indexDir)
110 require.NoError(err)
111 defer searcher.Close()
112
113 assertSearchContains(t, searcher, "smallneedle", "small.txt")
114 assertSearchContains(t, searcher, "devneedle", "dev.txt")
115 assertSearchEmpty(t, searcher, "largeneedle")
116
117 require.NoError(updateIndexStatusOnSourcegraph(c, args, sg, nil))
118 require.Len(sg.updates, 1)
119 require.Len(sg.updates[0], 1)
120 require.Equal(args.RepoID, sg.updates[0][0].RepoID)
121 require.Equal(metadata.IndexTime.Unix(), sg.updates[0][0].IndexTimeUnix)
122 require.Equal(configv1.UpdateIndexStatusRequest_Repository_STATE_SUCCESS, sg.updates[0][0].State)
123 require.Empty(sg.updates[0][0].FailureMessage)
124 if diff := cmp.Diff(args.Branches, sg.updates[0][0].Branches); diff != "" {
125 t.Fatalf("status branches mismatch (-want +got):\n%s", diff)
126 }
127 })
128 }
129}
130
131func TestUpdateIndexStatusOnSourcegraphFailure(t *testing.T) {
132 indexTime := time.Unix(123, 0)
133
134 for _, tc := range []struct {
135 name string
136 metadata *zoekt.IndexMetadata
137 metadataOK bool
138 metadataErr error
139 wantIndexTime int64
140 }{
141 {
142 name: "metadata available",
143 metadata: &zoekt.IndexMetadata{IndexTime: indexTime},
144 metadataOK: true,
145 wantIndexTime: indexTime.Unix(),
146 },
147 {
148 name: "metadata unavailable",
149 metadataErr: errors.New("metadata boom"),
150 },
151 } {
152 t.Run(tc.name, func(t *testing.T) {
153 sg := &recordingSourcegraph{}
154 args := &indexArgs{IndexOptions: IndexOptions{
155 RepoID: 123,
156 Name: "test/repo",
157 Branches: []zoekt.RepositoryBranch{
158 {Name: "HEAD", Version: "deadbeef"},
159 },
160 }}
161 c := gitIndexConfig{
162 findRepositoryMetadata: func(args *indexArgs) (repository *zoekt.Repository, metadata *zoekt.IndexMetadata, ok bool, err error) {
163 return nil, tc.metadata, tc.metadataOK, tc.metadataErr
164 },
165 }
166
167 require.NoError(t, updateIndexStatusOnSourcegraph(c, args, sg, errors.New("boom")))
168 require.Len(t, sg.updates, 1)
169 require.Len(t, sg.updates[0], 1)
170 require.Equal(t, args.RepoID, sg.updates[0][0].RepoID)
171 require.Equal(t, tc.wantIndexTime, sg.updates[0][0].IndexTimeUnix)
172 require.Equal(t, configv1.UpdateIndexStatusRequest_Repository_STATE_FAILURE, sg.updates[0][0].State)
173 require.Equal(t, "boom", sg.updates[0][0].FailureMessage)
174 if diff := cmp.Diff(args.Branches, sg.updates[0][0].Branches); diff != "" {
175 t.Fatalf("status branches mismatch (-want +got):\n%s", diff)
176 }
177 })
178 }
179}
180
181func requireGitDaemon(t *testing.T) {
182 t.Helper()
183
184 cmd := exec.Command("git", "daemon", "-h")
185 cmd.Env = gitTestEnv()
186 output, err := cmd.CombinedOutput()
187 text := string(output)
188
189 if strings.Contains(text, "usage: git daemon") {
190 return
191 }
192
193 if strings.Contains(text, "git: 'daemon' is not a git command") {
194 t.Skipf("skipping integration test: git daemon is unavailable: %s", strings.TrimSpace(text))
195 }
196
197 if err == nil {
198 return
199 }
200
201 t.Fatalf("failed to probe git daemon availability: %v\n%s", err, text)
202}
203
204type recordingSourcegraph struct {
205 opts IndexOptions
206 updates [][]indexStatus
207}
208
209func (s *recordingSourcegraph) List(ctx context.Context, indexed []uint32) (*SourcegraphListResult, error) {
210 return &SourcegraphListResult{
211 IDs: []uint32{s.opts.RepoID},
212 IterateIndexOptions: func(yield func(IndexOptions)) {
213 yield(s.opts)
214 },
215 }, nil
216}
217
218func (s *recordingSourcegraph) ForceIterateIndexOptions(onSuccess func(IndexOptions), onError func(uint32, error), repos ...uint32) {
219 onSuccess(s.opts)
220}
221
222func (s *recordingSourcegraph) UpdateIndexStatus(repositories []indexStatus) error {
223 cp := make([]indexStatus, len(repositories))
224 copy(cp, repositories)
225 s.updates = append(s.updates, cp)
226 return nil
227}
228
229type gitFetchFixture struct {
230 cloneURL string
231 mainCommit string
232 devCommit string
233 bigBlob string
234 daemon *gitDaemon
235}
236
237func newGitFetchFixture(t *testing.T) *gitFetchFixture {
238 t.Helper()
239
240 root := t.TempDir()
241 worktree := filepath.Join(root, "worktree")
242 serveRoot := filepath.Join(root, "serve")
243 remoteDir := filepath.Join(serveRoot, "repo")
244
245 require.NoError(t, os.MkdirAll(worktree, 0o755))
246 require.NoError(t, os.MkdirAll(serveRoot, 0o755))
247
248 runGit(t, root, "init", "-b", "main", worktree)
249 runGit(t, worktree, "config", "user.name", "Test User")
250 runGit(t, worktree, "config", "user.email", "test@example.com")
251
252 require.NoError(t, os.WriteFile(filepath.Join(worktree, "small.txt"), []byte("smallneedle\n"), 0o644))
253 large := strings.Repeat("x", MaxFileSize+1024)
254 require.NoError(t, os.WriteFile(filepath.Join(worktree, "big.bin"), []byte("largeneedle\n"+large), 0o644))
255 runGit(t, worktree, "add", "small.txt", "big.bin")
256 runGit(t, worktree, "commit", "-m", "main commit")
257
258 mainCommit := strings.TrimSpace(runGitOutput(t, worktree, "rev-parse", "HEAD"))
259 bigBlob := strings.TrimSpace(runGitOutput(t, worktree, "rev-parse", "HEAD:big.bin"))
260
261 runGit(t, worktree, "checkout", "-b", "dev")
262 require.NoError(t, os.WriteFile(filepath.Join(worktree, "dev.txt"), []byte("devneedle\n"), 0o644))
263 runGit(t, worktree, "add", "dev.txt")
264 runGit(t, worktree, "commit", "-m", "dev commit")
265
266 devCommit := strings.TrimSpace(runGitOutput(t, worktree, "rev-parse", "HEAD"))
267 runGit(t, root, "clone", "--bare", worktree, remoteDir)
268 runGit(t, remoteDir, "config", "uploadpack.allowFilter", "true")
269 runGit(t, remoteDir, "config", "uploadpack.allowAnySHA1InWant", "true")
270
271 daemon := startGitDaemon(t, serveRoot)
272
273 return &gitFetchFixture{
274 cloneURL: fmt.Sprintf("git://127.0.0.1:%d/repo", daemon.port),
275 mainCommit: mainCommit,
276 devCommit: devCommit,
277 bigBlob: bigBlob,
278 daemon: daemon,
279 }
280}
281
282type gitDaemon struct {
283 cmd *exec.Cmd
284 logPath string
285 port int
286}
287
288func startGitDaemon(t *testing.T, serveRoot string) *gitDaemon {
289 t.Helper()
290
291 port := allocatePort(t)
292 logFile, err := os.CreateTemp(t.TempDir(), "git-daemon-*.log")
293 require.NoError(t, err)
294 logPath := logFile.Name()
295 cmd := exec.Command("git", "daemon",
296 "--verbose",
297 "--export-all",
298 "--reuseaddr",
299 "--base-path="+serveRoot,
300 "--listen=127.0.0.1",
301 fmt.Sprintf("--port=%d", port),
302 serveRoot,
303 )
304 cmd.Env = gitTestEnv()
305 cmd.Stdout = logFile
306 cmd.Stderr = logFile
307
308 require.NoError(t, cmd.Start())
309 require.NoError(t, logFile.Close())
310 waitForGitDaemon(t, port, logPath)
311
312 daemon := &gitDaemon{cmd: cmd, logPath: logPath, port: port}
313 t.Cleanup(func() {
314 if cmd.Process != nil {
315 _ = cmd.Process.Kill()
316 }
317 waitDone := make(chan struct{})
318 go func() {
319 _ = cmd.Wait()
320 close(waitDone)
321 }()
322
323 select {
324 case <-waitDone:
325 case <-time.After(5 * time.Second):
326 }
327 })
328
329 return daemon
330}
331
332func waitForGitDaemon(t *testing.T, port int, logPath string) {
333 t.Helper()
334
335 addr := fmt.Sprintf("127.0.0.1:%d", port)
336 deadline := time.Now().Add(5 * time.Second)
337
338 for time.Now().Before(deadline) {
339 conn, err := net.DialTimeout("tcp", addr, 100*time.Millisecond)
340 if err == nil {
341 _ = conn.Close()
342 return
343 }
344
345 time.Sleep(50 * time.Millisecond)
346 }
347
348 contents, err := os.ReadFile(logPath)
349 if err != nil {
350 t.Fatalf("git daemon did not start listening on %s within 5s (failed to read log: %v)", addr, err)
351 }
352
353 t.Fatalf("git daemon did not start listening on %s within 5s\n%s", addr, contents)
354}
355
356func allocatePort(t *testing.T) int {
357 t.Helper()
358
359 listener, err := net.Listen("tcp", "127.0.0.1:0")
360 require.NoError(t, err)
361 defer listener.Close()
362
363 return listener.Addr().(*net.TCPAddr).Port
364}
365
366func gitIndexOptionsForTest(args *indexArgs, repoDir string) gitindex.Options {
367 buildOptions := *args.BuildOptions()
368 buildOptions.RepositoryDescription.Branches = nil
369
370 branches := make([]string, 0, len(args.Branches))
371 for _, branch := range args.Branches {
372 branches = append(branches, branch.Name)
373 }
374
375 return gitindex.Options{
376 RepoDir: repoDir,
377 Submodules: false,
378 Incremental: args.Incremental,
379 BuildOptions: buildOptions,
380 BranchPrefix: "refs/heads/",
381 Branches: branches,
382 DeltaShardNumberFallbackThreshold: args.DeltaShardNumberFallbackThreshold,
383 }
384}
385
386func assertPartialBareFetch(t *testing.T, gitDir string, fixture *gitFetchFixture) {
387 t.Helper()
388 require := require.New(t)
389
390 require.Equal(fixture.mainCommit, strings.TrimSpace(runGitOutput(t, gitDir, "rev-parse", "HEAD")))
391 require.Equal(fixture.devCommit, strings.TrimSpace(runGitOutput(t, gitDir, "rev-parse", "refs/heads/dev")))
392
393 promisors, err := filepath.Glob(filepath.Join(gitDir, "objects", "pack", "*.promisor"))
394 require.NoError(err)
395 require.NotEmpty(promisors)
396
397 objects := runGitOutput(t, gitDir, "rev-list", "--objects", "--missing=print", "HEAD", "refs/heads/dev")
398 require.Contains(objects, fixture.mainCommit)
399 require.Contains(objects, fixture.devCommit)
400 require.Contains(objects, "?"+fixture.bigBlob)
401}
402
403func assertSearchContains(t *testing.T, searcher zoekt.Searcher, pattern string, wantFile string) {
404 t.Helper()
405 require := require.New(t)
406
407 result, err := searcher.Search(context.Background(), &query.Substring{Pattern: pattern}, &zoekt.SearchOptions{})
408 require.NoError(err)
409 require.Len(result.Files, 1)
410 require.Equal(wantFile, result.Files[0].FileName)
411}
412
413func assertSearchEmpty(t *testing.T, searcher zoekt.Searcher, pattern string) {
414 t.Helper()
415 require := require.New(t)
416
417 result, err := searcher.Search(context.Background(), &query.Substring{Pattern: pattern}, &zoekt.SearchOptions{})
418 require.NoError(err)
419 require.Empty(result.Files)
420}
421
422func runIntegrationCommand(cmd *exec.Cmd) error {
423 cmd.Env = gitTestEnv()
424 output, err := cmd.CombinedOutput()
425 if err != nil {
426 return fmt.Errorf("%s: %w\n%s", strings.Join(cmd.Args, " "), err, output)
427 }
428 return nil
429}
430
431func runGit(t *testing.T, dir string, args ...string) {
432 t.Helper()
433 _ = runGitOutput(t, dir, args...)
434}
435
436func runGitOutput(t *testing.T, dir string, args ...string) string {
437 t.Helper()
438
439 cmd := exec.Command("git", args...)
440 cmd.Dir = dir
441 cmd.Env = gitTestEnv()
442 output, err := cmd.CombinedOutput()
443 if err != nil {
444 t.Fatalf("git %s failed: %v\n%s", strings.Join(args, " "), err, output)
445 }
446
447 return string(output)
448}
449
450func gitTestEnv() []string {
451 return append(os.Environ(),
452 "GIT_CONFIG_GLOBAL=",
453 "GIT_CONFIG_SYSTEM=",
454 )
455}