fork of https://github.com/sourcegraph/zoekt
1package main
2
3import (
4 "bytes"
5 "context"
6 "flag"
7 "fmt"
8 "io"
9 "net/url"
10 "os"
11 "os/exec"
12 "path/filepath"
13 "strings"
14 "testing"
15 "time"
16
17 "github.com/google/go-cmp/cmp"
18 "github.com/sourcegraph/zoekt"
19 "github.com/sourcegraph/zoekt/build"
20 "github.com/sourcegraph/zoekt/query"
21 "github.com/sourcegraph/zoekt/shards"
22)
23
24var update = flag.Bool("update", false, "update golden file")
25
26// debugScore can be set to include much more output. Do not commit the
27// updated golden files, this is purely used for debugging in a local
28// environment.
29var debugScore = flag.Bool("debug_score", false, "include debug output in golden files.")
30
31func TestRanking(t *testing.T) {
32 if testing.Short() {
33 t.Skip("skipping due to short flag")
34 }
35
36 requireCTags(t)
37
38 archiveURLs := []string{
39 "https://github.com/sourcegraph/sourcegraph/tree/v5.2.2",
40 "https://github.com/golang/go/tree/go1.21.4",
41 "https://github.com/sourcegraph/cody/tree/vscode-v0.14.5",
42 }
43 queries := []string{
44 // golang/go
45 "test server",
46 "bytes buffer",
47 "bufio buffer",
48
49 // sourcegraph/sourcegraph
50 "graphql type User",
51 "Get database/user",
52 "InternalDoer",
53 "Repository metadata Write rbac",
54
55 // cody
56 "generate unit test",
57 "r:cody sourcegraph url",
58 }
59
60 indexDir := t.TempDir()
61
62 for _, u := range archiveURLs {
63 if err := indexURL(indexDir, u); err != nil {
64 t.Fatal(err)
65 }
66 }
67
68 ss, err := shards.NewDirectorySearcher(indexDir)
69 if err != nil {
70 t.Fatalf("NewDirectorySearcher(%s): %v", indexDir, err)
71 }
72 defer ss.Close()
73
74 for _, queryStr := range queries {
75 // normalise queryStr for writing to fs
76 name := strings.Map(func(r rune) rune {
77 if strings.ContainsRune(" :", r) {
78 return '_'
79 }
80 if '0' <= r && r <= '9' ||
81 'a' <= r && r <= 'z' ||
82 'A' <= r && r <= 'Z' {
83 return r
84 }
85 return -1
86 }, queryStr)
87
88 t.Run(name, func(t *testing.T) {
89 q, err := query.Parse(queryStr)
90 if err != nil {
91 t.Fatal(err)
92 }
93
94 sOpts := zoekt.SearchOptions{
95 // Use the same options sourcegraph has by default
96 ChunkMatches: true,
97 MaxWallTime: 20 * time.Second,
98 ShardMaxMatchCount: 10_000 * 10,
99 TotalMaxMatchCount: 100_000 * 10,
100 MaxDocDisplayCount: 500,
101
102 DebugScore: *debugScore,
103 }
104 result, err := ss.Search(context.Background(), q, &sOpts)
105 if err != nil {
106 t.Fatal(err)
107 }
108
109 var gotBuf bytes.Buffer
110 marshalMatches(&gotBuf, queryStr, q, result.Files)
111 got := gotBuf.Bytes()
112
113 wantPath := filepath.Join("testdata", name+".txt")
114 if *update {
115 if err := os.WriteFile(wantPath, got, 0600); err != nil {
116 t.Fatal(err)
117 }
118 }
119 want, err := os.ReadFile(wantPath)
120 if err != nil {
121 t.Fatal(err)
122 }
123
124 if d := cmp.Diff(string(want), string(got)); d != "" {
125 t.Fatalf("unexpected (-want, +got):\n%s", d)
126 }
127 })
128 }
129}
130
131var tarballCache = "/tmp/zoekt-test-ranking-tarballs-" + os.Getenv("USER")
132
133func indexURL(indexDir, u string) error {
134 if err := os.MkdirAll(tarballCache, 0700); err != nil {
135 return err
136 }
137
138 opts := Options{
139 Archive: u,
140 }
141 opts.SetDefaults() // sets metadata like Name and the codeload URL
142 u = opts.Archive
143
144 // update Archive location to cached location
145 cacheBase := fmt.Sprintf("%s-%s%s.tar.gz", url.QueryEscape(opts.Name), opts.Branch, opts.Commit) // assume .tar.gz
146 path := filepath.Join(tarballCache, cacheBase)
147 opts.Archive = path
148
149 if _, err := os.Stat(path); os.IsNotExist(err) {
150 if err := download(u, path); err != nil {
151 return err
152 }
153 }
154
155 // TODO scip
156 // languageMap := make(ctags.LanguageMap)
157 // for _, lang := range []string{"kotlin", "rust", "ruby", "go", "python", "javascript", "c_sharp", "scala", "typescript", "zig"} {
158 // languageMap[lang] = ctags.ScipCTags
159 // }
160
161 err := do(opts, build.Options{
162 IndexDir: indexDir,
163 CTagsMustSucceed: true,
164 })
165 if err != nil {
166 return fmt.Errorf("failed to index %s: %w", opts.Archive, err)
167 }
168
169 return nil
170}
171
172func download(url, dst string) error {
173 tmpPath := dst + ".part"
174
175 rc, err := openReader(url)
176 if err != nil {
177 return err
178 }
179 defer rc.Close()
180
181 f, err := os.OpenFile(tmpPath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0600)
182 if err != nil {
183 return err
184 }
185 defer f.Close()
186
187 _, err = io.Copy(f, rc)
188 if err != nil {
189 return err
190 }
191
192 err = f.Close()
193 if err != nil {
194 return err
195 }
196
197 return os.Rename(tmpPath, dst)
198}
199
200const (
201 chunkMatchesPerFile = 3
202 fileMatchesPerSearch = 6
203)
204
205func marshalMatches(w io.Writer, queryStr string, q query.Q, files []zoekt.FileMatch) {
206 _, _ = fmt.Fprintf(w, "queryString: %s\n", queryStr)
207 _, _ = fmt.Fprintf(w, "query: %s\n\n", q)
208
209 files, hiddenFiles := splitAtIndex(files, fileMatchesPerSearch)
210 for _, f := range files {
211 _, _ = fmt.Fprintf(w, "%s/%s%s\n", f.Repository, f.FileName, addTabIfNonEmpty(f.Debug))
212
213 chunks, hidden := splitAtIndex(f.ChunkMatches, chunkMatchesPerFile)
214
215 for _, m := range chunks {
216 _, _ = fmt.Fprintf(w, "%d:%s%s\n", m.ContentStart.LineNumber, string(m.Content), addTabIfNonEmpty(m.DebugScore))
217 }
218
219 if len(hidden) > 0 {
220 _, _ = fmt.Fprintf(w, "hidden %d more line matches\n", len(hidden))
221 }
222 _, _ = fmt.Fprintln(w)
223 }
224
225 if len(hiddenFiles) > 0 {
226 fmt.Fprintf(w, "hidden %d more file matches\n", len(hiddenFiles))
227 }
228}
229
230func splitAtIndex[E any](s []E, idx int) ([]E, []E) {
231 if idx < len(s) {
232 return s[:idx], s[idx:]
233 }
234 return s, nil
235}
236
237func addTabIfNonEmpty(s string) string {
238 if s != "" {
239 return "\t" + s
240 }
241 return s
242}
243
244func requireCTags(tb testing.TB) {
245 tb.Helper()
246
247 if os.Getenv("CTAGS_COMMAND") != "" {
248 return
249 }
250 if _, err := exec.LookPath("universal-ctags"); err == nil {
251 return
252 }
253
254 // On CI we require ctags to be available. Otherwise we skip
255 if os.Getenv("CI") != "" {
256 tb.Fatal("universal-ctags is missing")
257 } else {
258 tb.Skip("universal-ctags is missing")
259 }
260}