fork of https://github.com/sourcegraph/zoekt
1// Copyright 2016 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package zoekt
16
17import (
18 "bytes"
19 "context"
20 "encoding/json"
21 "flag"
22 "fmt"
23 "io/fs"
24 "os"
25 "path"
26 "path/filepath"
27 "reflect"
28 "strconv"
29 "strings"
30 "testing"
31 "testing/quick"
32
33 "github.com/google/go-cmp/cmp"
34 "github.com/google/go-cmp/cmp/cmpopts"
35
36 "github.com/sourcegraph/zoekt/query"
37)
38
39var update = flag.Bool("update", false, "update golden files")
40
41func TestReadWrite(t *testing.T) {
42 b, err := NewIndexBuilder(nil)
43 if err != nil {
44 t.Fatalf("NewIndexBuilder: %v", err)
45 }
46
47 if err := b.AddFile("filename", []byte("abcde")); err != nil {
48 t.Fatalf("AddFile: %v", err)
49 }
50
51 var buf bytes.Buffer
52 if err := b.Write(&buf); err != nil {
53 t.Fatal(err)
54 }
55 f := &memSeeker{buf.Bytes()}
56
57 r := reader{r: f}
58
59 var toc indexTOC
60 err = r.readTOC(&toc)
61
62 if err != nil {
63 t.Errorf("got read error %v", err)
64 }
65 if toc.fileContents.data.sz != 5 {
66 t.Errorf("got contents size %d, want 5", toc.fileContents.data.sz)
67 }
68
69 data, err := r.readIndexData(&toc)
70 if err != nil {
71 t.Fatalf("readIndexData: %v", err)
72 }
73 if got := data.fileName(0); string(got) != "filename" {
74 t.Errorf("got filename %q, want %q", got, "filename")
75 }
76
77 contentNgrams := data.contentNgrams.DumpMap()
78 if len(contentNgrams) != 3 {
79 t.Fatalf("got ngrams %v, want 3 ngrams", contentNgrams)
80 }
81
82 if sec := data.contentNgrams.Get(stringToNGram("bcq")); sec.sz > 0 {
83 t.Errorf("found ngram bcq (%v) in %v", uint64(stringToNGram("bcq")), contentNgrams)
84 }
85}
86
87func TestReadWriteNames(t *testing.T) {
88 b, err := NewIndexBuilder(nil)
89 if err != nil {
90 t.Fatalf("NewIndexBuilder: %v", err)
91 }
92
93 if err := b.AddFile("abCd", []byte("")); err != nil {
94 t.Fatalf("AddFile: %v", err)
95 }
96
97 var buf bytes.Buffer
98 if err := b.Write(&buf); err != nil {
99 t.Fatal(err)
100 }
101 f := &memSeeker{buf.Bytes()}
102
103 r := reader{r: f}
104
105 var toc indexTOC
106 if err := r.readTOC(&toc); err != nil {
107 t.Errorf("got read error %v", err)
108 }
109 if toc.fileNames.data.sz != 4 {
110 t.Errorf("got contents size %d, want 4", toc.fileNames.data.sz)
111 }
112
113 data, err := r.readIndexData(&toc)
114 if err != nil {
115 t.Fatalf("readIndexData: %v", err)
116 }
117 if !reflect.DeepEqual([]uint32{0, 4}, data.fileNameIndex) {
118 t.Errorf("got index %v, want {0,4}", data.fileNameIndex)
119 }
120
121 gotSec := data.fileNameNgrams.Get(stringToNGram("bCd"))
122 if err != nil {
123 t.Fatalf("fileNameNgrams.GetBlob: %v", err)
124 }
125
126 if !reflect.DeepEqual(buf.Bytes()[gotSec.off:gotSec.off+gotSec.sz], []byte{1}) {
127 t.Errorf("got trigram bcd at bits %v, want sz 2", data.fileNameNgrams)
128 }
129}
130
131func TestGet(t *testing.T) {
132 b, err := NewIndexBuilder(nil)
133 if err != nil {
134 t.Fatalf("NewIndexBuilder: %v", err)
135 }
136
137 if err := b.AddFile("file_name", []byte("aaa bbbaaa")); err != nil {
138 t.Fatalf("AddFile: %v", err)
139 }
140
141 var buf bytes.Buffer
142 if err := b.Write(&buf); err != nil {
143 t.Fatal(err)
144 }
145 f := &memSeeker{buf.Bytes()}
146
147 r := reader{r: f}
148
149 var toc indexTOC
150 if err := r.readTOC(&toc); err != nil {
151 t.Errorf("got read error %v", err)
152 }
153
154 id, err := r.readIndexData(&toc)
155 if err != nil {
156 t.Fatalf("readIndexData: %v", err)
157 }
158
159 var off uint32 = 96
160
161 cases := []struct {
162 ng string
163 wantPostingList simpleSection
164 }{
165 {
166 ng: " bb",
167 wantPostingList: simpleSection{off: off, sz: 1},
168 },
169 {
170 ng: "a b",
171 wantPostingList: simpleSection{off: off + 1, sz: 1},
172 },
173 {
174 ng: "aa ",
175 wantPostingList: simpleSection{off: off + 2, sz: 1},
176 },
177 {
178 ng: "aaa",
179 wantPostingList: simpleSection{off: off + 3, sz: 2},
180 },
181 {
182 ng: "baa",
183 wantPostingList: simpleSection{off: off + 5, sz: 1},
184 },
185 {
186 ng: "bba",
187 wantPostingList: simpleSection{off: off + 6, sz: 1},
188 },
189 {
190 ng: "bbb",
191 wantPostingList: simpleSection{off: off + 7, sz: 1},
192 },
193 }
194
195 for _, tt := range cases {
196 t.Run(tt.ng, func(t *testing.T) {
197 havePostingList := id.contentNgrams.Get(stringToNGram(tt.ng))
198 if !reflect.DeepEqual(tt.wantPostingList, havePostingList) {
199 t.Fatalf("\nwant:%+v\ngot: %+v", tt.wantPostingList, havePostingList)
200 }
201 })
202 }
203}
204
205func loadShard(fn string) (Searcher, error) {
206 f, err := os.Open(fn)
207 if err != nil {
208 return nil, err
209 }
210
211 iFile, err := NewIndexFile(f)
212 if err != nil {
213 return nil, err
214 }
215 s, err := NewSearcher(iFile)
216 if err != nil {
217 iFile.Close()
218 return nil, fmt.Errorf("NewSearcher(%s): %v", fn, err)
219 }
220
221 return s, nil
222}
223
224func TestReadSearch(t *testing.T) {
225 type out struct {
226 FormatVersion int
227 FeatureVersion int
228 FileMatches [][]FileMatch
229 }
230
231 qs := []query.Q{
232 &query.Substring{Pattern: "func main", Content: true},
233 &query.Regexp{Regexp: mustParseRE("^package"), Content: true},
234 &query.Symbol{Expr: &query.Substring{Pattern: "num"}},
235 &query.Symbol{Expr: &query.Regexp{Regexp: mustParseRE("sage$")}},
236 }
237
238 shards, err := filepath.Glob("testdata/shards/*.zoekt")
239 if err != nil {
240 t.Fatal(err)
241 }
242
243 for _, path := range shards {
244 name := filepath.Base(path)
245 name = strings.TrimSuffix(name, ".zoekt")
246
247 shard, err := loadShard(path)
248 if err != nil {
249 t.Fatalf("error loading shard %s %v", name, err)
250 }
251
252 index, ok := shard.(*indexData)
253 if !ok {
254 t.Fatalf("expected *indexData for %s", name)
255 }
256
257 golden := "testdata/golden/TestReadSearch/" + name + ".golden"
258
259 if *update {
260 got := out{
261 FormatVersion: index.metaData.IndexFormatVersion,
262 FeatureVersion: index.metaData.IndexFeatureVersion,
263 }
264 for _, q := range qs {
265 res, err := shard.Search(context.Background(), q, &SearchOptions{})
266 if err != nil {
267 t.Fatalf("failed search %s on %s during updating: %v", q, name, err)
268 }
269 got.FileMatches = append(got.FileMatches, res.Files)
270 }
271
272 if raw, err := json.MarshalIndent(got, "", " "); err != nil {
273 t.Errorf("failed marshalling search results for %s during updating: %v", name, err)
274 continue
275 } else if err := os.WriteFile(golden, raw, 0644); err != nil {
276 t.Errorf("failed writing search results for %s during updating: %v", name, err)
277 continue
278 }
279 }
280
281 var want out
282 if buf, err := os.ReadFile(golden); err != nil {
283 t.Fatalf("failed reading search results for %s: %v", name, err)
284 } else if err := json.Unmarshal(buf, &want); err != nil {
285 t.Fatalf("failed unmarshalling search results for %s: %v", name, err)
286 }
287
288 if index.metaData.IndexFormatVersion != want.FormatVersion {
289 t.Errorf("got %d index format version, want %d for %s", index.metaData.IndexFormatVersion, want.FormatVersion, name)
290 }
291
292 if index.metaData.IndexFeatureVersion != want.FeatureVersion {
293 t.Errorf("got %d index feature version, want %d for %s", index.metaData.IndexFeatureVersion, want.FeatureVersion, name)
294 }
295
296 for j, q := range qs {
297 res, err := shard.Search(context.Background(), q, &SearchOptions{})
298 if err != nil {
299 t.Fatalf("failed search %s on %s: %v", q, name, err)
300 }
301
302 if len(res.Files) != len(want.FileMatches[j]) {
303 t.Fatalf("got %d file matches for %s on %s, want %d", len(res.Files), q, name, len(want.FileMatches[j]))
304 }
305
306 if len(want.FileMatches[j]) == 0 {
307 continue
308 }
309
310 if d := cmp.Diff(res.Files, want.FileMatches[j]); d != "" {
311 t.Errorf("matches for %s on %s\n%s", q, name, d)
312 }
313 }
314 }
315}
316
317func TestEncodeRawConfig(t *testing.T) {
318 mustParse := func(s string) uint8 {
319 i, err := strconv.ParseInt(s, 2, 8)
320 if err != nil {
321 t.Fatalf("failed to parse %s", s)
322 }
323 return uint8(i)
324 }
325
326 cases := []struct {
327 rawConfig map[string]string
328 want string
329 }{
330 {
331 rawConfig: map[string]string{"public": "1"},
332 want: "101001",
333 },
334 {
335 rawConfig: map[string]string{"fork": "1"},
336 want: "100110",
337 },
338 {
339 rawConfig: map[string]string{"public": "1", "fork": "1"},
340 want: "100101",
341 },
342 {
343 rawConfig: map[string]string{"public": "1", "fork": "1", "archived": "1"},
344 want: "010101",
345 },
346 {
347 rawConfig: map[string]string{},
348 want: "101010",
349 },
350 }
351 for _, c := range cases {
352 t.Run(c.want, func(t *testing.T) {
353 if got := encodeRawConfig(c.rawConfig); got != mustParse(c.want) {
354 t.Fatalf("want %s, got %s", c.want, strconv.FormatInt(int64(got), 2))
355 }
356 })
357 }
358}
359
360func TestBackwardsCompat(t *testing.T) {
361 if *update {
362 b, err := NewIndexBuilder(nil)
363 if err != nil {
364 t.Fatalf("NewIndexBuilder: %v", err)
365 }
366
367 if err := b.AddFile("filename", []byte("abcde")); err != nil {
368 t.Fatalf("AddFile: %v", err)
369 }
370
371 var buf bytes.Buffer
372 if err := b.Write(&buf); err != nil {
373 t.Fatal(err)
374 }
375
376 outname := fmt.Sprintf("testdata/backcompat/new_v%d.%05d.zoekt", IndexFormatVersion, 0)
377 t.Log("writing new file", outname)
378
379 err = os.WriteFile(outname, buf.Bytes(), 0644)
380 if err != nil {
381 t.Fatalf("Creating output file: %v", err)
382 }
383 }
384
385 compatibleFiles, err := fs.Glob(os.DirFS("."), "testdata/backcompat/*.zoekt")
386 if err != nil {
387 t.Fatalf("fs.Glob: %v", err)
388 }
389
390 for _, fname := range compatibleFiles {
391 t.Run(path.Base(fname),
392 func(t *testing.T) {
393 f, err := os.Open(fname)
394 if err != nil {
395 t.Fatal("os.Open", err)
396 }
397 idx, err := NewIndexFile(f)
398 if err != nil {
399 t.Fatal("NewIndexFile", err)
400 }
401 r := reader{r: idx}
402
403 var toc indexTOC
404 err = r.readTOC(&toc)
405
406 if err != nil {
407 t.Errorf("got read error %v", err)
408 }
409 if toc.fileContents.data.sz != 5 {
410 t.Errorf("got contents size %d, want 5", toc.fileContents.data.sz)
411 }
412
413 data, err := r.readIndexData(&toc)
414 if err != nil {
415 t.Fatalf("readIndexData: %v", err)
416 }
417 if got := data.fileName(0); string(got) != "filename" {
418 t.Errorf("got filename %q, want %q", got, "filename")
419 }
420
421 contentNgrams := data.contentNgrams.DumpMap()
422 if len(data.contentNgrams.DumpMap()) != 3 {
423 t.Fatalf("got ngrams %v, want 3 ngrams", contentNgrams)
424 }
425
426 if sec := data.contentNgrams.Get(stringToNGram("bcq")); sec.sz > 0 {
427 t.Errorf("found ngram bcd in %v", contentNgrams)
428 }
429 },
430 )
431 }
432}
433
434func TestBackfillIDIsDeterministic(t *testing.T) {
435 repo := "github.com/a/b"
436 have1 := backfillID(repo)
437 have2 := backfillID(repo)
438
439 if have1 != have2 {
440 t.Fatalf("%s != %s ", have1, have2)
441 }
442}
443
444func TestEncodeRanks(t *testing.T) {
445 quick.Check(func(ranks [][]float64) bool {
446 buf := bytes.Buffer{}
447 w := &writer{w: &buf}
448
449 if err := encodeRanks(w, ranks); err != nil {
450 return false
451 }
452
453 // In case all rank vectors are empty, IE {{}, {}, ...}, we won't write anything
454 // to w and gob decode will decode this as "nil", which will fail the
455 // comparison even with cmpopts.EquateEmpty().
456 if w.off == 0 {
457 return true
458 }
459
460 d := &indexData{}
461 if err := decodeRanks(buf.Bytes(), &d.ranks); err != nil {
462 t.Fatal(err)
463 }
464
465 if d := cmp.Diff(ranks, d.ranks, cmpopts.EquateEmpty()); d != "" {
466 t.Fatalf("-want, +got:\n%s\n", d)
467 }
468
469 return true
470 }, nil)
471}