fork of https://github.com/sourcegraph/zoekt
1// Copyright 2016 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package zoekt
16
17import (
18 "bytes"
19 "context"
20 "encoding/json"
21 "flag"
22 "fmt"
23 "io/fs"
24 "os"
25 "path"
26 "path/filepath"
27 "reflect"
28 "strconv"
29 "strings"
30 "testing"
31 "testing/quick"
32
33 "github.com/google/go-cmp/cmp"
34 "github.com/google/go-cmp/cmp/cmpopts"
35
36 "github.com/sourcegraph/zoekt/query"
37)
38
39var update = flag.Bool("update", false, "update golden files")
40
41func TestReadWrite(t *testing.T) {
42 b, err := NewIndexBuilder(nil)
43 if err != nil {
44 t.Fatalf("NewIndexBuilder: %v", err)
45 }
46
47 if err := b.AddFile("filename", []byte("abcde")); err != nil {
48 t.Fatalf("AddFile: %v", err)
49 }
50
51 var buf bytes.Buffer
52 if err := b.Write(&buf); err != nil {
53 t.Fatal(err)
54 }
55 f := &memSeeker{buf.Bytes()}
56
57 r := reader{r: f}
58
59 var toc indexTOC
60 err = r.readTOC(&toc)
61 if err != nil {
62 t.Errorf("got read error %v", err)
63 }
64 if toc.fileContents.data.sz != 5 {
65 t.Errorf("got contents size %d, want 5", toc.fileContents.data.sz)
66 }
67
68 data, err := r.readIndexData(&toc)
69 if err != nil {
70 t.Fatalf("readIndexData: %v", err)
71 }
72 if got := data.fileName(0); string(got) != "filename" {
73 t.Errorf("got filename %q, want %q", got, "filename")
74 }
75
76 contentNgrams := data.contentNgrams.DumpMap()
77 if len(contentNgrams) != 3 {
78 t.Fatalf("got ngrams %v, want 3 ngrams", contentNgrams)
79 }
80
81 if sec := data.contentNgrams.Get(stringToNGram("bcq")); sec.sz > 0 {
82 t.Errorf("found ngram bcq (%v) in %v", uint64(stringToNGram("bcq")), contentNgrams)
83 }
84}
85
86func TestReadWriteNames(t *testing.T) {
87 b, err := NewIndexBuilder(nil)
88 if err != nil {
89 t.Fatalf("NewIndexBuilder: %v", err)
90 }
91
92 if err := b.AddFile("abCd", []byte("")); err != nil {
93 t.Fatalf("AddFile: %v", err)
94 }
95
96 var buf bytes.Buffer
97 if err := b.Write(&buf); err != nil {
98 t.Fatal(err)
99 }
100 f := &memSeeker{buf.Bytes()}
101
102 r := reader{r: f}
103
104 var toc indexTOC
105 if err := r.readTOC(&toc); err != nil {
106 t.Errorf("got read error %v", err)
107 }
108 if toc.fileNames.data.sz != 4 {
109 t.Errorf("got contents size %d, want 4", toc.fileNames.data.sz)
110 }
111
112 data, err := r.readIndexData(&toc)
113 if err != nil {
114 t.Fatalf("readIndexData: %v", err)
115 }
116 if !reflect.DeepEqual([]uint32{0, 4}, data.fileNameIndex) {
117 t.Errorf("got index %v, want {0,4}", data.fileNameIndex)
118 }
119
120 gotSec := data.fileNameNgrams.Get(stringToNGram("bCd"))
121 if err != nil {
122 t.Fatalf("fileNameNgrams.GetBlob: %v", err)
123 }
124
125 if !reflect.DeepEqual(buf.Bytes()[gotSec.off:gotSec.off+gotSec.sz], []byte{1}) {
126 t.Errorf("got trigram bcd at bits %v, want sz 2", data.fileNameNgrams)
127 }
128}
129
130func TestGet(t *testing.T) {
131 b, err := NewIndexBuilder(nil)
132 if err != nil {
133 t.Fatalf("NewIndexBuilder: %v", err)
134 }
135
136 if err := b.AddFile("file_name", []byte("aaa bbbaaa")); err != nil {
137 t.Fatalf("AddFile: %v", err)
138 }
139
140 var buf bytes.Buffer
141 if err := b.Write(&buf); err != nil {
142 t.Fatal(err)
143 }
144 f := &memSeeker{buf.Bytes()}
145
146 r := reader{r: f}
147
148 var toc indexTOC
149 if err := r.readTOC(&toc); err != nil {
150 t.Errorf("got read error %v", err)
151 }
152
153 id, err := r.readIndexData(&toc)
154 if err != nil {
155 t.Fatalf("readIndexData: %v", err)
156 }
157
158 var off uint32 = 96
159
160 cases := []struct {
161 ng string
162 wantPostingList simpleSection
163 }{
164 {
165 ng: " bb",
166 wantPostingList: simpleSection{off: off, sz: 1},
167 },
168 {
169 ng: "a b",
170 wantPostingList: simpleSection{off: off + 1, sz: 1},
171 },
172 {
173 ng: "aa ",
174 wantPostingList: simpleSection{off: off + 2, sz: 1},
175 },
176 {
177 ng: "aaa",
178 wantPostingList: simpleSection{off: off + 3, sz: 2},
179 },
180 {
181 ng: "baa",
182 wantPostingList: simpleSection{off: off + 5, sz: 1},
183 },
184 {
185 ng: "bba",
186 wantPostingList: simpleSection{off: off + 6, sz: 1},
187 },
188 {
189 ng: "bbb",
190 wantPostingList: simpleSection{off: off + 7, sz: 1},
191 },
192 }
193
194 for _, tt := range cases {
195 t.Run(tt.ng, func(t *testing.T) {
196 havePostingList := id.contentNgrams.Get(stringToNGram(tt.ng))
197 if !reflect.DeepEqual(tt.wantPostingList, havePostingList) {
198 t.Fatalf("\nwant:%+v\ngot: %+v", tt.wantPostingList, havePostingList)
199 }
200 })
201 }
202}
203
204func loadShard(fn string) (Searcher, error) {
205 f, err := os.Open(fn)
206 if err != nil {
207 return nil, err
208 }
209
210 iFile, err := NewIndexFile(f)
211 if err != nil {
212 return nil, err
213 }
214 s, err := NewSearcher(iFile)
215 if err != nil {
216 iFile.Close()
217 return nil, fmt.Errorf("NewSearcher(%s): %v", fn, err)
218 }
219
220 return s, nil
221}
222
223func TestReadSearch(t *testing.T) {
224 type out struct {
225 FormatVersion int
226 FeatureVersion int
227 FileMatches [][]FileMatch
228 }
229
230 qs := []query.Q{
231 &query.Substring{Pattern: "func main", Content: true},
232 &query.Regexp{Regexp: mustParseRE("^package"), Content: true},
233 &query.Symbol{Expr: &query.Substring{Pattern: "num"}},
234 &query.Symbol{Expr: &query.Regexp{Regexp: mustParseRE("sage$")}},
235 }
236
237 shards, err := filepath.Glob("testdata/shards/*.zoekt")
238 if err != nil {
239 t.Fatal(err)
240 }
241
242 for _, path := range shards {
243 name := filepath.Base(path)
244 name = strings.TrimSuffix(name, ".zoekt")
245
246 shard, err := loadShard(path)
247 if err != nil {
248 t.Fatalf("error loading shard %s %v", name, err)
249 }
250
251 index, ok := shard.(*indexData)
252 if !ok {
253 t.Fatalf("expected *indexData for %s", name)
254 }
255
256 golden := "testdata/golden/TestReadSearch/" + name + ".golden"
257
258 if *update {
259 got := out{
260 FormatVersion: index.metaData.IndexFormatVersion,
261 FeatureVersion: index.metaData.IndexFeatureVersion,
262 }
263 for _, q := range qs {
264 res, err := shard.Search(context.Background(), q, &SearchOptions{})
265 if err != nil {
266 t.Fatalf("failed search %s on %s during updating: %v", q, name, err)
267 }
268 got.FileMatches = append(got.FileMatches, res.Files)
269 }
270
271 if raw, err := json.MarshalIndent(got, "", " "); err != nil {
272 t.Errorf("failed marshalling search results for %s during updating: %v", name, err)
273 continue
274 } else if err := os.WriteFile(golden, raw, 0o644); err != nil {
275 t.Errorf("failed writing search results for %s during updating: %v", name, err)
276 continue
277 }
278 }
279
280 var want out
281 if buf, err := os.ReadFile(golden); err != nil {
282 t.Fatalf("failed reading search results for %s: %v", name, err)
283 } else if err := json.Unmarshal(buf, &want); err != nil {
284 t.Fatalf("failed unmarshalling search results for %s: %v", name, err)
285 }
286
287 if index.metaData.IndexFormatVersion != want.FormatVersion {
288 t.Errorf("got %d index format version, want %d for %s", index.metaData.IndexFormatVersion, want.FormatVersion, name)
289 }
290
291 if index.metaData.IndexFeatureVersion != want.FeatureVersion {
292 t.Errorf("got %d index feature version, want %d for %s", index.metaData.IndexFeatureVersion, want.FeatureVersion, name)
293 }
294
295 for j, q := range qs {
296 res, err := shard.Search(context.Background(), q, &SearchOptions{})
297 if err != nil {
298 t.Fatalf("failed search %s on %s: %v", q, name, err)
299 }
300
301 if len(res.Files) != len(want.FileMatches[j]) {
302 t.Fatalf("got %d file matches for %s on %s, want %d", len(res.Files), q, name, len(want.FileMatches[j]))
303 }
304
305 if len(want.FileMatches[j]) == 0 {
306 continue
307 }
308
309 if d := cmp.Diff(res.Files, want.FileMatches[j]); d != "" {
310 t.Errorf("matches for %s on %s\n%s", q, name, d)
311 }
312 }
313 }
314}
315
316func TestEncodeRawConfig(t *testing.T) {
317 mustParse := func(s string) uint8 {
318 i, err := strconv.ParseInt(s, 2, 8)
319 if err != nil {
320 t.Fatalf("failed to parse %s", s)
321 }
322 return uint8(i)
323 }
324
325 cases := []struct {
326 rawConfig map[string]string
327 want string
328 }{
329 {
330 rawConfig: map[string]string{"public": "1"},
331 want: "101001",
332 },
333 {
334 rawConfig: map[string]string{"fork": "1"},
335 want: "100110",
336 },
337 {
338 rawConfig: map[string]string{"public": "1", "fork": "1"},
339 want: "100101",
340 },
341 {
342 rawConfig: map[string]string{"public": "1", "fork": "1", "archived": "1"},
343 want: "010101",
344 },
345 {
346 rawConfig: map[string]string{},
347 want: "101010",
348 },
349 }
350 for _, c := range cases {
351 t.Run(c.want, func(t *testing.T) {
352 if got := encodeRawConfig(c.rawConfig); got != mustParse(c.want) {
353 t.Fatalf("want %s, got %s", c.want, strconv.FormatInt(int64(got), 2))
354 }
355 })
356 }
357}
358
359func TestBackwardsCompat(t *testing.T) {
360 if *update {
361 b, err := NewIndexBuilder(nil)
362 if err != nil {
363 t.Fatalf("NewIndexBuilder: %v", err)
364 }
365
366 if err := b.AddFile("filename", []byte("abcde")); err != nil {
367 t.Fatalf("AddFile: %v", err)
368 }
369
370 var buf bytes.Buffer
371 if err := b.Write(&buf); err != nil {
372 t.Fatal(err)
373 }
374
375 outname := fmt.Sprintf("testdata/backcompat/new_v%d.%05d.zoekt", IndexFormatVersion, 0)
376 t.Log("writing new file", outname)
377
378 err = os.WriteFile(outname, buf.Bytes(), 0o644)
379 if err != nil {
380 t.Fatalf("Creating output file: %v", err)
381 }
382 }
383
384 compatibleFiles, err := fs.Glob(os.DirFS("."), "testdata/backcompat/*.zoekt")
385 if err != nil {
386 t.Fatalf("fs.Glob: %v", err)
387 }
388
389 for _, fname := range compatibleFiles {
390 t.Run(path.Base(fname),
391 func(t *testing.T) {
392 f, err := os.Open(fname)
393 if err != nil {
394 t.Fatal("os.Open", err)
395 }
396 idx, err := NewIndexFile(f)
397 if err != nil {
398 t.Fatal("NewIndexFile", err)
399 }
400 r := reader{r: idx}
401
402 var toc indexTOC
403 err = r.readTOC(&toc)
404 if err != nil {
405 t.Errorf("got read error %v", err)
406 }
407 if toc.fileContents.data.sz != 5 {
408 t.Errorf("got contents size %d, want 5", toc.fileContents.data.sz)
409 }
410
411 data, err := r.readIndexData(&toc)
412 if err != nil {
413 t.Fatalf("readIndexData: %v", err)
414 }
415 if got := data.fileName(0); string(got) != "filename" {
416 t.Errorf("got filename %q, want %q", got, "filename")
417 }
418
419 contentNgrams := data.contentNgrams.DumpMap()
420 if len(data.contentNgrams.DumpMap()) != 3 {
421 t.Fatalf("got ngrams %v, want 3 ngrams", contentNgrams)
422 }
423
424 if sec := data.contentNgrams.Get(stringToNGram("bcq")); sec.sz > 0 {
425 t.Errorf("found ngram bcd in %v", contentNgrams)
426 }
427 },
428 )
429 }
430}
431
432func TestBackfillIDIsDeterministic(t *testing.T) {
433 repo := "github.com/a/b"
434 have1 := backfillID(repo)
435 have2 := backfillID(repo)
436
437 if have1 != have2 {
438 t.Fatalf("%s != %s ", have1, have2)
439 }
440}
441
442func TestEncodeRanks(t *testing.T) {
443 quick.Check(func(ranks [][]float64) bool {
444 buf := bytes.Buffer{}
445 w := &writer{w: &buf}
446
447 if err := encodeRanks(w, ranks); err != nil {
448 return false
449 }
450
451 // In case all rank vectors are empty, IE {{}, {}, ...}, we won't write anything
452 // to w and gob decode will decode this as "nil", which will fail the
453 // comparison even with cmpopts.EquateEmpty().
454 if w.off == 0 {
455 return true
456 }
457
458 d := &indexData{}
459 if err := decodeRanks(buf.Bytes(), &d.ranks); err != nil {
460 t.Fatal(err)
461 }
462
463 if d := cmp.Diff(ranks, d.ranks, cmpopts.EquateEmpty()); d != "" {
464 t.Fatalf("-want, +got:\n%s\n", d)
465 }
466
467 return true
468 }, nil)
469}