fork of https://github.com/sourcegraph/zoekt
1// Copyright 2016 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package index
16
17import (
18 "bytes"
19 "context"
20 "encoding/json"
21 "fmt"
22 "io/fs"
23 "os"
24 "path"
25 "path/filepath"
26 "reflect"
27 "strconv"
28 "strings"
29 "testing"
30
31 "github.com/google/go-cmp/cmp"
32
33 "github.com/sourcegraph/zoekt"
34 "github.com/sourcegraph/zoekt/query"
35)
36
37func TestReadWrite(t *testing.T) {
38 b, err := NewShardBuilder(nil)
39 if err != nil {
40 t.Fatalf("NewShardBuilder: %v", err)
41 }
42
43 if err := b.AddFile("filename", []byte("abcde")); err != nil {
44 t.Fatalf("AddFile: %v", err)
45 }
46
47 var buf bytes.Buffer
48 if err := b.Write(&buf); err != nil {
49 t.Fatal(err)
50 }
51 f := &memSeeker{buf.Bytes()}
52
53 r := reader{r: f}
54
55 var toc indexTOC
56 err = r.readTOC(&toc)
57 if err != nil {
58 t.Errorf("got read error %v", err)
59 }
60 if toc.fileContents.data.sz != 5 {
61 t.Errorf("got contents size %d, want 5", toc.fileContents.data.sz)
62 }
63
64 data, err := r.readIndexData(&toc)
65 if err != nil {
66 t.Fatalf("readIndexData: %v", err)
67 }
68 if got := data.fileName(0); string(got) != "filename" {
69 t.Errorf("got filename %q, want %q", got, "filename")
70 }
71
72 contentNgrams := data.contentNgrams.DumpMap()
73 if len(contentNgrams) != 3 {
74 t.Fatalf("got ngrams %v, want 3 ngrams", contentNgrams)
75 }
76
77 if sec := data.contentNgrams.Get(stringToNGram("bcq")); sec.sz > 0 {
78 t.Errorf("found ngram bcq (%v) in %v", uint64(stringToNGram("bcq")), contentNgrams)
79 }
80}
81
82func TestReadWriteNames(t *testing.T) {
83 b, err := NewShardBuilder(nil)
84 if err != nil {
85 t.Fatalf("NewShardBuilder: %v", err)
86 }
87
88 if err := b.AddFile("abCd", []byte("")); err != nil {
89 t.Fatalf("AddFile: %v", err)
90 }
91
92 var buf bytes.Buffer
93 if err := b.Write(&buf); err != nil {
94 t.Fatal(err)
95 }
96 f := &memSeeker{buf.Bytes()}
97
98 r := reader{r: f}
99
100 var toc indexTOC
101 if err := r.readTOC(&toc); err != nil {
102 t.Errorf("got read error %v", err)
103 }
104 if toc.fileNames.data.sz != 4 {
105 t.Errorf("got contents size %d, want 4", toc.fileNames.data.sz)
106 }
107
108 data, err := r.readIndexData(&toc)
109 if err != nil {
110 t.Fatalf("readIndexData: %v", err)
111 }
112 if !reflect.DeepEqual([]uint32{0, 4}, data.fileNameIndex) {
113 t.Errorf("got index %v, want {0,4}", data.fileNameIndex)
114 }
115
116 gotSec := data.fileNameNgrams.Get(stringToNGram("bCd"))
117 if err != nil {
118 t.Fatalf("fileNameNgrams.GetBlob: %v", err)
119 }
120
121 if !reflect.DeepEqual(buf.Bytes()[gotSec.off:gotSec.off+gotSec.sz], []byte{1}) {
122 t.Errorf("got trigram bcd at bits %v, want sz 2", data.fileNameNgrams)
123 }
124}
125
126func TestGet(t *testing.T) {
127 b, err := NewShardBuilder(nil)
128 if err != nil {
129 t.Fatalf("NewShardBuilder: %v", err)
130 }
131
132 if err := b.AddFile("file_name", []byte("aaa bbbaaa")); err != nil {
133 t.Fatalf("AddFile: %v", err)
134 }
135
136 var buf bytes.Buffer
137 if err := b.Write(&buf); err != nil {
138 t.Fatal(err)
139 }
140 f := &memSeeker{buf.Bytes()}
141
142 r := reader{r: f}
143
144 var toc indexTOC
145 if err := r.readTOC(&toc); err != nil {
146 t.Errorf("got read error %v", err)
147 }
148
149 id, err := r.readIndexData(&toc)
150 if err != nil {
151 t.Fatalf("readIndexData: %v", err)
152 }
153
154 var off uint32 = 96
155
156 cases := []struct {
157 ng string
158 wantPostingList simpleSection
159 }{
160 {
161 ng: " bb",
162 wantPostingList: simpleSection{off: off, sz: 1},
163 },
164 {
165 ng: "a b",
166 wantPostingList: simpleSection{off: off + 1, sz: 1},
167 },
168 {
169 ng: "aa ",
170 wantPostingList: simpleSection{off: off + 2, sz: 1},
171 },
172 {
173 ng: "aaa",
174 wantPostingList: simpleSection{off: off + 3, sz: 2},
175 },
176 {
177 ng: "baa",
178 wantPostingList: simpleSection{off: off + 5, sz: 1},
179 },
180 {
181 ng: "bba",
182 wantPostingList: simpleSection{off: off + 6, sz: 1},
183 },
184 {
185 ng: "bbb",
186 wantPostingList: simpleSection{off: off + 7, sz: 1},
187 },
188 }
189
190 for _, tt := range cases {
191 t.Run(tt.ng, func(t *testing.T) {
192 havePostingList := id.contentNgrams.Get(stringToNGram(tt.ng))
193 if !reflect.DeepEqual(tt.wantPostingList, havePostingList) {
194 t.Fatalf("\nwant:%+v\ngot: %+v", tt.wantPostingList, havePostingList)
195 }
196 })
197 }
198}
199
200func loadShard(fn string) (zoekt.Searcher, error) {
201 f, err := os.Open(fn)
202 if err != nil {
203 return nil, err
204 }
205
206 iFile, err := NewIndexFile(f)
207 if err != nil {
208 return nil, err
209 }
210 s, err := NewSearcher(iFile)
211 if err != nil {
212 iFile.Close()
213 return nil, fmt.Errorf("NewSearcher(%s): %v", fn, err)
214 }
215
216 return s, nil
217}
218
219func TestReadSearch(t *testing.T) {
220 type out struct {
221 FormatVersion int
222 FeatureVersion int
223 FileMatches [][]zoekt.FileMatch
224 }
225
226 qs := []query.Q{
227 &query.Substring{Pattern: "func main", Content: true},
228 &query.Regexp{Regexp: mustParseRE("^package"), Content: true},
229 &query.Symbol{Expr: &query.Substring{Pattern: "num"}},
230 &query.Symbol{Expr: &query.Regexp{Regexp: mustParseRE("sage$")}},
231 }
232
233 shards, err := filepath.Glob("testdata/shards/*.zoekt")
234 if err != nil {
235 t.Fatal(err)
236 }
237
238 for _, path := range shards {
239 name := filepath.Base(path)
240 name = strings.TrimSuffix(name, ".zoekt")
241
242 shard, err := loadShard(path)
243 if err != nil {
244 t.Fatalf("error loading shard %s %v", name, err)
245 }
246
247 index, ok := shard.(*indexData)
248 if !ok {
249 t.Fatalf("expected *indexData for %s", name)
250 }
251
252 golden := "testdata/golden/TestReadSearch/" + name + ".golden"
253
254 if *update {
255 got := out{
256 FormatVersion: index.metaData.IndexFormatVersion,
257 FeatureVersion: index.metaData.IndexFeatureVersion,
258 }
259 for _, q := range qs {
260 res, err := shard.Search(context.Background(), q, &zoekt.SearchOptions{})
261 if err != nil {
262 t.Fatalf("failed search %s on %s during updating: %v", q, name, err)
263 }
264 got.FileMatches = append(got.FileMatches, res.Files)
265 }
266
267 if raw, err := json.MarshalIndent(got, "", " "); err != nil {
268 t.Errorf("failed marshalling search results for %s during updating: %v", name, err)
269 continue
270 } else if err := os.WriteFile(golden, raw, 0o644); err != nil {
271 t.Errorf("failed writing search results for %s during updating: %v", name, err)
272 continue
273 }
274 }
275
276 var want out
277 if buf, err := os.ReadFile(golden); err != nil {
278 t.Fatalf("failed reading search results for %s: %v", name, err)
279 } else if err := json.Unmarshal(buf, &want); err != nil {
280 t.Fatalf("failed unmarshalling search results for %s: %v", name, err)
281 }
282
283 if index.metaData.IndexFormatVersion != want.FormatVersion {
284 t.Errorf("got %d index format version, want %d for %s", index.metaData.IndexFormatVersion, want.FormatVersion, name)
285 }
286
287 if index.metaData.IndexFeatureVersion != want.FeatureVersion {
288 t.Errorf("got %d index feature version, want %d for %s", index.metaData.IndexFeatureVersion, want.FeatureVersion, name)
289 }
290
291 for j, q := range qs {
292 res, err := shard.Search(context.Background(), q, &zoekt.SearchOptions{})
293 if err != nil {
294 t.Fatalf("failed search %s on %s: %v", q, name, err)
295 }
296
297 if len(res.Files) != len(want.FileMatches[j]) {
298 t.Fatalf("got %d file matches for %s on %s, want %d", len(res.Files), q, name, len(want.FileMatches[j]))
299 }
300
301 if len(want.FileMatches[j]) == 0 {
302 continue
303 }
304
305 if d := cmp.Diff(want.FileMatches[j], res.Files); d != "" {
306 t.Errorf("matches for %s on %s (-want +got)\n%s", q, name, d)
307 }
308 }
309 }
310}
311
312func TestEncodeRawConfig(t *testing.T) {
313 mustParse := func(s string) uint8 {
314 i, err := strconv.ParseInt(s, 2, 8)
315 if err != nil {
316 t.Fatalf("failed to parse %s", s)
317 }
318 return uint8(i)
319 }
320
321 cases := []struct {
322 rawConfig map[string]string
323 want string
324 }{
325 {
326 rawConfig: map[string]string{"public": "1"},
327 want: "101001",
328 },
329 {
330 rawConfig: map[string]string{"fork": "1"},
331 want: "100110",
332 },
333 {
334 rawConfig: map[string]string{"public": "1", "fork": "1"},
335 want: "100101",
336 },
337 {
338 rawConfig: map[string]string{"public": "1", "fork": "1", "archived": "1"},
339 want: "010101",
340 },
341 {
342 rawConfig: map[string]string{},
343 want: "101010",
344 },
345 }
346 for _, c := range cases {
347 t.Run(c.want, func(t *testing.T) {
348 if got := encodeRawConfig(c.rawConfig); got != mustParse(c.want) {
349 t.Fatalf("want %s, got %s", c.want, strconv.FormatInt(int64(got), 2))
350 }
351 })
352 }
353}
354
355func TestBackwardsCompat(t *testing.T) {
356 if *update {
357 b, err := NewShardBuilder(nil)
358 if err != nil {
359 t.Fatalf("NewShardBuilder: %v", err)
360 }
361
362 if err := b.AddFile("filename", []byte("abcde")); err != nil {
363 t.Fatalf("AddFile: %v", err)
364 }
365
366 var buf bytes.Buffer
367 if err := b.Write(&buf); err != nil {
368 t.Fatal(err)
369 }
370
371 opts := Options{
372 IndexDir: "testdata/backcompat",
373 RepositoryDescription: zoekt.Repository{
374 Name: "new",
375 },
376 }
377 outName := opts.shardName(0)
378 t.Log("writing new file", outName)
379
380 err = os.WriteFile(outName, buf.Bytes(), 0o644)
381 if err != nil {
382 t.Fatalf("Creating output file: %v", err)
383 }
384 }
385
386 compatibleFiles, err := fs.Glob(os.DirFS("."), "testdata/backcompat/*.zoekt")
387 if err != nil {
388 t.Fatalf("fs.Glob: %v", err)
389 }
390
391 for _, fname := range compatibleFiles {
392 t.Run(path.Base(fname),
393 func(t *testing.T) {
394 f, err := os.Open(fname)
395 if err != nil {
396 t.Fatal("os.Open", err)
397 }
398 idx, err := NewIndexFile(f)
399 if err != nil {
400 t.Fatal("NewIndexFile", err)
401 }
402 r := reader{r: idx}
403
404 var toc indexTOC
405 err = r.readTOC(&toc)
406 if err != nil {
407 t.Errorf("got read error %v", err)
408 }
409 if toc.fileContents.data.sz != 5 {
410 t.Errorf("got contents size %d, want 5", toc.fileContents.data.sz)
411 }
412
413 data, err := r.readIndexData(&toc)
414 if err != nil {
415 t.Fatalf("readIndexData: %v", err)
416 }
417 if got := data.fileName(0); string(got) != "filename" {
418 t.Errorf("got filename %q, want %q", got, "filename")
419 }
420
421 contentNgrams := data.contentNgrams.DumpMap()
422 if len(data.contentNgrams.DumpMap()) != 3 {
423 t.Fatalf("got ngrams %v, want 3 ngrams", contentNgrams)
424 }
425
426 if sec := data.contentNgrams.Get(stringToNGram("bcq")); sec.sz > 0 {
427 t.Errorf("found ngram bcd in %v", contentNgrams)
428 }
429 },
430 )
431 }
432}
433
434func TestBackfillIDIsDeterministic(t *testing.T) {
435 repo := "github.com/a/b"
436 have1 := backfillID(repo)
437 have2 := backfillID(repo)
438
439 if have1 != have2 {
440 t.Fatalf("%s != %s ", have1, have2)
441 }
442}
443
444func BenchmarkReadMetadata(b *testing.B) {
445 file, err := os.Open("testdata/benchmark/zoekt_v16.00000.zoekt")
446 if err != nil {
447 b.Fatalf("Failed to open test file: %v", err)
448 }
449 defer file.Close()
450
451 indexFile, err := NewIndexFile(file)
452 if err != nil {
453 b.Fatalf("could not open index: %v", err)
454 }
455
456 b.ReportAllocs()
457 b.ResetTimer()
458
459 for i := 0; i < b.N; i++ {
460 repos, metadata, err := ReadMetadata(indexFile)
461 if err != nil {
462 b.Fatalf("ReadMetadata failed: %v", err)
463 }
464 if len(repos) != 1 {
465 b.Fatalf("expected 1 repository")
466 }
467 if metadata == nil {
468 b.Fatalf("expected non-nil metadata")
469 }
470 }
471}