fork of https://github.com/sourcegraph/zoekt
1// Copyright 2016 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package index
16
17import (
18 "bytes"
19 "context"
20 "encoding/json"
21 "fmt"
22 "io/fs"
23 "os"
24 "path"
25 "path/filepath"
26 "reflect"
27 "strconv"
28 "strings"
29 "testing"
30
31 "github.com/google/go-cmp/cmp"
32 "github.com/sourcegraph/zoekt"
33 "github.com/sourcegraph/zoekt/query"
34)
35
36func TestReadWrite(t *testing.T) {
37 b, err := NewIndexBuilder(nil)
38 if err != nil {
39 t.Fatalf("NewIndexBuilder: %v", err)
40 }
41
42 if err := b.AddFile("filename", []byte("abcde")); err != nil {
43 t.Fatalf("AddFile: %v", err)
44 }
45
46 var buf bytes.Buffer
47 if err := b.Write(&buf); err != nil {
48 t.Fatal(err)
49 }
50 f := &memSeeker{buf.Bytes()}
51
52 r := reader{r: f}
53
54 var toc indexTOC
55 err = r.readTOC(&toc)
56 if err != nil {
57 t.Errorf("got read error %v", err)
58 }
59 if toc.fileContents.data.sz != 5 {
60 t.Errorf("got contents size %d, want 5", toc.fileContents.data.sz)
61 }
62
63 data, err := r.readIndexData(&toc)
64 if err != nil {
65 t.Fatalf("readIndexData: %v", err)
66 }
67 if got := data.fileName(0); string(got) != "filename" {
68 t.Errorf("got filename %q, want %q", got, "filename")
69 }
70
71 contentNgrams := data.contentNgrams.DumpMap()
72 if len(contentNgrams) != 3 {
73 t.Fatalf("got ngrams %v, want 3 ngrams", contentNgrams)
74 }
75
76 if sec := data.contentNgrams.Get(stringToNGram("bcq")); sec.sz > 0 {
77 t.Errorf("found ngram bcq (%v) in %v", uint64(stringToNGram("bcq")), contentNgrams)
78 }
79}
80
81func TestReadWriteNames(t *testing.T) {
82 b, err := NewIndexBuilder(nil)
83 if err != nil {
84 t.Fatalf("NewIndexBuilder: %v", err)
85 }
86
87 if err := b.AddFile("abCd", []byte("")); err != nil {
88 t.Fatalf("AddFile: %v", err)
89 }
90
91 var buf bytes.Buffer
92 if err := b.Write(&buf); err != nil {
93 t.Fatal(err)
94 }
95 f := &memSeeker{buf.Bytes()}
96
97 r := reader{r: f}
98
99 var toc indexTOC
100 if err := r.readTOC(&toc); err != nil {
101 t.Errorf("got read error %v", err)
102 }
103 if toc.fileNames.data.sz != 4 {
104 t.Errorf("got contents size %d, want 4", toc.fileNames.data.sz)
105 }
106
107 data, err := r.readIndexData(&toc)
108 if err != nil {
109 t.Fatalf("readIndexData: %v", err)
110 }
111 if !reflect.DeepEqual([]uint32{0, 4}, data.fileNameIndex) {
112 t.Errorf("got index %v, want {0,4}", data.fileNameIndex)
113 }
114
115 gotSec := data.fileNameNgrams.Get(stringToNGram("bCd"))
116 if err != nil {
117 t.Fatalf("fileNameNgrams.GetBlob: %v", err)
118 }
119
120 if !reflect.DeepEqual(buf.Bytes()[gotSec.off:gotSec.off+gotSec.sz], []byte{1}) {
121 t.Errorf("got trigram bcd at bits %v, want sz 2", data.fileNameNgrams)
122 }
123}
124
125func TestGet(t *testing.T) {
126 b, err := NewIndexBuilder(nil)
127 if err != nil {
128 t.Fatalf("NewIndexBuilder: %v", err)
129 }
130
131 if err := b.AddFile("file_name", []byte("aaa bbbaaa")); err != nil {
132 t.Fatalf("AddFile: %v", err)
133 }
134
135 var buf bytes.Buffer
136 if err := b.Write(&buf); err != nil {
137 t.Fatal(err)
138 }
139 f := &memSeeker{buf.Bytes()}
140
141 r := reader{r: f}
142
143 var toc indexTOC
144 if err := r.readTOC(&toc); err != nil {
145 t.Errorf("got read error %v", err)
146 }
147
148 id, err := r.readIndexData(&toc)
149 if err != nil {
150 t.Fatalf("readIndexData: %v", err)
151 }
152
153 var off uint32 = 96
154
155 cases := []struct {
156 ng string
157 wantPostingList simpleSection
158 }{
159 {
160 ng: " bb",
161 wantPostingList: simpleSection{off: off, sz: 1},
162 },
163 {
164 ng: "a b",
165 wantPostingList: simpleSection{off: off + 1, sz: 1},
166 },
167 {
168 ng: "aa ",
169 wantPostingList: simpleSection{off: off + 2, sz: 1},
170 },
171 {
172 ng: "aaa",
173 wantPostingList: simpleSection{off: off + 3, sz: 2},
174 },
175 {
176 ng: "baa",
177 wantPostingList: simpleSection{off: off + 5, sz: 1},
178 },
179 {
180 ng: "bba",
181 wantPostingList: simpleSection{off: off + 6, sz: 1},
182 },
183 {
184 ng: "bbb",
185 wantPostingList: simpleSection{off: off + 7, sz: 1},
186 },
187 }
188
189 for _, tt := range cases {
190 t.Run(tt.ng, func(t *testing.T) {
191 havePostingList := id.contentNgrams.Get(stringToNGram(tt.ng))
192 if !reflect.DeepEqual(tt.wantPostingList, havePostingList) {
193 t.Fatalf("\nwant:%+v\ngot: %+v", tt.wantPostingList, havePostingList)
194 }
195 })
196 }
197}
198
199func loadShard(fn string) (zoekt.Searcher, error) {
200 f, err := os.Open(fn)
201 if err != nil {
202 return nil, err
203 }
204
205 iFile, err := NewIndexFile(f)
206 if err != nil {
207 return nil, err
208 }
209 s, err := NewSearcher(iFile)
210 if err != nil {
211 iFile.Close()
212 return nil, fmt.Errorf("NewSearcher(%s): %v", fn, err)
213 }
214
215 return s, nil
216}
217
218func TestReadSearch(t *testing.T) {
219 type out struct {
220 FormatVersion int
221 FeatureVersion int
222 FileMatches [][]zoekt.FileMatch
223 }
224
225 qs := []query.Q{
226 &query.Substring{Pattern: "func main", Content: true},
227 &query.Regexp{Regexp: mustParseRE("^package"), Content: true},
228 &query.Symbol{Expr: &query.Substring{Pattern: "num"}},
229 &query.Symbol{Expr: &query.Regexp{Regexp: mustParseRE("sage$")}},
230 }
231
232 shards, err := filepath.Glob("testdata/shards/*.zoekt")
233 if err != nil {
234 t.Fatal(err)
235 }
236
237 for _, path := range shards {
238 name := filepath.Base(path)
239 name = strings.TrimSuffix(name, ".zoekt")
240
241 shard, err := loadShard(path)
242 if err != nil {
243 t.Fatalf("error loading shard %s %v", name, err)
244 }
245
246 index, ok := shard.(*indexData)
247 if !ok {
248 t.Fatalf("expected *indexData for %s", name)
249 }
250
251 golden := "testdata/golden/TestReadSearch/" + name + ".golden"
252
253 if *update {
254 got := out{
255 FormatVersion: index.metaData.IndexFormatVersion,
256 FeatureVersion: index.metaData.IndexFeatureVersion,
257 }
258 for _, q := range qs {
259 res, err := shard.Search(context.Background(), q, &zoekt.SearchOptions{})
260 if err != nil {
261 t.Fatalf("failed search %s on %s during updating: %v", q, name, err)
262 }
263 got.FileMatches = append(got.FileMatches, res.Files)
264 }
265
266 if raw, err := json.MarshalIndent(got, "", " "); err != nil {
267 t.Errorf("failed marshalling search results for %s during updating: %v", name, err)
268 continue
269 } else if err := os.WriteFile(golden, raw, 0o644); err != nil {
270 t.Errorf("failed writing search results for %s during updating: %v", name, err)
271 continue
272 }
273 }
274
275 var want out
276 if buf, err := os.ReadFile(golden); err != nil {
277 t.Fatalf("failed reading search results for %s: %v", name, err)
278 } else if err := json.Unmarshal(buf, &want); err != nil {
279 t.Fatalf("failed unmarshalling search results for %s: %v", name, err)
280 }
281
282 if index.metaData.IndexFormatVersion != want.FormatVersion {
283 t.Errorf("got %d index format version, want %d for %s", index.metaData.IndexFormatVersion, want.FormatVersion, name)
284 }
285
286 if index.metaData.IndexFeatureVersion != want.FeatureVersion {
287 t.Errorf("got %d index feature version, want %d for %s", index.metaData.IndexFeatureVersion, want.FeatureVersion, name)
288 }
289
290 for j, q := range qs {
291 res, err := shard.Search(context.Background(), q, &zoekt.SearchOptions{})
292 if err != nil {
293 t.Fatalf("failed search %s on %s: %v", q, name, err)
294 }
295
296 if len(res.Files) != len(want.FileMatches[j]) {
297 t.Fatalf("got %d file matches for %s on %s, want %d", len(res.Files), q, name, len(want.FileMatches[j]))
298 }
299
300 if len(want.FileMatches[j]) == 0 {
301 continue
302 }
303
304 if d := cmp.Diff(want.FileMatches[j], res.Files); d != "" {
305 t.Errorf("matches for %s on %s (-want +got)\n%s", q, name, d)
306 }
307 }
308 }
309}
310
311func TestEncodeRawConfig(t *testing.T) {
312 mustParse := func(s string) uint8 {
313 i, err := strconv.ParseInt(s, 2, 8)
314 if err != nil {
315 t.Fatalf("failed to parse %s", s)
316 }
317 return uint8(i)
318 }
319
320 cases := []struct {
321 rawConfig map[string]string
322 want string
323 }{
324 {
325 rawConfig: map[string]string{"public": "1"},
326 want: "101001",
327 },
328 {
329 rawConfig: map[string]string{"fork": "1"},
330 want: "100110",
331 },
332 {
333 rawConfig: map[string]string{"public": "1", "fork": "1"},
334 want: "100101",
335 },
336 {
337 rawConfig: map[string]string{"public": "1", "fork": "1", "archived": "1"},
338 want: "010101",
339 },
340 {
341 rawConfig: map[string]string{},
342 want: "101010",
343 },
344 }
345 for _, c := range cases {
346 t.Run(c.want, func(t *testing.T) {
347 if got := encodeRawConfig(c.rawConfig); got != mustParse(c.want) {
348 t.Fatalf("want %s, got %s", c.want, strconv.FormatInt(int64(got), 2))
349 }
350 })
351 }
352}
353
354func TestBackwardsCompat(t *testing.T) {
355 if *update {
356 b, err := NewIndexBuilder(nil)
357 if err != nil {
358 t.Fatalf("NewIndexBuilder: %v", err)
359 }
360
361 if err := b.AddFile("filename", []byte("abcde")); err != nil {
362 t.Fatalf("AddFile: %v", err)
363 }
364
365 var buf bytes.Buffer
366 if err := b.Write(&buf); err != nil {
367 t.Fatal(err)
368 }
369
370 outName := ShardName("testdata/backcompat", "new", IndexFormatVersion, 0)
371 t.Log("writing new file", outName)
372
373 err = os.WriteFile(outName, buf.Bytes(), 0o644)
374 if err != nil {
375 t.Fatalf("Creating output file: %v", err)
376 }
377 }
378
379 compatibleFiles, err := fs.Glob(os.DirFS("."), "testdata/backcompat/*.zoekt")
380 if err != nil {
381 t.Fatalf("fs.Glob: %v", err)
382 }
383
384 for _, fname := range compatibleFiles {
385 t.Run(path.Base(fname),
386 func(t *testing.T) {
387 f, err := os.Open(fname)
388 if err != nil {
389 t.Fatal("os.Open", err)
390 }
391 idx, err := NewIndexFile(f)
392 if err != nil {
393 t.Fatal("NewIndexFile", err)
394 }
395 r := reader{r: idx}
396
397 var toc indexTOC
398 err = r.readTOC(&toc)
399 if err != nil {
400 t.Errorf("got read error %v", err)
401 }
402 if toc.fileContents.data.sz != 5 {
403 t.Errorf("got contents size %d, want 5", toc.fileContents.data.sz)
404 }
405
406 data, err := r.readIndexData(&toc)
407 if err != nil {
408 t.Fatalf("readIndexData: %v", err)
409 }
410 if got := data.fileName(0); string(got) != "filename" {
411 t.Errorf("got filename %q, want %q", got, "filename")
412 }
413
414 contentNgrams := data.contentNgrams.DumpMap()
415 if len(data.contentNgrams.DumpMap()) != 3 {
416 t.Fatalf("got ngrams %v, want 3 ngrams", contentNgrams)
417 }
418
419 if sec := data.contentNgrams.Get(stringToNGram("bcq")); sec.sz > 0 {
420 t.Errorf("found ngram bcd in %v", contentNgrams)
421 }
422 },
423 )
424 }
425}
426
427func TestBackfillIDIsDeterministic(t *testing.T) {
428 repo := "github.com/a/b"
429 have1 := backfillID(repo)
430 have2 := backfillID(repo)
431
432 if have1 != have2 {
433 t.Fatalf("%s != %s ", have1, have2)
434 }
435}
436
437func BenchmarkReadMetadata(b *testing.B) {
438 file, err := os.Open("testdata/benchmark/zoekt_v16.00000.zoekt")
439 if err != nil {
440 b.Fatalf("Failed to open test file: %v", err)
441 }
442 defer file.Close()
443
444 indexFile, err := NewIndexFile(file)
445 if err != nil {
446 b.Fatalf("could not open index: %v", err)
447 }
448
449 b.ReportAllocs()
450 b.ResetTimer()
451
452 for i := 0; i < b.N; i++ {
453 repos, metadata, err := ReadMetadata(indexFile)
454 if err != nil {
455 b.Fatalf("ReadMetadata failed: %v", err)
456 }
457 if len(repos) != 1 {
458 b.Fatalf("expected 1 repository")
459 }
460 if metadata == nil {
461 b.Fatalf("expected non-nil metadata")
462 }
463 }
464}