fork of https://github.com/sourcegraph/zoekt
1// Copyright 2016 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package zoekt
16
17import (
18 "bytes"
19 "context"
20 "encoding/json"
21 "flag"
22 "fmt"
23 "io/fs"
24 "os"
25 "path"
26 "path/filepath"
27 "reflect"
28 "strconv"
29 "strings"
30 "testing"
31
32 "github.com/google/go-cmp/cmp"
33
34 "github.com/sourcegraph/zoekt/query"
35)
36
37var update = flag.Bool("update", false, "update golden files")
38
39func TestReadWrite(t *testing.T) {
40 b, err := NewIndexBuilder(nil)
41 if err != nil {
42 t.Fatalf("NewIndexBuilder: %v", err)
43 }
44
45 if err := b.AddFile("filename", []byte("abcde")); err != nil {
46 t.Fatalf("AddFile: %v", err)
47 }
48
49 var buf bytes.Buffer
50 if err := b.Write(&buf); err != nil {
51 t.Fatal(err)
52 }
53 f := &memSeeker{buf.Bytes()}
54
55 r := reader{r: f}
56
57 var toc indexTOC
58 err = r.readTOC(&toc)
59 if err != nil {
60 t.Errorf("got read error %v", err)
61 }
62 if toc.fileContents.data.sz != 5 {
63 t.Errorf("got contents size %d, want 5", toc.fileContents.data.sz)
64 }
65
66 data, err := r.readIndexData(&toc)
67 if err != nil {
68 t.Fatalf("readIndexData: %v", err)
69 }
70 if got := data.fileName(0); string(got) != "filename" {
71 t.Errorf("got filename %q, want %q", got, "filename")
72 }
73
74 contentNgrams := data.contentNgrams.DumpMap()
75 if len(contentNgrams) != 3 {
76 t.Fatalf("got ngrams %v, want 3 ngrams", contentNgrams)
77 }
78
79 if sec := data.contentNgrams.Get(stringToNGram("bcq")); sec.sz > 0 {
80 t.Errorf("found ngram bcq (%v) in %v", uint64(stringToNGram("bcq")), contentNgrams)
81 }
82}
83
84func TestReadWriteNames(t *testing.T) {
85 b, err := NewIndexBuilder(nil)
86 if err != nil {
87 t.Fatalf("NewIndexBuilder: %v", err)
88 }
89
90 if err := b.AddFile("abCd", []byte("")); err != nil {
91 t.Fatalf("AddFile: %v", err)
92 }
93
94 var buf bytes.Buffer
95 if err := b.Write(&buf); err != nil {
96 t.Fatal(err)
97 }
98 f := &memSeeker{buf.Bytes()}
99
100 r := reader{r: f}
101
102 var toc indexTOC
103 if err := r.readTOC(&toc); err != nil {
104 t.Errorf("got read error %v", err)
105 }
106 if toc.fileNames.data.sz != 4 {
107 t.Errorf("got contents size %d, want 4", toc.fileNames.data.sz)
108 }
109
110 data, err := r.readIndexData(&toc)
111 if err != nil {
112 t.Fatalf("readIndexData: %v", err)
113 }
114 if !reflect.DeepEqual([]uint32{0, 4}, data.fileNameIndex) {
115 t.Errorf("got index %v, want {0,4}", data.fileNameIndex)
116 }
117
118 gotSec := data.fileNameNgrams.Get(stringToNGram("bCd"))
119 if err != nil {
120 t.Fatalf("fileNameNgrams.GetBlob: %v", err)
121 }
122
123 if !reflect.DeepEqual(buf.Bytes()[gotSec.off:gotSec.off+gotSec.sz], []byte{1}) {
124 t.Errorf("got trigram bcd at bits %v, want sz 2", data.fileNameNgrams)
125 }
126}
127
128func TestGet(t *testing.T) {
129 b, err := NewIndexBuilder(nil)
130 if err != nil {
131 t.Fatalf("NewIndexBuilder: %v", err)
132 }
133
134 if err := b.AddFile("file_name", []byte("aaa bbbaaa")); err != nil {
135 t.Fatalf("AddFile: %v", err)
136 }
137
138 var buf bytes.Buffer
139 if err := b.Write(&buf); err != nil {
140 t.Fatal(err)
141 }
142 f := &memSeeker{buf.Bytes()}
143
144 r := reader{r: f}
145
146 var toc indexTOC
147 if err := r.readTOC(&toc); err != nil {
148 t.Errorf("got read error %v", err)
149 }
150
151 id, err := r.readIndexData(&toc)
152 if err != nil {
153 t.Fatalf("readIndexData: %v", err)
154 }
155
156 var off uint32 = 96
157
158 cases := []struct {
159 ng string
160 wantPostingList simpleSection
161 }{
162 {
163 ng: " bb",
164 wantPostingList: simpleSection{off: off, sz: 1},
165 },
166 {
167 ng: "a b",
168 wantPostingList: simpleSection{off: off + 1, sz: 1},
169 },
170 {
171 ng: "aa ",
172 wantPostingList: simpleSection{off: off + 2, sz: 1},
173 },
174 {
175 ng: "aaa",
176 wantPostingList: simpleSection{off: off + 3, sz: 2},
177 },
178 {
179 ng: "baa",
180 wantPostingList: simpleSection{off: off + 5, sz: 1},
181 },
182 {
183 ng: "bba",
184 wantPostingList: simpleSection{off: off + 6, sz: 1},
185 },
186 {
187 ng: "bbb",
188 wantPostingList: simpleSection{off: off + 7, sz: 1},
189 },
190 }
191
192 for _, tt := range cases {
193 t.Run(tt.ng, func(t *testing.T) {
194 havePostingList := id.contentNgrams.Get(stringToNGram(tt.ng))
195 if !reflect.DeepEqual(tt.wantPostingList, havePostingList) {
196 t.Fatalf("\nwant:%+v\ngot: %+v", tt.wantPostingList, havePostingList)
197 }
198 })
199 }
200}
201
202func loadShard(fn string) (Searcher, error) {
203 f, err := os.Open(fn)
204 if err != nil {
205 return nil, err
206 }
207
208 iFile, err := NewIndexFile(f)
209 if err != nil {
210 return nil, err
211 }
212 s, err := NewSearcher(iFile)
213 if err != nil {
214 iFile.Close()
215 return nil, fmt.Errorf("NewSearcher(%s): %v", fn, err)
216 }
217
218 return s, nil
219}
220
221func TestReadSearch(t *testing.T) {
222 type out struct {
223 FormatVersion int
224 FeatureVersion int
225 FileMatches [][]FileMatch
226 }
227
228 qs := []query.Q{
229 &query.Substring{Pattern: "func main", Content: true},
230 &query.Regexp{Regexp: mustParseRE("^package"), Content: true},
231 &query.Symbol{Expr: &query.Substring{Pattern: "num"}},
232 &query.Symbol{Expr: &query.Regexp{Regexp: mustParseRE("sage$")}},
233 }
234
235 shards, err := filepath.Glob("testdata/shards/*.zoekt")
236 if err != nil {
237 t.Fatal(err)
238 }
239
240 for _, path := range shards {
241 name := filepath.Base(path)
242 name = strings.TrimSuffix(name, ".zoekt")
243
244 shard, err := loadShard(path)
245 if err != nil {
246 t.Fatalf("error loading shard %s %v", name, err)
247 }
248
249 index, ok := shard.(*indexData)
250 if !ok {
251 t.Fatalf("expected *indexData for %s", name)
252 }
253
254 golden := "testdata/golden/TestReadSearch/" + name + ".golden"
255
256 if *update {
257 got := out{
258 FormatVersion: index.metaData.IndexFormatVersion,
259 FeatureVersion: index.metaData.IndexFeatureVersion,
260 }
261 for _, q := range qs {
262 res, err := shard.Search(context.Background(), q, &SearchOptions{})
263 if err != nil {
264 t.Fatalf("failed search %s on %s during updating: %v", q, name, err)
265 }
266 got.FileMatches = append(got.FileMatches, res.Files)
267 }
268
269 if raw, err := json.MarshalIndent(got, "", " "); err != nil {
270 t.Errorf("failed marshalling search results for %s during updating: %v", name, err)
271 continue
272 } else if err := os.WriteFile(golden, raw, 0o644); err != nil {
273 t.Errorf("failed writing search results for %s during updating: %v", name, err)
274 continue
275 }
276 }
277
278 var want out
279 if buf, err := os.ReadFile(golden); err != nil {
280 t.Fatalf("failed reading search results for %s: %v", name, err)
281 } else if err := json.Unmarshal(buf, &want); err != nil {
282 t.Fatalf("failed unmarshalling search results for %s: %v", name, err)
283 }
284
285 if index.metaData.IndexFormatVersion != want.FormatVersion {
286 t.Errorf("got %d index format version, want %d for %s", index.metaData.IndexFormatVersion, want.FormatVersion, name)
287 }
288
289 if index.metaData.IndexFeatureVersion != want.FeatureVersion {
290 t.Errorf("got %d index feature version, want %d for %s", index.metaData.IndexFeatureVersion, want.FeatureVersion, name)
291 }
292
293 for j, q := range qs {
294 res, err := shard.Search(context.Background(), q, &SearchOptions{})
295 if err != nil {
296 t.Fatalf("failed search %s on %s: %v", q, name, err)
297 }
298
299 if len(res.Files) != len(want.FileMatches[j]) {
300 t.Fatalf("got %d file matches for %s on %s, want %d", len(res.Files), q, name, len(want.FileMatches[j]))
301 }
302
303 if len(want.FileMatches[j]) == 0 {
304 continue
305 }
306
307 if d := cmp.Diff(want.FileMatches[j], res.Files); d != "" {
308 t.Errorf("matches for %s on %s (-want +got)\n%s", q, name, d)
309 }
310 }
311 }
312}
313
314func TestEncodeRawConfig(t *testing.T) {
315 mustParse := func(s string) uint8 {
316 i, err := strconv.ParseInt(s, 2, 8)
317 if err != nil {
318 t.Fatalf("failed to parse %s", s)
319 }
320 return uint8(i)
321 }
322
323 cases := []struct {
324 rawConfig map[string]string
325 want string
326 }{
327 {
328 rawConfig: map[string]string{"public": "1"},
329 want: "101001",
330 },
331 {
332 rawConfig: map[string]string{"fork": "1"},
333 want: "100110",
334 },
335 {
336 rawConfig: map[string]string{"public": "1", "fork": "1"},
337 want: "100101",
338 },
339 {
340 rawConfig: map[string]string{"public": "1", "fork": "1", "archived": "1"},
341 want: "010101",
342 },
343 {
344 rawConfig: map[string]string{},
345 want: "101010",
346 },
347 }
348 for _, c := range cases {
349 t.Run(c.want, func(t *testing.T) {
350 if got := encodeRawConfig(c.rawConfig); got != mustParse(c.want) {
351 t.Fatalf("want %s, got %s", c.want, strconv.FormatInt(int64(got), 2))
352 }
353 })
354 }
355}
356
357func TestBackwardsCompat(t *testing.T) {
358 if *update {
359 b, err := NewIndexBuilder(nil)
360 if err != nil {
361 t.Fatalf("NewIndexBuilder: %v", err)
362 }
363
364 if err := b.AddFile("filename", []byte("abcde")); err != nil {
365 t.Fatalf("AddFile: %v", err)
366 }
367
368 var buf bytes.Buffer
369 if err := b.Write(&buf); err != nil {
370 t.Fatal(err)
371 }
372
373 outName := ShardName("testdata/backcompat", "new", IndexFormatVersion, 0)
374 t.Log("writing new file", outName)
375
376 err = os.WriteFile(outName, buf.Bytes(), 0o644)
377 if err != nil {
378 t.Fatalf("Creating output file: %v", err)
379 }
380 }
381
382 compatibleFiles, err := fs.Glob(os.DirFS("."), "testdata/backcompat/*.zoekt")
383 if err != nil {
384 t.Fatalf("fs.Glob: %v", err)
385 }
386
387 for _, fname := range compatibleFiles {
388 t.Run(path.Base(fname),
389 func(t *testing.T) {
390 f, err := os.Open(fname)
391 if err != nil {
392 t.Fatal("os.Open", err)
393 }
394 idx, err := NewIndexFile(f)
395 if err != nil {
396 t.Fatal("NewIndexFile", err)
397 }
398 r := reader{r: idx}
399
400 var toc indexTOC
401 err = r.readTOC(&toc)
402 if err != nil {
403 t.Errorf("got read error %v", err)
404 }
405 if toc.fileContents.data.sz != 5 {
406 t.Errorf("got contents size %d, want 5", toc.fileContents.data.sz)
407 }
408
409 data, err := r.readIndexData(&toc)
410 if err != nil {
411 t.Fatalf("readIndexData: %v", err)
412 }
413 if got := data.fileName(0); string(got) != "filename" {
414 t.Errorf("got filename %q, want %q", got, "filename")
415 }
416
417 contentNgrams := data.contentNgrams.DumpMap()
418 if len(data.contentNgrams.DumpMap()) != 3 {
419 t.Fatalf("got ngrams %v, want 3 ngrams", contentNgrams)
420 }
421
422 if sec := data.contentNgrams.Get(stringToNGram("bcq")); sec.sz > 0 {
423 t.Errorf("found ngram bcd in %v", contentNgrams)
424 }
425 },
426 )
427 }
428}
429
430func TestBackfillIDIsDeterministic(t *testing.T) {
431 repo := "github.com/a/b"
432 have1 := backfillID(repo)
433 have2 := backfillID(repo)
434
435 if have1 != have2 {
436 t.Fatalf("%s != %s ", have1, have2)
437 }
438}
439
440func BenchmarkReadMetadata(b *testing.B) {
441 file, err := os.Open("testdata/benchmark/zoekt_v16.00000.zoekt")
442 if err != nil {
443 b.Fatalf("Failed to open test file: %v", err)
444 }
445 defer file.Close()
446
447 indexFile, err := NewIndexFile(file)
448 if err != nil {
449 b.Fatalf("could not open index: %v", err)
450 }
451
452 b.ReportAllocs()
453 b.ResetTimer()
454
455 for i := 0; i < b.N; i++ {
456 repos, metadata, err := ReadMetadata(indexFile)
457 if err != nil {
458 b.Fatalf("ReadMetadata failed: %v", err)
459 }
460 if len(repos) != 1 {
461 b.Fatalf("expected 1 repository")
462 }
463 if metadata == nil {
464 b.Fatalf("expected non-nil metadata")
465 }
466 }
467}