fork of https://github.com/sourcegraph/zoekt
1// Copyright 2016 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package index
16
17import (
18 "bytes"
19 "context"
20 "fmt"
21 "reflect"
22 "regexp/syntax"
23 "strings"
24 "testing"
25
26 "github.com/google/go-cmp/cmp"
27 "github.com/google/go-cmp/cmp/cmpopts"
28 "github.com/grafana/regexp"
29
30 "github.com/sourcegraph/zoekt"
31 "github.com/sourcegraph/zoekt/query"
32)
33
34func clearScores(r *zoekt.SearchResult) {
35 for i := range r.Files {
36 r.Files[i].Score = 0.0
37 for j := range r.Files[i].LineMatches {
38 r.Files[i].LineMatches[j].Score = 0.0
39 }
40 for j := range r.Files[i].ChunkMatches {
41 r.Files[i].ChunkMatches[j].Score = 0.0
42 r.Files[i].ChunkMatches[j].BestLineMatch = 0
43 }
44 r.Files[i].Checksum = nil
45 r.Files[i].Debug = ""
46 }
47}
48
49func testShardBuilder(tb testing.TB, repo *zoekt.Repository, docs ...Document) *ShardBuilder {
50 tb.Helper()
51
52 b, err := NewShardBuilder(repo)
53 if err != nil {
54 tb.Fatalf("NewShardBuilder: %v", err)
55 }
56
57 for i, d := range docs {
58 if err := b.Add(d); err != nil {
59 tb.Fatalf("Add %d: %v", i, err)
60 }
61 }
62
63 return b
64}
65
66func testShardBuilderCompound(t *testing.T, repos []*zoekt.Repository, docs [][]Document) *ShardBuilder {
67 t.Helper()
68
69 b := newShardBuilder(0)
70 b.indexFormatVersion = NextIndexFormatVersion
71
72 if len(repos) != len(docs) {
73 t.Fatalf("testShardBuilderCompound: repos must be the same length as docs, got: len(repos)=%d len(docs)=%d", len(repos), len(docs))
74 }
75
76 for i, repo := range repos {
77 if err := b.setRepository(repo); err != nil {
78 t.Fatal(err)
79 }
80 for j, d := range docs[i] {
81 if err := b.Add(d); err != nil {
82 t.Fatalf("Add %d %d: %v", i, j, err)
83 }
84 }
85 }
86
87 return b
88}
89
90func TestBoundary(t *testing.T) {
91 b := testShardBuilder(t, nil,
92 Document{Name: "f1", Content: []byte("x the")},
93 Document{Name: "f1", Content: []byte("reader")})
94 res := searchForTest(t, b, &query.Substring{Pattern: "there"})
95 if len(res.Files) > 0 {
96 t.Fatalf("got %v, want no matches", res.Files)
97 }
98}
99
100func TestDocSectionInvalid(t *testing.T) {
101 b, err := NewShardBuilder(nil)
102 if err != nil {
103 t.Fatalf("NewShardBuilder: %v", err)
104 }
105 doc := Document{
106 Name: "f1",
107 Content: []byte("01234567890123"),
108 Symbols: []DocumentSection{{5, 8}, {7, 9}},
109 }
110
111 if err := b.Add(doc); err == nil {
112 t.Errorf("overlapping doc sections should fail")
113 }
114
115 doc = Document{
116 Name: "f1",
117 Content: []byte("01234567890123"),
118 Symbols: []DocumentSection{{0, 20}},
119 }
120
121 if err := b.Add(doc); err == nil {
122 t.Errorf("doc sections beyond EOF should fail")
123 }
124}
125
126func TestBasic(t *testing.T) {
127 b := testShardBuilder(t, nil,
128 Document{
129 Name: "f2",
130 Content: []byte("to carry water in the no later bla"),
131 // --------------0123456789012345678901234567890123
132 })
133
134 t.Run("LineMatch", func(t *testing.T) {
135 res := searchForTest(t, b, &query.Substring{
136 Pattern: "water",
137 CaseSensitive: true,
138 })
139 fmatches := res.Files
140 if len(fmatches) != 1 || len(fmatches[0].LineMatches) != 1 {
141 t.Fatalf("got %v, want 1 matches", fmatches)
142 }
143
144 got := fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].LineMatches[0].LineFragments[0].Offset)
145 want := "f2:9"
146 if got != want {
147 t.Errorf("1: got %s, want %s", got, want)
148 }
149 })
150
151 t.Run("ChunkMatch", func(t *testing.T) {
152 res := searchForTest(t, b, &query.Substring{
153 Pattern: "water",
154 CaseSensitive: true,
155 }, chunkOpts)
156 fmatches := res.Files
157 if len(fmatches) != 1 || len(fmatches[0].ChunkMatches) != 1 {
158 t.Fatalf("got %v, want 1 matches", fmatches)
159 }
160
161 got := fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset)
162 want := "f2:9"
163 if got != want {
164 t.Errorf("1: got %s, want %s", got, want)
165 }
166 })
167}
168
169func TestEmptyIndex(t *testing.T) {
170 b := testShardBuilder(t, nil)
171 searcher := searcherForTest(t, b)
172
173 var opts zoekt.SearchOptions
174 if _, err := searcher.Search(context.Background(), &query.Substring{}, &opts); err != nil {
175 t.Fatalf("Search: %v", err)
176 }
177
178 if _, err := searcher.List(context.Background(), &query.Repo{Regexp: regexp.MustCompile("")}, nil); err != nil {
179 t.Fatalf("List: %v", err)
180 }
181
182 if _, err := searcher.Search(context.Background(), &query.Substring{Pattern: "java", FileName: true}, &opts); err != nil {
183 t.Fatalf("Search: %v", err)
184 }
185}
186
187type memSeeker struct {
188 data []byte
189}
190
191func (s *memSeeker) Name() string {
192 return "memseeker"
193}
194
195func (s *memSeeker) Close() {}
196func (s *memSeeker) Read(off, sz uint32) ([]byte, error) {
197 return s.data[off : off+sz], nil
198}
199
200func (s *memSeeker) Size() (uint32, error) {
201 return uint32(len(s.data)), nil
202}
203
204func TestNewlines(t *testing.T) {
205 b := testShardBuilder(t, nil,
206 // -----------------------------------------012345-678901-234
207 Document{Name: "filename", Content: []byte("line1\nline2\nbla")})
208
209 t.Run("LineMatches", func(t *testing.T) {
210 sres := searchForTest(t, b, &query.Substring{Pattern: "ne2"})
211
212 matches := sres.Files
213 want := []zoekt.FileMatch{{
214 FileName: "filename",
215 LineMatches: []zoekt.LineMatch{{
216 LineFragments: []zoekt.LineFragmentMatch{{
217 Offset: 8,
218 LineOffset: 2,
219 MatchLength: 3,
220 }},
221 Line: []byte("line2\n"),
222 LineStart: 6,
223 LineEnd: 12,
224 LineNumber: 2,
225 }},
226 }}
227
228 if diff := cmp.Diff(matches, want); diff != "" {
229 t.Fatal(diff)
230 }
231 })
232
233 t.Run("ChunkMatches", func(t *testing.T) {
234 sres := searchForTest(t, b, &query.Substring{Pattern: "ne2"}, chunkOpts)
235
236 matches := sres.Files
237 want := []zoekt.FileMatch{{
238 FileName: "filename",
239 ChunkMatches: []zoekt.ChunkMatch{{
240 Content: []byte("line2\n"),
241 ContentStart: zoekt.Location{
242 ByteOffset: 6,
243 LineNumber: 2,
244 Column: 1,
245 },
246 Ranges: []zoekt.Range{{
247 Start: zoekt.Location{ByteOffset: 8, LineNumber: 2, Column: 3},
248 End: zoekt.Location{ByteOffset: 11, LineNumber: 2, Column: 6},
249 }},
250 }},
251 }}
252
253 if diff := cmp.Diff(want, matches); diff != "" {
254 t.Fatal(diff)
255 }
256 })
257}
258
259// A result spanning multiple lines should have LineMatches that only cover
260// single lines.
261func TestQueryNewlines(t *testing.T) {
262 text := "line1\nline2\nbla"
263 b := testShardBuilder(t, nil,
264 Document{Name: "filename", Content: []byte(text)})
265
266 t.Run("LineMatches", func(t *testing.T) {
267 sres := searchForTest(t, b, &query.Substring{Pattern: "ine2\nbla"})
268 matches := sres.Files
269 if len(matches) != 1 {
270 t.Fatalf("got %d file matches, want exactly one", len(matches))
271 }
272 m := matches[0]
273 if len(m.LineMatches) != 2 {
274 t.Fatalf("got %d line matches, want exactly two %#v", len(m.LineMatches), m.LineMatches)
275 }
276 })
277
278 t.Run("ChunkMatches", func(t *testing.T) {
279 sres := searchForTest(t, b, &query.Substring{Pattern: "ine2\nbla"}, chunkOpts)
280 matches := sres.Files
281 if len(matches) != 1 {
282 t.Fatalf("got %d file matches, want exactly one", len(matches))
283 }
284 m := matches[0]
285 if len(m.ChunkMatches) != 1 {
286 t.Fatalf("got %d chunk matches, want exactly one", len(m.ChunkMatches))
287 }
288 })
289}
290
291var chunkOpts = zoekt.SearchOptions{ChunkMatches: true}
292
293func searchForTest(t *testing.T, b *ShardBuilder, q query.Q, o ...zoekt.SearchOptions) *zoekt.SearchResult {
294 searcher := searcherForTest(t, b)
295 var opts zoekt.SearchOptions
296 if len(o) > 0 {
297 opts = o[0]
298 }
299 res, err := searcher.Search(context.Background(), q, &opts)
300 if err != nil {
301 t.Fatalf("Search(%s): %v", q, err)
302 }
303 clearScores(res)
304 return res
305}
306
307func searcherForTest(t testing.TB, b *ShardBuilder) zoekt.Searcher {
308 var buf bytes.Buffer
309 if err := b.Write(&buf); err != nil {
310 t.Fatal(err)
311 }
312 f := &memSeeker{buf.Bytes()}
313
314 searcher, err := NewSearcher(f)
315 if err != nil {
316 t.Fatalf("NewSearcher: %v", err)
317 }
318
319 return searcher
320}
321
322func TestCaseFold(t *testing.T) {
323 b := testShardBuilder(t, nil,
324 Document{Name: "f1", Content: []byte("I love BaNaNAS.")},
325 // -----------------------------------012345678901234
326 )
327 t.Run("LineMatches", func(t *testing.T) {
328 sres := searchForTest(t, b, &query.Substring{
329 Pattern: "bananas",
330 CaseSensitive: true,
331 })
332 matches := sres.Files
333 if len(matches) != 0 {
334 t.Errorf("foldcase: got %#v, want 0 matches", matches)
335 }
336
337 sres = searchForTest(t, b,
338 &query.Substring{
339 Pattern: "BaNaNAS",
340 CaseSensitive: true,
341 })
342 matches = sres.Files
343 if len(matches) != 1 {
344 t.Errorf("no foldcase: got %v, want 1 matches", matches)
345 } else if matches[0].LineMatches[0].LineFragments[0].Offset != 7 {
346 t.Errorf("foldcase: got %v, want offsets 7", matches)
347 }
348 })
349
350 t.Run("ChunkMatches", func(t *testing.T) {
351 sres := searchForTest(t, b, &query.Substring{
352 Pattern: "bananas",
353 CaseSensitive: true,
354 }, chunkOpts)
355 matches := sres.Files
356 if len(matches) != 0 {
357 t.Errorf("foldcase: got %#v, want 0 matches", matches)
358 }
359
360 sres = searchForTest(t, b,
361 &query.Substring{
362 Pattern: "BaNaNAS",
363 CaseSensitive: true,
364 })
365 matches = sres.Files
366 if len(matches) != 1 {
367 t.Errorf("no foldcase: got %v, want 1 matches", matches)
368 } else if matches[0].LineMatches[0].LineFragments[0].Offset != 7 {
369 t.Errorf("foldcase: got %v, want offsets 7", matches)
370 }
371 })
372}
373
374// wordsAsSymbols finds all ASCII words in doc.Content which are longer than 2
375// chars. Those are then set as symbols.
376func wordsAsSymbols(doc Document) Document {
377 re := regexp.MustCompile(`\b\w{2,}\b`)
378 var symbols []DocumentSection
379 var symbolsMetadata []*zoekt.Symbol
380 for _, match := range re.FindAllIndex(doc.Content, -1) {
381 symbols = append(symbols, DocumentSection{
382 Start: uint32(match[0]),
383 End: uint32(match[1]),
384 })
385 symbolsMetadata = append(symbolsMetadata, &zoekt.Symbol{Kind: "method"})
386 }
387 doc.Symbols = symbols
388 doc.SymbolsMetaData = symbolsMetadata
389 return doc
390}
391
392func TestSearchStats(t *testing.T) {
393 ctx := context.Background()
394 searcher := searcherForTest(t, testShardBuilder(t, nil,
395 wordsAsSymbols(Document{Name: "f1", Content: []byte("x banana y")}),
396 wordsAsSymbols(Document{Name: "f2", Content: []byte("x apple y")}),
397 wordsAsSymbols(Document{Name: "f3", Content: []byte("x banana apple y")}),
398 // --------------------------------------------------0123456789012345
399 ))
400
401 andQuery := query.NewAnd(
402 &query.Substring{
403 Pattern: "banana",
404 },
405 &query.Substring{
406 Pattern: "apple",
407 },
408 )
409
410 t.Run("LineMatches", func(t *testing.T) {
411 sres, err := searcher.Search(ctx, andQuery, &zoekt.SearchOptions{})
412 if err != nil {
413 t.Fatal(err)
414 }
415 matches := sres.Files
416 if len(matches) != 1 || len(matches[0].LineMatches) != 1 || len(matches[0].LineMatches[0].LineFragments) != 2 {
417 t.Fatalf("got %#v, want 1 match with 2 fragments", matches)
418 }
419
420 if matches[0].LineMatches[0].LineFragments[0].Offset != 2 || matches[0].LineMatches[0].LineFragments[1].Offset != 9 {
421 t.Fatalf("got %#v, want offsets 2,9", matches)
422 }
423 })
424 t.Run("ChunkMatches", func(t *testing.T) {
425 sres, err := searcher.Search(ctx, andQuery, &chunkOpts)
426 if err != nil {
427 t.Fatal(err)
428 }
429 matches := sres.Files
430 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 || len(matches[0].ChunkMatches[0].Ranges) != 2 {
431 t.Fatalf("got %#v, want 1 chunk match with 2 ranges", matches)
432 }
433
434 if matches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset != 2 || matches[0].ChunkMatches[0].Ranges[1].Start.ByteOffset != 9 {
435 t.Fatalf("got %#v, want offsets 2,9", matches)
436 }
437 })
438 t.Run("Stats", func(t *testing.T) {
439 cases := []struct {
440 Name string
441 Q query.Q
442 Want zoekt.Stats
443 }{{
444 Name: "and-query",
445 Q: andQuery,
446 Want: zoekt.Stats{
447 FilesLoaded: 1,
448 ContentBytesLoaded: 22,
449 IndexBytesLoaded: 10,
450 NgramMatches: 3, // we look at doc 1, because it's max(0,1) due to AND
451 NgramLookups: 104,
452 MatchCount: 2,
453 FileCount: 1,
454 FilesConsidered: 2,
455 ShardsScanned: 1,
456 },
457 }, {
458 Name: "one-trigram",
459 Q: &query.Substring{
460 Pattern: "a y",
461 Content: true,
462 CaseSensitive: true,
463 },
464 Want: zoekt.Stats{
465 ContentBytesLoaded: 14,
466 IndexBytesLoaded: 1,
467 FileCount: 1,
468 FilesConsidered: 1,
469 FilesLoaded: 1,
470 ShardsScanned: 1,
471 MatchCount: 1,
472 NgramMatches: 1,
473 NgramLookups: 2, // once to lookup frequency then again to access posting list.
474 },
475 }, {
476 Name: "one-trigram-case-insensitive",
477 Q: &query.Substring{
478 Pattern: "a y",
479 Content: true,
480 },
481 Want: zoekt.Stats{
482 ContentBytesLoaded: 14,
483 IndexBytesLoaded: 1,
484 FileCount: 1,
485 FilesConsidered: 1,
486 FilesLoaded: 1,
487 ShardsScanned: 1,
488 MatchCount: 1,
489 NgramMatches: 1,
490 NgramLookups: 8, // "a y" has 2**2 casings which we lookup twice.
491 },
492 }, {
493 Name: "one-trigram-pruned",
494 Q: &query.Substring{
495 Pattern: "foo",
496 Content: true,
497 CaseSensitive: true,
498 },
499 Want: zoekt.Stats{
500 ShardsSkippedFilter: 1,
501 NgramLookups: 1, // only had to lookup once
502 },
503 }, {
504 Name: "one-trigram-branch-pruned",
505 Q: query.NewAnd(
506 &query.Substring{
507 Pattern: "foo",
508 Content: true,
509 CaseSensitive: true,
510 },
511 &query.Substring{
512 Pattern: "a y",
513 Content: true,
514 CaseSensitive: true,
515 },
516 ),
517 Want: zoekt.Stats{
518 IndexBytesLoaded: 1, // we created an iterator for "a y" before pruning.
519 ShardsSkippedFilter: 1,
520 NgramLookups: 3, // we lookedup "foo" once (1), but lookedup and created "a y" (2).
521 },
522 }, {
523 Name: "symbol-substr-nomatch",
524 Q: &query.Symbol{Expr: &query.Substring{
525 Pattern: "banana apple",
526 Content: true,
527 CaseSensitive: true,
528 }},
529 Want: zoekt.Stats{
530 IndexBytesLoaded: 3,
531 FilesConsidered: 1, // important that we only check 1 file to ensure we are using the index
532 MatchCount: 0, // even though there is a match it doesn't align with a symbol
533 ShardsScanned: 1,
534 NgramMatches: 1,
535 NgramLookups: 12,
536 },
537 }, {
538 Name: "symbol-substr",
539 Q: &query.Symbol{Expr: &query.Substring{
540 Pattern: "apple",
541 Content: true,
542 CaseSensitive: true,
543 }},
544 Want: zoekt.Stats{
545 ContentBytesLoaded: 35,
546 IndexBytesLoaded: 4,
547 FileCount: 2,
548 FilesConsidered: 2, // must be 2 to ensure we used the index
549 FilesLoaded: 2,
550 MatchCount: 2, // apple symbols is in two files
551 ShardsScanned: 1,
552 NgramMatches: 2,
553 NgramLookups: 5,
554 },
555 }, {
556 Name: "symbol-regexp-nomatch",
557 Q: &query.Symbol{Expr: &query.Regexp{
558 Regexp: mustParseRE("^apple.banana$"),
559 Content: true,
560 CaseSensitive: true,
561 }},
562 Want: zoekt.Stats{
563 ContentBytesLoaded: 33, // we still have to run regex since "app" matches two documents
564 IndexBytesLoaded: 10,
565 FilesConsidered: 2, // important that we don't check 3 to ensure we are using the index
566 FilesLoaded: 2,
567 MatchCount: 0, // even though there is a match it doesn't align with a symbol
568 ShardsScanned: 1,
569 NgramMatches: 3,
570 NgramLookups: 11,
571 },
572 }, {
573 Name: "symbol-regexp",
574 Q: &query.Symbol{Expr: &query.Regexp{
575 Regexp: mustParseRE("^app.e$"),
576 Content: true,
577 CaseSensitive: true,
578 }},
579 Want: zoekt.Stats{
580 ContentBytesLoaded: 35,
581 IndexBytesLoaded: 2,
582 FileCount: 2,
583 FilesConsidered: 2, // must be 2 to ensure we used the index
584 FilesLoaded: 2,
585 MatchCount: 2, // apple symbols is in two files
586 ShardsScanned: 1,
587 NgramMatches: 2,
588 NgramLookups: 2,
589 },
590 }}
591
592 for _, tc := range cases {
593 t.Run(tc.Name, func(t *testing.T) {
594 sres, err := searcher.Search(ctx, tc.Q, &chunkOpts)
595 if err != nil {
596 t.Fatal(err)
597 }
598 if diff := cmp.Diff(tc.Want, sres.Stats, cmpopts.IgnoreFields(zoekt.Stats{}, "MatchTreeConstruction", "MatchTreeSearch")); diff != "" {
599 t.Errorf("unexpected Stats (-want +got):\n%s", diff)
600 }
601 })
602 }
603 })
604}
605
606func TestAndNegateSearch(t *testing.T) {
607 b := testShardBuilder(t, nil,
608 Document{Name: "f1", Content: []byte("x banana y")},
609 // -----------------------------------0123456789
610 Document{Name: "f4", Content: []byte("x banana apple y")})
611
612 t.Run("LineMatches", func(t *testing.T) {
613 sres := searchForTest(t, b, query.NewAnd(
614 &query.Substring{
615 Pattern: "banana",
616 },
617 &query.Not{Child: &query.Substring{
618 Pattern: "apple",
619 }}))
620
621 matches := sres.Files
622
623 if len(matches) != 1 || len(matches[0].LineMatches) != 1 {
624 t.Fatalf("got %v, want 1 match", matches)
625 }
626 if matches[0].FileName != "f1" {
627 t.Fatalf("got match %#v, want FileName: f1", matches[0])
628 }
629 if matches[0].LineMatches[0].LineFragments[0].Offset != 2 {
630 t.Fatalf("got %v, want offset 2", matches)
631 }
632 })
633
634 t.Run("ChunkMatches", func(t *testing.T) {
635 sres := searchForTest(t, b,
636 query.NewAnd(
637 &query.Substring{
638 Pattern: "banana",
639 },
640 &query.Not{Child: &query.Substring{
641 Pattern: "apple",
642 }},
643 ),
644 chunkOpts,
645 )
646
647 matches := sres.Files
648
649 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 {
650 t.Fatalf("got %v, want 1 match", matches)
651 }
652 if matches[0].FileName != "f1" {
653 t.Fatalf("got match %#v, want FileName: f1", matches[0])
654 }
655 if matches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset != 2 {
656 t.Fatalf("got %v, want offset 2", matches)
657 }
658 })
659}
660
661func TestNegativeMatchesOnlyShortcut(t *testing.T) {
662 b := testShardBuilder(t, nil,
663 Document{Name: "f1", Content: []byte("x banana y")},
664 Document{Name: "f2", Content: []byte("x appelmoes y")},
665 Document{Name: "f3", Content: []byte("x appelmoes y")},
666 Document{Name: "f3", Content: []byte("x appelmoes y")})
667
668 t.Run("LineMatches", func(t *testing.T) {
669 sres := searchForTest(t, b, query.NewAnd(
670 &query.Substring{
671 Pattern: "banana",
672 },
673 &query.Not{Child: &query.Substring{
674 Pattern: "appel",
675 }}))
676
677 if sres.Stats.FilesConsidered != 1 {
678 t.Errorf("got %#v, want FilesConsidered: 1", sres.Stats)
679 }
680 })
681
682 t.Run("ChunkMatches", func(t *testing.T) {
683 sres := searchForTest(t, b, query.NewAnd(
684 &query.Substring{
685 Pattern: "banana",
686 },
687 &query.Not{Child: &query.Substring{
688 Pattern: "appel",
689 }}), chunkOpts)
690
691 if sres.Stats.FilesConsidered != 1 {
692 t.Errorf("got %#v, want FilesConsidered: 1", sres.Stats)
693 }
694 })
695}
696
697func TestFileSearch(t *testing.T) {
698 b := testShardBuilder(t, nil,
699 Document{Name: "banzana", Content: []byte("x orange y")},
700 // -------------0123456
701 Document{Name: "banana", Content: []byte("x apple y")},
702 // -------------012345
703 )
704
705 t.Run("LineMatches", func(t *testing.T) {
706 sres := searchForTest(t, b, &query.Substring{
707 Pattern: "anan",
708 FileName: true,
709 })
710
711 matches := sres.Files
712 if len(matches) != 1 || len(matches[0].LineMatches) != 1 {
713 t.Fatalf("got %v, want 1 match", matches)
714 }
715
716 got := matches[0].LineMatches[0]
717 want := zoekt.LineMatch{
718 Line: []byte("banana"),
719 LineFragments: []zoekt.LineFragmentMatch{{
720 Offset: 1,
721 LineOffset: 1,
722 MatchLength: 4,
723 }},
724 FileName: true,
725 }
726
727 if !reflect.DeepEqual(got, want) {
728 t.Errorf("got %#v, want %#v", got, want)
729 }
730 })
731
732 t.Run("ChunkMatches", func(t *testing.T) {
733 sres := searchForTest(t, b, &query.Substring{
734 Pattern: "anan",
735 FileName: true,
736 }, chunkOpts)
737
738 matches := sres.Files
739 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 {
740 t.Fatalf("got %v, want 1 match", matches)
741 }
742
743 got := matches[0].ChunkMatches[0]
744 want := zoekt.ChunkMatch{
745 Content: []byte("banana"),
746 ContentStart: zoekt.Location{ByteOffset: 0, LineNumber: 1, Column: 1},
747 Ranges: []zoekt.Range{{
748 Start: zoekt.Location{ByteOffset: 1, LineNumber: 1, Column: 2},
749 End: zoekt.Location{ByteOffset: 5, LineNumber: 1, Column: 6},
750 }},
751 FileName: true,
752 }
753
754 if diff := cmp.Diff(want, got); diff != "" {
755 t.Fatal(diff)
756 }
757 })
758
759 t.Run("FileNameSet", func(t *testing.T) {
760 sres := searchForTest(t, b, query.NewFileNameSet("banana"), chunkOpts)
761
762 matches := sres.Files
763 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 {
764 t.Fatalf("got %v, want 1 match", matches)
765 }
766
767 got := matches[0].ChunkMatches[0]
768 want := zoekt.ChunkMatch{
769 Content: []byte("banana"),
770 ContentStart: zoekt.Location{ByteOffset: 0, LineNumber: 1, Column: 1},
771 Ranges: []zoekt.Range{{
772 Start: zoekt.Location{ByteOffset: 0, LineNumber: 1, Column: 1},
773 End: zoekt.Location{ByteOffset: 6, LineNumber: 1, Column: 7},
774 }},
775 FileName: true,
776 }
777
778 if diff := cmp.Diff(want, got); diff != "" {
779 t.Fatal(diff)
780 }
781 })
782}
783
784func TestFileCase(t *testing.T) {
785 b := testShardBuilder(t, nil,
786 Document{Name: "BANANA", Content: []byte("x orange y")})
787
788 t.Run("LineMatches", func(t *testing.T) {
789 sres := searchForTest(t, b, &query.Substring{
790 Pattern: "banana",
791 FileName: true,
792 })
793
794 matches := sres.Files
795 if len(matches) != 1 || matches[0].FileName != "BANANA" {
796 t.Fatalf("got %v, want 1 match 'BANANA'", matches)
797 }
798 })
799
800 t.Run("ChunkMatches", func(t *testing.T) {
801 sres := searchForTest(t, b, &query.Substring{
802 Pattern: "banana",
803 FileName: true,
804 }, chunkOpts)
805
806 matches := sres.Files
807 if len(matches) != 1 || matches[0].FileName != "BANANA" {
808 t.Fatalf("got %v, want 1 match 'BANANA'", matches)
809 }
810 })
811}
812
813func TestFileRegexpSearchBruteForce(t *testing.T) {
814 b := testShardBuilder(t, nil,
815 Document{Name: "banzana", Content: []byte("x orange y")},
816 Document{Name: "banana", Content: []byte("x apple y")},
817 )
818 t.Run("LineMatches", func(t *testing.T) {
819 sres := searchForTest(t, b, &query.Regexp{
820 Regexp: mustParseRE("[qn][zx]"),
821 FileName: true,
822 })
823
824 matches := sres.Files
825 if len(matches) != 1 || matches[0].FileName != "banzana" {
826 t.Fatalf("got %v, want 1 match on 'banzana'", matches)
827 }
828 })
829 t.Run("LineMatches", func(t *testing.T) {
830 sres := searchForTest(t, b, &query.Regexp{
831 Regexp: mustParseRE("[qn][zx]"),
832 FileName: true,
833 }, chunkOpts)
834
835 matches := sres.Files
836 if len(matches) != 1 || matches[0].FileName != "banzana" {
837 t.Fatalf("got %v, want 1 match on 'banzana'", matches)
838 }
839 })
840}
841
842func TestFileRegexpSearchShortString(t *testing.T) {
843 b := testShardBuilder(t, nil,
844 Document{Name: "banana.py", Content: []byte("x orange y")})
845
846 t.Run("LineMatches", func(t *testing.T) {
847 sres := searchForTest(t, b, &query.Regexp{
848 Regexp: mustParseRE("ana.py"),
849 FileName: true,
850 })
851
852 matches := sres.Files
853 if len(matches) != 1 || matches[0].FileName != "banana.py" {
854 t.Fatalf("got %v, want 1 match on 'banana.py'", matches)
855 }
856 })
857
858 t.Run("ChunkMatches", func(t *testing.T) {
859 sres := searchForTest(t, b, &query.Regexp{
860 Regexp: mustParseRE("ana.py"),
861 FileName: true,
862 }, chunkOpts)
863
864 matches := sres.Files
865 if len(matches) != 1 || matches[0].FileName != "banana.py" {
866 t.Fatalf("got %v, want 1 match on 'banana.py'", matches)
867 }
868 })
869}
870
871func TestFileSubstringSearchBruteForce(t *testing.T) {
872 b := testShardBuilder(t, nil,
873 Document{Name: "BANZANA", Content: []byte("x orange y")},
874 Document{Name: "banana", Content: []byte("x apple y")})
875
876 q := &query.Substring{
877 Pattern: "z",
878 FileName: true,
879 }
880
881 t.Run("LineMatches", func(t *testing.T) {
882 res := searchForTest(t, b, q)
883 if len(res.Files) != 1 || res.Files[0].FileName != "BANZANA" {
884 t.Fatalf("got %v, want 1 match on 'BANZANA''", res.Files)
885 }
886 })
887
888 t.Run("ChunkMatches", func(t *testing.T) {
889 res := searchForTest(t, b, q, chunkOpts)
890 if len(res.Files) != 1 || res.Files[0].FileName != "BANZANA" {
891 t.Fatalf("got %v, want 1 match on 'BANZANA''", res.Files)
892 }
893 })
894}
895
896func TestFileSubstringSearchBruteForceEnd(t *testing.T) {
897 b := testShardBuilder(t, nil,
898 Document{Name: "BANZANA", Content: []byte("x orange y")},
899 Document{Name: "bananaq", Content: []byte("x apple y")})
900
901 q := &query.Substring{
902 Pattern: "q",
903 FileName: true,
904 }
905 t.Run("LineMatches", func(t *testing.T) {
906 res := searchForTest(t, b, q)
907 if want := "bananaq"; len(res.Files) != 1 || res.Files[0].FileName != want {
908 t.Fatalf("got %v, want 1 match in %q", res.Files, want)
909 }
910 })
911
912 t.Run("LineMatches", func(t *testing.T) {
913 res := searchForTest(t, b, q, chunkOpts)
914 if want := "bananaq"; len(res.Files) != 1 || res.Files[0].FileName != want {
915 t.Fatalf("got %v, want 1 match in %q", res.Files, want)
916 }
917 })
918}
919
920func TestSearchMatchAll(t *testing.T) {
921 b := testShardBuilder(t, nil,
922 Document{Name: "banzana", Content: []byte("x orange y")},
923 Document{Name: "banana", Content: []byte("x apple y")})
924
925 t.Run("LineMatches", func(t *testing.T) {
926 sres := searchForTest(t, b, &query.Const{Value: true})
927 matches := sres.Files
928 if len(matches) != 2 {
929 t.Fatalf("got %v, want 2 matches", matches)
930 }
931 })
932
933 t.Run("ChunkMatches", func(t *testing.T) {
934 sres := searchForTest(t, b, &query.Const{Value: true}, chunkOpts)
935 matches := sres.Files
936 if len(matches) != 2 {
937 t.Fatalf("got %v, want 2 matches", matches)
938 }
939 })
940}
941
942func TestSearchNewline(t *testing.T) {
943 b := testShardBuilder(t, nil,
944 Document{Name: "banzana", Content: []byte("abcd\ndefg")})
945
946 t.Run("LineMatches", func(t *testing.T) {
947 sres := searchForTest(t, b, &query.Substring{Pattern: "d\nd"})
948
949 // Just check that we don't crash.
950
951 matches := sres.Files
952 if len(matches) != 1 {
953 t.Fatalf("got %v, want 1 matches", matches)
954 }
955 })
956
957 t.Run("ChunkMatches", func(t *testing.T) {
958 sres := searchForTest(t, b, &query.Substring{Pattern: "d\nd"}, chunkOpts)
959
960 // Just check that we don't crash.
961
962 matches := sres.Files
963 if len(matches) != 1 {
964 t.Fatalf("got %v, want 1 matches", matches)
965 }
966 })
967}
968
969func TestSearchMatchAllRegexp(t *testing.T) {
970 b := testShardBuilder(t, nil,
971 Document{Name: "banzana", Content: []byte("abcd")},
972 Document{Name: "banana", Content: []byte("pqrs")})
973
974 t.Run("LineMatches", func(t *testing.T) {
975 sres := searchForTest(t, b, &query.Regexp{Regexp: mustParseRE(".")})
976
977 matches := sres.Files
978 if len(matches) != 2 || sres.Stats.MatchCount != 2 {
979 t.Fatalf("got %v, want 2 matches", matches)
980 }
981 if len(matches[0].LineMatches[0].Line) != 4 || len(matches[1].LineMatches[0].Line) != 4 {
982 t.Fatalf("want 4 chars in every file, got %#v", matches)
983 }
984 })
985
986 t.Run("ChunkMatches", func(t *testing.T) {
987 sres := searchForTest(t, b, &query.Regexp{Regexp: mustParseRE(".")}, chunkOpts)
988
989 matches := sres.Files
990 if len(matches) != 2 || sres.Stats.MatchCount != 8 {
991 t.Fatalf("got %v, want 2 matches", matches)
992 }
993 if len(matches[0].ChunkMatches[0].Content) != 4 || len(matches[1].ChunkMatches[0].Content) != 4 {
994 t.Fatalf("want 4 chars in every file, got %#v", matches)
995 }
996 })
997}
998
999func TestSearchBM25MatchScores(t *testing.T) {
1000 ctx := context.Background()
1001 searcher := searcherForTest(t, testShardBuilder(t, nil,
1002 Document{Name: "f1", Content: []byte("one two three\naaaaaaaaaa\nbbbbbbbb\none two two")},
1003 Document{Name: "f2", Content: []byte("four five six\naaaaaaaaaa\nbbbbbbbb\nfour five five\nsix six")},
1004 wordsAsSymbols(Document{Name: "f3", Content: []byte("public static void main")}),
1005 ))
1006
1007 t.Run("LineMatches", func(t *testing.T) {
1008 q := &query.Substring{Pattern: "two"}
1009 sres, err := searcher.Search(ctx, q, &zoekt.SearchOptions{UseBM25Scoring: true})
1010 if err != nil {
1011 t.Fatal(err)
1012 }
1013 matches := sres.Files
1014 if len(matches) != 1 {
1015 t.Fatalf("want 1 file index, got %d", len(matches))
1016 }
1017
1018 if len(matches[0].LineMatches) != 2 {
1019 t.Fatalf("want 2 chunk matches, got %d", len(matches[0].ChunkMatches))
1020 }
1021
1022 if matches[0].LineMatches[0].LineNumber != 4 {
1023 t.Fatalf("want best-scoring line to be line 4, got %d", matches[0].LineMatches[0].LineNumber)
1024 }
1025 })
1026
1027 t.Run("ChunkMatches", func(t *testing.T) {
1028 q := &query.Substring{Pattern: "five"}
1029 sres, err := searcher.Search(ctx, q, &zoekt.SearchOptions{UseBM25Scoring: true, ChunkMatches: true, NumContextLines: 1})
1030 if err != nil {
1031 t.Fatal(err)
1032 }
1033
1034 matches := sres.Files
1035 if len(matches) != 1 {
1036 t.Fatalf("want 1 file index, got %d", len(matches))
1037 }
1038
1039 if len(matches[0].ChunkMatches) != 2 {
1040 t.Fatalf("want 2 chunk matches, got %d", len(matches[0].ChunkMatches))
1041 }
1042
1043 if matches[0].ChunkMatches[0].BestLineMatch != 4 {
1044 t.Fatalf("want best-scoring line to be line 4, got %d", matches[0].ChunkMatches[0].BestLineMatch)
1045 }
1046 })
1047
1048 t.Run("ChunkMatches with symbols", func(t *testing.T) {
1049 q := &query.Or{
1050 Children: []query.Q{
1051 &query.Symbol{Expr: &query.Substring{Pattern: "main"}},
1052 &query.Substring{Pattern: "five"},
1053 },
1054 }
1055
1056 sres, err := searcher.Search(ctx, q, &zoekt.SearchOptions{UseBM25Scoring: true, ChunkMatches: true, NumContextLines: 1})
1057 if err != nil {
1058 t.Fatal(err)
1059 }
1060
1061 matches := sres.Files
1062 if len(matches) != 2 {
1063 t.Fatalf("want 2 file index, got %d", len(matches))
1064 }
1065
1066 foundSymbolInfo := false
1067 for _, m := range matches {
1068 for _, cm := range m.ChunkMatches {
1069 if len(cm.SymbolInfo) > 0 {
1070 foundSymbolInfo = true
1071 }
1072 }
1073 }
1074
1075 if !foundSymbolInfo {
1076 t.Fatalf("want symbol info, got none")
1077 }
1078 })
1079}
1080
1081func TestFileRestriction(t *testing.T) {
1082 b := testShardBuilder(t, nil,
1083 Document{Name: "banana1", Content: []byte("x orange y")},
1084 Document{Name: "banana2", Content: []byte("x apple y")},
1085 Document{Name: "orange", Content: []byte("x apple z")})
1086
1087 t.Run("LineMatches", func(t *testing.T) {
1088 sres := searchForTest(t, b, query.NewAnd(
1089 &query.Substring{
1090 Pattern: "banana",
1091 FileName: true,
1092 },
1093 &query.Substring{
1094 Pattern: "apple",
1095 }))
1096
1097 matches := sres.Files
1098 if len(matches) != 1 || len(matches[0].LineMatches) != 1 {
1099 t.Fatalf("got %v, want 1 match", matches)
1100 }
1101
1102 match := matches[0].LineMatches[0]
1103 got := string(match.Line)
1104 want := "x apple y"
1105 if got != want {
1106 t.Errorf("got match %#v, want line %q", match, want)
1107 }
1108 })
1109
1110 t.Run("ChunkMatches", func(t *testing.T) {
1111 sres := searchForTest(t, b, query.NewAnd(
1112 &query.Substring{
1113 Pattern: "banana",
1114 FileName: true,
1115 },
1116 &query.Substring{
1117 Pattern: "apple",
1118 }), chunkOpts)
1119
1120 matches := sres.Files
1121 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 {
1122 t.Fatalf("got %v, want 1 match", matches)
1123 }
1124
1125 match := matches[0].ChunkMatches[0]
1126 got := string(match.Content)
1127 want := "x apple y"
1128 if got != want {
1129 t.Errorf("got match %#v, want line %q", match, want)
1130 }
1131 })
1132}
1133
1134func TestFileNameBoundary(t *testing.T) {
1135 b := testShardBuilder(t, nil,
1136 Document{Name: "banana2", Content: []byte("x apple y")},
1137 Document{Name: "helpers.go", Content: []byte("x apple y")},
1138 Document{Name: "foo", Content: []byte("x apple y")})
1139
1140 t.Run("LineMatches", func(t *testing.T) {
1141 sres := searchForTest(t, b, &query.Substring{
1142 Pattern: "helpers.go",
1143 FileName: true,
1144 })
1145
1146 matches := sres.Files
1147 if len(matches) != 1 || len(matches[0].LineMatches) != 1 {
1148 t.Fatalf("got %v, want 1 match", matches)
1149 }
1150 })
1151
1152 t.Run("ChunkMatches", func(t *testing.T) {
1153 sres := searchForTest(t, b, &query.Substring{
1154 Pattern: "helpers.go",
1155 FileName: true,
1156 }, chunkOpts)
1157
1158 matches := sres.Files
1159 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 {
1160 t.Fatalf("got %v, want 1 match", matches)
1161 }
1162 })
1163}
1164
1165func TestDocumentOrder(t *testing.T) {
1166 var docs []Document
1167 for i := range 3 {
1168 docs = append(docs, Document{Name: fmt.Sprintf("f%d", i), Content: []byte("needle")})
1169 }
1170
1171 b := testShardBuilder(t, nil, docs...)
1172
1173 t.Run("LineMatches", func(t *testing.T) {
1174 sres := searchForTest(t, b, query.NewAnd(
1175 &query.Substring{
1176 Pattern: "needle",
1177 }))
1178
1179 want := []string{"f0", "f1", "f2"}
1180 var got []string
1181 for _, f := range sres.Files {
1182 got = append(got, f.FileName)
1183 }
1184 if !reflect.DeepEqual(got, want) {
1185 t.Fatalf("got %v, want %v", got, want)
1186 }
1187 })
1188
1189 t.Run("ChunkMatches", func(t *testing.T) {
1190 sres := searchForTest(t, b,
1191 query.NewAnd(&query.Substring{
1192 Pattern: "needle",
1193 }),
1194 chunkOpts,
1195 )
1196
1197 want := []string{"f0", "f1", "f2"}
1198 var got []string
1199 for _, f := range sres.Files {
1200 got = append(got, f.FileName)
1201 }
1202 if !reflect.DeepEqual(got, want) {
1203 t.Fatalf("got %v, want %v", got, want)
1204 }
1205 })
1206}
1207
1208func TestBranchMask(t *testing.T) {
1209 b := testShardBuilder(t, &zoekt.Repository{
1210 Branches: []zoekt.RepositoryBranch{
1211 {"master", "v-master"},
1212 {"stable", "v-stable"},
1213 {"bonzai", "v-bonzai"},
1214 },
1215 }, Document{Name: "f1", Content: []byte("needle"), Branches: []string{"master"}},
1216 Document{Name: "f2", Content: []byte("needle"), Branches: []string{"stable", "master"}},
1217 Document{Name: "f3", Content: []byte("needle"), Branches: []string{"stable", "master"}},
1218 Document{Name: "f4", Content: []byte("needle"), Branches: []string{"bonzai"}},
1219 )
1220
1221 t.Run("LineMatches", func(t *testing.T) {
1222 sres := searchForTest(t, b, query.NewAnd(
1223 &query.Substring{
1224 Pattern: "needle",
1225 },
1226 &query.Branch{
1227 Pattern: "table",
1228 }))
1229
1230 if len(sres.Files) != 2 || sres.Files[0].FileName != "f2" || sres.Files[1].FileName != "f3" {
1231 t.Fatalf("got %v, want 2 result from [f2,f3]", sres.Files)
1232 }
1233
1234 if len(sres.Files[0].Branches) != 1 || sres.Files[0].Branches[0] != "stable" {
1235 t.Fatalf("got %v, want 1 branch 'stable'", sres.Files[0].Branches)
1236 }
1237 })
1238
1239 t.Run("ChunkMatches", func(t *testing.T) {
1240 sres := searchForTest(t, b, query.NewAnd(
1241 &query.Substring{
1242 Pattern: "needle",
1243 },
1244 &query.Branch{
1245 Pattern: "table",
1246 }),
1247 chunkOpts,
1248 )
1249
1250 if len(sres.Files) != 2 || sres.Files[0].FileName != "f2" || sres.Files[1].FileName != "f3" {
1251 t.Fatalf("got %v, want 2 result from [f2,f3]", sres.Files)
1252 }
1253
1254 if len(sres.Files[0].Branches) != 1 || sres.Files[0].Branches[0] != "stable" {
1255 t.Fatalf("got %v, want 1 branch 'stable'", sres.Files[0].Branches)
1256 }
1257 })
1258}
1259
1260func TestBranchLimit(t *testing.T) {
1261 for limit := 64; limit <= 65; limit++ {
1262 r := &zoekt.Repository{}
1263 for i := range limit {
1264 s := fmt.Sprintf("b%d", i)
1265 r.Branches = append(r.Branches, zoekt.RepositoryBranch{
1266 s, "v-" + s,
1267 })
1268 }
1269 _, err := NewShardBuilder(r)
1270 if limit == 64 && err != nil {
1271 t.Fatalf("NewShardBuilder: %v", err)
1272 } else if limit == 65 && err == nil {
1273 t.Fatalf("NewShardBuilder succeeded")
1274 }
1275 }
1276}
1277
1278func TestBranchReport(t *testing.T) {
1279 branches := []string{"stable", "master"}
1280 b := testShardBuilder(t, &zoekt.Repository{
1281 Branches: []zoekt.RepositoryBranch{
1282 {"stable", "vs"},
1283 {"master", "vm"},
1284 },
1285 },
1286 Document{Name: "f2", Content: []byte("needle"), Branches: branches})
1287
1288 t.Run("LineMatches", func(t *testing.T) {
1289 sres := searchForTest(t, b, &query.Substring{
1290 Pattern: "needle",
1291 })
1292 if len(sres.Files) != 1 {
1293 t.Fatalf("got %v, want 1 result from f2", sres.Files)
1294 }
1295
1296 f := sres.Files[0]
1297 if !reflect.DeepEqual(f.Branches, branches) {
1298 t.Fatalf("got branches %q, want %q", f.Branches, branches)
1299 }
1300 })
1301
1302 t.Run("ChunkMatches", func(t *testing.T) {
1303 sres := searchForTest(t, b, &query.Substring{
1304 Pattern: "needle",
1305 }, chunkOpts)
1306 if len(sres.Files) != 1 {
1307 t.Fatalf("got %v, want 1 result from f2", sres.Files)
1308 }
1309
1310 f := sres.Files[0]
1311 if !reflect.DeepEqual(f.Branches, branches) {
1312 t.Fatalf("got branches %q, want %q", f.Branches, branches)
1313 }
1314 })
1315}
1316
1317func TestBranchVersions(t *testing.T) {
1318 b := testShardBuilder(t, &zoekt.Repository{
1319 Branches: []zoekt.RepositoryBranch{
1320 {"stable", "v-stable"},
1321 {"master", "v-master"},
1322 },
1323 }, Document{Name: "f2", Content: []byte("needle"), Branches: []string{"master"}})
1324
1325 t.Run("LineMatches", func(t *testing.T) {
1326 sres := searchForTest(t, b, &query.Substring{
1327 Pattern: "needle",
1328 })
1329 if len(sres.Files) != 1 {
1330 t.Fatalf("got %v, want 1 result from f2", sres.Files)
1331 }
1332
1333 f := sres.Files[0]
1334 if f.Version != "v-master" {
1335 t.Fatalf("got file %#v, want version 'v-master'", f)
1336 }
1337 })
1338
1339 t.Run("ChunkMatches", func(t *testing.T) {
1340 sres := searchForTest(t, b, &query.Substring{
1341 Pattern: "needle",
1342 }, chunkOpts)
1343 if len(sres.Files) != 1 {
1344 t.Fatalf("got %v, want 1 result from f2", sres.Files)
1345 }
1346
1347 f := sres.Files[0]
1348 if f.Version != "v-master" {
1349 t.Fatalf("got file %#v, want version 'v-master'", f)
1350 }
1351 })
1352}
1353
1354func mustParseRE(s string) *syntax.Regexp {
1355 r, err := syntax.Parse(s, syntax.Perl)
1356 if err != nil {
1357 panic(err)
1358 }
1359
1360 return r
1361}
1362
1363func TestRegexp(t *testing.T) {
1364 content := []byte("needle the bla")
1365 // ----------------01234567890123
1366
1367 b := testShardBuilder(t, nil,
1368 Document{
1369 Name: "f1",
1370 Content: content,
1371 })
1372
1373 t.Run("LineMatches", func(t *testing.T) {
1374 sres := searchForTest(t, b,
1375 &query.Regexp{
1376 Regexp: mustParseRE("dle.*bla"),
1377 })
1378
1379 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 {
1380 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
1381 }
1382
1383 got := sres.Files[0].LineMatches[0]
1384 want := zoekt.LineMatch{
1385 LineFragments: []zoekt.LineFragmentMatch{{
1386 LineOffset: 3,
1387 Offset: 3,
1388 MatchLength: 11,
1389 }},
1390 Line: content,
1391 FileName: false,
1392 LineNumber: 1,
1393 LineStart: 0,
1394 LineEnd: 14,
1395 }
1396
1397 if !reflect.DeepEqual(got, want) {
1398 t.Errorf("got %#v, want %#v", got, want)
1399 }
1400 })
1401
1402 t.Run("ChunkMatches", func(t *testing.T) {
1403 sres := searchForTest(t, b,
1404 &query.Regexp{
1405 Regexp: mustParseRE("dle.*bla"),
1406 }, chunkOpts)
1407
1408 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 {
1409 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
1410 }
1411
1412 got := sres.Files[0].ChunkMatches[0]
1413 want := zoekt.ChunkMatch{
1414 Content: content,
1415 ContentStart: zoekt.Location{ByteOffset: 0, LineNumber: 1, Column: 1},
1416 Ranges: []zoekt.Range{{
1417 Start: zoekt.Location{ByteOffset: 3, LineNumber: 1, Column: 4},
1418 End: zoekt.Location{ByteOffset: 14, LineNumber: 1, Column: 15},
1419 }},
1420 }
1421
1422 if diff := cmp.Diff(want, got); diff != "" {
1423 t.Fatal(diff)
1424 }
1425 })
1426}
1427
1428func TestRegexpFile(t *testing.T) {
1429 content := []byte("needle the bla")
1430
1431 name := "let's play: find the mussel"
1432 b := testShardBuilder(t, nil,
1433 Document{Name: name, Content: content},
1434 Document{Name: "play.txt", Content: content})
1435
1436 t.Run("LineMatches", func(t *testing.T) {
1437 sres := searchForTest(t, b,
1438 &query.Regexp{
1439 Regexp: mustParseRE("play.*mussel"),
1440 FileName: true,
1441 })
1442
1443 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 {
1444 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
1445 }
1446
1447 if sres.Files[0].FileName != name {
1448 t.Errorf("got match %#v, want name %q", sres.Files[0], name)
1449 }
1450 })
1451
1452 t.Run("ChunkMatches", func(t *testing.T) {
1453 sres := searchForTest(t, b,
1454 &query.Regexp{
1455 Regexp: mustParseRE("play.*mussel"),
1456 FileName: true,
1457 }, chunkOpts)
1458
1459 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 {
1460 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
1461 }
1462
1463 if sres.Files[0].FileName != name {
1464 t.Errorf("got match %#v, want name %q", sres.Files[0], name)
1465 }
1466 })
1467}
1468
1469func TestRegexpOrder(t *testing.T) {
1470 content := []byte("bla the needle")
1471 // ----------------01234567890123
1472
1473 b := testShardBuilder(t, nil,
1474 Document{Name: "f1", Content: content})
1475
1476 t.Run("LineMatches", func(t *testing.T) {
1477 sres := searchForTest(t, b,
1478 &query.Regexp{
1479 Regexp: mustParseRE("dle.*bla"),
1480 })
1481
1482 if len(sres.Files) != 0 {
1483 t.Fatalf("got %v, want 0 matches", sres.Files)
1484 }
1485 })
1486
1487 t.Run("ChunkMatches", func(t *testing.T) {
1488 sres := searchForTest(t, b,
1489 &query.Regexp{
1490 Regexp: mustParseRE("dle.*bla"),
1491 })
1492
1493 if len(sres.Files) != 0 {
1494 t.Fatalf("got %v, want 0 matches", sres.Files)
1495 }
1496 })
1497}
1498
1499func TestRepoName(t *testing.T) {
1500 content := []byte("bla the needle")
1501 // ----------------01234567890123
1502
1503 b := testShardBuilder(t, &zoekt.Repository{Name: "bla"},
1504 Document{Name: "f1", Content: content})
1505
1506 t.Run("LineMatches", func(t *testing.T) {
1507 sres := searchForTest(t, b,
1508 query.NewAnd(
1509 &query.Substring{Pattern: "needle"},
1510 &query.Repo{Regexp: regexp.MustCompile("foo")},
1511 ))
1512
1513 if len(sres.Files) != 0 {
1514 t.Fatalf("got %v, want 0 matches", sres.Files)
1515 }
1516
1517 if sres.Stats.FilesConsidered > 0 {
1518 t.Fatalf("got FilesConsidered %d, should have short circuited", sres.Stats.FilesConsidered)
1519 }
1520
1521 sres = searchForTest(t, b,
1522 query.NewAnd(
1523 &query.Substring{Pattern: "needle"},
1524 &query.Repo{Regexp: regexp.MustCompile("bla")},
1525 ))
1526 if len(sres.Files) != 1 {
1527 t.Fatalf("got %v, want 1 match", sres.Files)
1528 }
1529 })
1530
1531 t.Run("ChunkMatches", func(t *testing.T) {
1532 sres := searchForTest(t, b,
1533 query.NewAnd(
1534 &query.Substring{Pattern: "needle"},
1535 &query.Repo{Regexp: regexp.MustCompile("foo")},
1536 ),
1537 chunkOpts,
1538 )
1539
1540 if len(sres.Files) != 0 {
1541 t.Fatalf("got %v, want 0 matches", sres.Files)
1542 }
1543
1544 if sres.Stats.FilesConsidered > 0 {
1545 t.Fatalf("got FilesConsidered %d, should have short circuited", sres.Stats.FilesConsidered)
1546 }
1547
1548 sres = searchForTest(t, b,
1549 query.NewAnd(
1550 &query.Substring{Pattern: "needle"},
1551 &query.Repo{Regexp: regexp.MustCompile("bla")},
1552 ))
1553 if len(sres.Files) != 1 {
1554 t.Fatalf("got %v, want 1 match", sres.Files)
1555 }
1556 })
1557}
1558
1559func TestMergeMatches(t *testing.T) {
1560 t.Run("LineMatches, adjacent matches", func(t *testing.T) {
1561 b := testShardBuilder(t, nil,
1562 Document{Name: "f1", Content: []byte("blablabla")})
1563 sres := searchForTest(t, b,
1564 &query.Substring{Pattern: "bla"})
1565
1566 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 {
1567 t.Fatalf("got %v, want 1 match", sres.Files)
1568 }
1569
1570 if len(sres.Files[0].LineMatches[0].LineFragments) != 3 {
1571 t.Fatalf("got %v, want 3 fragments", sres.Files[0].LineMatches[0].LineFragments)
1572 }
1573 })
1574
1575 t.Run("LineMatches, overlapping matches", func(t *testing.T) {
1576 b := testShardBuilder(t, nil,
1577 Document{Name: "f1", Content: []byte("hellogoodbye")})
1578 sres := searchForTest(t, b,
1579 &query.And{Children: []query.Q{
1580 &query.Substring{Pattern: "hello"},
1581 &query.Substring{Pattern: "logood"},
1582 }})
1583
1584 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 {
1585 t.Fatalf("got %v, want 1 match", sres.Files)
1586 }
1587
1588 lineFragments := sres.Files[0].LineMatches[0].LineFragments
1589 if len(lineFragments) != 1 || lineFragments[0].MatchLength != len("hello") {
1590 t.Fatalf("got %v, want single line fragment 'hello'", lineFragments)
1591 }
1592 })
1593
1594 t.Run("ChunkMatches, no overlap", func(t *testing.T) {
1595 b := testShardBuilder(t, nil,
1596 Document{Name: "f1", Content: []byte("blablabla")})
1597
1598 sres := searchForTest(t, b,
1599 &query.Substring{Pattern: "bla"},
1600 chunkOpts,
1601 )
1602 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 {
1603 t.Fatalf("got %v, want 1 match", sres.Files)
1604 }
1605
1606 if len(sres.Files[0].ChunkMatches[0].Ranges) != 3 {
1607 t.Fatalf("got %v, want 3 ranges", sres.Files[0].ChunkMatches[0].Ranges)
1608 }
1609 })
1610
1611 t.Run("ChunkMatches, overlapping matches", func(t *testing.T) {
1612 b := testShardBuilder(t, nil,
1613 Document{Name: "f1", Content: []byte("hellogoodbye")})
1614 sres := searchForTest(t, b,
1615 &query.And{Children: []query.Q{
1616 &query.Substring{Pattern: "hello"},
1617 &query.Substring{Pattern: "logood"},
1618 }}, chunkOpts)
1619
1620 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 {
1621 t.Fatalf("got %v, want 1 chunk match", sres.Files)
1622 }
1623
1624 ranges := sres.Files[0].ChunkMatches[0].Ranges
1625 if len(ranges) != 1 || ranges[0].Start.ByteOffset != 0 || ranges[0].End.ByteOffset != 5 {
1626 t.Fatalf("got %v, want single chunk range 'hello'", ranges)
1627 }
1628 })
1629}
1630
1631func TestRepoURL(t *testing.T) {
1632 content := []byte("blablabla")
1633 b := testShardBuilder(t, &zoekt.Repository{
1634 Name: "name",
1635 URL: "URL",
1636 CommitURLTemplate: "commit",
1637 FileURLTemplate: "file-url",
1638 LineFragmentTemplate: "fragment",
1639 }, Document{Name: "f1", Content: content})
1640
1641 sres := searchForTest(t, b, &query.Substring{Pattern: "bla"})
1642
1643 // RepoURLs/LineFragments are keyed by the repo's URL (its unique identity).
1644 if sres.RepoURLs["URL"] != "file-url" {
1645 t.Errorf("got RepoURLs %v, want {URL: file-url}", sres.RepoURLs)
1646 }
1647 if sres.LineFragments["URL"] != "fragment" {
1648 t.Errorf("got URLs %v, want {URL: fragment}", sres.LineFragments)
1649 }
1650}
1651
1652func TestRegexpCaseSensitive(t *testing.T) {
1653 content := []byte("bla\nfunc unmarshalGitiles\n")
1654 b := testShardBuilder(t, nil, Document{
1655 Name: "f1",
1656 Content: content,
1657 })
1658
1659 t.Run("LineMatches", func(t *testing.T) {
1660 res := searchForTest(t, b,
1661 &query.Regexp{
1662 Regexp: mustParseRE("func.*Gitiles"),
1663 CaseSensitive: true,
1664 })
1665
1666 if len(res.Files) != 1 {
1667 t.Fatalf("got %v, want one index", res.Files)
1668 }
1669 })
1670
1671 t.Run("ChunkMatches", func(t *testing.T) {
1672 res := searchForTest(t, b,
1673 &query.Regexp{
1674 Regexp: mustParseRE("func.*Gitiles"),
1675 CaseSensitive: true,
1676 },
1677 chunkOpts,
1678 )
1679
1680 if len(res.Files) != 1 {
1681 t.Fatalf("got %v, want one index", res.Files)
1682 }
1683 })
1684}
1685
1686func TestRegexpCaseFolding(t *testing.T) {
1687 content := []byte("bla\nfunc unmarshalGitiles\n")
1688
1689 b := testShardBuilder(t, nil,
1690 Document{Name: "f1", Content: content})
1691 res := searchForTest(t, b,
1692 &query.Regexp{
1693 Regexp: mustParseRE("func.*GITILES"),
1694 CaseSensitive: false,
1695 })
1696
1697 if len(res.Files) != 1 {
1698 t.Fatalf("got %v, want one index", res.Files)
1699 }
1700}
1701
1702func TestCaseRegexp(t *testing.T) {
1703 content := []byte("BLABLABLA")
1704 b := testShardBuilder(t, nil,
1705 Document{Name: "f1", Content: content})
1706
1707 t.Run("LineMatches", func(t *testing.T) {
1708 res := searchForTest(t, b,
1709 &query.Regexp{
1710 Regexp: mustParseRE("[xb][xl][xa]"),
1711 CaseSensitive: true,
1712 })
1713
1714 if len(res.Files) > 0 {
1715 t.Fatalf("got %v, want no matches", res.Files)
1716 }
1717 })
1718
1719 t.Run("ChunkMatches", func(t *testing.T) {
1720 res := searchForTest(t, b,
1721 &query.Regexp{
1722 Regexp: mustParseRE("[xb][xl][xa]"),
1723 CaseSensitive: true,
1724 },
1725 chunkOpts,
1726 )
1727
1728 if len(res.Files) > 0 {
1729 t.Fatalf("got %v, want no matches", res.Files)
1730 }
1731 })
1732}
1733
1734func TestNegativeRegexp(t *testing.T) {
1735 content := []byte("BLABLABLA needle bla")
1736 b := testShardBuilder(t, nil,
1737 Document{Name: "f1", Content: content})
1738
1739 t.Run("LineMatches", func(t *testing.T) {
1740 res := searchForTest(t, b,
1741 query.NewAnd(
1742 &query.Substring{
1743 Pattern: "needle",
1744 },
1745 &query.Not{
1746 Child: &query.Regexp{
1747 Regexp: mustParseRE(".cs"),
1748 },
1749 }))
1750
1751 if len(res.Files) != 1 {
1752 t.Fatalf("got %v, want 1 match", res.Files)
1753 }
1754 })
1755
1756 t.Run("ChunkMatches", func(t *testing.T) {
1757 res := searchForTest(t, b,
1758 query.NewAnd(
1759 &query.Substring{
1760 Pattern: "needle",
1761 },
1762 &query.Not{
1763 Child: &query.Regexp{
1764 Regexp: mustParseRE(".cs"),
1765 },
1766 },
1767 ),
1768 chunkOpts)
1769
1770 if len(res.Files) != 1 {
1771 t.Fatalf("got %v, want 1 match", res.Files)
1772 }
1773 })
1774}
1775
1776func TestSymbolRank(t *testing.T) {
1777 t.Skip()
1778
1779 content := []byte("func bla() blubxxxxx")
1780 // ----------------01234567890123456789
1781 b := testShardBuilder(t, nil,
1782 Document{
1783 Name: "f1",
1784 Content: content,
1785 }, Document{
1786 Name: "f2",
1787 Content: content,
1788 Symbols: []DocumentSection{{5, 8}},
1789 }, Document{
1790 Name: "f3",
1791 Content: content,
1792 })
1793
1794 t.Run("LineMatches", func(t *testing.T) {
1795 res := searchForTest(t, b,
1796 &query.Substring{
1797 CaseSensitive: false,
1798 Pattern: "bla",
1799 })
1800
1801 if len(res.Files) != 3 {
1802 t.Fatalf("got %d files, want 3 files. Full data: %v", len(res.Files), res.Files)
1803 }
1804 if res.Files[0].FileName != "f2" {
1805 t.Errorf("got %#v, want 'f2' as top index", res.Files[0])
1806 }
1807 })
1808
1809 t.Run("ChunkMatches", func(t *testing.T) {
1810 res := searchForTest(t, b,
1811 &query.Substring{
1812 CaseSensitive: false,
1813 Pattern: "bla",
1814 }, chunkOpts)
1815
1816 if len(res.Files) != 3 {
1817 t.Fatalf("got %d files, want 3 files. Full data: %v", len(res.Files), res.Files)
1818 }
1819 if res.Files[0].FileName != "f2" {
1820 t.Errorf("got %#v, want 'f2' as top index", res.Files[0])
1821 }
1822 })
1823}
1824
1825func TestSymbolRankRegexpUTF8(t *testing.T) {
1826 t.Skip()
1827
1828 prefix := strings.Repeat(string([]rune{kelvinCodePoint}), 100) + "\n"
1829 content := []byte(prefix +
1830 "func bla() blub")
1831 // ------012345678901234
1832 b := testShardBuilder(t, nil,
1833 Document{
1834 Name: "f1",
1835 Content: content,
1836 }, Document{
1837 Name: "f2",
1838 Content: content,
1839 Symbols: []DocumentSection{{uint32(len(prefix) + 5), uint32(len(prefix) + 8)}},
1840 }, Document{
1841 Name: "f3",
1842 Content: content,
1843 })
1844
1845 t.Run("LineMatches", func(t *testing.T) {
1846 res := searchForTest(t, b,
1847 &query.Regexp{
1848 Regexp: mustParseRE("b.a"),
1849 })
1850
1851 if len(res.Files) != 3 {
1852 t.Fatalf("got %#v, want 3 files", res.Files)
1853 }
1854 if res.Files[0].FileName != "f2" {
1855 t.Errorf("got %#v, want 'f2' as top index", res.Files[0])
1856 }
1857 })
1858
1859 t.Run("ChunjkMatches", func(t *testing.T) {
1860 res := searchForTest(t, b,
1861 &query.Regexp{
1862 Regexp: mustParseRE("b.a"),
1863 }, chunkOpts)
1864
1865 if len(res.Files) != 3 {
1866 t.Fatalf("got %#v, want 3 files", res.Files)
1867 }
1868 if res.Files[0].FileName != "f2" {
1869 t.Errorf("got %#v, want 'f2' as top index", res.Files[0])
1870 }
1871 })
1872}
1873
1874func TestPartialSymbolRank(t *testing.T) {
1875 t.Skip()
1876
1877 content := []byte("func bla() blub")
1878 // ----------------012345678901234
1879
1880 b := testShardBuilder(t, nil,
1881 Document{
1882 Name: "f1",
1883 Content: content,
1884 Symbols: []DocumentSection{{4, 9}},
1885 }, Document{
1886 Name: "f2",
1887 Content: content,
1888 Symbols: []DocumentSection{{4, 8}},
1889 }, Document{
1890 Name: "f3",
1891 Content: content,
1892 Symbols: []DocumentSection{{4, 9}},
1893 })
1894
1895 t.Run("LineMatches", func(t *testing.T) {
1896 res := searchForTest(t, b,
1897 &query.Substring{
1898 Pattern: "bla",
1899 })
1900
1901 if len(res.Files) != 3 {
1902 t.Fatalf("got %#v, want 3 files", res.Files)
1903 }
1904 if res.Files[0].FileName != "f2" {
1905 t.Errorf("got %#v, want 'f2' as top index", res.Files[0])
1906 }
1907 })
1908
1909 t.Run("ChunkMatches", func(t *testing.T) {
1910 res := searchForTest(t, b,
1911 &query.Substring{
1912 Pattern: "bla",
1913 }, chunkOpts)
1914
1915 if len(res.Files) != 3 {
1916 t.Fatalf("got %#v, want 3 files", res.Files)
1917 }
1918 if res.Files[0].FileName != "f2" {
1919 t.Errorf("got %#v, want 'f2' as top index", res.Files[0])
1920 }
1921 })
1922}
1923
1924func TestNegativeRepo(t *testing.T) {
1925 content := []byte("bla the needle")
1926 // ----------------01234567890123
1927 b := testShardBuilder(t, &zoekt.Repository{
1928 Name: "bla",
1929 }, Document{Name: "f1", Content: content})
1930
1931 t.Run("LineMatches", func(t *testing.T) {
1932 sres := searchForTest(t, b,
1933 query.NewAnd(
1934 &query.Substring{Pattern: "needle"},
1935 &query.Not{Child: &query.Repo{Regexp: regexp.MustCompile("bla")}},
1936 ))
1937
1938 if len(sres.Files) != 0 {
1939 t.Fatalf("got %v, want 0 matches", sres.Files)
1940 }
1941 })
1942
1943 t.Run("ChunkMatches", func(t *testing.T) {
1944 sres := searchForTest(t, b,
1945 query.NewAnd(
1946 &query.Substring{Pattern: "needle"},
1947 &query.Not{Child: &query.Repo{Regexp: regexp.MustCompile("bla")}},
1948 ), chunkOpts)
1949
1950 if len(sres.Files) != 0 {
1951 t.Fatalf("got %v, want 0 matches", sres.Files)
1952 }
1953 })
1954}
1955
1956func TestListRepos(t *testing.T) {
1957 content := []byte("bla the needle\n")
1958 // ----------------012345678901234-
1959
1960 t.Run("default and minimal fallback", func(t *testing.T) {
1961 repo := &zoekt.Repository{
1962 Name: "reponame",
1963 Branches: []zoekt.RepositoryBranch{{Name: "main"}, {Name: "dev"}},
1964 }
1965 b := testShardBuilder(t, repo,
1966 Document{Name: "f1", Content: content, Branches: []string{"main", "dev"}},
1967 Document{Name: "f2", Content: content, Branches: []string{"main"}},
1968 Document{Name: "f2", Content: content, Branches: []string{"dev"}},
1969 Document{Name: "f3", Content: content, Branches: []string{"dev"}})
1970
1971 searcher := searcherForTest(t, b)
1972
1973 for _, opts := range []*zoekt.ListOptions{
1974 nil,
1975 {},
1976 {Field: zoekt.RepoListFieldRepos},
1977 {Field: zoekt.RepoListFieldReposMap},
1978 } {
1979 t.Run(fmt.Sprint(opts), func(t *testing.T) {
1980 q := &query.Repo{Regexp: regexp.MustCompile("epo")}
1981
1982 res, err := searcher.List(context.Background(), q, opts)
1983 if err != nil {
1984 t.Fatalf("List(%v): %v", q, err)
1985 }
1986
1987 want := &zoekt.RepoList{
1988 Repos: []*zoekt.RepoListEntry{{
1989 Repository: *repo,
1990 Stats: zoekt.RepoStats{
1991 Documents: 4,
1992 ContentBytes: 68, // (15 bytes of content and 2 bytes of filename) x 4
1993 Shards: 1,
1994
1995 NewLinesCount: 4,
1996 DefaultBranchNewLinesCount: 2,
1997 OtherBranchesNewLinesCount: 3,
1998 },
1999 }},
2000 Stats: zoekt.RepoStats{
2001 Repos: 1,
2002 Documents: 4,
2003 ContentBytes: 68,
2004 Shards: 1,
2005
2006 NewLinesCount: 4,
2007 DefaultBranchNewLinesCount: 2,
2008 OtherBranchesNewLinesCount: 3,
2009 },
2010 }
2011 ignored := []cmp.Option{
2012 cmpopts.EquateEmpty(),
2013 cmpopts.IgnoreFields(zoekt.RepoListEntry{}, "IndexMetadata"),
2014 cmpopts.IgnoreFields(zoekt.RepoStats{}, "IndexBytes"),
2015 cmpopts.IgnoreFields(zoekt.Repository{}, "SubRepoMap"),
2016 cmpopts.IgnoreFields(zoekt.Repository{}, "priority"),
2017 }
2018 if diff := cmp.Diff(want, res, ignored...); diff != "" {
2019 t.Fatalf("mismatch (-want +got):\n%s", diff)
2020 }
2021
2022 q = &query.Repo{Regexp: regexp.MustCompile("bla")}
2023 res, err = searcher.List(context.Background(), q, nil)
2024 if err != nil {
2025 t.Fatalf("List(%v): %v", q, err)
2026 }
2027 if len(res.Repos) != 0 || len(res.ReposMap) != 0 {
2028 t.Fatalf("got %v, want 0 matches", res)
2029 }
2030 })
2031 }
2032 })
2033
2034 t.Run("minimal", func(t *testing.T) {
2035 repo := &zoekt.Repository{
2036 ID: 1234,
2037 Name: "reponame",
2038 Branches: []zoekt.RepositoryBranch{{Name: "main"}, {Name: "dev"}},
2039 RawConfig: map[string]string{"repoid": "1234"},
2040 }
2041 b := testShardBuilder(t, repo,
2042 Document{Name: "f1", Content: content, Branches: []string{"main", "dev"}},
2043 Document{Name: "f2", Content: content, Branches: []string{"main"}},
2044 Document{Name: "f2", Content: content, Branches: []string{"dev"}},
2045 Document{Name: "f3", Content: content, Branches: []string{"dev"}})
2046
2047 searcher := searcherForTest(t, b)
2048
2049 q := &query.Repo{Regexp: regexp.MustCompile("epo")}
2050 res, err := searcher.List(context.Background(), q, &zoekt.ListOptions{Field: zoekt.RepoListFieldReposMap})
2051 if err != nil {
2052 t.Fatalf("List(%v): %v", q, err)
2053 }
2054
2055 want := &zoekt.RepoList{
2056 ReposMap: zoekt.ReposMap{
2057 repo.ID: {
2058 HasSymbols: repo.HasSymbols,
2059 Branches: repo.Branches,
2060 },
2061 },
2062 Stats: zoekt.RepoStats{
2063 Repos: 1,
2064 Shards: 1,
2065 Documents: 4,
2066 IndexBytes: 412,
2067 ContentBytes: 68,
2068 NewLinesCount: 4,
2069 DefaultBranchNewLinesCount: 2,
2070 OtherBranchesNewLinesCount: 3,
2071 },
2072 }
2073
2074 ignored := []cmp.Option{
2075 cmpopts.IgnoreFields(zoekt.MinimalRepoListEntry{}, "IndexTimeUnix"),
2076 }
2077 if diff := cmp.Diff(want, res, ignored...); diff != "" {
2078 t.Fatalf("mismatch (-want +got):\n%s", diff)
2079 }
2080
2081 q = &query.Repo{Regexp: regexp.MustCompile("bla")}
2082 res, err = searcher.List(context.Background(), q, &zoekt.ListOptions{Field: zoekt.RepoListFieldReposMap})
2083 if err != nil {
2084 t.Fatalf("List(%v): %v", q, err)
2085 }
2086 if len(res.Repos) != 0 || len(res.ReposMap) != 0 {
2087 t.Fatalf("got %v, want 0 matches", res)
2088 }
2089 })
2090}
2091
2092func TestListReposByContent(t *testing.T) {
2093 content := []byte("bla the needle")
2094
2095 b := testShardBuilder(t, &zoekt.Repository{
2096 Name: "reponame",
2097 },
2098 Document{Name: "f1", Content: content},
2099 Document{Name: "f2", Content: content})
2100
2101 searcher := searcherForTest(t, b)
2102 q := &query.Substring{Pattern: "needle"}
2103 res, err := searcher.List(context.Background(), q, nil)
2104 if err != nil {
2105 t.Fatalf("List(%v): %v", q, err)
2106 }
2107 if len(res.Repos) != 1 || res.Repos[0].Repository.Name != "reponame" {
2108 t.Fatalf("got %v, want 1 matches", res)
2109 }
2110 if got := res.Repos[0].Stats.Shards; got != 1 {
2111 t.Fatalf("got %d, want 1 shard", got)
2112 }
2113 q = &query.Substring{Pattern: "foo"}
2114 res, err = searcher.List(context.Background(), q, nil)
2115 if err != nil {
2116 t.Fatalf("List(%v): %v", q, err)
2117 }
2118 if len(res.Repos) != 0 {
2119 t.Fatalf("got %v, want 0 matches", res)
2120 }
2121}
2122
2123func TestMetadata(t *testing.T) {
2124 content := []byte("bla the needle")
2125
2126 b := testShardBuilder(t, &zoekt.Repository{
2127 Name: "reponame",
2128 }, Document{Name: "f1", Content: content},
2129 Document{Name: "f2", Content: content})
2130
2131 var buf bytes.Buffer
2132 if err := b.Write(&buf); err != nil {
2133 t.Fatal(err)
2134 }
2135 f := &memSeeker{buf.Bytes()}
2136
2137 rd, _, err := ReadMetadata(f)
2138 if err != nil {
2139 t.Fatalf("ReadMetadata: %v", err)
2140 }
2141
2142 if got, want := rd[0].Name, "reponame"; got != want {
2143 t.Fatalf("got %q want %q", got, want)
2144 }
2145}
2146
2147func TestRepoWithMetadata(t *testing.T) {
2148 sb := newShardBuilder(0)
2149 sb.repoList = []zoekt.Repository{
2150 {
2151 Name: "repo1",
2152 Metadata: map[string]string{"language": "go", "custom_key": "value"},
2153 },
2154 }
2155
2156 var buf bytes.Buffer
2157 if err := sb.Write(&buf); err != nil {
2158 t.Fatalf("failed to write shard: %v", err)
2159 }
2160
2161 // Simulate reading the shard back
2162 f := &memSeeker{buf.Bytes()}
2163 repoMetaData, _, err := ReadMetadata(f)
2164 if err != nil {
2165 t.Fatalf("failed to read metadata: %v", err)
2166 }
2167
2168 // Verify the metadata
2169 if len(repoMetaData) != 1 {
2170 t.Fatalf("expected 1 repository, got %d", len(repoMetaData))
2171 }
2172 if got, want := repoMetaData[0].Metadata["language"], "go"; got != want {
2173 t.Errorf("expected metadata 'language' to be %q, got %q", want, got)
2174 }
2175 if got, want := repoMetaData[0].Metadata["custom_key"], "value"; got != want {
2176 t.Errorf("expected metadata 'custom_key' to be %q, got %q", want, got)
2177 }
2178}
2179
2180func TestOr(t *testing.T) {
2181 b := testShardBuilder(t, nil,
2182 Document{Name: "f1", Content: []byte("needle")},
2183 Document{Name: "f2", Content: []byte("banana")})
2184 t.Run("LineMatches", func(t *testing.T) {
2185 sres := searchForTest(t, b, query.NewOr(
2186 &query.Substring{Pattern: "needle"},
2187 &query.Substring{Pattern: "banana"}))
2188
2189 if len(sres.Files) != 2 {
2190 t.Fatalf("got %v, want 2 files", sres.Files)
2191 }
2192 })
2193
2194 t.Run("ChunkMatches", func(t *testing.T) {
2195 sres := searchForTest(t, b, query.NewOr(
2196 &query.Substring{Pattern: "needle"},
2197 &query.Substring{Pattern: "banana"}))
2198
2199 if len(sres.Files) != 2 {
2200 t.Fatalf("got %v, want 2 files", sres.Files)
2201 }
2202 })
2203}
2204
2205func TestFrequency(t *testing.T) {
2206 content := []byte("sla _Py_HashDouble(double v sla las las shd dot dot")
2207
2208 b := testShardBuilder(t, nil,
2209 Document{
2210 Name: "f1",
2211 Content: content,
2212 })
2213
2214 t.Run("LineMatches", func(t *testing.T) {
2215 sres := searchForTest(t, b, &query.Substring{Pattern: "slashdot"})
2216 if len(sres.Files) != 0 {
2217 t.Errorf("got %v, wanted 0 matches", sres.Files)
2218 }
2219 })
2220
2221 t.Run("ChunkMatches", func(t *testing.T) {
2222 sres := searchForTest(t, b, &query.Substring{Pattern: "slashdot"}, chunkOpts)
2223 if len(sres.Files) != 0 {
2224 t.Errorf("got %v, wanted 0 matches", sres.Files)
2225 }
2226 })
2227}
2228
2229func TestMatchNewline(t *testing.T) {
2230 re, err := syntax.Parse("[^a]a", syntax.ClassNL)
2231 if err != nil {
2232 t.Fatalf("syntax.Parse: %v", err)
2233 }
2234
2235 content := []byte("pqr\nalex")
2236
2237 b := testShardBuilder(t, nil,
2238 Document{
2239 Name: "f1",
2240 Content: content,
2241 })
2242
2243 t.Run("LineMatches", func(t *testing.T) {
2244 sres := searchForTest(t, b, &query.Regexp{Regexp: re, CaseSensitive: true})
2245 if len(sres.Files) != 1 {
2246 t.Errorf("got %v, wanted 1 matches", sres.Files)
2247 } else if l := sres.Files[0].LineMatches[0].Line; !bytes.Equal(l, content[len("pqr\n"):]) {
2248 t.Errorf("got match line %q, want %q", l, content)
2249 }
2250 })
2251
2252 t.Run("ChunkMatches", func(t *testing.T) {
2253 sres := searchForTest(t, b, &query.Regexp{Regexp: re, CaseSensitive: true}, chunkOpts)
2254 if len(sres.Files) != 1 {
2255 t.Errorf("got %v, wanted 1 matches", sres.Files)
2256 } else if c := sres.Files[0].ChunkMatches[0].Content; !bytes.Equal(c, content) {
2257 t.Errorf("got match line %q, want %q", c, content)
2258 }
2259 })
2260}
2261
2262func TestSubRepo(t *testing.T) {
2263 subRepos := map[string]*zoekt.Repository{
2264 "sub": {
2265 Name: "sub-name",
2266 LineFragmentTemplate: "sub-line",
2267 },
2268 }
2269
2270 content := []byte("pqr\nalex")
2271
2272 b := testShardBuilder(t, &zoekt.Repository{
2273 SubRepoMap: subRepos,
2274 }, Document{
2275 Name: "sub/f1",
2276 Content: content,
2277 SubRepositoryPath: "sub",
2278 })
2279
2280 sres := searchForTest(t, b, &query.Substring{Pattern: "alex"})
2281 if len(sres.Files) != 1 {
2282 t.Fatalf("got %v, wanted 1 matches", sres.Files)
2283 }
2284
2285 f := sres.Files[0]
2286 if f.SubRepositoryPath != "sub" || f.SubRepositoryName != "sub-name" {
2287 t.Errorf("got %#v, want SubRepository{Path,Name} = {'sub', 'sub-name'}", f)
2288 }
2289
2290 if sres.LineFragments["sub-name"] != "sub-line" {
2291 t.Errorf("got LineFragmentTemplate %v, want {'sub':'sub-line'}", sres.LineFragments)
2292 }
2293}
2294
2295func TestSearchEither(t *testing.T) {
2296 b := testShardBuilder(t, nil,
2297 Document{Name: "f1", Content: []byte("bla needle bla")},
2298 Document{Name: "needle-file-branch", Content: []byte("bla content")})
2299
2300 t.Run("LineMatches", func(t *testing.T) {
2301 sres := searchForTest(t, b, &query.Substring{Pattern: "needle"})
2302 if len(sres.Files) != 2 {
2303 t.Fatalf("got %v, wanted 2 matches", sres.Files)
2304 }
2305
2306 sres = searchForTest(t, b, &query.Substring{Pattern: "needle", Content: true})
2307 if len(sres.Files) != 1 {
2308 t.Fatalf("got %v, wanted 1 index", sres.Files)
2309 }
2310
2311 if got, want := sres.Files[0].FileName, "f1"; got != want {
2312 t.Errorf("got %q, want %q", got, want)
2313 }
2314 })
2315
2316 t.Run("ChunkMatches", func(t *testing.T) {
2317 sres := searchForTest(t, b, &query.Substring{Pattern: "needle"}, chunkOpts)
2318 if len(sres.Files) != 2 {
2319 t.Fatalf("got %v, wanted 2 matches", sres.Files)
2320 }
2321
2322 sres = searchForTest(t, b, &query.Substring{Pattern: "needle", Content: true}, chunkOpts)
2323 if len(sres.Files) != 1 {
2324 t.Fatalf("got %v, wanted 1 index", sres.Files)
2325 }
2326
2327 if got, want := sres.Files[0].FileName, "f1"; got != want {
2328 t.Errorf("got %q, want %q", got, want)
2329 }
2330 })
2331}
2332
2333func TestUnicodeExactMatch(t *testing.T) {
2334 needle := "néédlÉ"
2335 content := []byte("blá blá " + needle + " blâ")
2336
2337 b := testShardBuilder(t, nil,
2338 Document{Name: "f1", Content: content})
2339
2340 t.Run("LineMatches", func(t *testing.T) {
2341 if res := searchForTest(t, b, &query.Substring{Pattern: needle, CaseSensitive: true}); len(res.Files) != 1 {
2342 t.Fatalf("case sensitive: got %v, wanted 1 index", res.Files)
2343 }
2344 })
2345
2346 t.Run("ChunkMatches", func(t *testing.T) {
2347 res := searchForTest(t, b, &query.Substring{Pattern: needle, CaseSensitive: true}, chunkOpts)
2348 if len(res.Files) != 1 {
2349 t.Fatalf("case sensitive: got %v, wanted 1 index", res.Files)
2350 }
2351 })
2352}
2353
2354func TestUnicodeCoverContent(t *testing.T) {
2355 needle := "néédlÉ"
2356 content := []byte("blá blá " + needle + " blâ")
2357
2358 b := testShardBuilder(t, nil,
2359 Document{Name: "f1", Content: content})
2360
2361 t.Run("LineMatches", func(t *testing.T) {
2362 if res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ", CaseSensitive: true}); len(res.Files) != 0 {
2363 t.Fatalf("case sensitive: got %v, wanted 0 index", res.Files)
2364 }
2365
2366 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ"})
2367 if len(res.Files) != 1 {
2368 t.Fatalf("case insensitive: got %v, wanted 1 index", res.Files)
2369 }
2370
2371 if got, want := res.Files[0].LineMatches[0].LineFragments[0].Offset, uint32(strings.Index(string(content), needle)); got != want {
2372 t.Errorf("got %d want %d", got, want)
2373 }
2374 })
2375
2376 t.Run("ChunkMatches", func(t *testing.T) {
2377 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ", CaseSensitive: true}, chunkOpts)
2378 if len(res.Files) != 0 {
2379 t.Fatalf("case sensitive: got %v, wanted 0 index", res.Files)
2380 }
2381
2382 res = searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ"}, chunkOpts)
2383 if len(res.Files) != 1 {
2384 t.Fatalf("case insensitive: got %v, wanted 1 index", res.Files)
2385 }
2386
2387 got := res.Files[0].ChunkMatches[0].Ranges[0].Start.ByteOffset
2388 want := uint32(strings.Index(string(content), needle))
2389 if got != want {
2390 t.Errorf("got %d want %d", got, want)
2391 }
2392 })
2393}
2394
2395func TestUnicodeNonCoverContent(t *testing.T) {
2396 needle := "nééáádlÉ"
2397 content := []byte("blá blá " + needle + " blâ")
2398
2399 b := testShardBuilder(t, nil,
2400 Document{Name: "f1", Content: content})
2401
2402 t.Run("LineMatches", func(t *testing.T) {
2403 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉÁÁDLÉ", Content: true})
2404 if len(res.Files) != 1 {
2405 t.Fatalf("got %v, wanted 1 index", res.Files)
2406 }
2407
2408 if got, want := res.Files[0].LineMatches[0].LineFragments[0].Offset, uint32(strings.Index(string(content), needle)); got != want {
2409 t.Errorf("got %d want %d", got, want)
2410 }
2411 })
2412
2413 t.Run("ChunkMatches", func(t *testing.T) {
2414 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉÁÁDLÉ", Content: true}, chunkOpts)
2415 if len(res.Files) != 1 {
2416 t.Fatalf("got %v, wanted 1 index", res.Files)
2417 }
2418
2419 got := res.Files[0].ChunkMatches[0].Ranges[0].Start.ByteOffset
2420 want := uint32(strings.Index(string(content), needle))
2421 if got != want {
2422 t.Errorf("got %d want %d", got, want)
2423 }
2424 })
2425}
2426
2427const kelvinCodePoint = 8490
2428
2429func TestUnicodeVariableLength(t *testing.T) {
2430 lower := 'k'
2431 upper := rune(kelvinCodePoint)
2432
2433 needle := "nee" + string([]rune{lower}) + "eed"
2434 corpus := []byte("nee" + string([]rune{upper}) + "eed" +
2435 " ee" + string([]rune{lower}) + "ee" +
2436 " ee" + string([]rune{upper}) + "ee")
2437
2438 t.Run("LineMatches", func(t *testing.T) {
2439 b := testShardBuilder(t, nil,
2440 Document{Name: "f1", Content: corpus})
2441
2442 res := searchForTest(t, b, &query.Substring{Pattern: needle, Content: true})
2443 if len(res.Files) != 1 {
2444 t.Fatalf("got %v, wanted 1 index", res.Files)
2445 }
2446 })
2447
2448 t.Run("ChunkMatches", func(t *testing.T) {
2449 b := testShardBuilder(t, nil,
2450 Document{Name: "f1", Content: corpus})
2451
2452 res := searchForTest(t, b, &query.Substring{Pattern: needle, Content: true}, chunkOpts)
2453 if len(res.Files) != 1 {
2454 t.Fatalf("got %v, wanted 1 index", res.Files)
2455 }
2456 })
2457}
2458
2459func TestUnicodeFileStartOffsets(t *testing.T) {
2460 unicode := "世界"
2461 wat := "waaaaaat"
2462 b := testShardBuilder(t, nil,
2463 Document{
2464 Name: "f1",
2465 Content: []byte(unicode),
2466 },
2467 Document{
2468 Name: "f2",
2469 Content: []byte(wat),
2470 },
2471 )
2472 q := &query.Substring{Pattern: wat, Content: true}
2473 res := searchForTest(t, b, q)
2474 if len(res.Files) != 1 {
2475 t.Fatalf("got %v, wanted 1 index", res.Files)
2476 }
2477}
2478
2479func TestLongFileUTF8(t *testing.T) {
2480 needle := "neeedle"
2481
2482 // 6 bytes.
2483 unicode := "世界"
2484 content := []byte(strings.Repeat(unicode, 100) + needle)
2485 b := testShardBuilder(t, nil,
2486 Document{
2487 Name: "f1",
2488 Content: []byte(strings.Repeat("a", 50)),
2489 },
2490 Document{
2491 Name: "f2",
2492 Content: content,
2493 })
2494
2495 t.Run("LineMatches", func(t *testing.T) {
2496 q := &query.Substring{Pattern: needle, Content: true}
2497 res := searchForTest(t, b, q)
2498 if len(res.Files) != 1 {
2499 t.Errorf("got %v, want 1 result", res)
2500 }
2501 })
2502
2503 t.Run("ChunkMatches", func(t *testing.T) {
2504 q := &query.Substring{Pattern: needle, Content: true}
2505 res := searchForTest(t, b, q, chunkOpts)
2506 if len(res.Files) != 1 {
2507 t.Errorf("got %v, want 1 result", res)
2508 }
2509 })
2510}
2511
2512func TestEstimateDocCount(t *testing.T) {
2513 content := []byte("bla needle bla")
2514 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"},
2515 Document{Name: "f1", Content: content},
2516 Document{Name: "f2", Content: content},
2517 )
2518
2519 t.Run("LineMatches", func(t *testing.T) {
2520 if sres := searchForTest(t, b,
2521 query.NewAnd(
2522 &query.Substring{Pattern: "needle"},
2523 &query.Repo{Regexp: regexp.MustCompile("reponame")},
2524 ), zoekt.SearchOptions{
2525 EstimateDocCount: true,
2526 }); sres.Stats.ShardFilesConsidered != 2 {
2527 t.Errorf("got FilesConsidered = %d, want 2", sres.Stats.FilesConsidered)
2528 }
2529 if sres := searchForTest(t, b,
2530 query.NewAnd(
2531 &query.Substring{Pattern: "needle"},
2532 &query.Repo{Regexp: regexp.MustCompile("nomatch")},
2533 ), zoekt.SearchOptions{
2534 EstimateDocCount: true,
2535 }); sres.Stats.ShardFilesConsidered != 0 {
2536 t.Errorf("got FilesConsidered = %d, want 0", sres.Stats.FilesConsidered)
2537 }
2538 })
2539
2540 t.Run("ChunkMatches", func(t *testing.T) {
2541 if sres := searchForTest(t, b,
2542 query.NewAnd(
2543 &query.Substring{Pattern: "needle"},
2544 &query.Repo{Regexp: regexp.MustCompile("reponame")},
2545 ), zoekt.SearchOptions{
2546 EstimateDocCount: true,
2547 ChunkMatches: true,
2548 }); sres.Stats.ShardFilesConsidered != 2 {
2549 t.Errorf("got FilesConsidered = %d, want 2", sres.Stats.FilesConsidered)
2550 }
2551 if sres := searchForTest(t, b,
2552 query.NewAnd(
2553 &query.Substring{Pattern: "needle"},
2554 &query.Repo{Regexp: regexp.MustCompile("nomatch")},
2555 ), zoekt.SearchOptions{
2556 EstimateDocCount: true,
2557 ChunkMatches: true,
2558 }); sres.Stats.ShardFilesConsidered != 0 {
2559 t.Errorf("got FilesConsidered = %d, want 0", sres.Stats.FilesConsidered)
2560 }
2561 })
2562}
2563
2564func TestUTF8CorrectCorpus(t *testing.T) {
2565 needle := "neeedle"
2566
2567 // 6 bytes.
2568 unicode := "世界"
2569 b := testShardBuilder(t, nil,
2570 Document{
2571 Name: "f1",
2572 Content: []byte(strings.Repeat(unicode, 100)),
2573 },
2574 Document{
2575 Name: "xxxxxneeedle",
2576 Content: []byte("hello"),
2577 })
2578
2579 t.Run("LineMatches", func(t *testing.T) {
2580 q := &query.Substring{Pattern: needle, FileName: true}
2581 res := searchForTest(t, b, q)
2582 if len(res.Files) != 1 {
2583 t.Errorf("got %v, want 1 result", res)
2584 }
2585 })
2586
2587 t.Run("ChunkMatches", func(t *testing.T) {
2588 q := &query.Substring{Pattern: needle, FileName: true}
2589 res := searchForTest(t, b, q, chunkOpts)
2590 if len(res.Files) != 1 {
2591 t.Errorf("got %v, want 1 result", res)
2592 }
2593 })
2594}
2595
2596func TestBuilderStats(t *testing.T) {
2597 b := testShardBuilder(t, nil,
2598 Document{
2599 Name: "f1",
2600 Content: []byte(strings.Repeat("abcd", 1024)),
2601 })
2602 var buf bytes.Buffer
2603 if err := b.Write(&buf); err != nil {
2604 t.Fatal(err)
2605 }
2606
2607 if got, want := b.ContentSize(), uint32(2+4*1024); got != want {
2608 t.Errorf("got %d, want %d", got, want)
2609 }
2610}
2611
2612func TestIOStats(t *testing.T) {
2613 b := testShardBuilder(t, nil,
2614 Document{
2615 Name: "f1",
2616 Content: []byte(strings.Repeat("abcd", 1024)),
2617 })
2618
2619 t.Run("LineMatches", func(t *testing.T) {
2620 q := &query.Substring{Pattern: "abc", CaseSensitive: true, Content: true}
2621 res := searchForTest(t, b, q)
2622
2623 // 4096 (content) + 2 (overhead: newlines or doc sections)
2624 if got, want := res.Stats.ContentBytesLoaded, int64(4098); got != want {
2625 t.Errorf("got content I/O %d, want %d", got, want)
2626 }
2627
2628 // 1024 entries, each 4 bytes apart. 4 fits into single byte
2629 // delta encoded.
2630 if got, want := res.Stats.IndexBytesLoaded, int64(1024); got != want {
2631 t.Errorf("got index I/O %d, want %d", got, want)
2632 }
2633 })
2634
2635 t.Run("ChunkMatches", func(t *testing.T) {
2636 q := &query.Substring{Pattern: "abc", CaseSensitive: true, Content: true}
2637 res := searchForTest(t, b, q, chunkOpts)
2638
2639 // 4096 (content) + 2 (overhead: newlines or doc sections)
2640 if got, want := res.Stats.ContentBytesLoaded, int64(4098); got != want {
2641 t.Errorf("got content I/O %d, want %d", got, want)
2642 }
2643
2644 // 1024 entries, each 4 bytes apart. 4 fits into single byte
2645 // delta encoded.
2646 if got, want := res.Stats.IndexBytesLoaded, int64(1024); got != want {
2647 t.Errorf("got index I/O %d, want %d", got, want)
2648 }
2649 })
2650
2651 t.Run("LineMatches with BM25", func(t *testing.T) {
2652 q := &query.Substring{Pattern: "abc", CaseSensitive: true, Content: true}
2653 res := searchForTest(t, b, q, zoekt.SearchOptions{UseBM25Scoring: true})
2654
2655 // 4096 (content) + 2 (overhead: newlines or doc sections)
2656 if got, want := res.Stats.ContentBytesLoaded, int64(4098); got != want {
2657 t.Errorf("got content I/O %d, want %d", got, want)
2658 }
2659
2660 // 1024 entries, each 4 bytes apart. 4 fits into single byte
2661 // delta encoded.
2662 if got, want := res.Stats.IndexBytesLoaded, int64(1024); got != want {
2663 t.Errorf("got index I/O %d, want %d", got, want)
2664 }
2665 })
2666
2667 t.Run("ChunkMatches with BM25", func(t *testing.T) {
2668 q := &query.Substring{Pattern: "abc", CaseSensitive: true, Content: true}
2669 res := searchForTest(t, b, q, zoekt.SearchOptions{UseBM25Scoring: true, ChunkMatches: true})
2670
2671 // 4096 (content) + 2 (overhead: newlines or doc sections)
2672 if got, want := res.Stats.ContentBytesLoaded, int64(4098); got != want {
2673 t.Errorf("got content I/O %d, want %d", got, want)
2674 }
2675
2676 // 1024 entries, each 4 bytes apart. 4 fits into single byte
2677 // delta encoded.
2678 if got, want := res.Stats.IndexBytesLoaded, int64(1024); got != want {
2679 t.Errorf("got index I/O %d, want %d", got, want)
2680 }
2681 })
2682}
2683
2684func TestStartLineAnchor(t *testing.T) {
2685 b := testShardBuilder(t, nil,
2686 Document{
2687 Name: "f1",
2688 Content: []byte(
2689 `hello
2690start of middle of line
2691`),
2692 })
2693
2694 t.Run("LineMatches", func(t *testing.T) {
2695 q, err := query.Parse("^start")
2696 if err != nil {
2697 t.Errorf("parse: %v", err)
2698 }
2699
2700 res := searchForTest(t, b, q)
2701 if len(res.Files) != 1 {
2702 t.Errorf("got %v, want 1 file", res.Files)
2703 }
2704
2705 q, err = query.Parse("^middle")
2706 if err != nil {
2707 t.Errorf("parse: %v", err)
2708 }
2709 res = searchForTest(t, b, q)
2710 if len(res.Files) != 0 {
2711 t.Errorf("got %v, want 0 files", res.Files)
2712 }
2713 })
2714
2715 t.Run("ChunkMatches", func(t *testing.T) {
2716 q, err := query.Parse("^start")
2717 if err != nil {
2718 t.Errorf("parse: %v", err)
2719 }
2720
2721 res := searchForTest(t, b, q, chunkOpts)
2722 if len(res.Files) != 1 {
2723 t.Errorf("got %v, want 1 file", res.Files)
2724 }
2725
2726 q, err = query.Parse("^middle")
2727 if err != nil {
2728 t.Errorf("parse: %v", err)
2729 }
2730 res = searchForTest(t, b, q, chunkOpts)
2731 if len(res.Files) != 0 {
2732 t.Errorf("got %v, want 0 files", res.Files)
2733 }
2734 })
2735}
2736
2737func TestAndOrUnicode(t *testing.T) {
2738 q, err := query.Parse("orange.*apple")
2739 if err != nil {
2740 t.Errorf("parse: %v", err)
2741 }
2742 finalQ := query.NewAnd(q,
2743 query.NewOr(query.NewAnd(&query.Repo{Regexp: regexp.MustCompile("name")},
2744 query.NewOr(&query.Branch{Pattern: "master"}))))
2745
2746 b := testShardBuilder(t, &zoekt.Repository{
2747 Name: "name",
2748 Branches: []zoekt.RepositoryBranch{{"master", "master-version"}},
2749 }, Document{
2750 Name: "f2",
2751 Content: []byte("orange\u2318apple"),
2752 // --------------0123456 78901
2753 Branches: []string{"master"},
2754 })
2755
2756 t.Run("LineMatches", func(t *testing.T) {
2757 res := searchForTest(t, b, finalQ)
2758 if len(res.Files) != 1 {
2759 t.Errorf("got %v, want 1 result", res.Files)
2760 }
2761 })
2762
2763 t.Run("ChunkMatches", func(t *testing.T) {
2764 res := searchForTest(t, b, finalQ, chunkOpts)
2765 if len(res.Files) != 1 {
2766 t.Errorf("got %v, want 1 result", res.Files)
2767 }
2768 })
2769}
2770
2771func TestAndShort(t *testing.T) {
2772 content := []byte("bla needle at orange bla")
2773 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"},
2774 Document{Name: "f1", Content: content},
2775 Document{Name: "f2", Content: []byte("xx at xx")},
2776 Document{Name: "f3", Content: []byte("yy orange xx")},
2777 )
2778
2779 q := query.NewAnd(&query.Substring{Pattern: "at"},
2780 &query.Substring{Pattern: "orange"})
2781
2782 t.Run("LineMatches", func(t *testing.T) {
2783 res := searchForTest(t, b, q)
2784 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
2785 t.Errorf("got %v, want 1 result", res.Files)
2786 }
2787 })
2788
2789 t.Run("ChunkMatches", func(t *testing.T) {
2790 res := searchForTest(t, b, q, chunkOpts)
2791 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
2792 t.Errorf("got %v, want 1 result", res.Files)
2793 }
2794 })
2795}
2796
2797func TestNoCollectRegexpSubstring(t *testing.T) {
2798 content := []byte("bla final bla\nfoo final, foo")
2799 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"},
2800 Document{Name: "f1", Content: content},
2801 )
2802
2803 q := &query.Regexp{
2804 Regexp: mustParseRE("final[,.]"),
2805 }
2806
2807 t.Run("LineMatches", func(t *testing.T) {
2808 res := searchForTest(t, b, q)
2809 if len(res.Files) != 1 {
2810 t.Fatalf("got %v, want 1 result", res.Files)
2811 }
2812 if f := res.Files[0]; len(f.LineMatches) != 1 {
2813 t.Fatalf("got line matches %v, want 1 line index", printLineMatches(f.LineMatches))
2814 }
2815 })
2816
2817 t.Run("ChunkMatches", func(t *testing.T) {
2818 res := searchForTest(t, b, q, chunkOpts)
2819 if len(res.Files) != 1 {
2820 t.Fatalf("got %v, want 1 result", res.Files)
2821 }
2822 if f := res.Files[0]; len(f.ChunkMatches) != 1 {
2823 t.Fatalf("got line matches %v, want 1 line index", printLineMatches(f.LineMatches))
2824 }
2825 })
2826}
2827
2828func printLineMatches(ms []zoekt.LineMatch) string {
2829 var ss []string
2830 for _, m := range ms {
2831 ss = append(ss, fmt.Sprintf("%d:%q %v", m.LineNumber, m.Line, m.LineFragments))
2832 }
2833
2834 return strings.Join(ss, ", ")
2835}
2836
2837func TestLang(t *testing.T) {
2838 content := []byte("bla needle bla")
2839 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"},
2840 Document{Name: "f1", Content: content},
2841 Document{Name: "f2", Language: "java", Content: content},
2842 Document{Name: "f3", Language: "cpp", Content: content},
2843 )
2844
2845 q := query.NewAnd(&query.Substring{Pattern: "needle"},
2846 &query.Language{Language: "cpp"})
2847
2848 t.Run("LineMatches", func(t *testing.T) {
2849 res := searchForTest(t, b, q)
2850 if len(res.Files) != 1 {
2851 t.Fatalf("got %v, want 1 result in f3", res.Files)
2852 }
2853 f := res.Files[0]
2854 if f.FileName != "f3" || f.Language != "cpp" {
2855 t.Fatalf("got %v, want 1 match with language cpp", f)
2856 }
2857 })
2858
2859 t.Run("ChunkMatches", func(t *testing.T) {
2860 res := searchForTest(t, b, q, chunkOpts)
2861 if len(res.Files) != 1 {
2862 t.Fatalf("got %v, want 1 result in f3", res.Files)
2863 }
2864 f := res.Files[0]
2865 if f.FileName != "f3" || f.Language != "cpp" {
2866 t.Fatalf("got %v, want 1 match with language cpp", f)
2867 }
2868 })
2869}
2870
2871func TestLangShortcut(t *testing.T) {
2872 content := []byte("bla needle bla")
2873 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"},
2874 Document{Name: "f2", Language: "java", Content: content},
2875 Document{Name: "f3", Language: "cpp", Content: content},
2876 )
2877
2878 q := query.NewAnd(&query.Substring{Pattern: "needle"},
2879 &query.Language{Language: "fortran"})
2880
2881 t.Run("LineMatches", func(t *testing.T) {
2882 res := searchForTest(t, b, q)
2883 if len(res.Files) != 0 {
2884 t.Fatalf("got %v, want 0 results", res.Files)
2885 }
2886 if res.Stats.IndexBytesLoaded > 0 {
2887 t.Errorf("got matchBytesLoaded %d, want 0", res.Stats.IndexBytesLoaded)
2888 }
2889 })
2890
2891 t.Run("ChunkMatches", func(t *testing.T) {
2892 res := searchForTest(t, b, q, chunkOpts)
2893 if len(res.Files) != 0 {
2894 t.Fatalf("got %v, want 0 results", res.Files)
2895 }
2896 if res.Stats.IndexBytesLoaded > 0 {
2897 t.Errorf("got matchBytesLoaded %d, want 0", res.Stats.IndexBytesLoaded)
2898 }
2899 })
2900}
2901
2902func TestNoTextMatchAtoms(t *testing.T) {
2903 content := []byte("bla needle bla")
2904 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"},
2905 Document{Name: "f1", Content: content},
2906 Document{Name: "f2", Language: "java", Content: content},
2907 Document{Name: "f3", Language: "cpp", Content: content},
2908 )
2909 q := query.NewAnd(&query.Language{Language: "java"})
2910 t.Run("LineMatches", func(t *testing.T) {
2911 res := searchForTest(t, b, q)
2912 if len(res.Files) != 1 {
2913 t.Fatalf("got %v, want 1 result in f3", res.Files)
2914 }
2915 })
2916
2917 t.Run("ChunkMatches", func(t *testing.T) {
2918 res := searchForTest(t, b, q, chunkOpts)
2919 if len(res.Files) != 1 {
2920 t.Fatalf("got %v, want 1 result in f3", res.Files)
2921 }
2922 })
2923}
2924
2925func TestNoPositiveAtoms(t *testing.T) {
2926 content := []byte("bla needle bla")
2927 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"},
2928 Document{Name: "f1", Content: content},
2929 Document{Name: "f2", Content: content},
2930 )
2931
2932 q := query.NewAnd(
2933 &query.Not{Child: &query.Substring{Pattern: "xyz"}},
2934 &query.Repo{Regexp: regexp.MustCompile("reponame")})
2935 t.Run("LineMatches", func(t *testing.T) {
2936 res := searchForTest(t, b, q)
2937 if len(res.Files) != 2 {
2938 t.Fatalf("got %v, want 2 results in f3", res.Files)
2939 }
2940 })
2941 t.Run("ChunkMatches", func(t *testing.T) {
2942 res := searchForTest(t, b, q, chunkOpts)
2943 if len(res.Files) != 2 {
2944 t.Fatalf("got %v, want 2 results in f3", res.Files)
2945 }
2946 })
2947}
2948
2949func TestSymbolBoundaryStart(t *testing.T) {
2950 content := []byte("start\nbla bla\nend")
2951 // ----------------012345-67890123-456
2952
2953 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"},
2954 Document{
2955 Name: "f1",
2956 Content: content,
2957 Symbols: []DocumentSection{{0, 5}, {14, 17}},
2958 },
2959 )
2960 q := &query.Symbol{
2961 Expr: &query.Substring{Pattern: "start"},
2962 }
2963 t.Run("LineMatches", func(t *testing.T) {
2964 res := searchForTest(t, b, q)
2965 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
2966 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2967 }
2968 m := res.Files[0].LineMatches[0].LineFragments[0]
2969 if m.Offset != 0 {
2970 t.Fatalf("got offset %d want 0", m.Offset)
2971 }
2972 })
2973
2974 t.Run("ChunkMatches", func(t *testing.T) {
2975 res := searchForTest(t, b, q, chunkOpts)
2976 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
2977 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2978 }
2979 m := res.Files[0].ChunkMatches[0].Ranges[0]
2980 if m.Start.ByteOffset != 0 {
2981 t.Fatalf("got offset %d want 0", m.Start.ByteOffset)
2982 }
2983 })
2984}
2985
2986func TestSymbolBoundaryEnd(t *testing.T) {
2987 content := []byte("start\nbla bla\nend")
2988 // ----------------012345-67890123-456
2989
2990 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"},
2991 Document{
2992 Name: "f1",
2993 Content: content,
2994 Symbols: []DocumentSection{{14, 17}},
2995 },
2996 )
2997 q := &query.Symbol{
2998 Expr: &query.Substring{Pattern: "end"},
2999 }
3000 t.Run("LineMatches", func(t *testing.T) {
3001 res := searchForTest(t, b, q)
3002 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
3003 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
3004 }
3005 m := res.Files[0].LineMatches[0].LineFragments[0]
3006 if m.Offset != 14 {
3007 t.Fatalf("got offset %d want 0", m.Offset)
3008 }
3009 })
3010
3011 t.Run("ChunkMatches", func(t *testing.T) {
3012 res := searchForTest(t, b, q, chunkOpts)
3013 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
3014 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
3015 }
3016 m := res.Files[0].ChunkMatches[0].Ranges[0]
3017 if m.Start.ByteOffset != 14 {
3018 t.Fatalf("got offset %d want 0", m.Start.ByteOffset)
3019 }
3020 })
3021}
3022
3023func TestSymbolSubstring(t *testing.T) {
3024 content := []byte("bla\nsymblabla\nbla")
3025 // ----------------0123-4567890123-456
3026
3027 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"},
3028 Document{
3029 Name: "f1",
3030 Content: content,
3031 Symbols: []DocumentSection{{4, 12}},
3032 },
3033 )
3034 q := &query.Symbol{
3035 Expr: &query.Substring{Pattern: "bla"},
3036 }
3037 t.Run("LineMatches", func(t *testing.T) {
3038 res := searchForTest(t, b, q)
3039 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
3040 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
3041 }
3042 m := res.Files[0].LineMatches[0].LineFragments[0]
3043 if m.Offset != 7 || m.MatchLength != 3 {
3044 t.Fatalf("got offset %d, size %d want 7 size 3", m.Offset, m.MatchLength)
3045 }
3046 })
3047
3048 t.Run("ChunkMatches", func(t *testing.T) {
3049 res := searchForTest(t, b, q, chunkOpts)
3050 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
3051 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
3052 }
3053 m := res.Files[0].ChunkMatches[0].Ranges[0]
3054 if m.Start.ByteOffset != 7 || m.End.ByteOffset != 10 {
3055 t.Fatalf("got offset %d, end %d want 7, 10", m.Start.ByteOffset, m.End.ByteOffset)
3056 }
3057 })
3058}
3059
3060func TestSymbolSubstringExact(t *testing.T) {
3061 content := []byte("bla\nsym\nbla\nsym\nasymb")
3062 // ----------------0123-4567-890123456-78901
3063
3064 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"},
3065 Document{
3066 Name: "f1",
3067 Content: content,
3068 Symbols: []DocumentSection{{4, 7}},
3069 },
3070 )
3071 q := &query.Symbol{
3072 Expr: &query.Substring{Pattern: "sym"},
3073 }
3074 t.Run("LineMatches", func(t *testing.T) {
3075 res := searchForTest(t, b, q)
3076 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
3077 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
3078 }
3079 m := res.Files[0].LineMatches[0].LineFragments[0]
3080 if m.Offset != 4 {
3081 t.Fatalf("got offset %d, want 7", m.Offset)
3082 }
3083 })
3084
3085 t.Run("ChunkMatches", func(t *testing.T) {
3086 res := searchForTest(t, b, q, chunkOpts)
3087 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
3088 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
3089 }
3090 m := res.Files[0].ChunkMatches[0].Ranges[0]
3091 if m.Start.ByteOffset != 4 {
3092 t.Fatalf("got offset %d, want 7", m.Start.ByteOffset)
3093 }
3094 })
3095}
3096
3097func TestSymbolRegexpExact(t *testing.T) {
3098 content := []byte("blah\nbla\nbl")
3099 // ----------------01234-5678-90
3100
3101 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"},
3102 Document{
3103 Name: "f1",
3104 Content: content,
3105 Symbols: []DocumentSection{{0, 4}, {5, 8}, {9, 11}},
3106 },
3107 )
3108 q := &query.Symbol{
3109 Expr: &query.Regexp{Regexp: mustParseRE("^bla$")},
3110 }
3111 t.Run("LineMatches", func(t *testing.T) {
3112 res := searchForTest(t, b, q)
3113 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
3114 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
3115 }
3116 m := res.Files[0].LineMatches[0].LineFragments[0]
3117 if m.Offset != 5 {
3118 t.Fatalf("got offset %d, want 5", m.Offset)
3119 }
3120 })
3121
3122 t.Run("ChunkMatches", func(t *testing.T) {
3123 res := searchForTest(t, b, q, chunkOpts)
3124 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
3125 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
3126 }
3127 m := res.Files[0].ChunkMatches[0].Ranges[0]
3128 if m.Start.ByteOffset != 5 {
3129 t.Fatalf("got offset %d, want 5", m.Start.ByteOffset)
3130 }
3131 })
3132}
3133
3134func TestSymbolRegexpPartial(t *testing.T) {
3135 content := []byte("abcdef")
3136 // ----------------012345
3137
3138 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"},
3139 Document{
3140 Name: "f1",
3141 Content: content,
3142 Symbols: []DocumentSection{{0, 6}},
3143 },
3144 )
3145 q := &query.Symbol{
3146 Expr: &query.Regexp{Regexp: mustParseRE("(b|d)c(d|b)")},
3147 }
3148 t.Run("LineMatches", func(t *testing.T) {
3149 res := searchForTest(t, b, q)
3150 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
3151 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
3152 }
3153 m := res.Files[0].LineMatches[0].LineFragments[0]
3154 if m.Offset != 1 {
3155 t.Fatalf("got offset %d, want 1", m.Offset)
3156 }
3157 if m.MatchLength != 3 {
3158 t.Fatalf("got match length %d, want 3", m.MatchLength)
3159 }
3160 })
3161
3162 t.Run("ChunkMatches", func(t *testing.T) {
3163 res := searchForTest(t, b, q, chunkOpts)
3164 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
3165 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
3166 }
3167 m := res.Files[0].ChunkMatches[0].Ranges[0]
3168 if m.Start.ByteOffset != 1 {
3169 t.Fatalf("got offset %d, want 1", m.Start.ByteOffset)
3170 }
3171 if m.End.ByteOffset != 4 {
3172 t.Fatalf("got match end %d, want 4", m.End.ByteOffset)
3173 }
3174 })
3175}
3176
3177func TestSymbolRegexpAll(t *testing.T) {
3178 docs := []Document{
3179 {
3180 Name: "f1",
3181 Content: []byte("Hello Zoekt"),
3182 // --------------01234567890
3183 Symbols: []DocumentSection{{0, 5}, {6, 11}},
3184 },
3185 {
3186 Name: "f2",
3187 Content: []byte("Second Zoekt Third"),
3188 // --------------012345678901234567
3189 Symbols: []DocumentSection{{0, 6}, {7, 12}, {13, 18}},
3190 },
3191 }
3192
3193 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"}, docs...)
3194 q := &query.Symbol{
3195 Expr: &query.Regexp{Regexp: mustParseRE(".*")},
3196 }
3197 t.Run("LineMatches", func(t *testing.T) {
3198 res := searchForTest(t, b, q)
3199 if len(res.Files) != len(docs) {
3200 t.Fatalf("got %v, want %d file", res.Files, len(docs))
3201 }
3202 for i, want := range docs {
3203 got := res.Files[i].LineMatches[0].LineFragments
3204 if len(got) != len(want.Symbols) {
3205 t.Fatalf("got %d symbols, want %d symbols in doc %s", len(got), len(want.Symbols), want.Name)
3206 }
3207
3208 for j, sec := range want.Symbols {
3209 if sec.Start != got[j].Offset {
3210 t.Fatalf("got offset %d, want %d in doc %s", got[j].Offset, sec.Start, want.Name)
3211 }
3212 }
3213 }
3214 })
3215
3216 t.Run("ChunkMatches", func(t *testing.T) {
3217 res := searchForTest(t, b, q, chunkOpts)
3218 if len(res.Files) != len(docs) {
3219 t.Fatalf("got %v, want %d file", res.Files, len(docs))
3220 }
3221 for i, want := range docs {
3222 got := res.Files[i].ChunkMatches[0].Ranges
3223 if len(got) != len(want.Symbols) {
3224 t.Fatalf("got %d symbols, want %d symbols in doc %s", len(got), len(want.Symbols), want.Name)
3225 }
3226
3227 for j, sec := range want.Symbols {
3228 if sec.Start != got[j].Start.ByteOffset {
3229 t.Fatalf("got offset %d, want %d in doc %s", got[j].Start.ByteOffset, sec.Start, want.Name)
3230 }
3231 }
3232 }
3233 })
3234}
3235
3236func TestHitIterTerminate(t *testing.T) {
3237 // contrived input: trigram frequencies forces selecting abc +
3238 // def for the distance iteration. There is no index, so this
3239 // will advance the compressedPostingIterator to beyond the
3240 // end.
3241 content := []byte("abc bcdbcd cdecde abcabc def efg")
3242 b := testShardBuilder(t, nil,
3243 Document{
3244 Name: "f1",
3245 Content: content,
3246 },
3247 )
3248
3249 t.Run("LineMatches", func(t *testing.T) {
3250 searchForTest(t, b, &query.Substring{Pattern: "abcdef"})
3251 })
3252
3253 t.Run("ChunkMatches", func(t *testing.T) {
3254 searchForTest(t, b, &query.Substring{Pattern: "abcdef"}, chunkOpts)
3255 })
3256}
3257
3258func TestDistanceHitIterBailLast(t *testing.T) {
3259 content := []byte("AST AST AST UASH")
3260 b := testShardBuilder(t, nil,
3261 Document{
3262 Name: "f1",
3263 Content: content,
3264 },
3265 )
3266 t.Run("LineMatches", func(t *testing.T) {
3267 res := searchForTest(t, b, &query.Substring{Pattern: "UAST"})
3268 if len(res.Files) != 0 {
3269 t.Fatalf("got %v, want no results", res.Files)
3270 }
3271 })
3272
3273 t.Run("LineMatches", func(t *testing.T) {
3274 res := searchForTest(t, b, &query.Substring{Pattern: "UAST"}, chunkOpts)
3275 if len(res.Files) != 0 {
3276 t.Fatalf("got %v, want no results", res.Files)
3277 }
3278 })
3279}
3280
3281func TestDocumentSectionRuneBoundary(t *testing.T) {
3282 content := string([]rune{kelvinCodePoint, kelvinCodePoint, kelvinCodePoint})
3283 b, err := NewShardBuilder(nil)
3284 if err != nil {
3285 t.Fatalf("NewShardBuilder: %v", err)
3286 }
3287
3288 for i, sec := range []DocumentSection{
3289 {2, 6},
3290 {3, 7},
3291 } {
3292 if err := b.Add(Document{
3293 Name: "f1",
3294 Content: []byte(content),
3295 Symbols: []DocumentSection{sec},
3296 }); err == nil {
3297 t.Errorf("%d: Add succeeded", i)
3298 }
3299 }
3300}
3301
3302func TestUnicodeQuery(t *testing.T) {
3303 content := string([]rune{kelvinCodePoint, kelvinCodePoint, kelvinCodePoint})
3304 b := testShardBuilder(t, nil,
3305 Document{
3306 Name: "f1",
3307 Content: []byte(content),
3308 },
3309 )
3310
3311 q := &query.Substring{Pattern: content}
3312
3313 t.Run("LineMatches", func(t *testing.T) {
3314 res := searchForTest(t, b, q)
3315 if len(res.Files) != 1 {
3316 t.Fatalf("want 1 match, got %v", res.Files)
3317 }
3318
3319 f := res.Files[0]
3320 if len(f.LineMatches) != 1 {
3321 t.Fatalf("want 1 line, got %v", f.LineMatches)
3322 }
3323 l := f.LineMatches[0]
3324
3325 if len(l.LineFragments) != 1 {
3326 t.Fatalf("want 1 line fragment, got %v", l.LineFragments)
3327 }
3328 fr := l.LineFragments[0]
3329 if fr.MatchLength != len(content) {
3330 t.Fatalf("got MatchLength %d want %d", fr.MatchLength, len(content))
3331 }
3332 })
3333
3334 t.Run("ChunkMatches", func(t *testing.T) {
3335 res := searchForTest(t, b, q, chunkOpts)
3336 if len(res.Files) != 1 {
3337 t.Fatalf("want 1 match, got %v", res.Files)
3338 }
3339
3340 f := res.Files[0]
3341 if len(f.ChunkMatches) != 1 {
3342 t.Fatalf("want 1 line, got %v", f.LineMatches)
3343 }
3344 cm := f.ChunkMatches[0]
3345
3346 if len(cm.Ranges) != 1 {
3347 t.Fatalf("want 1 line fragment, got %v", cm.Ranges)
3348 }
3349 rr := cm.Ranges[0]
3350 if matchLen := rr.End.ByteOffset - rr.Start.ByteOffset; int(matchLen) != len(content) {
3351 t.Fatalf("got MatchLength %d want %d", matchLen, len(content))
3352 }
3353 })
3354}
3355
3356func TestSkipInvalidContent(t *testing.T) {
3357 for _, content := range []string{
3358 // Binary
3359 "abc def \x00 abc",
3360 } {
3361
3362 b, err := NewShardBuilder(nil)
3363 if err != nil {
3364 t.Fatalf("NewShardBuilder: %v", err)
3365 }
3366
3367 if err := b.Add(Document{
3368 Name: "f1",
3369 Content: []byte(content),
3370 }); err != nil {
3371 t.Fatal(err)
3372 }
3373
3374 t.Run("LineMatches", func(t *testing.T) {
3375 q := &query.Substring{Pattern: "abc def"}
3376 res := searchForTest(t, b, q)
3377 if len(res.Files) != 0 {
3378 t.Fatalf("got %v, want no results", res.Files)
3379 }
3380
3381 q = &query.Substring{Pattern: "NOT-INDEXED"}
3382 res = searchForTest(t, b, q)
3383 if len(res.Files) != 1 {
3384 t.Fatalf("got %v, want 1 result", res.Files)
3385 }
3386 })
3387
3388 t.Run("ChunkMatches", func(t *testing.T) {
3389 q := &query.Substring{Pattern: "abc def"}
3390 res := searchForTest(t, b, q, chunkOpts)
3391 if len(res.Files) != 0 {
3392 t.Fatalf("got %v, want no results", res.Files)
3393 }
3394
3395 q = &query.Substring{Pattern: "NOT-INDEXED"}
3396 res = searchForTest(t, b, q, chunkOpts)
3397 if len(res.Files) != 1 {
3398 t.Fatalf("got %v, want 1 result", res.Files)
3399 }
3400 })
3401 }
3402}
3403
3404func TestDocChecker(t *testing.T) {
3405 docChecker := DocChecker{}
3406
3407 // Test valid and invalid text
3408 for _, text := range []string{"", "simple ascii", "símplé unicödé", "\uFEFFwith utf8 'bom'", "with \uFFFD unicode replacement char"} {
3409 if skip := docChecker.Check([]byte(text), 20000, false); skip != SkipReasonNone {
3410 t.Errorf("Check(%q): %v", text, skip)
3411 }
3412 }
3413 for _, text := range []string{"zero\x00byte", "\x00starts with null byte", "xx", "0123456789abcdefghi"} {
3414 if skip := docChecker.Check([]byte(text), 15, false); skip == SkipReasonNone {
3415 t.Errorf("Check(%q) succeeded", text)
3416 }
3417 }
3418
3419 // Test valid and invalid text with an allowed large file
3420 for _, text := range []string{"0123456789abcdefghi", "qwertyuiopasdfghjklzxcvbnm"} {
3421 if skip := docChecker.Check([]byte(text), 15, true); skip != SkipReasonNone {
3422 t.Errorf("Check(%q): %v", text, skip)
3423 }
3424 }
3425 for _, text := range []string{"zero\x00byte", "\x00starts with null byte", "xx"} {
3426 if skip := docChecker.Check([]byte(text), 15, true); skip == SkipReasonNone {
3427 t.Errorf("Check(%q) succeeded", text)
3428 }
3429 }
3430}
3431
3432func TestLineAnd(t *testing.T) {
3433 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"},
3434 Document{Name: "f1", Content: []byte("apple\nbanana\napple banana chocolate apple pudding banana\ngrape")},
3435 Document{Name: "f2", Content: []byte("apple orange\nbanana")},
3436 Document{Name: "f3", Content: []byte("banana grape")},
3437 )
3438 pattern := "(apple)(?-s:.)*?(banana)"
3439 r, _ := syntax.Parse(pattern, syntax.Perl)
3440
3441 q := query.Regexp{
3442 Regexp: r,
3443 Content: true,
3444 }
3445 t.Run("LineMatches", func(t *testing.T) {
3446 res := searchForTest(t, b, &q)
3447 wantRegexpCount := 1
3448 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3449 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3450 }
3451 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
3452 t.Errorf("got %v, want 1 result", res.Files)
3453 }
3454 })
3455
3456 t.Run("ChunkMatches", func(t *testing.T) {
3457 res := searchForTest(t, b, &q, chunkOpts)
3458 wantRegexpCount := 1
3459 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3460 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3461 }
3462 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
3463 t.Errorf("got %v, want 1 result", res.Files)
3464 }
3465 })
3466}
3467
3468func TestLineAndFileName(t *testing.T) {
3469 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"},
3470 Document{Name: "f1", Content: []byte("apple banana\ngrape")},
3471 Document{Name: "f2", Content: []byte("apple banana\norange")},
3472 Document{Name: "apple banana", Content: []byte("banana grape")},
3473 )
3474 pattern := "(apple)(?-s:.)*?(banana)"
3475 r, _ := syntax.Parse(pattern, syntax.Perl)
3476
3477 q := query.Regexp{
3478 Regexp: r,
3479 FileName: true,
3480 }
3481 t.Run("LineMatches", func(t *testing.T) {
3482 res := searchForTest(t, b, &q)
3483 wantRegexpCount := 1
3484 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3485 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3486 }
3487 if len(res.Files) != 1 || res.Files[0].FileName != "apple banana" {
3488 t.Errorf("got %v, want 1 result", res.Files)
3489 }
3490 })
3491
3492 t.Run("ChunkMatches", func(t *testing.T) {
3493 res := searchForTest(t, b, &q, chunkOpts)
3494 wantRegexpCount := 1
3495 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3496 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3497 }
3498 if len(res.Files) != 1 || res.Files[0].FileName != "apple banana" {
3499 t.Errorf("got %v, want 1 result", res.Files)
3500 }
3501 })
3502}
3503
3504func TestMultiLineRegex(t *testing.T) {
3505 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"},
3506 Document{Name: "f1", Content: []byte("apple banana\ngrape")},
3507 Document{Name: "f2", Content: []byte("apple orange")},
3508 Document{Name: "f3", Content: []byte("grape apple")},
3509 )
3510 pattern := "(apple).*?[[:space:]].*?(grape)"
3511 r, _ := syntax.Parse(pattern, syntax.Perl)
3512
3513 q := query.Regexp{
3514 Regexp: r,
3515 }
3516 t.Run("LineMatches", func(t *testing.T) {
3517 res := searchForTest(t, b, &q)
3518 wantRegexpCount := 2
3519 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3520 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3521 }
3522 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
3523 t.Errorf("got %v, want 1 result", res.Files)
3524 }
3525 if l := len(res.Files[0].LineMatches); l != 2 {
3526 t.Errorf("got %v, want 2 line matches", l)
3527 }
3528 })
3529
3530 t.Run("ChunkMatches", func(t *testing.T) {
3531 res := searchForTest(t, b, &q, chunkOpts)
3532 wantRegexpCount := 2
3533 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3534 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3535 }
3536 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
3537 t.Errorf("got %v, want 1 result", res.Files)
3538 }
3539 if l := len(res.Files[0].ChunkMatches); l != 1 {
3540 t.Errorf("got %v, want 1 chunk matches", l)
3541 }
3542 if l := len(res.Files[0].ChunkMatches[0].Ranges); l != 1 {
3543 t.Errorf("got %v, want 1 chunk ranges", l)
3544 }
3545 })
3546}
3547
3548func TestSearchTypeFileName(t *testing.T) {
3549 b := testShardBuilder(t, &zoekt.Repository{
3550 Name: "reponame",
3551 },
3552 Document{Name: "f1", Content: []byte("bla the needle")},
3553 Document{Name: "f2", Content: []byte("another file another\nneedle")},
3554 // -----------------------------------012345678901234567890-123456
3555 )
3556
3557 t.Run("LineMatches", func(t *testing.T) {
3558 wantSingleMatch := func(res *zoekt.SearchResult, want string) {
3559 t.Helper()
3560 fmatches := res.Files
3561 if len(fmatches) != 1 {
3562 t.Errorf("got %v, want 1 matches", len(fmatches))
3563 return
3564 }
3565 if len(fmatches[0].LineMatches) != 1 {
3566 t.Errorf("got %d line matches", len(fmatches[0].LineMatches))
3567 return
3568 }
3569 var got string
3570 if fmatches[0].LineMatches[0].FileName {
3571 got = fmatches[0].FileName
3572 } else {
3573 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].LineMatches[0].LineFragments[0].Offset)
3574 }
3575
3576 if got != want {
3577 t.Errorf("got %s, want %s", got, want)
3578 }
3579 }
3580
3581 // Only return the later match in the second file
3582 res := searchForTest(t, b, query.NewAnd(
3583 &query.Type{
3584 Type: query.TypeFileName,
3585 Child: &query.Substring{Pattern: "needle"},
3586 },
3587 &query.Substring{Pattern: "file"}))
3588 wantSingleMatch(res, "f2:8")
3589
3590 // Only return a filename result
3591 res = searchForTest(t, b,
3592 &query.Type{
3593 Type: query.TypeFileName,
3594 Child: &query.Substring{Pattern: "file"},
3595 })
3596 wantSingleMatch(res, "f2")
3597 })
3598
3599 t.Run("ChunkMatches", func(t *testing.T) {
3600 wantSingleMatch := func(res *zoekt.SearchResult, want string) {
3601 t.Helper()
3602 fmatches := res.Files
3603 if len(fmatches) != 1 {
3604 t.Errorf("got %v, want 1 matches", len(fmatches))
3605 return
3606 }
3607 if len(fmatches[0].ChunkMatches) != 1 {
3608 t.Errorf("got %d line matches", len(fmatches[0].ChunkMatches))
3609 return
3610 }
3611 var got string
3612 if fmatches[0].ChunkMatches[0].FileName {
3613 got = fmatches[0].FileName
3614 } else {
3615 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset)
3616 }
3617
3618 if got != want {
3619 t.Errorf("got %s, want %s", got, want)
3620 }
3621 }
3622
3623 // Only return the later match in the second file
3624 res := searchForTest(t, b, query.NewAnd(
3625 &query.Type{
3626 Type: query.TypeFileName,
3627 Child: &query.Substring{Pattern: "needle"},
3628 },
3629 &query.Substring{Pattern: "file"}),
3630 chunkOpts,
3631 )
3632 wantSingleMatch(res, "f2:8")
3633
3634 // Only return a filename result
3635 res = searchForTest(t, b,
3636 &query.Type{
3637 Type: query.TypeFileName,
3638 Child: &query.Substring{Pattern: "file"},
3639 },
3640 chunkOpts,
3641 )
3642 wantSingleMatch(res, "f2")
3643 })
3644
3645 // type:filematch is the default result granularity. The wrapper must be
3646 // treated as a passthrough to the child instead of crashing the shard.
3647 t.Run("TypeFileMatch", func(t *testing.T) {
3648 res := searchForTest(t, b,
3649 &query.Type{
3650 Type: query.TypeFileMatch,
3651 Child: &query.Substring{Pattern: "needle"},
3652 })
3653 if len(res.Files) != 2 {
3654 t.Fatalf("got %d file matches, want 2", len(res.Files))
3655 }
3656 })
3657}
3658
3659func TestSearchTypeLanguage(t *testing.T) {
3660 b := testShardBuilder(t, &zoekt.Repository{
3661 Name: "reponame",
3662 },
3663 Document{Name: "apex.cls", Content: []byte("public class Car extends Vehicle {")},
3664 Document{Name: "tex.cls", Content: []byte(`\DeclareOption*{`)},
3665 Document{Name: "hello.h", Content: []byte(`#include <stdio.h>`)},
3666 Document{Name: "be.magik", Content: []byte(`_package unicorn`)},
3667 )
3668
3669 t.Log(b.languageMap)
3670
3671 t.Run("LineMatches", func(t *testing.T) {
3672 wantSingleMatch := func(res *zoekt.SearchResult, want string) {
3673 t.Helper()
3674 fmatches := res.Files
3675 if len(fmatches) != 1 {
3676 t.Errorf("got %v, want 1 matches", len(fmatches))
3677 return
3678 }
3679 if len(fmatches[0].LineMatches) != 1 {
3680 t.Errorf("got %d line matches", len(fmatches[0].LineMatches))
3681 return
3682 }
3683 var got string
3684 if fmatches[0].LineMatches[0].FileName {
3685 got = fmatches[0].FileName
3686 } else {
3687 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].LineMatches[0].LineFragments[0].Offset)
3688 }
3689
3690 if got != want {
3691 t.Errorf("got %s, want %s", got, want)
3692 }
3693 }
3694
3695 res := searchForTest(t, b, &query.Language{Language: "Apex"})
3696 wantSingleMatch(res, "apex.cls")
3697
3698 res = searchForTest(t, b, &query.Language{Language: "TeX"})
3699 wantSingleMatch(res, "tex.cls")
3700
3701 res = searchForTest(t, b, &query.Language{Language: "C"})
3702 wantSingleMatch(res, "hello.h")
3703
3704 res = searchForTest(t, b, &query.Language{Language: "Magik"})
3705 wantSingleMatch(res, "be.magik")
3706
3707 // test fallback language search by pretending it's an older index version
3708 res = searchForTest(t, b, &query.Language{Language: "C++"})
3709 if len(res.Files) != 0 {
3710 t.Errorf("got %d results for C++, want 0", len(res.Files))
3711 }
3712
3713 b.featureVersion = 11 // force fallback
3714 res = searchForTest(t, b, &query.Language{Language: "C++"})
3715 wantSingleMatch(res, "hello.h")
3716 })
3717
3718 t.Run("ChunkMatches", func(t *testing.T) {
3719 wantSingleMatch := func(res *zoekt.SearchResult, want string) {
3720 t.Helper()
3721 fmatches := res.Files
3722 if len(fmatches) != 1 {
3723 t.Errorf("got %v, want 1 matches", len(fmatches))
3724 return
3725 }
3726 if len(fmatches[0].ChunkMatches) != 1 {
3727 t.Errorf("got %d line matches", len(fmatches[0].ChunkMatches))
3728 return
3729 }
3730 var got string
3731 if fmatches[0].ChunkMatches[0].FileName {
3732 got = fmatches[0].FileName
3733 } else {
3734 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset)
3735 }
3736
3737 if got != want {
3738 t.Errorf("got %s, want %s", got, want)
3739 }
3740 }
3741
3742 b.featureVersion = FeatureVersion // reset feature version
3743 res := searchForTest(t, b, &query.Language{Language: "Apex"}, chunkOpts)
3744 wantSingleMatch(res, "apex.cls")
3745
3746 res = searchForTest(t, b, &query.Language{Language: "TeX"}, chunkOpts)
3747 wantSingleMatch(res, "tex.cls")
3748
3749 res = searchForTest(t, b, &query.Language{Language: "C"}, chunkOpts)
3750 wantSingleMatch(res, "hello.h")
3751
3752 // test fallback language search by pretending it's an older index version
3753 res = searchForTest(t, b, &query.Language{Language: "C++"}, chunkOpts)
3754 if len(res.Files) != 0 {
3755 t.Errorf("got %d results for C++, want 0", len(res.Files))
3756 }
3757
3758 b.featureVersion = 11 // force fallback
3759 res = searchForTest(t, b, &query.Language{Language: "C++"}, chunkOpts)
3760 wantSingleMatch(res, "hello.h")
3761 })
3762}
3763
3764func TestStats(t *testing.T) {
3765 ignored := []cmp.Option{
3766 cmpopts.EquateEmpty(),
3767 cmpopts.IgnoreFields(zoekt.RepoListEntry{}, "Repository"),
3768 cmpopts.IgnoreFields(zoekt.RepoListEntry{}, "IndexMetadata"),
3769 cmpopts.IgnoreFields(zoekt.RepoStats{}, "IndexBytes"),
3770 }
3771
3772 repoListEntries := func(b *ShardBuilder) []zoekt.RepoListEntry {
3773 searcher := searcherForTest(t, b)
3774 indexdata := searcher.(*indexData)
3775 return indexdata.repoListEntry
3776 }
3777
3778 t.Run("one empty repo", func(t *testing.T) {
3779 b := testShardBuilder(t, nil)
3780 got := repoListEntries(b)
3781 want := []zoekt.RepoListEntry{
3782 {
3783 Stats: zoekt.RepoStats{
3784 Repos: 0,
3785 Shards: 1,
3786 Documents: 0,
3787 IndexBytes: 20,
3788 ContentBytes: 0,
3789 NewLinesCount: 0,
3790 DefaultBranchNewLinesCount: 0,
3791 OtherBranchesNewLinesCount: 0,
3792 },
3793 },
3794 }
3795
3796 if diff := cmp.Diff(want, got, ignored...); diff != "" {
3797 t.Fatalf("mismatch (-want +got):\n%s", diff)
3798 }
3799 })
3800
3801 t.Run("one simple shard", func(t *testing.T) {
3802 b := testShardBuilder(t, nil,
3803 Document{Name: "doc 0", Content: []byte("content 0")},
3804 Document{Name: "doc 1", Content: []byte("content 1")},
3805 )
3806 got := repoListEntries(b)
3807 want := []zoekt.RepoListEntry{
3808 {
3809 Stats: zoekt.RepoStats{
3810 Repos: 0,
3811 Shards: 1,
3812 Documents: 2,
3813 IndexBytes: 224,
3814 ContentBytes: 28,
3815 NewLinesCount: 0,
3816 DefaultBranchNewLinesCount: 0,
3817 OtherBranchesNewLinesCount: 0,
3818 },
3819 },
3820 }
3821
3822 if diff := cmp.Diff(want, got, ignored...); diff != "" {
3823 t.Fatalf("mismatch (-want +got):\n%s", diff)
3824 }
3825 })
3826
3827 t.Run("one compound shard", func(t *testing.T) {
3828 b := testShardBuilderCompound(t,
3829 []*zoekt.Repository{
3830 {Name: "repo 0"},
3831 {Name: "repo 1"},
3832 },
3833 [][]Document{
3834 {
3835 {Name: "doc 0", Content: []byte("content 0")},
3836 {Name: "doc 1", Content: []byte("content 1")},
3837 },
3838 {
3839 {Name: "doc 2", Content: []byte("content 2")},
3840 {Name: "doc 3", Content: []byte("content 3")},
3841 },
3842 },
3843 )
3844 got := repoListEntries(b)
3845 want := []zoekt.RepoListEntry{
3846 {
3847 Stats: zoekt.RepoStats{
3848 Repos: 0,
3849 Shards: 1,
3850 Documents: 2,
3851 IndexBytes: 180,
3852 ContentBytes: 28,
3853 NewLinesCount: 0,
3854 DefaultBranchNewLinesCount: 0,
3855 OtherBranchesNewLinesCount: 0,
3856 },
3857 },
3858 {
3859 Stats: zoekt.RepoStats{
3860 Repos: 0,
3861 Shards: 1,
3862 Documents: 2,
3863 IndexBytes: 180,
3864 ContentBytes: 28,
3865 NewLinesCount: 0,
3866 DefaultBranchNewLinesCount: 0,
3867 OtherBranchesNewLinesCount: 0,
3868 },
3869 },
3870 }
3871
3872 if diff := cmp.Diff(want, got, ignored...); diff != "" {
3873 t.Fatalf("mismatch (-want +got):\n%s", diff)
3874 }
3875 })
3876
3877 t.Run("compound shard with empty repos", func(t *testing.T) {
3878 b := testShardBuilderCompound(t,
3879 []*zoekt.Repository{
3880 {Name: "repo 0"},
3881 {Name: "repo 1"},
3882 {Name: "repo 2"},
3883 {Name: "repo 3"},
3884 {Name: "repo 4"},
3885 },
3886 [][]Document{
3887 {{Name: "doc 0", Content: []byte("content 0")}},
3888 nil,
3889 {{Name: "doc 1", Content: []byte("content 1")}},
3890 nil,
3891 nil,
3892 },
3893 )
3894 got := repoListEntries(b)
3895
3896 entryEmpty := zoekt.RepoListEntry{Stats: zoekt.RepoStats{
3897 Shards: 1,
3898 Documents: 0,
3899 ContentBytes: 0,
3900 }}
3901 entryNonEmpty := zoekt.RepoListEntry{Stats: zoekt.RepoStats{
3902 Shards: 1,
3903 Documents: 1,
3904 ContentBytes: 14,
3905 }}
3906
3907 want := []zoekt.RepoListEntry{
3908 entryNonEmpty,
3909 entryEmpty,
3910 entryNonEmpty,
3911 entryEmpty,
3912 entryEmpty,
3913 }
3914
3915 if diff := cmp.Diff(want, got, ignored...); diff != "" {
3916 t.Fatalf("mismatch (-want +got):\n%s", diff)
3917 }
3918 })
3919}
3920
3921// This tests the frequent pattern "\bLITERAL\b".
3922func TestWordSearch(t *testing.T) {
3923 content := []byte("needle the bla")
3924 // ----------------01234567890123
3925
3926 b := testShardBuilder(t, nil,
3927 Document{
3928 Name: "f1",
3929 Content: content,
3930 })
3931
3932 t.Run("LineMatches", func(t *testing.T) {
3933 sres := searchForTest(t, b,
3934 &query.Regexp{
3935 Regexp: mustParseRE("\\bthe\\b"),
3936 CaseSensitive: true,
3937 Content: true,
3938 })
3939
3940 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 {
3941 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
3942 }
3943
3944 if sres.Stats.RegexpsConsidered != 0 {
3945 t.Fatal("expected regexp to be skipped")
3946 }
3947
3948 got := sres.Files[0].LineMatches[0]
3949 want := zoekt.LineMatch{
3950 LineFragments: []zoekt.LineFragmentMatch{{
3951 LineOffset: 7,
3952 Offset: 7,
3953 MatchLength: 3,
3954 }},
3955 Line: content,
3956 FileName: false,
3957 LineNumber: 1,
3958 LineStart: 0,
3959 LineEnd: 14,
3960 }
3961
3962 if !reflect.DeepEqual(got, want) {
3963 t.Errorf("got %#v, want %#v", got, want)
3964 }
3965 })
3966
3967 t.Run("ChunkMatches", func(t *testing.T) {
3968 sres := searchForTest(t, b,
3969 &query.Regexp{
3970 Regexp: mustParseRE("\\bthe\\b"),
3971 CaseSensitive: true,
3972 }, chunkOpts)
3973
3974 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 {
3975 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
3976 }
3977
3978 if sres.Stats.RegexpsConsidered != 0 {
3979 t.Fatal("expected regexp to be skipped")
3980 }
3981
3982 got := sres.Files[0].ChunkMatches[0]
3983 want := zoekt.ChunkMatch{
3984 Content: content,
3985 ContentStart: zoekt.Location{ByteOffset: 0, LineNumber: 1, Column: 1},
3986 Ranges: []zoekt.Range{{
3987 Start: zoekt.Location{ByteOffset: 7, LineNumber: 1, Column: 8},
3988 End: zoekt.Location{ByteOffset: 10, LineNumber: 1, Column: 11},
3989 }},
3990 }
3991
3992 if diff := cmp.Diff(want, got); diff != "" {
3993 t.Fatal(diff)
3994 }
3995 })
3996}
3997
3998// Simple benchmark focused on chunk match scoring. It creates a single file that will have a 1000-line chunk match.
3999// The benchmark time is expected to be strongly correlated with time spent assembling and scoring this chunk.
4000func BenchmarkScoreChunkMatches(b *testing.B) {
4001 ctx := context.Background()
4002 var builder strings.Builder
4003 for i := range 1000 {
4004 builder.WriteString(fmt.Sprintf("line-%d one one one two two two three three three four four four five five\n", i))
4005 }
4006
4007 searcher := searcherForTest(b, testShardBuilder(b, nil,
4008 Document{Name: "f1", Content: []byte(builder.String())},
4009 ))
4010
4011 q := &query.Or{
4012 Children: []query.Q{
4013 &query.Substring{Pattern: "f"},
4014 &query.Substring{Pattern: "t"},
4015 }}
4016
4017 b.Run("score large ChunkMatch", func(b *testing.B) {
4018 b.ReportAllocs()
4019 b.ResetTimer()
4020
4021 for i := 0; i < b.N; i++ {
4022 sres, err := searcher.Search(ctx, q, &zoekt.SearchOptions{ChunkMatches: true, NumContextLines: 1})
4023 if err != nil {
4024 b.Fatal(err)
4025 }
4026
4027 matches := sres.Files
4028 if len(matches) == 0 {
4029 b.Fatalf("want file index, got none")
4030 }
4031 }
4032 })
4033}