fork of https://github.com/sourcegraph/zoekt
1// Copyright 2016 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package index
16
17import (
18 "bytes"
19 "context"
20 "fmt"
21 "reflect"
22 "regexp/syntax"
23 "strings"
24 "testing"
25
26 "github.com/google/go-cmp/cmp"
27 "github.com/google/go-cmp/cmp/cmpopts"
28 "github.com/grafana/regexp"
29
30 "github.com/sourcegraph/zoekt"
31 "github.com/sourcegraph/zoekt/query"
32)
33
34func clearScores(r *zoekt.SearchResult) {
35 for i := range r.Files {
36 r.Files[i].Score = 0.0
37 for j := range r.Files[i].LineMatches {
38 r.Files[i].LineMatches[j].Score = 0.0
39 }
40 for j := range r.Files[i].ChunkMatches {
41 r.Files[i].ChunkMatches[j].Score = 0.0
42 r.Files[i].ChunkMatches[j].BestLineMatch = 0
43 }
44 r.Files[i].Checksum = nil
45 r.Files[i].Debug = ""
46 }
47}
48
49func testShardBuilder(tb testing.TB, repo *zoekt.Repository, docs ...Document) *ShardBuilder {
50 tb.Helper()
51
52 b, err := NewShardBuilder(repo)
53 if err != nil {
54 tb.Fatalf("NewShardBuilder: %v", err)
55 }
56
57 for i, d := range docs {
58 if err := b.Add(d); err != nil {
59 tb.Fatalf("Add %d: %v", i, err)
60 }
61 }
62
63 return b
64}
65
66func testShardBuilderCompound(t *testing.T, repos []*zoekt.Repository, docs [][]Document) *ShardBuilder {
67 t.Helper()
68
69 b := newShardBuilder()
70 b.indexFormatVersion = NextIndexFormatVersion
71
72 if len(repos) != len(docs) {
73 t.Fatalf("testShardBuilderCompound: repos must be the same length as docs, got: len(repos)=%d len(docs)=%d", len(repos), len(docs))
74 }
75
76 for i, repo := range repos {
77 if err := b.setRepository(repo); err != nil {
78 t.Fatal(err)
79 }
80 for j, d := range docs[i] {
81 if err := b.Add(d); err != nil {
82 t.Fatalf("Add %d %d: %v", i, j, err)
83 }
84 }
85 }
86
87 return b
88}
89
90func TestBoundary(t *testing.T) {
91 b := testShardBuilder(t, nil,
92 Document{Name: "f1", Content: []byte("x the")},
93 Document{Name: "f1", Content: []byte("reader")})
94 res := searchForTest(t, b, &query.Substring{Pattern: "there"})
95 if len(res.Files) > 0 {
96 t.Fatalf("got %v, want no matches", res.Files)
97 }
98}
99
100func TestDocSectionInvalid(t *testing.T) {
101 b, err := NewShardBuilder(nil)
102 if err != nil {
103 t.Fatalf("NewShardBuilder: %v", err)
104 }
105 doc := Document{
106 Name: "f1",
107 Content: []byte("01234567890123"),
108 Symbols: []DocumentSection{{5, 8}, {7, 9}},
109 }
110
111 if err := b.Add(doc); err == nil {
112 t.Errorf("overlapping doc sections should fail")
113 }
114
115 doc = Document{
116 Name: "f1",
117 Content: []byte("01234567890123"),
118 Symbols: []DocumentSection{{0, 20}},
119 }
120
121 if err := b.Add(doc); err == nil {
122 t.Errorf("doc sections beyond EOF should fail")
123 }
124}
125
126func TestBasic(t *testing.T) {
127 b := testShardBuilder(t, nil,
128 Document{
129 Name: "f2",
130 Content: []byte("to carry water in the no later bla"),
131 // --------------0123456789012345678901234567890123
132 })
133
134 t.Run("LineMatch", func(t *testing.T) {
135 res := searchForTest(t, b, &query.Substring{
136 Pattern: "water",
137 CaseSensitive: true,
138 })
139 fmatches := res.Files
140 if len(fmatches) != 1 || len(fmatches[0].LineMatches) != 1 {
141 t.Fatalf("got %v, want 1 matches", fmatches)
142 }
143
144 got := fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].LineMatches[0].LineFragments[0].Offset)
145 want := "f2:9"
146 if got != want {
147 t.Errorf("1: got %s, want %s", got, want)
148 }
149 })
150
151 t.Run("ChunkMatch", func(t *testing.T) {
152 res := searchForTest(t, b, &query.Substring{
153 Pattern: "water",
154 CaseSensitive: true,
155 }, chunkOpts)
156 fmatches := res.Files
157 if len(fmatches) != 1 || len(fmatches[0].ChunkMatches) != 1 {
158 t.Fatalf("got %v, want 1 matches", fmatches)
159 }
160
161 got := fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset)
162 want := "f2:9"
163 if got != want {
164 t.Errorf("1: got %s, want %s", got, want)
165 }
166 })
167}
168
169func TestEmptyIndex(t *testing.T) {
170 b := testShardBuilder(t, nil)
171 searcher := searcherForTest(t, b)
172
173 var opts zoekt.SearchOptions
174 if _, err := searcher.Search(context.Background(), &query.Substring{}, &opts); err != nil {
175 t.Fatalf("Search: %v", err)
176 }
177
178 if _, err := searcher.List(context.Background(), &query.Repo{Regexp: regexp.MustCompile("")}, nil); err != nil {
179 t.Fatalf("List: %v", err)
180 }
181
182 if _, err := searcher.Search(context.Background(), &query.Substring{Pattern: "java", FileName: true}, &opts); err != nil {
183 t.Fatalf("Search: %v", err)
184 }
185}
186
187type memSeeker struct {
188 data []byte
189}
190
191func (s *memSeeker) Name() string {
192 return "memseeker"
193}
194
195func (s *memSeeker) Close() {}
196func (s *memSeeker) Read(off, sz uint32) ([]byte, error) {
197 return s.data[off : off+sz], nil
198}
199
200func (s *memSeeker) Size() (uint32, error) {
201 return uint32(len(s.data)), nil
202}
203
204func TestNewlines(t *testing.T) {
205 b := testShardBuilder(t, nil,
206 // -----------------------------------------012345-678901-234
207 Document{Name: "filename", Content: []byte("line1\nline2\nbla")})
208
209 t.Run("LineMatches", func(t *testing.T) {
210 sres := searchForTest(t, b, &query.Substring{Pattern: "ne2"})
211
212 matches := sres.Files
213 want := []zoekt.FileMatch{{
214 FileName: "filename",
215 LineMatches: []zoekt.LineMatch{{
216 LineFragments: []zoekt.LineFragmentMatch{{
217 Offset: 8,
218 LineOffset: 2,
219 MatchLength: 3,
220 }},
221 Line: []byte("line2\n"),
222 LineStart: 6,
223 LineEnd: 12,
224 LineNumber: 2,
225 }},
226 }}
227
228 if diff := cmp.Diff(matches, want); diff != "" {
229 t.Fatal(diff)
230 }
231 })
232
233 t.Run("ChunkMatches", func(t *testing.T) {
234 sres := searchForTest(t, b, &query.Substring{Pattern: "ne2"}, chunkOpts)
235
236 matches := sres.Files
237 want := []zoekt.FileMatch{{
238 FileName: "filename",
239 ChunkMatches: []zoekt.ChunkMatch{{
240 Content: []byte("line2\n"),
241 ContentStart: zoekt.Location{
242 ByteOffset: 6,
243 LineNumber: 2,
244 Column: 1,
245 },
246 Ranges: []zoekt.Range{{
247 Start: zoekt.Location{ByteOffset: 8, LineNumber: 2, Column: 3},
248 End: zoekt.Location{ByteOffset: 11, LineNumber: 2, Column: 6},
249 }},
250 }},
251 }}
252
253 if diff := cmp.Diff(want, matches); diff != "" {
254 t.Fatal(diff)
255 }
256 })
257}
258
259// A result spanning multiple lines should have LineMatches that only cover
260// single lines.
261func TestQueryNewlines(t *testing.T) {
262 text := "line1\nline2\nbla"
263 b := testShardBuilder(t, nil,
264 Document{Name: "filename", Content: []byte(text)})
265
266 t.Run("LineMatches", func(t *testing.T) {
267 sres := searchForTest(t, b, &query.Substring{Pattern: "ine2\nbla"})
268 matches := sres.Files
269 if len(matches) != 1 {
270 t.Fatalf("got %d file matches, want exactly one", len(matches))
271 }
272 m := matches[0]
273 if len(m.LineMatches) != 2 {
274 t.Fatalf("got %d line matches, want exactly two %#v", len(m.LineMatches), m.LineMatches)
275 }
276 })
277
278 t.Run("ChunkMatches", func(t *testing.T) {
279 sres := searchForTest(t, b, &query.Substring{Pattern: "ine2\nbla"}, chunkOpts)
280 matches := sres.Files
281 if len(matches) != 1 {
282 t.Fatalf("got %d file matches, want exactly one", len(matches))
283 }
284 m := matches[0]
285 if len(m.ChunkMatches) != 1 {
286 t.Fatalf("got %d chunk matches, want exactly one", len(m.ChunkMatches))
287 }
288 })
289}
290
291var chunkOpts = zoekt.SearchOptions{ChunkMatches: true}
292
293func searchForTest(t *testing.T, b *ShardBuilder, q query.Q, o ...zoekt.SearchOptions) *zoekt.SearchResult {
294 searcher := searcherForTest(t, b)
295 var opts zoekt.SearchOptions
296 if len(o) > 0 {
297 opts = o[0]
298 }
299 res, err := searcher.Search(context.Background(), q, &opts)
300 if err != nil {
301 t.Fatalf("Search(%s): %v", q, err)
302 }
303 clearScores(res)
304 return res
305}
306
307func searcherForTest(t testing.TB, b *ShardBuilder) zoekt.Searcher {
308 var buf bytes.Buffer
309 if err := b.Write(&buf); err != nil {
310 t.Fatal(err)
311 }
312 f := &memSeeker{buf.Bytes()}
313
314 searcher, err := NewSearcher(f)
315 if err != nil {
316 t.Fatalf("NewSearcher: %v", err)
317 }
318
319 return searcher
320}
321
322func TestCaseFold(t *testing.T) {
323 b := testShardBuilder(t, nil,
324 Document{Name: "f1", Content: []byte("I love BaNaNAS.")},
325 // -----------------------------------012345678901234
326 )
327 t.Run("LineMatches", func(t *testing.T) {
328 sres := searchForTest(t, b, &query.Substring{
329 Pattern: "bananas",
330 CaseSensitive: true,
331 })
332 matches := sres.Files
333 if len(matches) != 0 {
334 t.Errorf("foldcase: got %#v, want 0 matches", matches)
335 }
336
337 sres = searchForTest(t, b,
338 &query.Substring{
339 Pattern: "BaNaNAS",
340 CaseSensitive: true,
341 })
342 matches = sres.Files
343 if len(matches) != 1 {
344 t.Errorf("no foldcase: got %v, want 1 matches", matches)
345 } else if matches[0].LineMatches[0].LineFragments[0].Offset != 7 {
346 t.Errorf("foldcase: got %v, want offsets 7", matches)
347 }
348 })
349
350 t.Run("ChunkMatches", func(t *testing.T) {
351 sres := searchForTest(t, b, &query.Substring{
352 Pattern: "bananas",
353 CaseSensitive: true,
354 }, chunkOpts)
355 matches := sres.Files
356 if len(matches) != 0 {
357 t.Errorf("foldcase: got %#v, want 0 matches", matches)
358 }
359
360 sres = searchForTest(t, b,
361 &query.Substring{
362 Pattern: "BaNaNAS",
363 CaseSensitive: true,
364 })
365 matches = sres.Files
366 if len(matches) != 1 {
367 t.Errorf("no foldcase: got %v, want 1 matches", matches)
368 } else if matches[0].LineMatches[0].LineFragments[0].Offset != 7 {
369 t.Errorf("foldcase: got %v, want offsets 7", matches)
370 }
371 })
372}
373
374// wordsAsSymbols finds all ASCII words in doc.Content which are longer than 2
375// chars. Those are then set as symbols.
376func wordsAsSymbols(doc Document) Document {
377 re := regexp.MustCompile(`\b\w{2,}\b`)
378 var symbols []DocumentSection
379 var symbolsMetadata []*zoekt.Symbol
380 for _, match := range re.FindAllIndex(doc.Content, -1) {
381 symbols = append(symbols, DocumentSection{
382 Start: uint32(match[0]),
383 End: uint32(match[1]),
384 })
385 symbolsMetadata = append(symbolsMetadata, &zoekt.Symbol{Kind: "method"})
386 }
387 doc.Symbols = symbols
388 doc.SymbolsMetaData = symbolsMetadata
389 return doc
390}
391
392func TestSearchStats(t *testing.T) {
393 ctx := context.Background()
394 searcher := searcherForTest(t, testShardBuilder(t, nil,
395 wordsAsSymbols(Document{Name: "f1", Content: []byte("x banana y")}),
396 wordsAsSymbols(Document{Name: "f2", Content: []byte("x apple y")}),
397 wordsAsSymbols(Document{Name: "f3", Content: []byte("x banana apple y")}),
398 // --------------------------------------------------0123456789012345
399 ))
400
401 andQuery := query.NewAnd(
402 &query.Substring{
403 Pattern: "banana",
404 },
405 &query.Substring{
406 Pattern: "apple",
407 },
408 )
409
410 t.Run("LineMatches", func(t *testing.T) {
411 sres, err := searcher.Search(ctx, andQuery, &zoekt.SearchOptions{})
412 if err != nil {
413 t.Fatal(err)
414 }
415 matches := sres.Files
416 if len(matches) != 1 || len(matches[0].LineMatches) != 1 || len(matches[0].LineMatches[0].LineFragments) != 2 {
417 t.Fatalf("got %#v, want 1 match with 2 fragments", matches)
418 }
419
420 if matches[0].LineMatches[0].LineFragments[0].Offset != 2 || matches[0].LineMatches[0].LineFragments[1].Offset != 9 {
421 t.Fatalf("got %#v, want offsets 2,9", matches)
422 }
423 })
424 t.Run("ChunkMatches", func(t *testing.T) {
425 sres, err := searcher.Search(ctx, andQuery, &chunkOpts)
426 if err != nil {
427 t.Fatal(err)
428 }
429 matches := sres.Files
430 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 || len(matches[0].ChunkMatches[0].Ranges) != 2 {
431 t.Fatalf("got %#v, want 1 chunk match with 2 ranges", matches)
432 }
433
434 if matches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset != 2 || matches[0].ChunkMatches[0].Ranges[1].Start.ByteOffset != 9 {
435 t.Fatalf("got %#v, want offsets 2,9", matches)
436 }
437 })
438 t.Run("Stats", func(t *testing.T) {
439 cases := []struct {
440 Name string
441 Q query.Q
442 Want zoekt.Stats
443 }{{
444 Name: "and-query",
445 Q: andQuery,
446 Want: zoekt.Stats{
447 FilesLoaded: 1,
448 ContentBytesLoaded: 22,
449 IndexBytesLoaded: 10,
450 NgramMatches: 3, // we look at doc 1, because it's max(0,1) due to AND
451 NgramLookups: 104,
452 MatchCount: 2,
453 FileCount: 1,
454 FilesConsidered: 2,
455 ShardsScanned: 1,
456 },
457 }, {
458 Name: "one-trigram",
459 Q: &query.Substring{
460 Pattern: "a y",
461 Content: true,
462 CaseSensitive: true,
463 },
464 Want: zoekt.Stats{
465 ContentBytesLoaded: 14,
466 IndexBytesLoaded: 1,
467 FileCount: 1,
468 FilesConsidered: 1,
469 FilesLoaded: 1,
470 ShardsScanned: 1,
471 MatchCount: 1,
472 NgramMatches: 1,
473 NgramLookups: 2, // once to lookup frequency then again to access posting list.
474 },
475 }, {
476 Name: "one-trigram-case-insensitive",
477 Q: &query.Substring{
478 Pattern: "a y",
479 Content: true,
480 },
481 Want: zoekt.Stats{
482 ContentBytesLoaded: 14,
483 IndexBytesLoaded: 1,
484 FileCount: 1,
485 FilesConsidered: 1,
486 FilesLoaded: 1,
487 ShardsScanned: 1,
488 MatchCount: 1,
489 NgramMatches: 1,
490 NgramLookups: 8, // "a y" has 2**2 casings which we lookup twice.
491 },
492 }, {
493 Name: "one-trigram-pruned",
494 Q: &query.Substring{
495 Pattern: "foo",
496 Content: true,
497 CaseSensitive: true,
498 },
499 Want: zoekt.Stats{
500 ShardsSkippedFilter: 1,
501 NgramLookups: 1, // only had to lookup once
502 },
503 }, {
504 Name: "one-trigram-branch-pruned",
505 Q: query.NewAnd(
506 &query.Substring{
507 Pattern: "foo",
508 Content: true,
509 CaseSensitive: true,
510 },
511 &query.Substring{
512 Pattern: "a y",
513 Content: true,
514 CaseSensitive: true,
515 },
516 ),
517 Want: zoekt.Stats{
518 IndexBytesLoaded: 1, // we created an iterator for "a y" before pruning.
519 ShardsSkippedFilter: 1,
520 NgramLookups: 3, // we lookedup "foo" once (1), but lookedup and created "a y" (2).
521 },
522 }, {
523 Name: "symbol-substr-nomatch",
524 Q: &query.Symbol{Expr: &query.Substring{
525 Pattern: "banana apple",
526 Content: true,
527 CaseSensitive: true,
528 }},
529 Want: zoekt.Stats{
530 IndexBytesLoaded: 3,
531 FilesConsidered: 1, // important that we only check 1 file to ensure we are using the index
532 MatchCount: 0, // even though there is a match it doesn't align with a symbol
533 ShardsScanned: 1,
534 NgramMatches: 1,
535 NgramLookups: 12,
536 },
537 }, {
538 Name: "symbol-substr",
539 Q: &query.Symbol{Expr: &query.Substring{
540 Pattern: "apple",
541 Content: true,
542 CaseSensitive: true,
543 }},
544 Want: zoekt.Stats{
545 ContentBytesLoaded: 35,
546 IndexBytesLoaded: 4,
547 FileCount: 2,
548 FilesConsidered: 2, // must be 2 to ensure we used the index
549 FilesLoaded: 2,
550 MatchCount: 2, // apple symbols is in two files
551 ShardsScanned: 1,
552 NgramMatches: 2,
553 NgramLookups: 5,
554 },
555 }, {
556 Name: "symbol-regexp-nomatch",
557 Q: &query.Symbol{Expr: &query.Regexp{
558 Regexp: mustParseRE("^apple.banana$"),
559 Content: true,
560 CaseSensitive: true,
561 }},
562 Want: zoekt.Stats{
563 ContentBytesLoaded: 33, // we still have to run regex since "app" matches two documents
564 IndexBytesLoaded: 10,
565 FilesConsidered: 2, // important that we don't check 3 to ensure we are using the index
566 FilesLoaded: 2,
567 MatchCount: 0, // even though there is a match it doesn't align with a symbol
568 ShardsScanned: 1,
569 NgramMatches: 3,
570 NgramLookups: 11,
571 },
572 }, {
573 Name: "symbol-regexp",
574 Q: &query.Symbol{Expr: &query.Regexp{
575 Regexp: mustParseRE("^app.e$"),
576 Content: true,
577 CaseSensitive: true,
578 }},
579 Want: zoekt.Stats{
580 ContentBytesLoaded: 35,
581 IndexBytesLoaded: 2,
582 FileCount: 2,
583 FilesConsidered: 2, // must be 2 to ensure we used the index
584 FilesLoaded: 2,
585 MatchCount: 2, // apple symbols is in two files
586 ShardsScanned: 1,
587 NgramMatches: 2,
588 NgramLookups: 2,
589 },
590 }}
591
592 for _, tc := range cases {
593 t.Run(tc.Name, func(t *testing.T) {
594 sres, err := searcher.Search(ctx, tc.Q, &chunkOpts)
595 if err != nil {
596 t.Fatal(err)
597 }
598 if diff := cmp.Diff(tc.Want, sres.Stats, cmpopts.IgnoreFields(zoekt.Stats{}, "MatchTreeConstruction", "MatchTreeSearch")); diff != "" {
599 t.Errorf("unexpected Stats (-want +got):\n%s", diff)
600 }
601 })
602 }
603 })
604}
605
606func TestAndNegateSearch(t *testing.T) {
607 b := testShardBuilder(t, nil,
608 Document{Name: "f1", Content: []byte("x banana y")},
609 // -----------------------------------0123456789
610 Document{Name: "f4", Content: []byte("x banana apple y")})
611
612 t.Run("LineMatches", func(t *testing.T) {
613 sres := searchForTest(t, b, query.NewAnd(
614 &query.Substring{
615 Pattern: "banana",
616 },
617 &query.Not{Child: &query.Substring{
618 Pattern: "apple",
619 }}))
620
621 matches := sres.Files
622
623 if len(matches) != 1 || len(matches[0].LineMatches) != 1 {
624 t.Fatalf("got %v, want 1 match", matches)
625 }
626 if matches[0].FileName != "f1" {
627 t.Fatalf("got match %#v, want FileName: f1", matches[0])
628 }
629 if matches[0].LineMatches[0].LineFragments[0].Offset != 2 {
630 t.Fatalf("got %v, want offset 2", matches)
631 }
632 })
633
634 t.Run("ChunkMatches", func(t *testing.T) {
635 sres := searchForTest(t, b,
636 query.NewAnd(
637 &query.Substring{
638 Pattern: "banana",
639 },
640 &query.Not{Child: &query.Substring{
641 Pattern: "apple",
642 }},
643 ),
644 chunkOpts,
645 )
646
647 matches := sres.Files
648
649 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 {
650 t.Fatalf("got %v, want 1 match", matches)
651 }
652 if matches[0].FileName != "f1" {
653 t.Fatalf("got match %#v, want FileName: f1", matches[0])
654 }
655 if matches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset != 2 {
656 t.Fatalf("got %v, want offset 2", matches)
657 }
658 })
659}
660
661func TestNegativeMatchesOnlyShortcut(t *testing.T) {
662 b := testShardBuilder(t, nil,
663 Document{Name: "f1", Content: []byte("x banana y")},
664 Document{Name: "f2", Content: []byte("x appelmoes y")},
665 Document{Name: "f3", Content: []byte("x appelmoes y")},
666 Document{Name: "f3", Content: []byte("x appelmoes y")})
667
668 t.Run("LineMatches", func(t *testing.T) {
669 sres := searchForTest(t, b, query.NewAnd(
670 &query.Substring{
671 Pattern: "banana",
672 },
673 &query.Not{Child: &query.Substring{
674 Pattern: "appel",
675 }}))
676
677 if sres.Stats.FilesConsidered != 1 {
678 t.Errorf("got %#v, want FilesConsidered: 1", sres.Stats)
679 }
680 })
681
682 t.Run("ChunkMatches", func(t *testing.T) {
683 sres := searchForTest(t, b, query.NewAnd(
684 &query.Substring{
685 Pattern: "banana",
686 },
687 &query.Not{Child: &query.Substring{
688 Pattern: "appel",
689 }}), chunkOpts)
690
691 if sres.Stats.FilesConsidered != 1 {
692 t.Errorf("got %#v, want FilesConsidered: 1", sres.Stats)
693 }
694 })
695}
696
697func TestFileSearch(t *testing.T) {
698 b := testShardBuilder(t, nil,
699 Document{Name: "banzana", Content: []byte("x orange y")},
700 // -------------0123456
701 Document{Name: "banana", Content: []byte("x apple y")},
702 // -------------012345
703 )
704
705 t.Run("LineMatches", func(t *testing.T) {
706 sres := searchForTest(t, b, &query.Substring{
707 Pattern: "anan",
708 FileName: true,
709 })
710
711 matches := sres.Files
712 if len(matches) != 1 || len(matches[0].LineMatches) != 1 {
713 t.Fatalf("got %v, want 1 match", matches)
714 }
715
716 got := matches[0].LineMatches[0]
717 want := zoekt.LineMatch{
718 Line: []byte("banana"),
719 LineFragments: []zoekt.LineFragmentMatch{{
720 Offset: 1,
721 LineOffset: 1,
722 MatchLength: 4,
723 }},
724 FileName: true,
725 }
726
727 if !reflect.DeepEqual(got, want) {
728 t.Errorf("got %#v, want %#v", got, want)
729 }
730 })
731
732 t.Run("ChunkMatches", func(t *testing.T) {
733 sres := searchForTest(t, b, &query.Substring{
734 Pattern: "anan",
735 FileName: true,
736 }, chunkOpts)
737
738 matches := sres.Files
739 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 {
740 t.Fatalf("got %v, want 1 match", matches)
741 }
742
743 got := matches[0].ChunkMatches[0]
744 want := zoekt.ChunkMatch{
745 Content: []byte("banana"),
746 ContentStart: zoekt.Location{ByteOffset: 0, LineNumber: 1, Column: 1},
747 Ranges: []zoekt.Range{{
748 Start: zoekt.Location{ByteOffset: 1, LineNumber: 1, Column: 2},
749 End: zoekt.Location{ByteOffset: 5, LineNumber: 1, Column: 6},
750 }},
751 FileName: true,
752 }
753
754 if diff := cmp.Diff(want, got); diff != "" {
755 t.Fatal(diff)
756 }
757 })
758
759 t.Run("FileNameSet", func(t *testing.T) {
760 sres := searchForTest(t, b, query.NewFileNameSet("banana"), chunkOpts)
761
762 matches := sres.Files
763 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 {
764 t.Fatalf("got %v, want 1 match", matches)
765 }
766
767 got := matches[0].ChunkMatches[0]
768 want := zoekt.ChunkMatch{
769 Content: []byte("banana"),
770 ContentStart: zoekt.Location{ByteOffset: 0, LineNumber: 1, Column: 1},
771 Ranges: []zoekt.Range{{
772 Start: zoekt.Location{ByteOffset: 0, LineNumber: 1, Column: 1},
773 End: zoekt.Location{ByteOffset: 6, LineNumber: 1, Column: 7},
774 }},
775 FileName: true,
776 }
777
778 if diff := cmp.Diff(want, got); diff != "" {
779 t.Fatal(diff)
780 }
781 })
782}
783
784func TestFileCase(t *testing.T) {
785 b := testShardBuilder(t, nil,
786 Document{Name: "BANANA", Content: []byte("x orange y")})
787
788 t.Run("LineMatches", func(t *testing.T) {
789 sres := searchForTest(t, b, &query.Substring{
790 Pattern: "banana",
791 FileName: true,
792 })
793
794 matches := sres.Files
795 if len(matches) != 1 || matches[0].FileName != "BANANA" {
796 t.Fatalf("got %v, want 1 match 'BANANA'", matches)
797 }
798 })
799
800 t.Run("ChunkMatches", func(t *testing.T) {
801 sres := searchForTest(t, b, &query.Substring{
802 Pattern: "banana",
803 FileName: true,
804 }, chunkOpts)
805
806 matches := sres.Files
807 if len(matches) != 1 || matches[0].FileName != "BANANA" {
808 t.Fatalf("got %v, want 1 match 'BANANA'", matches)
809 }
810 })
811}
812
813func TestFileRegexpSearchBruteForce(t *testing.T) {
814 b := testShardBuilder(t, nil,
815 Document{Name: "banzana", Content: []byte("x orange y")},
816 Document{Name: "banana", Content: []byte("x apple y")},
817 )
818 t.Run("LineMatches", func(t *testing.T) {
819 sres := searchForTest(t, b, &query.Regexp{
820 Regexp: mustParseRE("[qn][zx]"),
821 FileName: true,
822 })
823
824 matches := sres.Files
825 if len(matches) != 1 || matches[0].FileName != "banzana" {
826 t.Fatalf("got %v, want 1 match on 'banzana'", matches)
827 }
828 })
829 t.Run("LineMatches", func(t *testing.T) {
830 sres := searchForTest(t, b, &query.Regexp{
831 Regexp: mustParseRE("[qn][zx]"),
832 FileName: true,
833 }, chunkOpts)
834
835 matches := sres.Files
836 if len(matches) != 1 || matches[0].FileName != "banzana" {
837 t.Fatalf("got %v, want 1 match on 'banzana'", matches)
838 }
839 })
840}
841
842func TestFileRegexpSearchShortString(t *testing.T) {
843 b := testShardBuilder(t, nil,
844 Document{Name: "banana.py", Content: []byte("x orange y")})
845
846 t.Run("LineMatches", func(t *testing.T) {
847 sres := searchForTest(t, b, &query.Regexp{
848 Regexp: mustParseRE("ana.py"),
849 FileName: true,
850 })
851
852 matches := sres.Files
853 if len(matches) != 1 || matches[0].FileName != "banana.py" {
854 t.Fatalf("got %v, want 1 match on 'banana.py'", matches)
855 }
856 })
857
858 t.Run("ChunkMatches", func(t *testing.T) {
859 sres := searchForTest(t, b, &query.Regexp{
860 Regexp: mustParseRE("ana.py"),
861 FileName: true,
862 }, chunkOpts)
863
864 matches := sres.Files
865 if len(matches) != 1 || matches[0].FileName != "banana.py" {
866 t.Fatalf("got %v, want 1 match on 'banana.py'", matches)
867 }
868 })
869}
870
871func TestFileSubstringSearchBruteForce(t *testing.T) {
872 b := testShardBuilder(t, nil,
873 Document{Name: "BANZANA", Content: []byte("x orange y")},
874 Document{Name: "banana", Content: []byte("x apple y")})
875
876 q := &query.Substring{
877 Pattern: "z",
878 FileName: true,
879 }
880
881 t.Run("LineMatches", func(t *testing.T) {
882 res := searchForTest(t, b, q)
883 if len(res.Files) != 1 || res.Files[0].FileName != "BANZANA" {
884 t.Fatalf("got %v, want 1 match on 'BANZANA''", res.Files)
885 }
886 })
887
888 t.Run("ChunkMatches", func(t *testing.T) {
889 res := searchForTest(t, b, q, chunkOpts)
890 if len(res.Files) != 1 || res.Files[0].FileName != "BANZANA" {
891 t.Fatalf("got %v, want 1 match on 'BANZANA''", res.Files)
892 }
893 })
894}
895
896func TestFileSubstringSearchBruteForceEnd(t *testing.T) {
897 b := testShardBuilder(t, nil,
898 Document{Name: "BANZANA", Content: []byte("x orange y")},
899 Document{Name: "bananaq", Content: []byte("x apple y")})
900
901 q := &query.Substring{
902 Pattern: "q",
903 FileName: true,
904 }
905 t.Run("LineMatches", func(t *testing.T) {
906 res := searchForTest(t, b, q)
907 if want := "bananaq"; len(res.Files) != 1 || res.Files[0].FileName != want {
908 t.Fatalf("got %v, want 1 match in %q", res.Files, want)
909 }
910 })
911
912 t.Run("LineMatches", func(t *testing.T) {
913 res := searchForTest(t, b, q, chunkOpts)
914 if want := "bananaq"; len(res.Files) != 1 || res.Files[0].FileName != want {
915 t.Fatalf("got %v, want 1 match in %q", res.Files, want)
916 }
917 })
918}
919
920func TestSearchMatchAll(t *testing.T) {
921 b := testShardBuilder(t, nil,
922 Document{Name: "banzana", Content: []byte("x orange y")},
923 Document{Name: "banana", Content: []byte("x apple y")})
924
925 t.Run("LineMatches", func(t *testing.T) {
926 sres := searchForTest(t, b, &query.Const{Value: true})
927 matches := sres.Files
928 if len(matches) != 2 {
929 t.Fatalf("got %v, want 2 matches", matches)
930 }
931 })
932
933 t.Run("ChunkMatches", func(t *testing.T) {
934 sres := searchForTest(t, b, &query.Const{Value: true}, chunkOpts)
935 matches := sres.Files
936 if len(matches) != 2 {
937 t.Fatalf("got %v, want 2 matches", matches)
938 }
939 })
940}
941
942func TestSearchNewline(t *testing.T) {
943 b := testShardBuilder(t, nil,
944 Document{Name: "banzana", Content: []byte("abcd\ndefg")})
945
946 t.Run("LineMatches", func(t *testing.T) {
947 sres := searchForTest(t, b, &query.Substring{Pattern: "d\nd"})
948
949 // Just check that we don't crash.
950
951 matches := sres.Files
952 if len(matches) != 1 {
953 t.Fatalf("got %v, want 1 matches", matches)
954 }
955 })
956
957 t.Run("ChunkMatches", func(t *testing.T) {
958 sres := searchForTest(t, b, &query.Substring{Pattern: "d\nd"}, chunkOpts)
959
960 // Just check that we don't crash.
961
962 matches := sres.Files
963 if len(matches) != 1 {
964 t.Fatalf("got %v, want 1 matches", matches)
965 }
966 })
967}
968
969func TestSearchMatchAllRegexp(t *testing.T) {
970 b := testShardBuilder(t, nil,
971 Document{Name: "banzana", Content: []byte("abcd")},
972 Document{Name: "banana", Content: []byte("pqrs")})
973
974 t.Run("LineMatches", func(t *testing.T) {
975 sres := searchForTest(t, b, &query.Regexp{Regexp: mustParseRE(".")})
976
977 matches := sres.Files
978 if len(matches) != 2 || sres.Stats.MatchCount != 2 {
979 t.Fatalf("got %v, want 2 matches", matches)
980 }
981 if len(matches[0].LineMatches[0].Line) != 4 || len(matches[1].LineMatches[0].Line) != 4 {
982 t.Fatalf("want 4 chars in every file, got %#v", matches)
983 }
984 })
985
986 t.Run("ChunkMatches", func(t *testing.T) {
987 sres := searchForTest(t, b, &query.Regexp{Regexp: mustParseRE(".")}, chunkOpts)
988
989 matches := sres.Files
990 if len(matches) != 2 || sres.Stats.MatchCount != 8 {
991 t.Fatalf("got %v, want 2 matches", matches)
992 }
993 if len(matches[0].ChunkMatches[0].Content) != 4 || len(matches[1].ChunkMatches[0].Content) != 4 {
994 t.Fatalf("want 4 chars in every file, got %#v", matches)
995 }
996 })
997}
998
999func TestSearchBM25MatchScores(t *testing.T) {
1000 ctx := context.Background()
1001 searcher := searcherForTest(t, testShardBuilder(t, nil,
1002 Document{Name: "f1", Content: []byte("one two three\naaaaaaaaaa\nbbbbbbbb\none two two")},
1003 Document{Name: "f2", Content: []byte("four five six\naaaaaaaaaa\nbbbbbbbb\nfour five five\nsix six")},
1004 wordsAsSymbols(Document{Name: "f3", Content: []byte("public static void main")}),
1005 ))
1006
1007 t.Run("LineMatches", func(t *testing.T) {
1008 q := &query.Substring{Pattern: "two"}
1009 sres, err := searcher.Search(ctx, q, &zoekt.SearchOptions{UseBM25Scoring: true})
1010 if err != nil {
1011 t.Fatal(err)
1012 }
1013 matches := sres.Files
1014 if len(matches) != 1 {
1015 t.Fatalf("want 1 file index, got %d", len(matches))
1016 }
1017
1018 if len(matches[0].LineMatches) != 2 {
1019 t.Fatalf("want 2 chunk matches, got %d", len(matches[0].ChunkMatches))
1020 }
1021
1022 if matches[0].LineMatches[0].LineNumber != 4 {
1023 t.Fatalf("want best-scoring line to be line 4, got %d", matches[0].LineMatches[0].LineNumber)
1024 }
1025 })
1026
1027 t.Run("ChunkMatches", func(t *testing.T) {
1028 q := &query.Substring{Pattern: "five"}
1029 sres, err := searcher.Search(ctx, q, &zoekt.SearchOptions{UseBM25Scoring: true, ChunkMatches: true, NumContextLines: 1})
1030 if err != nil {
1031 t.Fatal(err)
1032 }
1033
1034 matches := sres.Files
1035 if len(matches) != 1 {
1036 t.Fatalf("want 1 file index, got %d", len(matches))
1037 }
1038
1039 if len(matches[0].ChunkMatches) != 2 {
1040 t.Fatalf("want 2 chunk matches, got %d", len(matches[0].ChunkMatches))
1041 }
1042
1043 if matches[0].ChunkMatches[0].BestLineMatch != 4 {
1044 t.Fatalf("want best-scoring line to be line 4, got %d", matches[0].ChunkMatches[0].BestLineMatch)
1045 }
1046 })
1047
1048 t.Run("ChunkMatches with symbols", func(t *testing.T) {
1049 q := &query.Or{
1050 Children: []query.Q{
1051 &query.Symbol{Expr: &query.Substring{Pattern: "main"}},
1052 &query.Substring{Pattern: "five"},
1053 },
1054 }
1055
1056 sres, err := searcher.Search(ctx, q, &zoekt.SearchOptions{UseBM25Scoring: true, ChunkMatches: true, NumContextLines: 1})
1057 if err != nil {
1058 t.Fatal(err)
1059 }
1060
1061 matches := sres.Files
1062 if len(matches) != 2 {
1063 t.Fatalf("want 2 file index, got %d", len(matches))
1064 }
1065
1066 foundSymbolInfo := false
1067 for _, m := range matches {
1068 for _, cm := range m.ChunkMatches {
1069 if len(cm.SymbolInfo) > 0 {
1070 foundSymbolInfo = true
1071 }
1072 }
1073 }
1074
1075 if !foundSymbolInfo {
1076 t.Fatalf("want symbol info, got none")
1077 }
1078 })
1079}
1080
1081func TestFileRestriction(t *testing.T) {
1082 b := testShardBuilder(t, nil,
1083 Document{Name: "banana1", Content: []byte("x orange y")},
1084 Document{Name: "banana2", Content: []byte("x apple y")},
1085 Document{Name: "orange", Content: []byte("x apple z")})
1086
1087 t.Run("LineMatches", func(t *testing.T) {
1088 sres := searchForTest(t, b, query.NewAnd(
1089 &query.Substring{
1090 Pattern: "banana",
1091 FileName: true,
1092 },
1093 &query.Substring{
1094 Pattern: "apple",
1095 }))
1096
1097 matches := sres.Files
1098 if len(matches) != 1 || len(matches[0].LineMatches) != 1 {
1099 t.Fatalf("got %v, want 1 match", matches)
1100 }
1101
1102 match := matches[0].LineMatches[0]
1103 got := string(match.Line)
1104 want := "x apple y"
1105 if got != want {
1106 t.Errorf("got match %#v, want line %q", match, want)
1107 }
1108 })
1109
1110 t.Run("ChunkMatches", func(t *testing.T) {
1111 sres := searchForTest(t, b, query.NewAnd(
1112 &query.Substring{
1113 Pattern: "banana",
1114 FileName: true,
1115 },
1116 &query.Substring{
1117 Pattern: "apple",
1118 }), chunkOpts)
1119
1120 matches := sres.Files
1121 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 {
1122 t.Fatalf("got %v, want 1 match", matches)
1123 }
1124
1125 match := matches[0].ChunkMatches[0]
1126 got := string(match.Content)
1127 want := "x apple y"
1128 if got != want {
1129 t.Errorf("got match %#v, want line %q", match, want)
1130 }
1131 })
1132}
1133
1134func TestFileNameBoundary(t *testing.T) {
1135 b := testShardBuilder(t, nil,
1136 Document{Name: "banana2", Content: []byte("x apple y")},
1137 Document{Name: "helpers.go", Content: []byte("x apple y")},
1138 Document{Name: "foo", Content: []byte("x apple y")})
1139
1140 t.Run("LineMatches", func(t *testing.T) {
1141 sres := searchForTest(t, b, &query.Substring{
1142 Pattern: "helpers.go",
1143 FileName: true,
1144 })
1145
1146 matches := sres.Files
1147 if len(matches) != 1 || len(matches[0].LineMatches) != 1 {
1148 t.Fatalf("got %v, want 1 match", matches)
1149 }
1150 })
1151
1152 t.Run("ChunkMatches", func(t *testing.T) {
1153 sres := searchForTest(t, b, &query.Substring{
1154 Pattern: "helpers.go",
1155 FileName: true,
1156 }, chunkOpts)
1157
1158 matches := sres.Files
1159 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 {
1160 t.Fatalf("got %v, want 1 match", matches)
1161 }
1162 })
1163}
1164
1165func TestDocumentOrder(t *testing.T) {
1166 var docs []Document
1167 for i := 0; i < 3; i++ {
1168 docs = append(docs, Document{Name: fmt.Sprintf("f%d", i), Content: []byte("needle")})
1169 }
1170
1171 b := testShardBuilder(t, nil, docs...)
1172
1173 t.Run("LineMatches", func(t *testing.T) {
1174 sres := searchForTest(t, b, query.NewAnd(
1175 &query.Substring{
1176 Pattern: "needle",
1177 }))
1178
1179 want := []string{"f0", "f1", "f2"}
1180 var got []string
1181 for _, f := range sres.Files {
1182 got = append(got, f.FileName)
1183 }
1184 if !reflect.DeepEqual(got, want) {
1185 t.Fatalf("got %v, want %v", got, want)
1186 }
1187 })
1188
1189 t.Run("ChunkMatches", func(t *testing.T) {
1190 sres := searchForTest(t, b,
1191 query.NewAnd(&query.Substring{
1192 Pattern: "needle",
1193 }),
1194 chunkOpts,
1195 )
1196
1197 want := []string{"f0", "f1", "f2"}
1198 var got []string
1199 for _, f := range sres.Files {
1200 got = append(got, f.FileName)
1201 }
1202 if !reflect.DeepEqual(got, want) {
1203 t.Fatalf("got %v, want %v", got, want)
1204 }
1205 })
1206}
1207
1208func TestBranchMask(t *testing.T) {
1209 b := testShardBuilder(t, &zoekt.Repository{
1210 Branches: []zoekt.RepositoryBranch{
1211 {"master", "v-master"},
1212 {"stable", "v-stable"},
1213 {"bonzai", "v-bonzai"},
1214 },
1215 }, Document{Name: "f1", Content: []byte("needle"), Branches: []string{"master"}},
1216 Document{Name: "f2", Content: []byte("needle"), Branches: []string{"stable", "master"}},
1217 Document{Name: "f3", Content: []byte("needle"), Branches: []string{"stable", "master"}},
1218 Document{Name: "f4", Content: []byte("needle"), Branches: []string{"bonzai"}},
1219 )
1220
1221 t.Run("LineMatches", func(t *testing.T) {
1222 sres := searchForTest(t, b, query.NewAnd(
1223 &query.Substring{
1224 Pattern: "needle",
1225 },
1226 &query.Branch{
1227 Pattern: "table",
1228 }))
1229
1230 if len(sres.Files) != 2 || sres.Files[0].FileName != "f2" || sres.Files[1].FileName != "f3" {
1231 t.Fatalf("got %v, want 2 result from [f2,f3]", sres.Files)
1232 }
1233
1234 if len(sres.Files[0].Branches) != 1 || sres.Files[0].Branches[0] != "stable" {
1235 t.Fatalf("got %v, want 1 branch 'stable'", sres.Files[0].Branches)
1236 }
1237 })
1238
1239 t.Run("ChunkMatches", func(t *testing.T) {
1240 sres := searchForTest(t, b, query.NewAnd(
1241 &query.Substring{
1242 Pattern: "needle",
1243 },
1244 &query.Branch{
1245 Pattern: "table",
1246 }),
1247 chunkOpts,
1248 )
1249
1250 if len(sres.Files) != 2 || sres.Files[0].FileName != "f2" || sres.Files[1].FileName != "f3" {
1251 t.Fatalf("got %v, want 2 result from [f2,f3]", sres.Files)
1252 }
1253
1254 if len(sres.Files[0].Branches) != 1 || sres.Files[0].Branches[0] != "stable" {
1255 t.Fatalf("got %v, want 1 branch 'stable'", sres.Files[0].Branches)
1256 }
1257 })
1258}
1259
1260func TestBranchLimit(t *testing.T) {
1261 for limit := 64; limit <= 65; limit++ {
1262 r := &zoekt.Repository{}
1263 for i := 0; i < limit; i++ {
1264 s := fmt.Sprintf("b%d", i)
1265 r.Branches = append(r.Branches, zoekt.RepositoryBranch{
1266 s, "v-" + s,
1267 })
1268 }
1269 _, err := NewShardBuilder(r)
1270 if limit == 64 && err != nil {
1271 t.Fatalf("NewShardBuilder: %v", err)
1272 } else if limit == 65 && err == nil {
1273 t.Fatalf("NewShardBuilder succeeded")
1274 }
1275 }
1276}
1277
1278func TestBranchReport(t *testing.T) {
1279 branches := []string{"stable", "master"}
1280 b := testShardBuilder(t, &zoekt.Repository{
1281 Branches: []zoekt.RepositoryBranch{
1282 {"stable", "vs"},
1283 {"master", "vm"},
1284 },
1285 },
1286 Document{Name: "f2", Content: []byte("needle"), Branches: branches})
1287
1288 t.Run("LineMatches", func(t *testing.T) {
1289 sres := searchForTest(t, b, &query.Substring{
1290 Pattern: "needle",
1291 })
1292 if len(sres.Files) != 1 {
1293 t.Fatalf("got %v, want 1 result from f2", sres.Files)
1294 }
1295
1296 f := sres.Files[0]
1297 if !reflect.DeepEqual(f.Branches, branches) {
1298 t.Fatalf("got branches %q, want %q", f.Branches, branches)
1299 }
1300 })
1301
1302 t.Run("ChunkMatches", func(t *testing.T) {
1303 sres := searchForTest(t, b, &query.Substring{
1304 Pattern: "needle",
1305 }, chunkOpts)
1306 if len(sres.Files) != 1 {
1307 t.Fatalf("got %v, want 1 result from f2", sres.Files)
1308 }
1309
1310 f := sres.Files[0]
1311 if !reflect.DeepEqual(f.Branches, branches) {
1312 t.Fatalf("got branches %q, want %q", f.Branches, branches)
1313 }
1314 })
1315}
1316
1317func TestBranchVersions(t *testing.T) {
1318 b := testShardBuilder(t, &zoekt.Repository{
1319 Branches: []zoekt.RepositoryBranch{
1320 {"stable", "v-stable"},
1321 {"master", "v-master"},
1322 },
1323 }, Document{Name: "f2", Content: []byte("needle"), Branches: []string{"master"}})
1324
1325 t.Run("LineMatches", func(t *testing.T) {
1326 sres := searchForTest(t, b, &query.Substring{
1327 Pattern: "needle",
1328 })
1329 if len(sres.Files) != 1 {
1330 t.Fatalf("got %v, want 1 result from f2", sres.Files)
1331 }
1332
1333 f := sres.Files[0]
1334 if f.Version != "v-master" {
1335 t.Fatalf("got file %#v, want version 'v-master'", f)
1336 }
1337 })
1338
1339 t.Run("ChunkMatches", func(t *testing.T) {
1340 sres := searchForTest(t, b, &query.Substring{
1341 Pattern: "needle",
1342 }, chunkOpts)
1343 if len(sres.Files) != 1 {
1344 t.Fatalf("got %v, want 1 result from f2", sres.Files)
1345 }
1346
1347 f := sres.Files[0]
1348 if f.Version != "v-master" {
1349 t.Fatalf("got file %#v, want version 'v-master'", f)
1350 }
1351 })
1352}
1353
1354func mustParseRE(s string) *syntax.Regexp {
1355 r, err := syntax.Parse(s, syntax.Perl)
1356 if err != nil {
1357 panic(err)
1358 }
1359
1360 return r
1361}
1362
1363func TestRegexp(t *testing.T) {
1364 content := []byte("needle the bla")
1365 // ----------------01234567890123
1366
1367 b := testShardBuilder(t, nil,
1368 Document{
1369 Name: "f1",
1370 Content: content,
1371 })
1372
1373 t.Run("LineMatches", func(t *testing.T) {
1374 sres := searchForTest(t, b,
1375 &query.Regexp{
1376 Regexp: mustParseRE("dle.*bla"),
1377 })
1378
1379 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 {
1380 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
1381 }
1382
1383 got := sres.Files[0].LineMatches[0]
1384 want := zoekt.LineMatch{
1385 LineFragments: []zoekt.LineFragmentMatch{{
1386 LineOffset: 3,
1387 Offset: 3,
1388 MatchLength: 11,
1389 }},
1390 Line: content,
1391 FileName: false,
1392 LineNumber: 1,
1393 LineStart: 0,
1394 LineEnd: 14,
1395 }
1396
1397 if !reflect.DeepEqual(got, want) {
1398 t.Errorf("got %#v, want %#v", got, want)
1399 }
1400 })
1401
1402 t.Run("ChunkMatches", func(t *testing.T) {
1403 sres := searchForTest(t, b,
1404 &query.Regexp{
1405 Regexp: mustParseRE("dle.*bla"),
1406 }, chunkOpts)
1407
1408 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 {
1409 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
1410 }
1411
1412 got := sres.Files[0].ChunkMatches[0]
1413 want := zoekt.ChunkMatch{
1414 Content: content,
1415 ContentStart: zoekt.Location{ByteOffset: 0, LineNumber: 1, Column: 1},
1416 Ranges: []zoekt.Range{{
1417 Start: zoekt.Location{ByteOffset: 3, LineNumber: 1, Column: 4},
1418 End: zoekt.Location{ByteOffset: 14, LineNumber: 1, Column: 15},
1419 }},
1420 }
1421
1422 if diff := cmp.Diff(want, got); diff != "" {
1423 t.Fatal(diff)
1424 }
1425 })
1426}
1427
1428func TestRegexpFile(t *testing.T) {
1429 content := []byte("needle the bla")
1430
1431 name := "let's play: find the mussel"
1432 b := testShardBuilder(t, nil,
1433 Document{Name: name, Content: content},
1434 Document{Name: "play.txt", Content: content})
1435
1436 t.Run("LineMatches", func(t *testing.T) {
1437 sres := searchForTest(t, b,
1438 &query.Regexp{
1439 Regexp: mustParseRE("play.*mussel"),
1440 FileName: true,
1441 })
1442
1443 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 {
1444 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
1445 }
1446
1447 if sres.Files[0].FileName != name {
1448 t.Errorf("got match %#v, want name %q", sres.Files[0], name)
1449 }
1450 })
1451
1452 t.Run("ChunkMatches", func(t *testing.T) {
1453 sres := searchForTest(t, b,
1454 &query.Regexp{
1455 Regexp: mustParseRE("play.*mussel"),
1456 FileName: true,
1457 }, chunkOpts)
1458
1459 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 {
1460 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
1461 }
1462
1463 if sres.Files[0].FileName != name {
1464 t.Errorf("got match %#v, want name %q", sres.Files[0], name)
1465 }
1466 })
1467}
1468
1469func TestRegexpOrder(t *testing.T) {
1470 content := []byte("bla the needle")
1471 // ----------------01234567890123
1472
1473 b := testShardBuilder(t, nil,
1474 Document{Name: "f1", Content: content})
1475
1476 t.Run("LineMatches", func(t *testing.T) {
1477 sres := searchForTest(t, b,
1478 &query.Regexp{
1479 Regexp: mustParseRE("dle.*bla"),
1480 })
1481
1482 if len(sres.Files) != 0 {
1483 t.Fatalf("got %v, want 0 matches", sres.Files)
1484 }
1485 })
1486
1487 t.Run("ChunkMatches", func(t *testing.T) {
1488 sres := searchForTest(t, b,
1489 &query.Regexp{
1490 Regexp: mustParseRE("dle.*bla"),
1491 })
1492
1493 if len(sres.Files) != 0 {
1494 t.Fatalf("got %v, want 0 matches", sres.Files)
1495 }
1496 })
1497}
1498
1499func TestRepoName(t *testing.T) {
1500 content := []byte("bla the needle")
1501 // ----------------01234567890123
1502
1503 b := testShardBuilder(t, &zoekt.Repository{Name: "bla"},
1504 Document{Name: "f1", Content: content})
1505
1506 t.Run("LineMatches", func(t *testing.T) {
1507 sres := searchForTest(t, b,
1508 query.NewAnd(
1509 &query.Substring{Pattern: "needle"},
1510 &query.Repo{Regexp: regexp.MustCompile("foo")},
1511 ))
1512
1513 if len(sres.Files) != 0 {
1514 t.Fatalf("got %v, want 0 matches", sres.Files)
1515 }
1516
1517 if sres.Stats.FilesConsidered > 0 {
1518 t.Fatalf("got FilesConsidered %d, should have short circuited", sres.Stats.FilesConsidered)
1519 }
1520
1521 sres = searchForTest(t, b,
1522 query.NewAnd(
1523 &query.Substring{Pattern: "needle"},
1524 &query.Repo{Regexp: regexp.MustCompile("bla")},
1525 ))
1526 if len(sres.Files) != 1 {
1527 t.Fatalf("got %v, want 1 match", sres.Files)
1528 }
1529 })
1530
1531 t.Run("ChunkMatches", func(t *testing.T) {
1532 sres := searchForTest(t, b,
1533 query.NewAnd(
1534 &query.Substring{Pattern: "needle"},
1535 &query.Repo{Regexp: regexp.MustCompile("foo")},
1536 ),
1537 chunkOpts,
1538 )
1539
1540 if len(sres.Files) != 0 {
1541 t.Fatalf("got %v, want 0 matches", sres.Files)
1542 }
1543
1544 if sres.Stats.FilesConsidered > 0 {
1545 t.Fatalf("got FilesConsidered %d, should have short circuited", sres.Stats.FilesConsidered)
1546 }
1547
1548 sres = searchForTest(t, b,
1549 query.NewAnd(
1550 &query.Substring{Pattern: "needle"},
1551 &query.Repo{Regexp: regexp.MustCompile("bla")},
1552 ))
1553 if len(sres.Files) != 1 {
1554 t.Fatalf("got %v, want 1 match", sres.Files)
1555 }
1556 })
1557}
1558
1559func TestMergeMatches(t *testing.T) {
1560 t.Run("LineMatches, adjacent matches", func(t *testing.T) {
1561 b := testShardBuilder(t, nil,
1562 Document{Name: "f1", Content: []byte("blablabla")})
1563 sres := searchForTest(t, b,
1564 &query.Substring{Pattern: "bla"})
1565
1566 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 {
1567 t.Fatalf("got %v, want 1 match", sres.Files)
1568 }
1569
1570 if len(sres.Files[0].LineMatches[0].LineFragments) != 3 {
1571 t.Fatalf("got %v, want 3 fragments", sres.Files[0].LineMatches[0].LineFragments)
1572 }
1573 })
1574
1575 t.Run("LineMatches, overlapping matches", func(t *testing.T) {
1576 b := testShardBuilder(t, nil,
1577 Document{Name: "f1", Content: []byte("hellogoodbye")})
1578 sres := searchForTest(t, b,
1579 &query.And{Children: []query.Q{
1580 &query.Substring{Pattern: "hello"},
1581 &query.Substring{Pattern: "logood"},
1582 }})
1583
1584 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 {
1585 t.Fatalf("got %v, want 1 match", sres.Files)
1586 }
1587
1588 lineFragments := sres.Files[0].LineMatches[0].LineFragments
1589 if len(lineFragments) != 1 || lineFragments[0].MatchLength != len("hello") {
1590 t.Fatalf("got %v, want single line fragment 'hello'", lineFragments)
1591 }
1592 })
1593
1594 t.Run("ChunkMatches, no overlap", func(t *testing.T) {
1595 b := testShardBuilder(t, nil,
1596 Document{Name: "f1", Content: []byte("blablabla")})
1597
1598 sres := searchForTest(t, b,
1599 &query.Substring{Pattern: "bla"},
1600 chunkOpts,
1601 )
1602 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 {
1603 t.Fatalf("got %v, want 1 match", sres.Files)
1604 }
1605
1606 if len(sres.Files[0].ChunkMatches[0].Ranges) != 3 {
1607 t.Fatalf("got %v, want 3 ranges", sres.Files[0].ChunkMatches[0].Ranges)
1608 }
1609 })
1610
1611 t.Run("ChunkMatches, overlapping matches", func(t *testing.T) {
1612 b := testShardBuilder(t, nil,
1613 Document{Name: "f1", Content: []byte("hellogoodbye")})
1614 sres := searchForTest(t, b,
1615 &query.And{Children: []query.Q{
1616 &query.Substring{Pattern: "hello"},
1617 &query.Substring{Pattern: "logood"},
1618 }}, chunkOpts)
1619
1620 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 {
1621 t.Fatalf("got %v, want 1 chunk match", sres.Files)
1622 }
1623
1624 ranges := sres.Files[0].ChunkMatches[0].Ranges
1625 if len(ranges) != 1 || ranges[0].Start.ByteOffset != 0 || ranges[0].End.ByteOffset != 5 {
1626 t.Fatalf("got %v, want single chunk range 'hello'", ranges)
1627 }
1628 })
1629}
1630
1631func TestRepoURL(t *testing.T) {
1632 content := []byte("blablabla")
1633 b := testShardBuilder(t, &zoekt.Repository{
1634 Name: "name",
1635 URL: "URL",
1636 CommitURLTemplate: "commit",
1637 FileURLTemplate: "file-url",
1638 LineFragmentTemplate: "fragment",
1639 }, Document{Name: "f1", Content: content})
1640
1641 sres := searchForTest(t, b, &query.Substring{Pattern: "bla"})
1642
1643 if sres.RepoURLs["name"] != "file-url" {
1644 t.Errorf("got RepoURLs %v, want {name: URL}", sres.RepoURLs)
1645 }
1646 if sres.LineFragments["name"] != "fragment" {
1647 t.Errorf("got URLs %v, want {name: URL}", sres.LineFragments)
1648 }
1649}
1650
1651func TestRegexpCaseSensitive(t *testing.T) {
1652 content := []byte("bla\nfunc unmarshalGitiles\n")
1653 b := testShardBuilder(t, nil, Document{
1654 Name: "f1",
1655 Content: content,
1656 })
1657
1658 t.Run("LineMatches", func(t *testing.T) {
1659 res := searchForTest(t, b,
1660 &query.Regexp{
1661 Regexp: mustParseRE("func.*Gitiles"),
1662 CaseSensitive: true,
1663 })
1664
1665 if len(res.Files) != 1 {
1666 t.Fatalf("got %v, want one index", res.Files)
1667 }
1668 })
1669
1670 t.Run("ChunkMatches", func(t *testing.T) {
1671 res := searchForTest(t, b,
1672 &query.Regexp{
1673 Regexp: mustParseRE("func.*Gitiles"),
1674 CaseSensitive: true,
1675 },
1676 chunkOpts,
1677 )
1678
1679 if len(res.Files) != 1 {
1680 t.Fatalf("got %v, want one index", res.Files)
1681 }
1682 })
1683}
1684
1685func TestRegexpCaseFolding(t *testing.T) {
1686 content := []byte("bla\nfunc unmarshalGitiles\n")
1687
1688 b := testShardBuilder(t, nil,
1689 Document{Name: "f1", Content: content})
1690 res := searchForTest(t, b,
1691 &query.Regexp{
1692 Regexp: mustParseRE("func.*GITILES"),
1693 CaseSensitive: false,
1694 })
1695
1696 if len(res.Files) != 1 {
1697 t.Fatalf("got %v, want one index", res.Files)
1698 }
1699}
1700
1701func TestCaseRegexp(t *testing.T) {
1702 content := []byte("BLABLABLA")
1703 b := testShardBuilder(t, nil,
1704 Document{Name: "f1", Content: content})
1705
1706 t.Run("LineMatches", func(t *testing.T) {
1707 res := searchForTest(t, b,
1708 &query.Regexp{
1709 Regexp: mustParseRE("[xb][xl][xa]"),
1710 CaseSensitive: true,
1711 })
1712
1713 if len(res.Files) > 0 {
1714 t.Fatalf("got %v, want no matches", res.Files)
1715 }
1716 })
1717
1718 t.Run("ChunkMatches", func(t *testing.T) {
1719 res := searchForTest(t, b,
1720 &query.Regexp{
1721 Regexp: mustParseRE("[xb][xl][xa]"),
1722 CaseSensitive: true,
1723 },
1724 chunkOpts,
1725 )
1726
1727 if len(res.Files) > 0 {
1728 t.Fatalf("got %v, want no matches", res.Files)
1729 }
1730 })
1731}
1732
1733func TestNegativeRegexp(t *testing.T) {
1734 content := []byte("BLABLABLA needle bla")
1735 b := testShardBuilder(t, nil,
1736 Document{Name: "f1", Content: content})
1737
1738 t.Run("LineMatches", func(t *testing.T) {
1739 res := searchForTest(t, b,
1740 query.NewAnd(
1741 &query.Substring{
1742 Pattern: "needle",
1743 },
1744 &query.Not{
1745 Child: &query.Regexp{
1746 Regexp: mustParseRE(".cs"),
1747 },
1748 }))
1749
1750 if len(res.Files) != 1 {
1751 t.Fatalf("got %v, want 1 match", res.Files)
1752 }
1753 })
1754
1755 t.Run("ChunkMatches", func(t *testing.T) {
1756 res := searchForTest(t, b,
1757 query.NewAnd(
1758 &query.Substring{
1759 Pattern: "needle",
1760 },
1761 &query.Not{
1762 Child: &query.Regexp{
1763 Regexp: mustParseRE(".cs"),
1764 },
1765 },
1766 ),
1767 chunkOpts)
1768
1769 if len(res.Files) != 1 {
1770 t.Fatalf("got %v, want 1 match", res.Files)
1771 }
1772 })
1773}
1774
1775func TestSymbolRank(t *testing.T) {
1776 t.Skip()
1777
1778 content := []byte("func bla() blubxxxxx")
1779 // ----------------01234567890123456789
1780 b := testShardBuilder(t, nil,
1781 Document{
1782 Name: "f1",
1783 Content: content,
1784 }, Document{
1785 Name: "f2",
1786 Content: content,
1787 Symbols: []DocumentSection{{5, 8}},
1788 }, Document{
1789 Name: "f3",
1790 Content: content,
1791 })
1792
1793 t.Run("LineMatches", func(t *testing.T) {
1794 res := searchForTest(t, b,
1795 &query.Substring{
1796 CaseSensitive: false,
1797 Pattern: "bla",
1798 })
1799
1800 if len(res.Files) != 3 {
1801 t.Fatalf("got %d files, want 3 files. Full data: %v", len(res.Files), res.Files)
1802 }
1803 if res.Files[0].FileName != "f2" {
1804 t.Errorf("got %#v, want 'f2' as top index", res.Files[0])
1805 }
1806 })
1807
1808 t.Run("ChunkMatches", func(t *testing.T) {
1809 res := searchForTest(t, b,
1810 &query.Substring{
1811 CaseSensitive: false,
1812 Pattern: "bla",
1813 }, chunkOpts)
1814
1815 if len(res.Files) != 3 {
1816 t.Fatalf("got %d files, want 3 files. Full data: %v", len(res.Files), res.Files)
1817 }
1818 if res.Files[0].FileName != "f2" {
1819 t.Errorf("got %#v, want 'f2' as top index", res.Files[0])
1820 }
1821 })
1822}
1823
1824func TestSymbolRankRegexpUTF8(t *testing.T) {
1825 t.Skip()
1826
1827 prefix := strings.Repeat(string([]rune{kelvinCodePoint}), 100) + "\n"
1828 content := []byte(prefix +
1829 "func bla() blub")
1830 // ------012345678901234
1831 b := testShardBuilder(t, nil,
1832 Document{
1833 Name: "f1",
1834 Content: content,
1835 }, Document{
1836 Name: "f2",
1837 Content: content,
1838 Symbols: []DocumentSection{{uint32(len(prefix) + 5), uint32(len(prefix) + 8)}},
1839 }, Document{
1840 Name: "f3",
1841 Content: content,
1842 })
1843
1844 t.Run("LineMatches", func(t *testing.T) {
1845 res := searchForTest(t, b,
1846 &query.Regexp{
1847 Regexp: mustParseRE("b.a"),
1848 })
1849
1850 if len(res.Files) != 3 {
1851 t.Fatalf("got %#v, want 3 files", res.Files)
1852 }
1853 if res.Files[0].FileName != "f2" {
1854 t.Errorf("got %#v, want 'f2' as top index", res.Files[0])
1855 }
1856 })
1857
1858 t.Run("ChunjkMatches", func(t *testing.T) {
1859 res := searchForTest(t, b,
1860 &query.Regexp{
1861 Regexp: mustParseRE("b.a"),
1862 }, chunkOpts)
1863
1864 if len(res.Files) != 3 {
1865 t.Fatalf("got %#v, want 3 files", res.Files)
1866 }
1867 if res.Files[0].FileName != "f2" {
1868 t.Errorf("got %#v, want 'f2' as top index", res.Files[0])
1869 }
1870 })
1871}
1872
1873func TestPartialSymbolRank(t *testing.T) {
1874 t.Skip()
1875
1876 content := []byte("func bla() blub")
1877 // ----------------012345678901234
1878
1879 b := testShardBuilder(t, nil,
1880 Document{
1881 Name: "f1",
1882 Content: content,
1883 Symbols: []DocumentSection{{4, 9}},
1884 }, Document{
1885 Name: "f2",
1886 Content: content,
1887 Symbols: []DocumentSection{{4, 8}},
1888 }, Document{
1889 Name: "f3",
1890 Content: content,
1891 Symbols: []DocumentSection{{4, 9}},
1892 })
1893
1894 t.Run("LineMatches", func(t *testing.T) {
1895 res := searchForTest(t, b,
1896 &query.Substring{
1897 Pattern: "bla",
1898 })
1899
1900 if len(res.Files) != 3 {
1901 t.Fatalf("got %#v, want 3 files", res.Files)
1902 }
1903 if res.Files[0].FileName != "f2" {
1904 t.Errorf("got %#v, want 'f2' as top index", res.Files[0])
1905 }
1906 })
1907
1908 t.Run("ChunkMatches", func(t *testing.T) {
1909 res := searchForTest(t, b,
1910 &query.Substring{
1911 Pattern: "bla",
1912 }, chunkOpts)
1913
1914 if len(res.Files) != 3 {
1915 t.Fatalf("got %#v, want 3 files", res.Files)
1916 }
1917 if res.Files[0].FileName != "f2" {
1918 t.Errorf("got %#v, want 'f2' as top index", res.Files[0])
1919 }
1920 })
1921}
1922
1923func TestNegativeRepo(t *testing.T) {
1924 content := []byte("bla the needle")
1925 // ----------------01234567890123
1926 b := testShardBuilder(t, &zoekt.Repository{
1927 Name: "bla",
1928 }, Document{Name: "f1", Content: content})
1929
1930 t.Run("LineMatches", func(t *testing.T) {
1931 sres := searchForTest(t, b,
1932 query.NewAnd(
1933 &query.Substring{Pattern: "needle"},
1934 &query.Not{Child: &query.Repo{Regexp: regexp.MustCompile("bla")}},
1935 ))
1936
1937 if len(sres.Files) != 0 {
1938 t.Fatalf("got %v, want 0 matches", sres.Files)
1939 }
1940 })
1941
1942 t.Run("ChunkMatches", func(t *testing.T) {
1943 sres := searchForTest(t, b,
1944 query.NewAnd(
1945 &query.Substring{Pattern: "needle"},
1946 &query.Not{Child: &query.Repo{Regexp: regexp.MustCompile("bla")}},
1947 ), chunkOpts)
1948
1949 if len(sres.Files) != 0 {
1950 t.Fatalf("got %v, want 0 matches", sres.Files)
1951 }
1952 })
1953}
1954
1955func TestListRepos(t *testing.T) {
1956 content := []byte("bla the needle\n")
1957 // ----------------012345678901234-
1958
1959 t.Run("default and minimal fallback", func(t *testing.T) {
1960 repo := &zoekt.Repository{
1961 Name: "reponame",
1962 Branches: []zoekt.RepositoryBranch{{Name: "main"}, {Name: "dev"}},
1963 }
1964 b := testShardBuilder(t, repo,
1965 Document{Name: "f1", Content: content, Branches: []string{"main", "dev"}},
1966 Document{Name: "f2", Content: content, Branches: []string{"main"}},
1967 Document{Name: "f2", Content: content, Branches: []string{"dev"}},
1968 Document{Name: "f3", Content: content, Branches: []string{"dev"}})
1969
1970 searcher := searcherForTest(t, b)
1971
1972 for _, opts := range []*zoekt.ListOptions{
1973 nil,
1974 {},
1975 {Field: zoekt.RepoListFieldRepos},
1976 {Field: zoekt.RepoListFieldReposMap},
1977 } {
1978 t.Run(fmt.Sprint(opts), func(t *testing.T) {
1979 q := &query.Repo{Regexp: regexp.MustCompile("epo")}
1980
1981 res, err := searcher.List(context.Background(), q, opts)
1982 if err != nil {
1983 t.Fatalf("List(%v): %v", q, err)
1984 }
1985
1986 want := &zoekt.RepoList{
1987 Repos: []*zoekt.RepoListEntry{{
1988 Repository: *repo,
1989 Stats: zoekt.RepoStats{
1990 Documents: 4,
1991 ContentBytes: 68, // (15 bytes of content and 2 bytes of filename) x 4
1992 Shards: 1,
1993
1994 NewLinesCount: 4,
1995 DefaultBranchNewLinesCount: 2,
1996 OtherBranchesNewLinesCount: 3,
1997 },
1998 }},
1999 Stats: zoekt.RepoStats{
2000 Repos: 1,
2001 Documents: 4,
2002 ContentBytes: 68,
2003 Shards: 1,
2004
2005 NewLinesCount: 4,
2006 DefaultBranchNewLinesCount: 2,
2007 OtherBranchesNewLinesCount: 3,
2008 },
2009 }
2010 ignored := []cmp.Option{
2011 cmpopts.EquateEmpty(),
2012 cmpopts.IgnoreFields(zoekt.RepoListEntry{}, "IndexMetadata"),
2013 cmpopts.IgnoreFields(zoekt.RepoStats{}, "IndexBytes"),
2014 cmpopts.IgnoreFields(zoekt.Repository{}, "SubRepoMap"),
2015 cmpopts.IgnoreFields(zoekt.Repository{}, "priority"),
2016 }
2017 if diff := cmp.Diff(want, res, ignored...); diff != "" {
2018 t.Fatalf("mismatch (-want +got):\n%s", diff)
2019 }
2020
2021 q = &query.Repo{Regexp: regexp.MustCompile("bla")}
2022 res, err = searcher.List(context.Background(), q, nil)
2023 if err != nil {
2024 t.Fatalf("List(%v): %v", q, err)
2025 }
2026 if len(res.Repos) != 0 || len(res.ReposMap) != 0 {
2027 t.Fatalf("got %v, want 0 matches", res)
2028 }
2029 })
2030 }
2031 })
2032
2033 t.Run("minimal", func(t *testing.T) {
2034 repo := &zoekt.Repository{
2035 ID: 1234,
2036 Name: "reponame",
2037 Branches: []zoekt.RepositoryBranch{{Name: "main"}, {Name: "dev"}},
2038 RawConfig: map[string]string{"repoid": "1234"},
2039 }
2040 b := testShardBuilder(t, repo,
2041 Document{Name: "f1", Content: content, Branches: []string{"main", "dev"}},
2042 Document{Name: "f2", Content: content, Branches: []string{"main"}},
2043 Document{Name: "f2", Content: content, Branches: []string{"dev"}},
2044 Document{Name: "f3", Content: content, Branches: []string{"dev"}})
2045
2046 searcher := searcherForTest(t, b)
2047
2048 q := &query.Repo{Regexp: regexp.MustCompile("epo")}
2049 res, err := searcher.List(context.Background(), q, &zoekt.ListOptions{Field: zoekt.RepoListFieldReposMap})
2050 if err != nil {
2051 t.Fatalf("List(%v): %v", q, err)
2052 }
2053
2054 want := &zoekt.RepoList{
2055 ReposMap: zoekt.ReposMap{
2056 repo.ID: {
2057 HasSymbols: repo.HasSymbols,
2058 Branches: repo.Branches,
2059 },
2060 },
2061 Stats: zoekt.RepoStats{
2062 Repos: 1,
2063 Shards: 1,
2064 Documents: 4,
2065 IndexBytes: 412,
2066 ContentBytes: 68,
2067 NewLinesCount: 4,
2068 DefaultBranchNewLinesCount: 2,
2069 OtherBranchesNewLinesCount: 3,
2070 },
2071 }
2072
2073 ignored := []cmp.Option{
2074 cmpopts.IgnoreFields(zoekt.MinimalRepoListEntry{}, "IndexTimeUnix"),
2075 }
2076 if diff := cmp.Diff(want, res, ignored...); diff != "" {
2077 t.Fatalf("mismatch (-want +got):\n%s", diff)
2078 }
2079
2080 q = &query.Repo{Regexp: regexp.MustCompile("bla")}
2081 res, err = searcher.List(context.Background(), q, &zoekt.ListOptions{Field: zoekt.RepoListFieldReposMap})
2082 if err != nil {
2083 t.Fatalf("List(%v): %v", q, err)
2084 }
2085 if len(res.Repos) != 0 || len(res.ReposMap) != 0 {
2086 t.Fatalf("got %v, want 0 matches", res)
2087 }
2088 })
2089}
2090
2091func TestListReposByContent(t *testing.T) {
2092 content := []byte("bla the needle")
2093
2094 b := testShardBuilder(t, &zoekt.Repository{
2095 Name: "reponame",
2096 },
2097 Document{Name: "f1", Content: content},
2098 Document{Name: "f2", Content: content})
2099
2100 searcher := searcherForTest(t, b)
2101 q := &query.Substring{Pattern: "needle"}
2102 res, err := searcher.List(context.Background(), q, nil)
2103 if err != nil {
2104 t.Fatalf("List(%v): %v", q, err)
2105 }
2106 if len(res.Repos) != 1 || res.Repos[0].Repository.Name != "reponame" {
2107 t.Fatalf("got %v, want 1 matches", res)
2108 }
2109 if got := res.Repos[0].Stats.Shards; got != 1 {
2110 t.Fatalf("got %d, want 1 shard", got)
2111 }
2112 q = &query.Substring{Pattern: "foo"}
2113 res, err = searcher.List(context.Background(), q, nil)
2114 if err != nil {
2115 t.Fatalf("List(%v): %v", q, err)
2116 }
2117 if len(res.Repos) != 0 {
2118 t.Fatalf("got %v, want 0 matches", res)
2119 }
2120}
2121
2122func TestMetadata(t *testing.T) {
2123 content := []byte("bla the needle")
2124
2125 b := testShardBuilder(t, &zoekt.Repository{
2126 Name: "reponame",
2127 }, Document{Name: "f1", Content: content},
2128 Document{Name: "f2", Content: content})
2129
2130 var buf bytes.Buffer
2131 if err := b.Write(&buf); err != nil {
2132 t.Fatal(err)
2133 }
2134 f := &memSeeker{buf.Bytes()}
2135
2136 rd, _, err := ReadMetadata(f)
2137 if err != nil {
2138 t.Fatalf("ReadMetadata: %v", err)
2139 }
2140
2141 if got, want := rd[0].Name, "reponame"; got != want {
2142 t.Fatalf("got %q want %q", got, want)
2143 }
2144}
2145
2146func TestOr(t *testing.T) {
2147 b := testShardBuilder(t, nil,
2148 Document{Name: "f1", Content: []byte("needle")},
2149 Document{Name: "f2", Content: []byte("banana")})
2150 t.Run("LineMatches", func(t *testing.T) {
2151 sres := searchForTest(t, b, query.NewOr(
2152 &query.Substring{Pattern: "needle"},
2153 &query.Substring{Pattern: "banana"}))
2154
2155 if len(sres.Files) != 2 {
2156 t.Fatalf("got %v, want 2 files", sres.Files)
2157 }
2158 })
2159
2160 t.Run("ChunkMatches", func(t *testing.T) {
2161 sres := searchForTest(t, b, query.NewOr(
2162 &query.Substring{Pattern: "needle"},
2163 &query.Substring{Pattern: "banana"}))
2164
2165 if len(sres.Files) != 2 {
2166 t.Fatalf("got %v, want 2 files", sres.Files)
2167 }
2168 })
2169}
2170
2171func TestFrequency(t *testing.T) {
2172 content := []byte("sla _Py_HashDouble(double v sla las las shd dot dot")
2173
2174 b := testShardBuilder(t, nil,
2175 Document{
2176 Name: "f1",
2177 Content: content,
2178 })
2179
2180 t.Run("LineMatches", func(t *testing.T) {
2181 sres := searchForTest(t, b, &query.Substring{Pattern: "slashdot"})
2182 if len(sres.Files) != 0 {
2183 t.Errorf("got %v, wanted 0 matches", sres.Files)
2184 }
2185 })
2186
2187 t.Run("ChunkMatches", func(t *testing.T) {
2188 sres := searchForTest(t, b, &query.Substring{Pattern: "slashdot"}, chunkOpts)
2189 if len(sres.Files) != 0 {
2190 t.Errorf("got %v, wanted 0 matches", sres.Files)
2191 }
2192 })
2193}
2194
2195func TestMatchNewline(t *testing.T) {
2196 re, err := syntax.Parse("[^a]a", syntax.ClassNL)
2197 if err != nil {
2198 t.Fatalf("syntax.Parse: %v", err)
2199 }
2200
2201 content := []byte("pqr\nalex")
2202
2203 b := testShardBuilder(t, nil,
2204 Document{
2205 Name: "f1",
2206 Content: content,
2207 })
2208
2209 t.Run("LineMatches", func(t *testing.T) {
2210 sres := searchForTest(t, b, &query.Regexp{Regexp: re, CaseSensitive: true})
2211 if len(sres.Files) != 1 {
2212 t.Errorf("got %v, wanted 1 matches", sres.Files)
2213 } else if l := sres.Files[0].LineMatches[0].Line; !bytes.Equal(l, content[len("pqr\n"):]) {
2214 t.Errorf("got match line %q, want %q", l, content)
2215 }
2216 })
2217
2218 t.Run("ChunkMatches", func(t *testing.T) {
2219 sres := searchForTest(t, b, &query.Regexp{Regexp: re, CaseSensitive: true}, chunkOpts)
2220 if len(sres.Files) != 1 {
2221 t.Errorf("got %v, wanted 1 matches", sres.Files)
2222 } else if c := sres.Files[0].ChunkMatches[0].Content; !bytes.Equal(c, content) {
2223 t.Errorf("got match line %q, want %q", c, content)
2224 }
2225 })
2226}
2227
2228func TestSubRepo(t *testing.T) {
2229 subRepos := map[string]*zoekt.Repository{
2230 "sub": {
2231 Name: "sub-name",
2232 LineFragmentTemplate: "sub-line",
2233 },
2234 }
2235
2236 content := []byte("pqr\nalex")
2237
2238 b := testShardBuilder(t, &zoekt.Repository{
2239 SubRepoMap: subRepos,
2240 }, Document{
2241 Name: "sub/f1",
2242 Content: content,
2243 SubRepositoryPath: "sub",
2244 })
2245
2246 sres := searchForTest(t, b, &query.Substring{Pattern: "alex"})
2247 if len(sres.Files) != 1 {
2248 t.Fatalf("got %v, wanted 1 matches", sres.Files)
2249 }
2250
2251 f := sres.Files[0]
2252 if f.SubRepositoryPath != "sub" || f.SubRepositoryName != "sub-name" {
2253 t.Errorf("got %#v, want SubRepository{Path,Name} = {'sub', 'sub-name'}", f)
2254 }
2255
2256 if sres.LineFragments["sub-name"] != "sub-line" {
2257 t.Errorf("got LineFragmentTemplate %v, want {'sub':'sub-line'}", sres.LineFragments)
2258 }
2259}
2260
2261func TestSearchEither(t *testing.T) {
2262 b := testShardBuilder(t, nil,
2263 Document{Name: "f1", Content: []byte("bla needle bla")},
2264 Document{Name: "needle-file-branch", Content: []byte("bla content")})
2265
2266 t.Run("LineMatches", func(t *testing.T) {
2267 sres := searchForTest(t, b, &query.Substring{Pattern: "needle"})
2268 if len(sres.Files) != 2 {
2269 t.Fatalf("got %v, wanted 2 matches", sres.Files)
2270 }
2271
2272 sres = searchForTest(t, b, &query.Substring{Pattern: "needle", Content: true})
2273 if len(sres.Files) != 1 {
2274 t.Fatalf("got %v, wanted 1 index", sres.Files)
2275 }
2276
2277 if got, want := sres.Files[0].FileName, "f1"; got != want {
2278 t.Errorf("got %q, want %q", got, want)
2279 }
2280 })
2281
2282 t.Run("ChunkMatches", func(t *testing.T) {
2283 sres := searchForTest(t, b, &query.Substring{Pattern: "needle"}, chunkOpts)
2284 if len(sres.Files) != 2 {
2285 t.Fatalf("got %v, wanted 2 matches", sres.Files)
2286 }
2287
2288 sres = searchForTest(t, b, &query.Substring{Pattern: "needle", Content: true}, chunkOpts)
2289 if len(sres.Files) != 1 {
2290 t.Fatalf("got %v, wanted 1 index", sres.Files)
2291 }
2292
2293 if got, want := sres.Files[0].FileName, "f1"; got != want {
2294 t.Errorf("got %q, want %q", got, want)
2295 }
2296 })
2297}
2298
2299func TestUnicodeExactMatch(t *testing.T) {
2300 needle := "néédlÉ"
2301 content := []byte("blá blá " + needle + " blâ")
2302
2303 b := testShardBuilder(t, nil,
2304 Document{Name: "f1", Content: content})
2305
2306 t.Run("LineMatches", func(t *testing.T) {
2307 if res := searchForTest(t, b, &query.Substring{Pattern: needle, CaseSensitive: true}); len(res.Files) != 1 {
2308 t.Fatalf("case sensitive: got %v, wanted 1 index", res.Files)
2309 }
2310 })
2311
2312 t.Run("ChunkMatches", func(t *testing.T) {
2313 res := searchForTest(t, b, &query.Substring{Pattern: needle, CaseSensitive: true}, chunkOpts)
2314 if len(res.Files) != 1 {
2315 t.Fatalf("case sensitive: got %v, wanted 1 index", res.Files)
2316 }
2317 })
2318}
2319
2320func TestUnicodeCoverContent(t *testing.T) {
2321 needle := "néédlÉ"
2322 content := []byte("blá blá " + needle + " blâ")
2323
2324 b := testShardBuilder(t, nil,
2325 Document{Name: "f1", Content: content})
2326
2327 t.Run("LineMatches", func(t *testing.T) {
2328 if res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ", CaseSensitive: true}); len(res.Files) != 0 {
2329 t.Fatalf("case sensitive: got %v, wanted 0 index", res.Files)
2330 }
2331
2332 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ"})
2333 if len(res.Files) != 1 {
2334 t.Fatalf("case insensitive: got %v, wanted 1 index", res.Files)
2335 }
2336
2337 if got, want := res.Files[0].LineMatches[0].LineFragments[0].Offset, uint32(strings.Index(string(content), needle)); got != want {
2338 t.Errorf("got %d want %d", got, want)
2339 }
2340 })
2341
2342 t.Run("ChunkMatches", func(t *testing.T) {
2343 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ", CaseSensitive: true}, chunkOpts)
2344 if len(res.Files) != 0 {
2345 t.Fatalf("case sensitive: got %v, wanted 0 index", res.Files)
2346 }
2347
2348 res = searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ"}, chunkOpts)
2349 if len(res.Files) != 1 {
2350 t.Fatalf("case insensitive: got %v, wanted 1 index", res.Files)
2351 }
2352
2353 got := res.Files[0].ChunkMatches[0].Ranges[0].Start.ByteOffset
2354 want := uint32(strings.Index(string(content), needle))
2355 if got != want {
2356 t.Errorf("got %d want %d", got, want)
2357 }
2358 })
2359}
2360
2361func TestUnicodeNonCoverContent(t *testing.T) {
2362 needle := "nééáádlÉ"
2363 content := []byte("blá blá " + needle + " blâ")
2364
2365 b := testShardBuilder(t, nil,
2366 Document{Name: "f1", Content: content})
2367
2368 t.Run("LineMatches", func(t *testing.T) {
2369 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉÁÁDLÉ", Content: true})
2370 if len(res.Files) != 1 {
2371 t.Fatalf("got %v, wanted 1 index", res.Files)
2372 }
2373
2374 if got, want := res.Files[0].LineMatches[0].LineFragments[0].Offset, uint32(strings.Index(string(content), needle)); got != want {
2375 t.Errorf("got %d want %d", got, want)
2376 }
2377 })
2378
2379 t.Run("ChunkMatches", func(t *testing.T) {
2380 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉÁÁDLÉ", Content: true}, chunkOpts)
2381 if len(res.Files) != 1 {
2382 t.Fatalf("got %v, wanted 1 index", res.Files)
2383 }
2384
2385 got := res.Files[0].ChunkMatches[0].Ranges[0].Start.ByteOffset
2386 want := uint32(strings.Index(string(content), needle))
2387 if got != want {
2388 t.Errorf("got %d want %d", got, want)
2389 }
2390 })
2391}
2392
2393const kelvinCodePoint = 8490
2394
2395func TestUnicodeVariableLength(t *testing.T) {
2396 lower := 'k'
2397 upper := rune(kelvinCodePoint)
2398
2399 needle := "nee" + string([]rune{lower}) + "eed"
2400 corpus := []byte("nee" + string([]rune{upper}) + "eed" +
2401 " ee" + string([]rune{lower}) + "ee" +
2402 " ee" + string([]rune{upper}) + "ee")
2403
2404 t.Run("LineMatches", func(t *testing.T) {
2405 b := testShardBuilder(t, nil,
2406 Document{Name: "f1", Content: []byte(corpus)})
2407
2408 res := searchForTest(t, b, &query.Substring{Pattern: needle, Content: true})
2409 if len(res.Files) != 1 {
2410 t.Fatalf("got %v, wanted 1 index", res.Files)
2411 }
2412 })
2413
2414 t.Run("ChunkMatches", func(t *testing.T) {
2415 b := testShardBuilder(t, nil,
2416 Document{Name: "f1", Content: []byte(corpus)})
2417
2418 res := searchForTest(t, b, &query.Substring{Pattern: needle, Content: true}, chunkOpts)
2419 if len(res.Files) != 1 {
2420 t.Fatalf("got %v, wanted 1 index", res.Files)
2421 }
2422 })
2423}
2424
2425func TestUnicodeFileStartOffsets(t *testing.T) {
2426 unicode := "世界"
2427 wat := "waaaaaat"
2428 b := testShardBuilder(t, nil,
2429 Document{
2430 Name: "f1",
2431 Content: []byte(unicode),
2432 },
2433 Document{
2434 Name: "f2",
2435 Content: []byte(wat),
2436 },
2437 )
2438 q := &query.Substring{Pattern: wat, Content: true}
2439 res := searchForTest(t, b, q)
2440 if len(res.Files) != 1 {
2441 t.Fatalf("got %v, wanted 1 index", res.Files)
2442 }
2443}
2444
2445func TestLongFileUTF8(t *testing.T) {
2446 needle := "neeedle"
2447
2448 // 6 bytes.
2449 unicode := "世界"
2450 content := []byte(strings.Repeat(unicode, 100) + needle)
2451 b := testShardBuilder(t, nil,
2452 Document{
2453 Name: "f1",
2454 Content: []byte(strings.Repeat("a", 50)),
2455 },
2456 Document{
2457 Name: "f2",
2458 Content: content,
2459 })
2460
2461 t.Run("LineMatches", func(t *testing.T) {
2462 q := &query.Substring{Pattern: needle, Content: true}
2463 res := searchForTest(t, b, q)
2464 if len(res.Files) != 1 {
2465 t.Errorf("got %v, want 1 result", res)
2466 }
2467 })
2468
2469 t.Run("ChunkMatches", func(t *testing.T) {
2470 q := &query.Substring{Pattern: needle, Content: true}
2471 res := searchForTest(t, b, q, chunkOpts)
2472 if len(res.Files) != 1 {
2473 t.Errorf("got %v, want 1 result", res)
2474 }
2475 })
2476}
2477
2478func TestEstimateDocCount(t *testing.T) {
2479 content := []byte("bla needle bla")
2480 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"},
2481 Document{Name: "f1", Content: content},
2482 Document{Name: "f2", Content: content},
2483 )
2484
2485 t.Run("LineMatches", func(t *testing.T) {
2486 if sres := searchForTest(t, b,
2487 query.NewAnd(
2488 &query.Substring{Pattern: "needle"},
2489 &query.Repo{Regexp: regexp.MustCompile("reponame")},
2490 ), zoekt.SearchOptions{
2491 EstimateDocCount: true,
2492 }); sres.Stats.ShardFilesConsidered != 2 {
2493 t.Errorf("got FilesConsidered = %d, want 2", sres.Stats.FilesConsidered)
2494 }
2495 if sres := searchForTest(t, b,
2496 query.NewAnd(
2497 &query.Substring{Pattern: "needle"},
2498 &query.Repo{Regexp: regexp.MustCompile("nomatch")},
2499 ), zoekt.SearchOptions{
2500 EstimateDocCount: true,
2501 }); sres.Stats.ShardFilesConsidered != 0 {
2502 t.Errorf("got FilesConsidered = %d, want 0", sres.Stats.FilesConsidered)
2503 }
2504 })
2505
2506 t.Run("ChunkMatches", func(t *testing.T) {
2507 if sres := searchForTest(t, b,
2508 query.NewAnd(
2509 &query.Substring{Pattern: "needle"},
2510 &query.Repo{Regexp: regexp.MustCompile("reponame")},
2511 ), zoekt.SearchOptions{
2512 EstimateDocCount: true,
2513 ChunkMatches: true,
2514 }); sres.Stats.ShardFilesConsidered != 2 {
2515 t.Errorf("got FilesConsidered = %d, want 2", sres.Stats.FilesConsidered)
2516 }
2517 if sres := searchForTest(t, b,
2518 query.NewAnd(
2519 &query.Substring{Pattern: "needle"},
2520 &query.Repo{Regexp: regexp.MustCompile("nomatch")},
2521 ), zoekt.SearchOptions{
2522 EstimateDocCount: true,
2523 ChunkMatches: true,
2524 }); sres.Stats.ShardFilesConsidered != 0 {
2525 t.Errorf("got FilesConsidered = %d, want 0", sres.Stats.FilesConsidered)
2526 }
2527 })
2528}
2529
2530func TestUTF8CorrectCorpus(t *testing.T) {
2531 needle := "neeedle"
2532
2533 // 6 bytes.
2534 unicode := "世界"
2535 b := testShardBuilder(t, nil,
2536 Document{
2537 Name: "f1",
2538 Content: []byte(strings.Repeat(unicode, 100)),
2539 },
2540 Document{
2541 Name: "xxxxxneeedle",
2542 Content: []byte("hello"),
2543 })
2544
2545 t.Run("LineMatches", func(t *testing.T) {
2546 q := &query.Substring{Pattern: needle, FileName: true}
2547 res := searchForTest(t, b, q)
2548 if len(res.Files) != 1 {
2549 t.Errorf("got %v, want 1 result", res)
2550 }
2551 })
2552
2553 t.Run("ChunkMatches", func(t *testing.T) {
2554 q := &query.Substring{Pattern: needle, FileName: true}
2555 res := searchForTest(t, b, q, chunkOpts)
2556 if len(res.Files) != 1 {
2557 t.Errorf("got %v, want 1 result", res)
2558 }
2559 })
2560}
2561
2562func TestBuilderStats(t *testing.T) {
2563 b := testShardBuilder(t, nil,
2564 Document{
2565 Name: "f1",
2566 Content: []byte(strings.Repeat("abcd", 1024)),
2567 })
2568 var buf bytes.Buffer
2569 if err := b.Write(&buf); err != nil {
2570 t.Fatal(err)
2571 }
2572
2573 if got, want := b.ContentSize(), uint32(2+4*1024); got != want {
2574 t.Errorf("got %d, want %d", got, want)
2575 }
2576}
2577
2578func TestIOStats(t *testing.T) {
2579 b := testShardBuilder(t, nil,
2580 Document{
2581 Name: "f1",
2582 Content: []byte(strings.Repeat("abcd", 1024)),
2583 })
2584
2585 t.Run("LineMatches", func(t *testing.T) {
2586 q := &query.Substring{Pattern: "abc", CaseSensitive: true, Content: true}
2587 res := searchForTest(t, b, q)
2588
2589 // 4096 (content) + 2 (overhead: newlines or doc sections)
2590 if got, want := res.Stats.ContentBytesLoaded, int64(4098); got != want {
2591 t.Errorf("got content I/O %d, want %d", got, want)
2592 }
2593
2594 // 1024 entries, each 4 bytes apart. 4 fits into single byte
2595 // delta encoded.
2596 if got, want := res.Stats.IndexBytesLoaded, int64(1024); got != want {
2597 t.Errorf("got index I/O %d, want %d", got, want)
2598 }
2599 })
2600
2601 t.Run("ChunkMatches", func(t *testing.T) {
2602 q := &query.Substring{Pattern: "abc", CaseSensitive: true, Content: true}
2603 res := searchForTest(t, b, q, chunkOpts)
2604
2605 // 4096 (content) + 2 (overhead: newlines or doc sections)
2606 if got, want := res.Stats.ContentBytesLoaded, int64(4098); got != want {
2607 t.Errorf("got content I/O %d, want %d", got, want)
2608 }
2609
2610 // 1024 entries, each 4 bytes apart. 4 fits into single byte
2611 // delta encoded.
2612 if got, want := res.Stats.IndexBytesLoaded, int64(1024); got != want {
2613 t.Errorf("got index I/O %d, want %d", got, want)
2614 }
2615 })
2616
2617 t.Run("LineMatches with BM25", func(t *testing.T) {
2618 q := &query.Substring{Pattern: "abc", CaseSensitive: true, Content: true}
2619 res := searchForTest(t, b, q, zoekt.SearchOptions{UseBM25Scoring: true})
2620
2621 // 4096 (content) + 2 (overhead: newlines or doc sections)
2622 if got, want := res.Stats.ContentBytesLoaded, int64(4098); got != want {
2623 t.Errorf("got content I/O %d, want %d", got, want)
2624 }
2625
2626 // 1024 entries, each 4 bytes apart. 4 fits into single byte
2627 // delta encoded.
2628 if got, want := res.Stats.IndexBytesLoaded, int64(1024); got != want {
2629 t.Errorf("got index I/O %d, want %d", got, want)
2630 }
2631 })
2632
2633 t.Run("ChunkMatches with BM25", func(t *testing.T) {
2634 q := &query.Substring{Pattern: "abc", CaseSensitive: true, Content: true}
2635 res := searchForTest(t, b, q, zoekt.SearchOptions{UseBM25Scoring: true, ChunkMatches: true})
2636
2637 // 4096 (content) + 2 (overhead: newlines or doc sections)
2638 if got, want := res.Stats.ContentBytesLoaded, int64(4098); got != want {
2639 t.Errorf("got content I/O %d, want %d", got, want)
2640 }
2641
2642 // 1024 entries, each 4 bytes apart. 4 fits into single byte
2643 // delta encoded.
2644 if got, want := res.Stats.IndexBytesLoaded, int64(1024); got != want {
2645 t.Errorf("got index I/O %d, want %d", got, want)
2646 }
2647 })
2648}
2649
2650func TestStartLineAnchor(t *testing.T) {
2651 b := testShardBuilder(t, nil,
2652 Document{
2653 Name: "f1",
2654 Content: []byte(
2655 `hello
2656start of middle of line
2657`),
2658 })
2659
2660 t.Run("LineMatches", func(t *testing.T) {
2661 q, err := query.Parse("^start")
2662 if err != nil {
2663 t.Errorf("parse: %v", err)
2664 }
2665
2666 res := searchForTest(t, b, q)
2667 if len(res.Files) != 1 {
2668 t.Errorf("got %v, want 1 file", res.Files)
2669 }
2670
2671 q, err = query.Parse("^middle")
2672 if err != nil {
2673 t.Errorf("parse: %v", err)
2674 }
2675 res = searchForTest(t, b, q)
2676 if len(res.Files) != 0 {
2677 t.Errorf("got %v, want 0 files", res.Files)
2678 }
2679 })
2680
2681 t.Run("ChunkMatches", func(t *testing.T) {
2682 q, err := query.Parse("^start")
2683 if err != nil {
2684 t.Errorf("parse: %v", err)
2685 }
2686
2687 res := searchForTest(t, b, q, chunkOpts)
2688 if len(res.Files) != 1 {
2689 t.Errorf("got %v, want 1 file", res.Files)
2690 }
2691
2692 q, err = query.Parse("^middle")
2693 if err != nil {
2694 t.Errorf("parse: %v", err)
2695 }
2696 res = searchForTest(t, b, q, chunkOpts)
2697 if len(res.Files) != 0 {
2698 t.Errorf("got %v, want 0 files", res.Files)
2699 }
2700 })
2701}
2702
2703func TestAndOrUnicode(t *testing.T) {
2704 q, err := query.Parse("orange.*apple")
2705 if err != nil {
2706 t.Errorf("parse: %v", err)
2707 }
2708 finalQ := query.NewAnd(q,
2709 query.NewOr(query.NewAnd(&query.Repo{Regexp: regexp.MustCompile("name")},
2710 query.NewOr(&query.Branch{Pattern: "master"}))))
2711
2712 b := testShardBuilder(t, &zoekt.Repository{
2713 Name: "name",
2714 Branches: []zoekt.RepositoryBranch{{"master", "master-version"}},
2715 }, Document{
2716 Name: "f2",
2717 Content: []byte("orange\u2318apple"),
2718 // --------------0123456 78901
2719 Branches: []string{"master"},
2720 })
2721
2722 t.Run("LineMatches", func(t *testing.T) {
2723 res := searchForTest(t, b, finalQ)
2724 if len(res.Files) != 1 {
2725 t.Errorf("got %v, want 1 result", res.Files)
2726 }
2727 })
2728
2729 t.Run("ChunkMatches", func(t *testing.T) {
2730 res := searchForTest(t, b, finalQ, chunkOpts)
2731 if len(res.Files) != 1 {
2732 t.Errorf("got %v, want 1 result", res.Files)
2733 }
2734 })
2735}
2736
2737func TestAndShort(t *testing.T) {
2738 content := []byte("bla needle at orange bla")
2739 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"},
2740 Document{Name: "f1", Content: content},
2741 Document{Name: "f2", Content: []byte("xx at xx")},
2742 Document{Name: "f3", Content: []byte("yy orange xx")},
2743 )
2744
2745 q := query.NewAnd(&query.Substring{Pattern: "at"},
2746 &query.Substring{Pattern: "orange"})
2747
2748 t.Run("LineMatches", func(t *testing.T) {
2749 res := searchForTest(t, b, q)
2750 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
2751 t.Errorf("got %v, want 1 result", res.Files)
2752 }
2753 })
2754
2755 t.Run("ChunkMatches", func(t *testing.T) {
2756 res := searchForTest(t, b, q, chunkOpts)
2757 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
2758 t.Errorf("got %v, want 1 result", res.Files)
2759 }
2760 })
2761}
2762
2763func TestNoCollectRegexpSubstring(t *testing.T) {
2764 content := []byte("bla final bla\nfoo final, foo")
2765 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"},
2766 Document{Name: "f1", Content: content},
2767 )
2768
2769 q := &query.Regexp{
2770 Regexp: mustParseRE("final[,.]"),
2771 }
2772
2773 t.Run("LineMatches", func(t *testing.T) {
2774 res := searchForTest(t, b, q)
2775 if len(res.Files) != 1 {
2776 t.Fatalf("got %v, want 1 result", res.Files)
2777 }
2778 if f := res.Files[0]; len(f.LineMatches) != 1 {
2779 t.Fatalf("got line matches %v, want 1 line index", printLineMatches(f.LineMatches))
2780 }
2781 })
2782
2783 t.Run("ChunkMatches", func(t *testing.T) {
2784 res := searchForTest(t, b, q, chunkOpts)
2785 if len(res.Files) != 1 {
2786 t.Fatalf("got %v, want 1 result", res.Files)
2787 }
2788 if f := res.Files[0]; len(f.ChunkMatches) != 1 {
2789 t.Fatalf("got line matches %v, want 1 line index", printLineMatches(f.LineMatches))
2790 }
2791 })
2792}
2793
2794func printLineMatches(ms []zoekt.LineMatch) string {
2795 var ss []string
2796 for _, m := range ms {
2797 ss = append(ss, fmt.Sprintf("%d:%q %v", m.LineNumber, m.Line, m.LineFragments))
2798 }
2799
2800 return strings.Join(ss, ", ")
2801}
2802
2803func TestLang(t *testing.T) {
2804 content := []byte("bla needle bla")
2805 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"},
2806 Document{Name: "f1", Content: content},
2807 Document{Name: "f2", Language: "java", Content: content},
2808 Document{Name: "f3", Language: "cpp", Content: content},
2809 )
2810
2811 q := query.NewAnd(&query.Substring{Pattern: "needle"},
2812 &query.Language{Language: "cpp"})
2813
2814 t.Run("LineMatches", func(t *testing.T) {
2815 res := searchForTest(t, b, q)
2816 if len(res.Files) != 1 {
2817 t.Fatalf("got %v, want 1 result in f3", res.Files)
2818 }
2819 f := res.Files[0]
2820 if f.FileName != "f3" || f.Language != "cpp" {
2821 t.Fatalf("got %v, want 1 match with language cpp", f)
2822 }
2823 })
2824
2825 t.Run("ChunkMatches", func(t *testing.T) {
2826 res := searchForTest(t, b, q, chunkOpts)
2827 if len(res.Files) != 1 {
2828 t.Fatalf("got %v, want 1 result in f3", res.Files)
2829 }
2830 f := res.Files[0]
2831 if f.FileName != "f3" || f.Language != "cpp" {
2832 t.Fatalf("got %v, want 1 match with language cpp", f)
2833 }
2834 })
2835}
2836
2837func TestLangShortcut(t *testing.T) {
2838 content := []byte("bla needle bla")
2839 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"},
2840 Document{Name: "f2", Language: "java", Content: content},
2841 Document{Name: "f3", Language: "cpp", Content: content},
2842 )
2843
2844 q := query.NewAnd(&query.Substring{Pattern: "needle"},
2845 &query.Language{Language: "fortran"})
2846
2847 t.Run("LineMatches", func(t *testing.T) {
2848 res := searchForTest(t, b, q)
2849 if len(res.Files) != 0 {
2850 t.Fatalf("got %v, want 0 results", res.Files)
2851 }
2852 if res.Stats.IndexBytesLoaded > 0 {
2853 t.Errorf("got matchBytesLoaded %d, want 0", res.Stats.IndexBytesLoaded)
2854 }
2855 })
2856
2857 t.Run("ChunkMatches", func(t *testing.T) {
2858 res := searchForTest(t, b, q, chunkOpts)
2859 if len(res.Files) != 0 {
2860 t.Fatalf("got %v, want 0 results", res.Files)
2861 }
2862 if res.Stats.IndexBytesLoaded > 0 {
2863 t.Errorf("got matchBytesLoaded %d, want 0", res.Stats.IndexBytesLoaded)
2864 }
2865 })
2866}
2867
2868func TestNoTextMatchAtoms(t *testing.T) {
2869 content := []byte("bla needle bla")
2870 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"},
2871 Document{Name: "f1", Content: content},
2872 Document{Name: "f2", Language: "java", Content: content},
2873 Document{Name: "f3", Language: "cpp", Content: content},
2874 )
2875 q := query.NewAnd(&query.Language{Language: "java"})
2876 t.Run("LineMatches", func(t *testing.T) {
2877 res := searchForTest(t, b, q)
2878 if len(res.Files) != 1 {
2879 t.Fatalf("got %v, want 1 result in f3", res.Files)
2880 }
2881 })
2882
2883 t.Run("ChunkMatches", func(t *testing.T) {
2884 res := searchForTest(t, b, q, chunkOpts)
2885 if len(res.Files) != 1 {
2886 t.Fatalf("got %v, want 1 result in f3", res.Files)
2887 }
2888 })
2889}
2890
2891func TestNoPositiveAtoms(t *testing.T) {
2892 content := []byte("bla needle bla")
2893 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"},
2894 Document{Name: "f1", Content: content},
2895 Document{Name: "f2", Content: content},
2896 )
2897
2898 q := query.NewAnd(
2899 &query.Not{Child: &query.Substring{Pattern: "xyz"}},
2900 &query.Repo{Regexp: regexp.MustCompile("reponame")})
2901 t.Run("LineMatches", func(t *testing.T) {
2902 res := searchForTest(t, b, q)
2903 if len(res.Files) != 2 {
2904 t.Fatalf("got %v, want 2 results in f3", res.Files)
2905 }
2906 })
2907 t.Run("ChunkMatches", func(t *testing.T) {
2908 res := searchForTest(t, b, q, chunkOpts)
2909 if len(res.Files) != 2 {
2910 t.Fatalf("got %v, want 2 results in f3", res.Files)
2911 }
2912 })
2913}
2914
2915func TestSymbolBoundaryStart(t *testing.T) {
2916 content := []byte("start\nbla bla\nend")
2917 // ----------------012345-67890123-456
2918
2919 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"},
2920 Document{
2921 Name: "f1",
2922 Content: content,
2923 Symbols: []DocumentSection{{0, 5}, {14, 17}},
2924 },
2925 )
2926 q := &query.Symbol{
2927 Expr: &query.Substring{Pattern: "start"},
2928 }
2929 t.Run("LineMatches", func(t *testing.T) {
2930 res := searchForTest(t, b, q)
2931 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
2932 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2933 }
2934 m := res.Files[0].LineMatches[0].LineFragments[0]
2935 if m.Offset != 0 {
2936 t.Fatalf("got offset %d want 0", m.Offset)
2937 }
2938 })
2939
2940 t.Run("ChunkMatches", func(t *testing.T) {
2941 res := searchForTest(t, b, q, chunkOpts)
2942 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
2943 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2944 }
2945 m := res.Files[0].ChunkMatches[0].Ranges[0]
2946 if m.Start.ByteOffset != 0 {
2947 t.Fatalf("got offset %d want 0", m.Start.ByteOffset)
2948 }
2949 })
2950}
2951
2952func TestSymbolBoundaryEnd(t *testing.T) {
2953 content := []byte("start\nbla bla\nend")
2954 // ----------------012345-67890123-456
2955
2956 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"},
2957 Document{
2958 Name: "f1",
2959 Content: content,
2960 Symbols: []DocumentSection{{14, 17}},
2961 },
2962 )
2963 q := &query.Symbol{
2964 Expr: &query.Substring{Pattern: "end"},
2965 }
2966 t.Run("LineMatches", func(t *testing.T) {
2967 res := searchForTest(t, b, q)
2968 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
2969 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2970 }
2971 m := res.Files[0].LineMatches[0].LineFragments[0]
2972 if m.Offset != 14 {
2973 t.Fatalf("got offset %d want 0", m.Offset)
2974 }
2975 })
2976
2977 t.Run("ChunkMatches", func(t *testing.T) {
2978 res := searchForTest(t, b, q, chunkOpts)
2979 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
2980 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2981 }
2982 m := res.Files[0].ChunkMatches[0].Ranges[0]
2983 if m.Start.ByteOffset != 14 {
2984 t.Fatalf("got offset %d want 0", m.Start.ByteOffset)
2985 }
2986 })
2987}
2988
2989func TestSymbolSubstring(t *testing.T) {
2990 content := []byte("bla\nsymblabla\nbla")
2991 // ----------------0123-4567890123-456
2992
2993 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"},
2994 Document{
2995 Name: "f1",
2996 Content: content,
2997 Symbols: []DocumentSection{{4, 12}},
2998 },
2999 )
3000 q := &query.Symbol{
3001 Expr: &query.Substring{Pattern: "bla"},
3002 }
3003 t.Run("LineMatches", func(t *testing.T) {
3004 res := searchForTest(t, b, q)
3005 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
3006 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
3007 }
3008 m := res.Files[0].LineMatches[0].LineFragments[0]
3009 if m.Offset != 7 || m.MatchLength != 3 {
3010 t.Fatalf("got offset %d, size %d want 7 size 3", m.Offset, m.MatchLength)
3011 }
3012 })
3013
3014 t.Run("ChunkMatches", func(t *testing.T) {
3015 res := searchForTest(t, b, q, chunkOpts)
3016 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
3017 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
3018 }
3019 m := res.Files[0].ChunkMatches[0].Ranges[0]
3020 if m.Start.ByteOffset != 7 || m.End.ByteOffset != 10 {
3021 t.Fatalf("got offset %d, end %d want 7, 10", m.Start.ByteOffset, m.End.ByteOffset)
3022 }
3023 })
3024}
3025
3026func TestSymbolSubstringExact(t *testing.T) {
3027 content := []byte("bla\nsym\nbla\nsym\nasymb")
3028 // ----------------0123-4567-890123456-78901
3029
3030 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"},
3031 Document{
3032 Name: "f1",
3033 Content: content,
3034 Symbols: []DocumentSection{{4, 7}},
3035 },
3036 )
3037 q := &query.Symbol{
3038 Expr: &query.Substring{Pattern: "sym"},
3039 }
3040 t.Run("LineMatches", func(t *testing.T) {
3041 res := searchForTest(t, b, q)
3042 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
3043 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
3044 }
3045 m := res.Files[0].LineMatches[0].LineFragments[0]
3046 if m.Offset != 4 {
3047 t.Fatalf("got offset %d, want 7", m.Offset)
3048 }
3049 })
3050
3051 t.Run("ChunkMatches", func(t *testing.T) {
3052 res := searchForTest(t, b, q, chunkOpts)
3053 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
3054 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
3055 }
3056 m := res.Files[0].ChunkMatches[0].Ranges[0]
3057 if m.Start.ByteOffset != 4 {
3058 t.Fatalf("got offset %d, want 7", m.Start.ByteOffset)
3059 }
3060 })
3061}
3062
3063func TestSymbolRegexpExact(t *testing.T) {
3064 content := []byte("blah\nbla\nbl")
3065 // ----------------01234-5678-90
3066
3067 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"},
3068 Document{
3069 Name: "f1",
3070 Content: content,
3071 Symbols: []DocumentSection{{0, 4}, {5, 8}, {9, 11}},
3072 },
3073 )
3074 q := &query.Symbol{
3075 Expr: &query.Regexp{Regexp: mustParseRE("^bla$")},
3076 }
3077 t.Run("LineMatches", func(t *testing.T) {
3078 res := searchForTest(t, b, q)
3079 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
3080 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
3081 }
3082 m := res.Files[0].LineMatches[0].LineFragments[0]
3083 if m.Offset != 5 {
3084 t.Fatalf("got offset %d, want 5", m.Offset)
3085 }
3086 })
3087
3088 t.Run("ChunkMatches", func(t *testing.T) {
3089 res := searchForTest(t, b, q, chunkOpts)
3090 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
3091 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
3092 }
3093 m := res.Files[0].ChunkMatches[0].Ranges[0]
3094 if m.Start.ByteOffset != 5 {
3095 t.Fatalf("got offset %d, want 5", m.Start.ByteOffset)
3096 }
3097 })
3098}
3099
3100func TestSymbolRegexpPartial(t *testing.T) {
3101 content := []byte("abcdef")
3102 // ----------------012345
3103
3104 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"},
3105 Document{
3106 Name: "f1",
3107 Content: content,
3108 Symbols: []DocumentSection{{0, 6}},
3109 },
3110 )
3111 q := &query.Symbol{
3112 Expr: &query.Regexp{Regexp: mustParseRE("(b|d)c(d|b)")},
3113 }
3114 t.Run("LineMatches", func(t *testing.T) {
3115 res := searchForTest(t, b, q)
3116 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
3117 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
3118 }
3119 m := res.Files[0].LineMatches[0].LineFragments[0]
3120 if m.Offset != 1 {
3121 t.Fatalf("got offset %d, want 1", m.Offset)
3122 }
3123 if m.MatchLength != 3 {
3124 t.Fatalf("got match length %d, want 3", m.MatchLength)
3125 }
3126 })
3127
3128 t.Run("ChunkMatches", func(t *testing.T) {
3129 res := searchForTest(t, b, q, chunkOpts)
3130 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
3131 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
3132 }
3133 m := res.Files[0].ChunkMatches[0].Ranges[0]
3134 if m.Start.ByteOffset != 1 {
3135 t.Fatalf("got offset %d, want 1", m.Start.ByteOffset)
3136 }
3137 if m.End.ByteOffset != 4 {
3138 t.Fatalf("got match end %d, want 4", m.End.ByteOffset)
3139 }
3140 })
3141}
3142
3143func TestSymbolRegexpAll(t *testing.T) {
3144 docs := []Document{
3145 {
3146 Name: "f1",
3147 Content: []byte("Hello Zoekt"),
3148 // --------------01234567890
3149 Symbols: []DocumentSection{{0, 5}, {6, 11}},
3150 },
3151 {
3152 Name: "f2",
3153 Content: []byte("Second Zoekt Third"),
3154 // --------------012345678901234567
3155 Symbols: []DocumentSection{{0, 6}, {7, 12}, {13, 18}},
3156 },
3157 }
3158
3159 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"}, docs...)
3160 q := &query.Symbol{
3161 Expr: &query.Regexp{Regexp: mustParseRE(".*")},
3162 }
3163 t.Run("LineMatches", func(t *testing.T) {
3164 res := searchForTest(t, b, q)
3165 if len(res.Files) != len(docs) {
3166 t.Fatalf("got %v, want %d file", res.Files, len(docs))
3167 }
3168 for i, want := range docs {
3169 got := res.Files[i].LineMatches[0].LineFragments
3170 if len(got) != len(want.Symbols) {
3171 t.Fatalf("got %d symbols, want %d symbols in doc %s", len(got), len(want.Symbols), want.Name)
3172 }
3173
3174 for j, sec := range want.Symbols {
3175 if sec.Start != got[j].Offset {
3176 t.Fatalf("got offset %d, want %d in doc %s", got[j].Offset, sec.Start, want.Name)
3177 }
3178 }
3179 }
3180 })
3181
3182 t.Run("ChunkMatches", func(t *testing.T) {
3183 res := searchForTest(t, b, q, chunkOpts)
3184 if len(res.Files) != len(docs) {
3185 t.Fatalf("got %v, want %d file", res.Files, len(docs))
3186 }
3187 for i, want := range docs {
3188 got := res.Files[i].ChunkMatches[0].Ranges
3189 if len(got) != len(want.Symbols) {
3190 t.Fatalf("got %d symbols, want %d symbols in doc %s", len(got), len(want.Symbols), want.Name)
3191 }
3192
3193 for j, sec := range want.Symbols {
3194 if sec.Start != uint32(got[j].Start.ByteOffset) {
3195 t.Fatalf("got offset %d, want %d in doc %s", got[j].Start.ByteOffset, sec.Start, want.Name)
3196 }
3197 }
3198 }
3199 })
3200}
3201
3202func TestHitIterTerminate(t *testing.T) {
3203 // contrived input: trigram frequencies forces selecting abc +
3204 // def for the distance iteration. There is no index, so this
3205 // will advance the compressedPostingIterator to beyond the
3206 // end.
3207 content := []byte("abc bcdbcd cdecde abcabc def efg")
3208 b := testShardBuilder(t, nil,
3209 Document{
3210 Name: "f1",
3211 Content: content,
3212 },
3213 )
3214
3215 t.Run("LineMatches", func(t *testing.T) {
3216 searchForTest(t, b, &query.Substring{Pattern: "abcdef"})
3217 })
3218
3219 t.Run("ChunkMatches", func(t *testing.T) {
3220 searchForTest(t, b, &query.Substring{Pattern: "abcdef"}, chunkOpts)
3221 })
3222}
3223
3224func TestDistanceHitIterBailLast(t *testing.T) {
3225 content := []byte("AST AST AST UASH")
3226 b := testShardBuilder(t, nil,
3227 Document{
3228 Name: "f1",
3229 Content: content,
3230 },
3231 )
3232 t.Run("LineMatches", func(t *testing.T) {
3233 res := searchForTest(t, b, &query.Substring{Pattern: "UAST"})
3234 if len(res.Files) != 0 {
3235 t.Fatalf("got %v, want no results", res.Files)
3236 }
3237 })
3238
3239 t.Run("LineMatches", func(t *testing.T) {
3240 res := searchForTest(t, b, &query.Substring{Pattern: "UAST"}, chunkOpts)
3241 if len(res.Files) != 0 {
3242 t.Fatalf("got %v, want no results", res.Files)
3243 }
3244 })
3245}
3246
3247func TestDocumentSectionRuneBoundary(t *testing.T) {
3248 content := string([]rune{kelvinCodePoint, kelvinCodePoint, kelvinCodePoint})
3249 b, err := NewShardBuilder(nil)
3250 if err != nil {
3251 t.Fatalf("NewShardBuilder: %v", err)
3252 }
3253
3254 for i, sec := range []DocumentSection{
3255 {2, 6},
3256 {3, 7},
3257 } {
3258 if err := b.Add(Document{
3259 Name: "f1",
3260 Content: []byte(content),
3261 Symbols: []DocumentSection{sec},
3262 }); err == nil {
3263 t.Errorf("%d: Add succeeded", i)
3264 }
3265 }
3266}
3267
3268func TestUnicodeQuery(t *testing.T) {
3269 content := string([]rune{kelvinCodePoint, kelvinCodePoint, kelvinCodePoint})
3270 b := testShardBuilder(t, nil,
3271 Document{
3272 Name: "f1",
3273 Content: []byte(content),
3274 },
3275 )
3276
3277 q := &query.Substring{Pattern: content}
3278
3279 t.Run("LineMatches", func(t *testing.T) {
3280 res := searchForTest(t, b, q)
3281 if len(res.Files) != 1 {
3282 t.Fatalf("want 1 match, got %v", res.Files)
3283 }
3284
3285 f := res.Files[0]
3286 if len(f.LineMatches) != 1 {
3287 t.Fatalf("want 1 line, got %v", f.LineMatches)
3288 }
3289 l := f.LineMatches[0]
3290
3291 if len(l.LineFragments) != 1 {
3292 t.Fatalf("want 1 line fragment, got %v", l.LineFragments)
3293 }
3294 fr := l.LineFragments[0]
3295 if fr.MatchLength != len(content) {
3296 t.Fatalf("got MatchLength %d want %d", fr.MatchLength, len(content))
3297 }
3298 })
3299
3300 t.Run("ChunkMatches", func(t *testing.T) {
3301 res := searchForTest(t, b, q, chunkOpts)
3302 if len(res.Files) != 1 {
3303 t.Fatalf("want 1 match, got %v", res.Files)
3304 }
3305
3306 f := res.Files[0]
3307 if len(f.ChunkMatches) != 1 {
3308 t.Fatalf("want 1 line, got %v", f.LineMatches)
3309 }
3310 cm := f.ChunkMatches[0]
3311
3312 if len(cm.Ranges) != 1 {
3313 t.Fatalf("want 1 line fragment, got %v", cm.Ranges)
3314 }
3315 rr := cm.Ranges[0]
3316 if matchLen := rr.End.ByteOffset - rr.Start.ByteOffset; int(matchLen) != len(content) {
3317 t.Fatalf("got MatchLength %d want %d", matchLen, len(content))
3318 }
3319 })
3320}
3321
3322func TestSkipInvalidContent(t *testing.T) {
3323 for _, content := range []string{
3324 // Binary
3325 "abc def \x00 abc",
3326 } {
3327
3328 b, err := NewShardBuilder(nil)
3329 if err != nil {
3330 t.Fatalf("NewShardBuilder: %v", err)
3331 }
3332
3333 if err := b.Add(Document{
3334 Name: "f1",
3335 Content: []byte(content),
3336 }); err != nil {
3337 t.Fatal(err)
3338 }
3339
3340 t.Run("LineMatches", func(t *testing.T) {
3341 q := &query.Substring{Pattern: "abc def"}
3342 res := searchForTest(t, b, q)
3343 if len(res.Files) != 0 {
3344 t.Fatalf("got %v, want no results", res.Files)
3345 }
3346
3347 q = &query.Substring{Pattern: "NOT-INDEXED"}
3348 res = searchForTest(t, b, q)
3349 if len(res.Files) != 1 {
3350 t.Fatalf("got %v, want 1 result", res.Files)
3351 }
3352 })
3353
3354 t.Run("ChunkMatches", func(t *testing.T) {
3355 q := &query.Substring{Pattern: "abc def"}
3356 res := searchForTest(t, b, q, chunkOpts)
3357 if len(res.Files) != 0 {
3358 t.Fatalf("got %v, want no results", res.Files)
3359 }
3360
3361 q = &query.Substring{Pattern: "NOT-INDEXED"}
3362 res = searchForTest(t, b, q, chunkOpts)
3363 if len(res.Files) != 1 {
3364 t.Fatalf("got %v, want 1 result", res.Files)
3365 }
3366 })
3367 }
3368}
3369
3370func TestDocChecker(t *testing.T) {
3371 docChecker := DocChecker{}
3372
3373 // Test valid and invalid text
3374 for _, text := range []string{"", "simple ascii", "símplé unicödé", "\uFEFFwith utf8 'bom'", "with \uFFFD unicode replacement char"} {
3375 if err := docChecker.Check([]byte(text), 20000, false); err != nil {
3376 t.Errorf("Check(%q): %v", text, err)
3377 }
3378 }
3379 for _, text := range []string{"zero\x00byte", "xx", "0123456789abcdefghi"} {
3380 if err := docChecker.Check([]byte(text), 15, false); err == nil {
3381 t.Errorf("Check(%q) succeeded", text)
3382 }
3383 }
3384
3385 // Test valid and invalid text with an allowed large file
3386 for _, text := range []string{"0123456789abcdefghi", "qwertyuiopasdfghjklzxcvbnm"} {
3387 if err := docChecker.Check([]byte(text), 15, true); err != nil {
3388 t.Errorf("Check(%q): %v", text, err)
3389 }
3390 }
3391 for _, text := range []string{"zero\x00byte", "xx"} {
3392 if err := docChecker.Check([]byte(text), 15, true); err == nil {
3393 t.Errorf("Check(%q) succeeded", text)
3394 }
3395 }
3396}
3397
3398func TestLineAnd(t *testing.T) {
3399 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"},
3400 Document{Name: "f1", Content: []byte("apple\nbanana\napple banana chocolate apple pudding banana\ngrape")},
3401 Document{Name: "f2", Content: []byte("apple orange\nbanana")},
3402 Document{Name: "f3", Content: []byte("banana grape")},
3403 )
3404 pattern := "(apple)(?-s:.)*?(banana)"
3405 r, _ := syntax.Parse(pattern, syntax.Perl)
3406
3407 q := query.Regexp{
3408 Regexp: r,
3409 Content: true,
3410 }
3411 t.Run("LineMatches", func(t *testing.T) {
3412 res := searchForTest(t, b, &q)
3413 wantRegexpCount := 1
3414 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3415 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3416 }
3417 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
3418 t.Errorf("got %v, want 1 result", res.Files)
3419 }
3420 })
3421
3422 t.Run("ChunkMatches", func(t *testing.T) {
3423 res := searchForTest(t, b, &q, chunkOpts)
3424 wantRegexpCount := 1
3425 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3426 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3427 }
3428 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
3429 t.Errorf("got %v, want 1 result", res.Files)
3430 }
3431 })
3432}
3433
3434func TestLineAndFileName(t *testing.T) {
3435 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"},
3436 Document{Name: "f1", Content: []byte("apple banana\ngrape")},
3437 Document{Name: "f2", Content: []byte("apple banana\norange")},
3438 Document{Name: "apple banana", Content: []byte("banana grape")},
3439 )
3440 pattern := "(apple)(?-s:.)*?(banana)"
3441 r, _ := syntax.Parse(pattern, syntax.Perl)
3442
3443 q := query.Regexp{
3444 Regexp: r,
3445 FileName: true,
3446 }
3447 t.Run("LineMatches", func(t *testing.T) {
3448 res := searchForTest(t, b, &q)
3449 wantRegexpCount := 1
3450 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3451 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3452 }
3453 if len(res.Files) != 1 || res.Files[0].FileName != "apple banana" {
3454 t.Errorf("got %v, want 1 result", res.Files)
3455 }
3456 })
3457
3458 t.Run("ChunkMatches", func(t *testing.T) {
3459 res := searchForTest(t, b, &q, chunkOpts)
3460 wantRegexpCount := 1
3461 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3462 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3463 }
3464 if len(res.Files) != 1 || res.Files[0].FileName != "apple banana" {
3465 t.Errorf("got %v, want 1 result", res.Files)
3466 }
3467 })
3468}
3469
3470func TestMultiLineRegex(t *testing.T) {
3471 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"},
3472 Document{Name: "f1", Content: []byte("apple banana\ngrape")},
3473 Document{Name: "f2", Content: []byte("apple orange")},
3474 Document{Name: "f3", Content: []byte("grape apple")},
3475 )
3476 pattern := "(apple).*?[[:space:]].*?(grape)"
3477 r, _ := syntax.Parse(pattern, syntax.Perl)
3478
3479 q := query.Regexp{
3480 Regexp: r,
3481 }
3482 t.Run("LineMatches", func(t *testing.T) {
3483 res := searchForTest(t, b, &q)
3484 wantRegexpCount := 2
3485 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3486 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3487 }
3488 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
3489 t.Errorf("got %v, want 1 result", res.Files)
3490 }
3491 if l := len(res.Files[0].LineMatches); l != 2 {
3492 t.Errorf("got %v, want 2 line matches", l)
3493 }
3494 })
3495
3496 t.Run("ChunkMatches", func(t *testing.T) {
3497 res := searchForTest(t, b, &q, chunkOpts)
3498 wantRegexpCount := 2
3499 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3500 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3501 }
3502 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
3503 t.Errorf("got %v, want 1 result", res.Files)
3504 }
3505 if l := len(res.Files[0].ChunkMatches); l != 1 {
3506 t.Errorf("got %v, want 1 chunk matches", l)
3507 }
3508 if l := len(res.Files[0].ChunkMatches[0].Ranges); l != 1 {
3509 t.Errorf("got %v, want 1 chunk ranges", l)
3510 }
3511 })
3512}
3513
3514func TestSearchTypeFileName(t *testing.T) {
3515 b := testShardBuilder(t, &zoekt.Repository{
3516 Name: "reponame",
3517 },
3518 Document{Name: "f1", Content: []byte("bla the needle")},
3519 Document{Name: "f2", Content: []byte("another file another\nneedle")},
3520 // -----------------------------------012345678901234567890-123456
3521 )
3522
3523 t.Run("LineMatches", func(t *testing.T) {
3524 wantSingleMatch := func(res *zoekt.SearchResult, want string) {
3525 t.Helper()
3526 fmatches := res.Files
3527 if len(fmatches) != 1 {
3528 t.Errorf("got %v, want 1 matches", len(fmatches))
3529 return
3530 }
3531 if len(fmatches[0].LineMatches) != 1 {
3532 t.Errorf("got %d line matches", len(fmatches[0].LineMatches))
3533 return
3534 }
3535 var got string
3536 if fmatches[0].LineMatches[0].FileName {
3537 got = fmatches[0].FileName
3538 } else {
3539 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].LineMatches[0].LineFragments[0].Offset)
3540 }
3541
3542 if got != want {
3543 t.Errorf("got %s, want %s", got, want)
3544 }
3545 }
3546
3547 // Only return the later match in the second file
3548 res := searchForTest(t, b, query.NewAnd(
3549 &query.Type{
3550 Type: query.TypeFileName,
3551 Child: &query.Substring{Pattern: "needle"},
3552 },
3553 &query.Substring{Pattern: "file"}))
3554 wantSingleMatch(res, "f2:8")
3555
3556 // Only return a filename result
3557 res = searchForTest(t, b,
3558 &query.Type{
3559 Type: query.TypeFileName,
3560 Child: &query.Substring{Pattern: "file"},
3561 })
3562 wantSingleMatch(res, "f2")
3563 })
3564
3565 t.Run("ChunkMatches", func(t *testing.T) {
3566 wantSingleMatch := func(res *zoekt.SearchResult, want string) {
3567 t.Helper()
3568 fmatches := res.Files
3569 if len(fmatches) != 1 {
3570 t.Errorf("got %v, want 1 matches", len(fmatches))
3571 return
3572 }
3573 if len(fmatches[0].ChunkMatches) != 1 {
3574 t.Errorf("got %d line matches", len(fmatches[0].ChunkMatches))
3575 return
3576 }
3577 var got string
3578 if fmatches[0].ChunkMatches[0].FileName {
3579 got = fmatches[0].FileName
3580 } else {
3581 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset)
3582 }
3583
3584 if got != want {
3585 t.Errorf("got %s, want %s", got, want)
3586 }
3587 }
3588
3589 // Only return the later match in the second file
3590 res := searchForTest(t, b, query.NewAnd(
3591 &query.Type{
3592 Type: query.TypeFileName,
3593 Child: &query.Substring{Pattern: "needle"},
3594 },
3595 &query.Substring{Pattern: "file"}),
3596 chunkOpts,
3597 )
3598 wantSingleMatch(res, "f2:8")
3599
3600 // Only return a filename result
3601 res = searchForTest(t, b,
3602 &query.Type{
3603 Type: query.TypeFileName,
3604 Child: &query.Substring{Pattern: "file"},
3605 },
3606 chunkOpts,
3607 )
3608 wantSingleMatch(res, "f2")
3609 })
3610}
3611
3612func TestSearchTypeLanguage(t *testing.T) {
3613 b := testShardBuilder(t, &zoekt.Repository{
3614 Name: "reponame",
3615 },
3616 Document{Name: "apex.cls", Content: []byte("public class Car extends Vehicle {")},
3617 Document{Name: "tex.cls", Content: []byte(`\DeclareOption*{`)},
3618 Document{Name: "hello.h", Content: []byte(`#include <stdio.h>`)},
3619 Document{Name: "be.magik", Content: []byte(`_package unicorn`)},
3620 )
3621
3622 t.Log(b.languageMap)
3623
3624 t.Run("LineMatches", func(t *testing.T) {
3625 wantSingleMatch := func(res *zoekt.SearchResult, want string) {
3626 t.Helper()
3627 fmatches := res.Files
3628 if len(fmatches) != 1 {
3629 t.Errorf("got %v, want 1 matches", len(fmatches))
3630 return
3631 }
3632 if len(fmatches[0].LineMatches) != 1 {
3633 t.Errorf("got %d line matches", len(fmatches[0].LineMatches))
3634 return
3635 }
3636 var got string
3637 if fmatches[0].LineMatches[0].FileName {
3638 got = fmatches[0].FileName
3639 } else {
3640 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].LineMatches[0].LineFragments[0].Offset)
3641 }
3642
3643 if got != want {
3644 t.Errorf("got %s, want %s", got, want)
3645 }
3646 }
3647
3648 res := searchForTest(t, b, &query.Language{Language: "Apex"})
3649 wantSingleMatch(res, "apex.cls")
3650
3651 res = searchForTest(t, b, &query.Language{Language: "TeX"})
3652 wantSingleMatch(res, "tex.cls")
3653
3654 res = searchForTest(t, b, &query.Language{Language: "C"})
3655 wantSingleMatch(res, "hello.h")
3656
3657 res = searchForTest(t, b, &query.Language{Language: "Magik"})
3658 wantSingleMatch(res, "be.magik")
3659
3660 // test fallback language search by pretending it's an older index version
3661 res = searchForTest(t, b, &query.Language{Language: "C++"})
3662 if len(res.Files) != 0 {
3663 t.Errorf("got %d results for C++, want 0", len(res.Files))
3664 }
3665
3666 b.featureVersion = 11 // force fallback
3667 res = searchForTest(t, b, &query.Language{Language: "C++"})
3668 wantSingleMatch(res, "hello.h")
3669 })
3670
3671 t.Run("ChunkMatches", func(t *testing.T) {
3672 wantSingleMatch := func(res *zoekt.SearchResult, want string) {
3673 t.Helper()
3674 fmatches := res.Files
3675 if len(fmatches) != 1 {
3676 t.Errorf("got %v, want 1 matches", len(fmatches))
3677 return
3678 }
3679 if len(fmatches[0].ChunkMatches) != 1 {
3680 t.Errorf("got %d line matches", len(fmatches[0].ChunkMatches))
3681 return
3682 }
3683 var got string
3684 if fmatches[0].ChunkMatches[0].FileName {
3685 got = fmatches[0].FileName
3686 } else {
3687 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset)
3688 }
3689
3690 if got != want {
3691 t.Errorf("got %s, want %s", got, want)
3692 }
3693 }
3694
3695 b.featureVersion = FeatureVersion // reset feature version
3696 res := searchForTest(t, b, &query.Language{Language: "Apex"}, chunkOpts)
3697 wantSingleMatch(res, "apex.cls")
3698
3699 res = searchForTest(t, b, &query.Language{Language: "TeX"}, chunkOpts)
3700 wantSingleMatch(res, "tex.cls")
3701
3702 res = searchForTest(t, b, &query.Language{Language: "C"}, chunkOpts)
3703 wantSingleMatch(res, "hello.h")
3704
3705 // test fallback language search by pretending it's an older index version
3706 res = searchForTest(t, b, &query.Language{Language: "C++"}, chunkOpts)
3707 if len(res.Files) != 0 {
3708 t.Errorf("got %d results for C++, want 0", len(res.Files))
3709 }
3710
3711 b.featureVersion = 11 // force fallback
3712 res = searchForTest(t, b, &query.Language{Language: "C++"}, chunkOpts)
3713 wantSingleMatch(res, "hello.h")
3714 })
3715}
3716
3717func TestStats(t *testing.T) {
3718 ignored := []cmp.Option{
3719 cmpopts.EquateEmpty(),
3720 cmpopts.IgnoreFields(zoekt.RepoListEntry{}, "Repository"),
3721 cmpopts.IgnoreFields(zoekt.RepoListEntry{}, "IndexMetadata"),
3722 cmpopts.IgnoreFields(zoekt.RepoStats{}, "IndexBytes"),
3723 }
3724
3725 repoListEntries := func(b *ShardBuilder) []zoekt.RepoListEntry {
3726 searcher := searcherForTest(t, b)
3727 indexdata := searcher.(*indexData)
3728 return indexdata.repoListEntry
3729 }
3730
3731 t.Run("one empty repo", func(t *testing.T) {
3732 b := testShardBuilder(t, nil)
3733 got := repoListEntries(b)
3734 want := []zoekt.RepoListEntry{
3735 {
3736 Stats: zoekt.RepoStats{
3737 Repos: 0,
3738 Shards: 1,
3739 Documents: 0,
3740 IndexBytes: 20,
3741 ContentBytes: 0,
3742 NewLinesCount: 0,
3743 DefaultBranchNewLinesCount: 0,
3744 OtherBranchesNewLinesCount: 0,
3745 },
3746 },
3747 }
3748
3749 if diff := cmp.Diff(want, got, ignored...); diff != "" {
3750 t.Fatalf("mismatch (-want +got):\n%s", diff)
3751 }
3752 })
3753
3754 t.Run("one simple shard", func(t *testing.T) {
3755 b := testShardBuilder(t, nil,
3756 Document{Name: "doc 0", Content: []byte("content 0")},
3757 Document{Name: "doc 1", Content: []byte("content 1")},
3758 )
3759 got := repoListEntries(b)
3760 want := []zoekt.RepoListEntry{
3761 {
3762 Stats: zoekt.RepoStats{
3763 Repos: 0,
3764 Shards: 1,
3765 Documents: 2,
3766 IndexBytes: 224,
3767 ContentBytes: 28,
3768 NewLinesCount: 0,
3769 DefaultBranchNewLinesCount: 0,
3770 OtherBranchesNewLinesCount: 0,
3771 },
3772 },
3773 }
3774
3775 if diff := cmp.Diff(want, got, ignored...); diff != "" {
3776 t.Fatalf("mismatch (-want +got):\n%s", diff)
3777 }
3778 })
3779
3780 t.Run("one compound shard", func(t *testing.T) {
3781 b := testShardBuilderCompound(t,
3782 []*zoekt.Repository{
3783 {Name: "repo 0"},
3784 {Name: "repo 1"},
3785 },
3786 [][]Document{
3787 {
3788 {Name: "doc 0", Content: []byte("content 0")},
3789 {Name: "doc 1", Content: []byte("content 1")},
3790 },
3791 {
3792 {Name: "doc 2", Content: []byte("content 2")},
3793 {Name: "doc 3", Content: []byte("content 3")},
3794 },
3795 },
3796 )
3797 got := repoListEntries(b)
3798 want := []zoekt.RepoListEntry{
3799 {
3800 Stats: zoekt.RepoStats{
3801 Repos: 0,
3802 Shards: 1,
3803 Documents: 2,
3804 IndexBytes: 180,
3805 ContentBytes: 28,
3806 NewLinesCount: 0,
3807 DefaultBranchNewLinesCount: 0,
3808 OtherBranchesNewLinesCount: 0,
3809 },
3810 },
3811 {
3812 Stats: zoekt.RepoStats{
3813 Repos: 0,
3814 Shards: 1,
3815 Documents: 2,
3816 IndexBytes: 180,
3817 ContentBytes: 28,
3818 NewLinesCount: 0,
3819 DefaultBranchNewLinesCount: 0,
3820 OtherBranchesNewLinesCount: 0,
3821 },
3822 },
3823 }
3824
3825 if diff := cmp.Diff(want, got, ignored...); diff != "" {
3826 t.Fatalf("mismatch (-want +got):\n%s", diff)
3827 }
3828 })
3829
3830 t.Run("compound shard with empty repos", func(t *testing.T) {
3831 b := testShardBuilderCompound(t,
3832 []*zoekt.Repository{
3833 {Name: "repo 0"},
3834 {Name: "repo 1"},
3835 {Name: "repo 2"},
3836 {Name: "repo 3"},
3837 {Name: "repo 4"},
3838 },
3839 [][]Document{
3840 {{Name: "doc 0", Content: []byte("content 0")}},
3841 nil,
3842 {{Name: "doc 1", Content: []byte("content 1")}},
3843 nil,
3844 nil,
3845 },
3846 )
3847 got := repoListEntries(b)
3848
3849 entryEmpty := zoekt.RepoListEntry{Stats: zoekt.RepoStats{
3850 Shards: 1,
3851 Documents: 0,
3852 ContentBytes: 0,
3853 }}
3854 entryNonEmpty := zoekt.RepoListEntry{Stats: zoekt.RepoStats{
3855 Shards: 1,
3856 Documents: 1,
3857 ContentBytes: 14,
3858 }}
3859
3860 want := []zoekt.RepoListEntry{
3861 entryNonEmpty,
3862 entryEmpty,
3863 entryNonEmpty,
3864 entryEmpty,
3865 entryEmpty,
3866 }
3867
3868 if diff := cmp.Diff(want, got, ignored...); diff != "" {
3869 t.Fatalf("mismatch (-want +got):\n%s", diff)
3870 }
3871 })
3872}
3873
3874// This tests the frequent pattern "\bLITERAL\b".
3875func TestWordSearch(t *testing.T) {
3876 content := []byte("needle the bla")
3877 // ----------------01234567890123
3878
3879 b := testShardBuilder(t, nil,
3880 Document{
3881 Name: "f1",
3882 Content: content,
3883 })
3884
3885 t.Run("LineMatches", func(t *testing.T) {
3886 sres := searchForTest(t, b,
3887 &query.Regexp{
3888 Regexp: mustParseRE("\\bthe\\b"),
3889 CaseSensitive: true,
3890 Content: true,
3891 })
3892
3893 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 {
3894 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
3895 }
3896
3897 if sres.Stats.RegexpsConsidered != 0 {
3898 t.Fatal("expected regexp to be skipped")
3899 }
3900
3901 got := sres.Files[0].LineMatches[0]
3902 want := zoekt.LineMatch{
3903 LineFragments: []zoekt.LineFragmentMatch{{
3904 LineOffset: 7,
3905 Offset: 7,
3906 MatchLength: 3,
3907 }},
3908 Line: content,
3909 FileName: false,
3910 LineNumber: 1,
3911 LineStart: 0,
3912 LineEnd: 14,
3913 }
3914
3915 if !reflect.DeepEqual(got, want) {
3916 t.Errorf("got %#v, want %#v", got, want)
3917 }
3918 })
3919
3920 t.Run("ChunkMatches", func(t *testing.T) {
3921 sres := searchForTest(t, b,
3922 &query.Regexp{
3923 Regexp: mustParseRE("\\bthe\\b"),
3924 CaseSensitive: true,
3925 }, chunkOpts)
3926
3927 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 {
3928 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
3929 }
3930
3931 if sres.Stats.RegexpsConsidered != 0 {
3932 t.Fatal("expected regexp to be skipped")
3933 }
3934
3935 got := sres.Files[0].ChunkMatches[0]
3936 want := zoekt.ChunkMatch{
3937 Content: content,
3938 ContentStart: zoekt.Location{ByteOffset: 0, LineNumber: 1, Column: 1},
3939 Ranges: []zoekt.Range{{
3940 Start: zoekt.Location{ByteOffset: 7, LineNumber: 1, Column: 8},
3941 End: zoekt.Location{ByteOffset: 10, LineNumber: 1, Column: 11},
3942 }},
3943 }
3944
3945 if diff := cmp.Diff(want, got); diff != "" {
3946 t.Fatal(diff)
3947 }
3948 })
3949}
3950
3951// Simple benchmark focused on chunk match scoring. It creates a single file that will have a 1000-line chunk match.
3952// The benchmark time is expected to be strongly correlated with time spent assembling and scoring this chunk.
3953func BenchmarkScoreChunkMatches(b *testing.B) {
3954 ctx := context.Background()
3955 var builder strings.Builder
3956 for i := 0; i < 1000; i++ {
3957 builder.WriteString(fmt.Sprintf("line-%d one one one two two two three three three four four four five five\n", i))
3958 }
3959
3960 searcher := searcherForTest(b, testShardBuilder(b, nil,
3961 Document{Name: "f1", Content: []byte(builder.String())},
3962 ))
3963
3964 q := &query.Or{
3965 Children: []query.Q{
3966 &query.Substring{Pattern: "f"},
3967 &query.Substring{Pattern: "t"},
3968 }}
3969
3970 b.Run("score large ChunkMatch", func(b *testing.B) {
3971 b.ReportAllocs()
3972 b.ResetTimer()
3973
3974 for i := 0; i < b.N; i++ {
3975 sres, err := searcher.Search(ctx, q, &zoekt.SearchOptions{ChunkMatches: true, NumContextLines: 1})
3976 if err != nil {
3977 b.Fatal(err)
3978 }
3979
3980 matches := sres.Files
3981 if len(matches) == 0 {
3982 b.Fatalf("want file index, got none")
3983 }
3984 }
3985 })
3986}