fork of https://github.com/sourcegraph/zoekt
1// Copyright 2016 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package index
16
17import (
18 "bytes"
19 "context"
20 "fmt"
21 "reflect"
22 "regexp/syntax"
23 "strings"
24 "testing"
25
26 "github.com/google/go-cmp/cmp"
27 "github.com/google/go-cmp/cmp/cmpopts"
28 "github.com/grafana/regexp"
29 "github.com/sourcegraph/zoekt"
30 "github.com/sourcegraph/zoekt/query"
31)
32
33func clearScores(r *zoekt.SearchResult) {
34 for i := range r.Files {
35 r.Files[i].Score = 0.0
36 for j := range r.Files[i].LineMatches {
37 r.Files[i].LineMatches[j].Score = 0.0
38 }
39 for j := range r.Files[i].ChunkMatches {
40 r.Files[i].ChunkMatches[j].Score = 0.0
41 r.Files[i].ChunkMatches[j].BestLineMatch = 0
42 }
43 r.Files[i].Checksum = nil
44 r.Files[i].Debug = ""
45 }
46}
47
48func testIndexBuilder(tb testing.TB, repo *zoekt.Repository, docs ...Document) *IndexBuilder {
49 tb.Helper()
50
51 b, err := NewIndexBuilder(repo)
52 if err != nil {
53 tb.Fatalf("NewIndexBuilder: %v", err)
54 }
55
56 for i, d := range docs {
57 if err := b.Add(d); err != nil {
58 tb.Fatalf("Add %d: %v", i, err)
59 }
60 }
61
62 return b
63}
64
65func testIndexBuilderCompound(t *testing.T, repos []*zoekt.Repository, docs [][]Document) *IndexBuilder {
66 t.Helper()
67
68 b := newIndexBuilder()
69 b.indexFormatVersion = NextIndexFormatVersion
70
71 if len(repos) != len(docs) {
72 t.Fatalf("testIndexBuilderCompound: repos must be the same length as docs, got: len(repos)=%d len(docs)=%d", len(repos), len(docs))
73 }
74
75 for i, repo := range repos {
76 if err := b.setRepository(repo); err != nil {
77 t.Fatal(err)
78 }
79 for j, d := range docs[i] {
80 if err := b.Add(d); err != nil {
81 t.Fatalf("Add %d %d: %v", i, j, err)
82 }
83 }
84 }
85
86 return b
87}
88
89func TestBoundary(t *testing.T) {
90 b := testIndexBuilder(t, nil,
91 Document{Name: "f1", Content: []byte("x the")},
92 Document{Name: "f1", Content: []byte("reader")})
93 res := searchForTest(t, b, &query.Substring{Pattern: "there"})
94 if len(res.Files) > 0 {
95 t.Fatalf("got %v, want no matches", res.Files)
96 }
97}
98
99func TestDocSectionInvalid(t *testing.T) {
100 b, err := NewIndexBuilder(nil)
101 if err != nil {
102 t.Fatalf("NewIndexBuilder: %v", err)
103 }
104 doc := Document{
105 Name: "f1",
106 Content: []byte("01234567890123"),
107 Symbols: []DocumentSection{{5, 8}, {7, 9}},
108 }
109
110 if err := b.Add(doc); err == nil {
111 t.Errorf("overlapping doc sections should fail")
112 }
113
114 doc = Document{
115 Name: "f1",
116 Content: []byte("01234567890123"),
117 Symbols: []DocumentSection{{0, 20}},
118 }
119
120 if err := b.Add(doc); err == nil {
121 t.Errorf("doc sections beyond EOF should fail")
122 }
123}
124
125func TestBasic(t *testing.T) {
126 b := testIndexBuilder(t, nil,
127 Document{
128 Name: "f2",
129 Content: []byte("to carry water in the no later bla"),
130 // --------------0123456789012345678901234567890123
131 })
132
133 t.Run("LineMatch", func(t *testing.T) {
134 res := searchForTest(t, b, &query.Substring{
135 Pattern: "water",
136 CaseSensitive: true,
137 })
138 fmatches := res.Files
139 if len(fmatches) != 1 || len(fmatches[0].LineMatches) != 1 {
140 t.Fatalf("got %v, want 1 matches", fmatches)
141 }
142
143 got := fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].LineMatches[0].LineFragments[0].Offset)
144 want := "f2:9"
145 if got != want {
146 t.Errorf("1: got %s, want %s", got, want)
147 }
148 })
149
150 t.Run("ChunkMatch", func(t *testing.T) {
151 res := searchForTest(t, b, &query.Substring{
152 Pattern: "water",
153 CaseSensitive: true,
154 }, chunkOpts)
155 fmatches := res.Files
156 if len(fmatches) != 1 || len(fmatches[0].ChunkMatches) != 1 {
157 t.Fatalf("got %v, want 1 matches", fmatches)
158 }
159
160 got := fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset)
161 want := "f2:9"
162 if got != want {
163 t.Errorf("1: got %s, want %s", got, want)
164 }
165 })
166}
167
168func TestEmptyIndex(t *testing.T) {
169 b := testIndexBuilder(t, nil)
170 searcher := searcherForTest(t, b)
171
172 var opts zoekt.SearchOptions
173 if _, err := searcher.Search(context.Background(), &query.Substring{}, &opts); err != nil {
174 t.Fatalf("Search: %v", err)
175 }
176
177 if _, err := searcher.List(context.Background(), &query.Repo{Regexp: regexp.MustCompile("")}, nil); err != nil {
178 t.Fatalf("List: %v", err)
179 }
180
181 if _, err := searcher.Search(context.Background(), &query.Substring{Pattern: "java", FileName: true}, &opts); err != nil {
182 t.Fatalf("Search: %v", err)
183 }
184}
185
186type memSeeker struct {
187 data []byte
188}
189
190func (s *memSeeker) Name() string {
191 return "memseeker"
192}
193
194func (s *memSeeker) Close() {}
195func (s *memSeeker) Read(off, sz uint32) ([]byte, error) {
196 return s.data[off : off+sz], nil
197}
198
199func (s *memSeeker) Size() (uint32, error) {
200 return uint32(len(s.data)), nil
201}
202
203func TestNewlines(t *testing.T) {
204 b := testIndexBuilder(t, nil,
205 // -----------------------------------------012345-678901-234
206 Document{Name: "filename", Content: []byte("line1\nline2\nbla")})
207
208 t.Run("LineMatches", func(t *testing.T) {
209 sres := searchForTest(t, b, &query.Substring{Pattern: "ne2"})
210
211 matches := sres.Files
212 want := []zoekt.FileMatch{{
213 FileName: "filename",
214 LineMatches: []zoekt.LineMatch{{
215 LineFragments: []zoekt.LineFragmentMatch{{
216 Offset: 8,
217 LineOffset: 2,
218 MatchLength: 3,
219 }},
220 Line: []byte("line2\n"),
221 LineStart: 6,
222 LineEnd: 12,
223 LineNumber: 2,
224 }},
225 }}
226
227 if diff := cmp.Diff(matches, want); diff != "" {
228 t.Fatal(diff)
229 }
230 })
231
232 t.Run("ChunkMatches", func(t *testing.T) {
233 sres := searchForTest(t, b, &query.Substring{Pattern: "ne2"}, chunkOpts)
234
235 matches := sres.Files
236 want := []zoekt.FileMatch{{
237 FileName: "filename",
238 ChunkMatches: []zoekt.ChunkMatch{{
239 Content: []byte("line2\n"),
240 ContentStart: zoekt.Location{
241 ByteOffset: 6,
242 LineNumber: 2,
243 Column: 1,
244 },
245 Ranges: []zoekt.Range{{
246 Start: zoekt.Location{ByteOffset: 8, LineNumber: 2, Column: 3},
247 End: zoekt.Location{ByteOffset: 11, LineNumber: 2, Column: 6},
248 }},
249 }},
250 }}
251
252 if diff := cmp.Diff(want, matches); diff != "" {
253 t.Fatal(diff)
254 }
255 })
256}
257
258// A result spanning multiple lines should have LineMatches that only cover
259// single lines.
260func TestQueryNewlines(t *testing.T) {
261 text := "line1\nline2\nbla"
262 b := testIndexBuilder(t, nil,
263 Document{Name: "filename", Content: []byte(text)})
264
265 t.Run("LineMatches", func(t *testing.T) {
266 sres := searchForTest(t, b, &query.Substring{Pattern: "ine2\nbla"})
267 matches := sres.Files
268 if len(matches) != 1 {
269 t.Fatalf("got %d file matches, want exactly one", len(matches))
270 }
271 m := matches[0]
272 if len(m.LineMatches) != 2 {
273 t.Fatalf("got %d line matches, want exactly two %#v", len(m.LineMatches), m.LineMatches)
274 }
275 })
276
277 t.Run("ChunkMatches", func(t *testing.T) {
278 sres := searchForTest(t, b, &query.Substring{Pattern: "ine2\nbla"}, chunkOpts)
279 matches := sres.Files
280 if len(matches) != 1 {
281 t.Fatalf("got %d file matches, want exactly one", len(matches))
282 }
283 m := matches[0]
284 if len(m.ChunkMatches) != 1 {
285 t.Fatalf("got %d chunk matches, want exactly one", len(m.ChunkMatches))
286 }
287 })
288}
289
290var chunkOpts = zoekt.SearchOptions{ChunkMatches: true}
291
292func searchForTest(t *testing.T, b *IndexBuilder, q query.Q, o ...zoekt.SearchOptions) *zoekt.SearchResult {
293 searcher := searcherForTest(t, b)
294 var opts zoekt.SearchOptions
295 if len(o) > 0 {
296 opts = o[0]
297 }
298 res, err := searcher.Search(context.Background(), q, &opts)
299 if err != nil {
300 t.Fatalf("Search(%s): %v", q, err)
301 }
302 clearScores(res)
303 return res
304}
305
306func searcherForTest(t testing.TB, b *IndexBuilder) zoekt.Searcher {
307 var buf bytes.Buffer
308 if err := b.Write(&buf); err != nil {
309 t.Fatal(err)
310 }
311 f := &memSeeker{buf.Bytes()}
312
313 searcher, err := NewSearcher(f)
314 if err != nil {
315 t.Fatalf("NewSearcher: %v", err)
316 }
317
318 return searcher
319}
320
321func TestCaseFold(t *testing.T) {
322 b := testIndexBuilder(t, nil,
323 Document{Name: "f1", Content: []byte("I love BaNaNAS.")},
324 // -----------------------------------012345678901234
325 )
326 t.Run("LineMatches", func(t *testing.T) {
327 sres := searchForTest(t, b, &query.Substring{
328 Pattern: "bananas",
329 CaseSensitive: true,
330 })
331 matches := sres.Files
332 if len(matches) != 0 {
333 t.Errorf("foldcase: got %#v, want 0 matches", matches)
334 }
335
336 sres = searchForTest(t, b,
337 &query.Substring{
338 Pattern: "BaNaNAS",
339 CaseSensitive: true,
340 })
341 matches = sres.Files
342 if len(matches) != 1 {
343 t.Errorf("no foldcase: got %v, want 1 matches", matches)
344 } else if matches[0].LineMatches[0].LineFragments[0].Offset != 7 {
345 t.Errorf("foldcase: got %v, want offsets 7", matches)
346 }
347 })
348
349 t.Run("ChunkMatches", func(t *testing.T) {
350 sres := searchForTest(t, b, &query.Substring{
351 Pattern: "bananas",
352 CaseSensitive: true,
353 }, chunkOpts)
354 matches := sres.Files
355 if len(matches) != 0 {
356 t.Errorf("foldcase: got %#v, want 0 matches", matches)
357 }
358
359 sres = searchForTest(t, b,
360 &query.Substring{
361 Pattern: "BaNaNAS",
362 CaseSensitive: true,
363 })
364 matches = sres.Files
365 if len(matches) != 1 {
366 t.Errorf("no foldcase: got %v, want 1 matches", matches)
367 } else if matches[0].LineMatches[0].LineFragments[0].Offset != 7 {
368 t.Errorf("foldcase: got %v, want offsets 7", matches)
369 }
370 })
371}
372
373// wordsAsSymbols finds all ASCII words in doc.Content which are longer than 2
374// chars. Those are then set as symbols.
375func wordsAsSymbols(doc Document) Document {
376 re := regexp.MustCompile(`\b\w{2,}\b`)
377 var symbols []DocumentSection
378 var symbolsMetadata []*zoekt.Symbol
379 for _, match := range re.FindAllIndex(doc.Content, -1) {
380 symbols = append(symbols, DocumentSection{
381 Start: uint32(match[0]),
382 End: uint32(match[1]),
383 })
384 symbolsMetadata = append(symbolsMetadata, &zoekt.Symbol{Kind: "method"})
385 }
386 doc.Symbols = symbols
387 doc.SymbolsMetaData = symbolsMetadata
388 return doc
389}
390
391func TestSearchStats(t *testing.T) {
392 ctx := context.Background()
393 searcher := searcherForTest(t, testIndexBuilder(t, nil,
394 wordsAsSymbols(Document{Name: "f1", Content: []byte("x banana y")}),
395 wordsAsSymbols(Document{Name: "f2", Content: []byte("x apple y")}),
396 wordsAsSymbols(Document{Name: "f3", Content: []byte("x banana apple y")}),
397 // --------------------------------------------------0123456789012345
398 ))
399
400 andQuery := query.NewAnd(
401 &query.Substring{
402 Pattern: "banana",
403 },
404 &query.Substring{
405 Pattern: "apple",
406 },
407 )
408
409 t.Run("LineMatches", func(t *testing.T) {
410 sres, err := searcher.Search(ctx, andQuery, &zoekt.SearchOptions{})
411 if err != nil {
412 t.Fatal(err)
413 }
414 matches := sres.Files
415 if len(matches) != 1 || len(matches[0].LineMatches) != 1 || len(matches[0].LineMatches[0].LineFragments) != 2 {
416 t.Fatalf("got %#v, want 1 match with 2 fragments", matches)
417 }
418
419 if matches[0].LineMatches[0].LineFragments[0].Offset != 2 || matches[0].LineMatches[0].LineFragments[1].Offset != 9 {
420 t.Fatalf("got %#v, want offsets 2,9", matches)
421 }
422 })
423 t.Run("ChunkMatches", func(t *testing.T) {
424 sres, err := searcher.Search(ctx, andQuery, &chunkOpts)
425 if err != nil {
426 t.Fatal(err)
427 }
428 matches := sres.Files
429 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 || len(matches[0].ChunkMatches[0].Ranges) != 2 {
430 t.Fatalf("got %#v, want 1 chunk match with 2 ranges", matches)
431 }
432
433 if matches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset != 2 || matches[0].ChunkMatches[0].Ranges[1].Start.ByteOffset != 9 {
434 t.Fatalf("got %#v, want offsets 2,9", matches)
435 }
436 })
437 t.Run("Stats", func(t *testing.T) {
438 cases := []struct {
439 Name string
440 Q query.Q
441 Want zoekt.Stats
442 }{{
443 Name: "and-query",
444 Q: andQuery,
445 Want: zoekt.Stats{
446 FilesLoaded: 1,
447 ContentBytesLoaded: 22,
448 IndexBytesLoaded: 10,
449 NgramMatches: 3, // we look at doc 1, because it's max(0,1) due to AND
450 NgramLookups: 104,
451 MatchCount: 2,
452 FileCount: 1,
453 FilesConsidered: 2,
454 ShardsScanned: 1,
455 },
456 }, {
457 Name: "one-trigram",
458 Q: &query.Substring{
459 Pattern: "a y",
460 Content: true,
461 CaseSensitive: true,
462 },
463 Want: zoekt.Stats{
464 ContentBytesLoaded: 14,
465 IndexBytesLoaded: 1,
466 FileCount: 1,
467 FilesConsidered: 1,
468 FilesLoaded: 1,
469 ShardsScanned: 1,
470 MatchCount: 1,
471 NgramMatches: 1,
472 NgramLookups: 2, // once to lookup frequency then again to access posting list.
473 },
474 }, {
475 Name: "one-trigram-case-insensitive",
476 Q: &query.Substring{
477 Pattern: "a y",
478 Content: true,
479 },
480 Want: zoekt.Stats{
481 ContentBytesLoaded: 14,
482 IndexBytesLoaded: 1,
483 FileCount: 1,
484 FilesConsidered: 1,
485 FilesLoaded: 1,
486 ShardsScanned: 1,
487 MatchCount: 1,
488 NgramMatches: 1,
489 NgramLookups: 8, // "a y" has 2**2 casings which we lookup twice.
490 },
491 }, {
492 Name: "one-trigram-pruned",
493 Q: &query.Substring{
494 Pattern: "foo",
495 Content: true,
496 CaseSensitive: true,
497 },
498 Want: zoekt.Stats{
499 ShardsSkippedFilter: 1,
500 NgramLookups: 1, // only had to lookup once
501 },
502 }, {
503 Name: "one-trigram-branch-pruned",
504 Q: query.NewAnd(
505 &query.Substring{
506 Pattern: "foo",
507 Content: true,
508 CaseSensitive: true,
509 },
510 &query.Substring{
511 Pattern: "a y",
512 Content: true,
513 CaseSensitive: true,
514 },
515 ),
516 Want: zoekt.Stats{
517 IndexBytesLoaded: 1, // we created an iterator for "a y" before pruning.
518 ShardsSkippedFilter: 1,
519 NgramLookups: 3, // we lookedup "foo" once (1), but lookedup and created "a y" (2).
520 },
521 }, {
522 Name: "symbol-substr-nomatch",
523 Q: &query.Symbol{Expr: &query.Substring{
524 Pattern: "banana apple",
525 Content: true,
526 CaseSensitive: true,
527 }},
528 Want: zoekt.Stats{
529 IndexBytesLoaded: 3,
530 FilesConsidered: 1, // important that we only check 1 file to ensure we are using the index
531 MatchCount: 0, // even though there is a match it doesn't align with a symbol
532 ShardsScanned: 1,
533 NgramMatches: 1,
534 NgramLookups: 12,
535 },
536 }, {
537 Name: "symbol-substr",
538 Q: &query.Symbol{Expr: &query.Substring{
539 Pattern: "apple",
540 Content: true,
541 CaseSensitive: true,
542 }},
543 Want: zoekt.Stats{
544 ContentBytesLoaded: 35,
545 IndexBytesLoaded: 4,
546 FileCount: 2,
547 FilesConsidered: 2, // must be 2 to ensure we used the index
548 FilesLoaded: 2,
549 MatchCount: 2, // apple symbols is in two files
550 ShardsScanned: 1,
551 NgramMatches: 2,
552 NgramLookups: 5,
553 },
554 }, {
555 Name: "symbol-regexp-nomatch",
556 Q: &query.Symbol{Expr: &query.Regexp{
557 Regexp: mustParseRE("^apple.banana$"),
558 Content: true,
559 CaseSensitive: true,
560 }},
561 Want: zoekt.Stats{
562 ContentBytesLoaded: 33, // we still have to run regex since "app" matches two documents
563 IndexBytesLoaded: 10,
564 FilesConsidered: 2, // important that we don't check 3 to ensure we are using the index
565 FilesLoaded: 2,
566 MatchCount: 0, // even though there is a match it doesn't align with a symbol
567 ShardsScanned: 1,
568 NgramMatches: 3,
569 NgramLookups: 11,
570 },
571 }, {
572 Name: "symbol-regexp",
573 Q: &query.Symbol{Expr: &query.Regexp{
574 Regexp: mustParseRE("^app.e$"),
575 Content: true,
576 CaseSensitive: true,
577 }},
578 Want: zoekt.Stats{
579 ContentBytesLoaded: 35,
580 IndexBytesLoaded: 2,
581 FileCount: 2,
582 FilesConsidered: 2, // must be 2 to ensure we used the index
583 FilesLoaded: 2,
584 MatchCount: 2, // apple symbols is in two files
585 ShardsScanned: 1,
586 NgramMatches: 2,
587 NgramLookups: 2,
588 },
589 }}
590
591 for _, tc := range cases {
592 t.Run(tc.Name, func(t *testing.T) {
593 sres, err := searcher.Search(ctx, tc.Q, &chunkOpts)
594 if err != nil {
595 t.Fatal(err)
596 }
597 if diff := cmp.Diff(tc.Want, sres.Stats, cmpopts.IgnoreFields(zoekt.Stats{}, "MatchTreeConstruction", "MatchTreeSearch")); diff != "" {
598 t.Errorf("unexpected Stats (-want +got):\n%s", diff)
599 }
600 })
601 }
602 })
603}
604
605func TestAndNegateSearch(t *testing.T) {
606 b := testIndexBuilder(t, nil,
607 Document{Name: "f1", Content: []byte("x banana y")},
608 // -----------------------------------0123456789
609 Document{Name: "f4", Content: []byte("x banana apple y")})
610
611 t.Run("LineMatches", func(t *testing.T) {
612 sres := searchForTest(t, b, query.NewAnd(
613 &query.Substring{
614 Pattern: "banana",
615 },
616 &query.Not{Child: &query.Substring{
617 Pattern: "apple",
618 }}))
619
620 matches := sres.Files
621
622 if len(matches) != 1 || len(matches[0].LineMatches) != 1 {
623 t.Fatalf("got %v, want 1 match", matches)
624 }
625 if matches[0].FileName != "f1" {
626 t.Fatalf("got match %#v, want FileName: f1", matches[0])
627 }
628 if matches[0].LineMatches[0].LineFragments[0].Offset != 2 {
629 t.Fatalf("got %v, want offset 2", matches)
630 }
631 })
632
633 t.Run("ChunkMatches", func(t *testing.T) {
634 sres := searchForTest(t, b,
635 query.NewAnd(
636 &query.Substring{
637 Pattern: "banana",
638 },
639 &query.Not{Child: &query.Substring{
640 Pattern: "apple",
641 }},
642 ),
643 chunkOpts,
644 )
645
646 matches := sres.Files
647
648 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 {
649 t.Fatalf("got %v, want 1 match", matches)
650 }
651 if matches[0].FileName != "f1" {
652 t.Fatalf("got match %#v, want FileName: f1", matches[0])
653 }
654 if matches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset != 2 {
655 t.Fatalf("got %v, want offset 2", matches)
656 }
657 })
658}
659
660func TestNegativeMatchesOnlyShortcut(t *testing.T) {
661 b := testIndexBuilder(t, nil,
662 Document{Name: "f1", Content: []byte("x banana y")},
663 Document{Name: "f2", Content: []byte("x appelmoes y")},
664 Document{Name: "f3", Content: []byte("x appelmoes y")},
665 Document{Name: "f3", Content: []byte("x appelmoes y")})
666
667 t.Run("LineMatches", func(t *testing.T) {
668 sres := searchForTest(t, b, query.NewAnd(
669 &query.Substring{
670 Pattern: "banana",
671 },
672 &query.Not{Child: &query.Substring{
673 Pattern: "appel",
674 }}))
675
676 if sres.Stats.FilesConsidered != 1 {
677 t.Errorf("got %#v, want FilesConsidered: 1", sres.Stats)
678 }
679 })
680
681 t.Run("ChunkMatches", func(t *testing.T) {
682 sres := searchForTest(t, b, query.NewAnd(
683 &query.Substring{
684 Pattern: "banana",
685 },
686 &query.Not{Child: &query.Substring{
687 Pattern: "appel",
688 }}), chunkOpts)
689
690 if sres.Stats.FilesConsidered != 1 {
691 t.Errorf("got %#v, want FilesConsidered: 1", sres.Stats)
692 }
693 })
694}
695
696func TestFileSearch(t *testing.T) {
697 b := testIndexBuilder(t, nil,
698 Document{Name: "banzana", Content: []byte("x orange y")},
699 // -------------0123456
700 Document{Name: "banana", Content: []byte("x apple y")},
701 // -------------012345
702 )
703
704 t.Run("LineMatches", func(t *testing.T) {
705 sres := searchForTest(t, b, &query.Substring{
706 Pattern: "anan",
707 FileName: true,
708 })
709
710 matches := sres.Files
711 if len(matches) != 1 || len(matches[0].LineMatches) != 1 {
712 t.Fatalf("got %v, want 1 match", matches)
713 }
714
715 got := matches[0].LineMatches[0]
716 want := zoekt.LineMatch{
717 Line: []byte("banana"),
718 LineFragments: []zoekt.LineFragmentMatch{{
719 Offset: 1,
720 LineOffset: 1,
721 MatchLength: 4,
722 }},
723 FileName: true,
724 }
725
726 if !reflect.DeepEqual(got, want) {
727 t.Errorf("got %#v, want %#v", got, want)
728 }
729 })
730
731 t.Run("ChunkMatches", func(t *testing.T) {
732 sres := searchForTest(t, b, &query.Substring{
733 Pattern: "anan",
734 FileName: true,
735 }, chunkOpts)
736
737 matches := sres.Files
738 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 {
739 t.Fatalf("got %v, want 1 match", matches)
740 }
741
742 got := matches[0].ChunkMatches[0]
743 want := zoekt.ChunkMatch{
744 Content: []byte("banana"),
745 ContentStart: zoekt.Location{ByteOffset: 0, LineNumber: 1, Column: 1},
746 Ranges: []zoekt.Range{{
747 Start: zoekt.Location{ByteOffset: 1, LineNumber: 1, Column: 2},
748 End: zoekt.Location{ByteOffset: 5, LineNumber: 1, Column: 6},
749 }},
750 FileName: true,
751 }
752
753 if diff := cmp.Diff(want, got); diff != "" {
754 t.Fatal(diff)
755 }
756 })
757
758 t.Run("FileNameSet", func(t *testing.T) {
759 sres := searchForTest(t, b, query.NewFileNameSet("banana"), chunkOpts)
760
761 matches := sres.Files
762 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 {
763 t.Fatalf("got %v, want 1 match", matches)
764 }
765
766 got := matches[0].ChunkMatches[0]
767 want := zoekt.ChunkMatch{
768 Content: []byte("banana"),
769 ContentStart: zoekt.Location{ByteOffset: 0, LineNumber: 1, Column: 1},
770 Ranges: []zoekt.Range{{
771 Start: zoekt.Location{ByteOffset: 0, LineNumber: 1, Column: 1},
772 End: zoekt.Location{ByteOffset: 6, LineNumber: 1, Column: 7},
773 }},
774 FileName: true,
775 }
776
777 if diff := cmp.Diff(want, got); diff != "" {
778 t.Fatal(diff)
779 }
780 })
781}
782
783func TestFileCase(t *testing.T) {
784 b := testIndexBuilder(t, nil,
785 Document{Name: "BANANA", Content: []byte("x orange y")})
786
787 t.Run("LineMatches", func(t *testing.T) {
788 sres := searchForTest(t, b, &query.Substring{
789 Pattern: "banana",
790 FileName: true,
791 })
792
793 matches := sres.Files
794 if len(matches) != 1 || matches[0].FileName != "BANANA" {
795 t.Fatalf("got %v, want 1 match 'BANANA'", matches)
796 }
797 })
798
799 t.Run("ChunkMatches", func(t *testing.T) {
800 sres := searchForTest(t, b, &query.Substring{
801 Pattern: "banana",
802 FileName: true,
803 }, chunkOpts)
804
805 matches := sres.Files
806 if len(matches) != 1 || matches[0].FileName != "BANANA" {
807 t.Fatalf("got %v, want 1 match 'BANANA'", matches)
808 }
809 })
810}
811
812func TestFileRegexpSearchBruteForce(t *testing.T) {
813 b := testIndexBuilder(t, nil,
814 Document{Name: "banzana", Content: []byte("x orange y")},
815 Document{Name: "banana", Content: []byte("x apple y")},
816 )
817 t.Run("LineMatches", func(t *testing.T) {
818 sres := searchForTest(t, b, &query.Regexp{
819 Regexp: mustParseRE("[qn][zx]"),
820 FileName: true,
821 })
822
823 matches := sres.Files
824 if len(matches) != 1 || matches[0].FileName != "banzana" {
825 t.Fatalf("got %v, want 1 match on 'banzana'", matches)
826 }
827 })
828 t.Run("LineMatches", func(t *testing.T) {
829 sres := searchForTest(t, b, &query.Regexp{
830 Regexp: mustParseRE("[qn][zx]"),
831 FileName: true,
832 }, chunkOpts)
833
834 matches := sres.Files
835 if len(matches) != 1 || matches[0].FileName != "banzana" {
836 t.Fatalf("got %v, want 1 match on 'banzana'", matches)
837 }
838 })
839}
840
841func TestFileRegexpSearchShortString(t *testing.T) {
842 b := testIndexBuilder(t, nil,
843 Document{Name: "banana.py", Content: []byte("x orange y")})
844
845 t.Run("LineMatches", func(t *testing.T) {
846 sres := searchForTest(t, b, &query.Regexp{
847 Regexp: mustParseRE("ana.py"),
848 FileName: true,
849 })
850
851 matches := sres.Files
852 if len(matches) != 1 || matches[0].FileName != "banana.py" {
853 t.Fatalf("got %v, want 1 match on 'banana.py'", matches)
854 }
855 })
856
857 t.Run("ChunkMatches", func(t *testing.T) {
858 sres := searchForTest(t, b, &query.Regexp{
859 Regexp: mustParseRE("ana.py"),
860 FileName: true,
861 }, chunkOpts)
862
863 matches := sres.Files
864 if len(matches) != 1 || matches[0].FileName != "banana.py" {
865 t.Fatalf("got %v, want 1 match on 'banana.py'", matches)
866 }
867 })
868}
869
870func TestFileSubstringSearchBruteForce(t *testing.T) {
871 b := testIndexBuilder(t, nil,
872 Document{Name: "BANZANA", Content: []byte("x orange y")},
873 Document{Name: "banana", Content: []byte("x apple y")})
874
875 q := &query.Substring{
876 Pattern: "z",
877 FileName: true,
878 }
879
880 t.Run("LineMatches", func(t *testing.T) {
881 res := searchForTest(t, b, q)
882 if len(res.Files) != 1 || res.Files[0].FileName != "BANZANA" {
883 t.Fatalf("got %v, want 1 match on 'BANZANA''", res.Files)
884 }
885 })
886
887 t.Run("ChunkMatches", func(t *testing.T) {
888 res := searchForTest(t, b, q, chunkOpts)
889 if len(res.Files) != 1 || res.Files[0].FileName != "BANZANA" {
890 t.Fatalf("got %v, want 1 match on 'BANZANA''", res.Files)
891 }
892 })
893}
894
895func TestFileSubstringSearchBruteForceEnd(t *testing.T) {
896 b := testIndexBuilder(t, nil,
897 Document{Name: "BANZANA", Content: []byte("x orange y")},
898 Document{Name: "bananaq", Content: []byte("x apple y")})
899
900 q := &query.Substring{
901 Pattern: "q",
902 FileName: true,
903 }
904 t.Run("LineMatches", func(t *testing.T) {
905 res := searchForTest(t, b, q)
906 if want := "bananaq"; len(res.Files) != 1 || res.Files[0].FileName != want {
907 t.Fatalf("got %v, want 1 match in %q", res.Files, want)
908 }
909 })
910
911 t.Run("LineMatches", func(t *testing.T) {
912 res := searchForTest(t, b, q, chunkOpts)
913 if want := "bananaq"; len(res.Files) != 1 || res.Files[0].FileName != want {
914 t.Fatalf("got %v, want 1 match in %q", res.Files, want)
915 }
916 })
917}
918
919func TestSearchMatchAll(t *testing.T) {
920 b := testIndexBuilder(t, nil,
921 Document{Name: "banzana", Content: []byte("x orange y")},
922 Document{Name: "banana", Content: []byte("x apple y")})
923
924 t.Run("LineMatches", func(t *testing.T) {
925 sres := searchForTest(t, b, &query.Const{Value: true})
926 matches := sres.Files
927 if len(matches) != 2 {
928 t.Fatalf("got %v, want 2 matches", matches)
929 }
930 })
931
932 t.Run("ChunkMatches", func(t *testing.T) {
933 sres := searchForTest(t, b, &query.Const{Value: true}, chunkOpts)
934 matches := sres.Files
935 if len(matches) != 2 {
936 t.Fatalf("got %v, want 2 matches", matches)
937 }
938 })
939}
940
941func TestSearchNewline(t *testing.T) {
942 b := testIndexBuilder(t, nil,
943 Document{Name: "banzana", Content: []byte("abcd\ndefg")})
944
945 t.Run("LineMatches", func(t *testing.T) {
946 sres := searchForTest(t, b, &query.Substring{Pattern: "d\nd"})
947
948 // Just check that we don't crash.
949
950 matches := sres.Files
951 if len(matches) != 1 {
952 t.Fatalf("got %v, want 1 matches", matches)
953 }
954 })
955
956 t.Run("ChunkMatches", func(t *testing.T) {
957 sres := searchForTest(t, b, &query.Substring{Pattern: "d\nd"}, chunkOpts)
958
959 // Just check that we don't crash.
960
961 matches := sres.Files
962 if len(matches) != 1 {
963 t.Fatalf("got %v, want 1 matches", matches)
964 }
965 })
966}
967
968func TestSearchMatchAllRegexp(t *testing.T) {
969 b := testIndexBuilder(t, nil,
970 Document{Name: "banzana", Content: []byte("abcd")},
971 Document{Name: "banana", Content: []byte("pqrs")})
972
973 t.Run("LineMatches", func(t *testing.T) {
974 sres := searchForTest(t, b, &query.Regexp{Regexp: mustParseRE(".")})
975
976 matches := sres.Files
977 if len(matches) != 2 || sres.Stats.MatchCount != 2 {
978 t.Fatalf("got %v, want 2 matches", matches)
979 }
980 if len(matches[0].LineMatches[0].Line) != 4 || len(matches[1].LineMatches[0].Line) != 4 {
981 t.Fatalf("want 4 chars in every file, got %#v", matches)
982 }
983 })
984
985 t.Run("ChunkMatches", func(t *testing.T) {
986 sres := searchForTest(t, b, &query.Regexp{Regexp: mustParseRE(".")}, chunkOpts)
987
988 matches := sres.Files
989 if len(matches) != 2 || sres.Stats.MatchCount != 8 {
990 t.Fatalf("got %v, want 2 matches", matches)
991 }
992 if len(matches[0].ChunkMatches[0].Content) != 4 || len(matches[1].ChunkMatches[0].Content) != 4 {
993 t.Fatalf("want 4 chars in every file, got %#v", matches)
994 }
995 })
996}
997
998func TestSearchBM25MatchScores(t *testing.T) {
999 ctx := context.Background()
1000 searcher := searcherForTest(t, testIndexBuilder(t, nil,
1001 Document{Name: "f1", Content: []byte("one two three\naaaaaaaaaa\nbbbbbbbb\none two two")},
1002 Document{Name: "f2", Content: []byte("four five six\naaaaaaaaaa\nbbbbbbbb\nfour five five\nsix six")},
1003 wordsAsSymbols(Document{Name: "f3", Content: []byte("public static void main")}),
1004 ))
1005
1006 t.Run("LineMatches", func(t *testing.T) {
1007 q := &query.Substring{Pattern: "two"}
1008 sres, err := searcher.Search(ctx, q, &zoekt.SearchOptions{UseBM25Scoring: true})
1009 if err != nil {
1010 t.Fatal(err)
1011 }
1012 matches := sres.Files
1013 if len(matches) != 1 {
1014 t.Fatalf("want 1 file index, got %d", len(matches))
1015 }
1016
1017 if len(matches[0].LineMatches) != 2 {
1018 t.Fatalf("want 2 chunk matches, got %d", len(matches[0].ChunkMatches))
1019 }
1020
1021 if matches[0].LineMatches[0].LineNumber != 4 {
1022 t.Fatalf("want best-scoring line to be line 4, got %d", matches[0].LineMatches[0].LineNumber)
1023 }
1024 })
1025
1026 t.Run("ChunkMatches", func(t *testing.T) {
1027 q := &query.Substring{Pattern: "five"}
1028 sres, err := searcher.Search(ctx, q, &zoekt.SearchOptions{UseBM25Scoring: true, ChunkMatches: true, NumContextLines: 1})
1029 if err != nil {
1030 t.Fatal(err)
1031 }
1032
1033 matches := sres.Files
1034 if len(matches) != 1 {
1035 t.Fatalf("want 1 file index, got %d", len(matches))
1036 }
1037
1038 if len(matches[0].ChunkMatches) != 2 {
1039 t.Fatalf("want 2 chunk matches, got %d", len(matches[0].ChunkMatches))
1040 }
1041
1042 if matches[0].ChunkMatches[0].BestLineMatch != 4 {
1043 t.Fatalf("want best-scoring line to be line 4, got %d", matches[0].ChunkMatches[0].BestLineMatch)
1044 }
1045 })
1046
1047 t.Run("ChunkMatches with symbols", func(t *testing.T) {
1048 q := &query.Or{
1049 Children: []query.Q{
1050 &query.Symbol{Expr: &query.Substring{Pattern: "main"}},
1051 &query.Substring{Pattern: "five"},
1052 },
1053 }
1054
1055 sres, err := searcher.Search(ctx, q, &zoekt.SearchOptions{UseBM25Scoring: true, ChunkMatches: true, NumContextLines: 1})
1056 if err != nil {
1057 t.Fatal(err)
1058 }
1059
1060 matches := sres.Files
1061 if len(matches) != 2 {
1062 t.Fatalf("want 2 file index, got %d", len(matches))
1063 }
1064
1065 foundSymbolInfo := false
1066 for _, m := range matches {
1067 for _, cm := range m.ChunkMatches {
1068 if len(cm.SymbolInfo) > 0 {
1069 foundSymbolInfo = true
1070 }
1071 }
1072 }
1073
1074 if !foundSymbolInfo {
1075 t.Fatalf("want symbol info, got none")
1076 }
1077 })
1078}
1079
1080func TestFileRestriction(t *testing.T) {
1081 b := testIndexBuilder(t, nil,
1082 Document{Name: "banana1", Content: []byte("x orange y")},
1083 Document{Name: "banana2", Content: []byte("x apple y")},
1084 Document{Name: "orange", Content: []byte("x apple z")})
1085
1086 t.Run("LineMatches", func(t *testing.T) {
1087 sres := searchForTest(t, b, query.NewAnd(
1088 &query.Substring{
1089 Pattern: "banana",
1090 FileName: true,
1091 },
1092 &query.Substring{
1093 Pattern: "apple",
1094 }))
1095
1096 matches := sres.Files
1097 if len(matches) != 1 || len(matches[0].LineMatches) != 1 {
1098 t.Fatalf("got %v, want 1 match", matches)
1099 }
1100
1101 match := matches[0].LineMatches[0]
1102 got := string(match.Line)
1103 want := "x apple y"
1104 if got != want {
1105 t.Errorf("got match %#v, want line %q", match, want)
1106 }
1107 })
1108
1109 t.Run("ChunkMatches", func(t *testing.T) {
1110 sres := searchForTest(t, b, query.NewAnd(
1111 &query.Substring{
1112 Pattern: "banana",
1113 FileName: true,
1114 },
1115 &query.Substring{
1116 Pattern: "apple",
1117 }), chunkOpts)
1118
1119 matches := sres.Files
1120 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 {
1121 t.Fatalf("got %v, want 1 match", matches)
1122 }
1123
1124 match := matches[0].ChunkMatches[0]
1125 got := string(match.Content)
1126 want := "x apple y"
1127 if got != want {
1128 t.Errorf("got match %#v, want line %q", match, want)
1129 }
1130 })
1131}
1132
1133func TestFileNameBoundary(t *testing.T) {
1134 b := testIndexBuilder(t, nil,
1135 Document{Name: "banana2", Content: []byte("x apple y")},
1136 Document{Name: "helpers.go", Content: []byte("x apple y")},
1137 Document{Name: "foo", Content: []byte("x apple y")})
1138
1139 t.Run("LineMatches", func(t *testing.T) {
1140 sres := searchForTest(t, b, &query.Substring{
1141 Pattern: "helpers.go",
1142 FileName: true,
1143 })
1144
1145 matches := sres.Files
1146 if len(matches) != 1 || len(matches[0].LineMatches) != 1 {
1147 t.Fatalf("got %v, want 1 match", matches)
1148 }
1149 })
1150
1151 t.Run("ChunkMatches", func(t *testing.T) {
1152 sres := searchForTest(t, b, &query.Substring{
1153 Pattern: "helpers.go",
1154 FileName: true,
1155 }, chunkOpts)
1156
1157 matches := sres.Files
1158 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 {
1159 t.Fatalf("got %v, want 1 match", matches)
1160 }
1161 })
1162}
1163
1164func TestDocumentOrder(t *testing.T) {
1165 var docs []Document
1166 for i := 0; i < 3; i++ {
1167 docs = append(docs, Document{Name: fmt.Sprintf("f%d", i), Content: []byte("needle")})
1168 }
1169
1170 b := testIndexBuilder(t, nil, docs...)
1171
1172 t.Run("LineMatches", func(t *testing.T) {
1173 sres := searchForTest(t, b, query.NewAnd(
1174 &query.Substring{
1175 Pattern: "needle",
1176 }))
1177
1178 want := []string{"f0", "f1", "f2"}
1179 var got []string
1180 for _, f := range sres.Files {
1181 got = append(got, f.FileName)
1182 }
1183 if !reflect.DeepEqual(got, want) {
1184 t.Fatalf("got %v, want %v", got, want)
1185 }
1186 })
1187
1188 t.Run("ChunkMatches", func(t *testing.T) {
1189 sres := searchForTest(t, b,
1190 query.NewAnd(&query.Substring{
1191 Pattern: "needle",
1192 }),
1193 chunkOpts,
1194 )
1195
1196 want := []string{"f0", "f1", "f2"}
1197 var got []string
1198 for _, f := range sres.Files {
1199 got = append(got, f.FileName)
1200 }
1201 if !reflect.DeepEqual(got, want) {
1202 t.Fatalf("got %v, want %v", got, want)
1203 }
1204 })
1205}
1206
1207func TestBranchMask(t *testing.T) {
1208 b := testIndexBuilder(t, &zoekt.Repository{
1209 Branches: []zoekt.RepositoryBranch{
1210 {"master", "v-master"},
1211 {"stable", "v-stable"},
1212 {"bonzai", "v-bonzai"},
1213 },
1214 }, Document{Name: "f1", Content: []byte("needle"), Branches: []string{"master"}},
1215 Document{Name: "f2", Content: []byte("needle"), Branches: []string{"stable", "master"}},
1216 Document{Name: "f3", Content: []byte("needle"), Branches: []string{"stable", "master"}},
1217 Document{Name: "f4", Content: []byte("needle"), Branches: []string{"bonzai"}},
1218 )
1219
1220 t.Run("LineMatches", func(t *testing.T) {
1221 sres := searchForTest(t, b, query.NewAnd(
1222 &query.Substring{
1223 Pattern: "needle",
1224 },
1225 &query.Branch{
1226 Pattern: "table",
1227 }))
1228
1229 if len(sres.Files) != 2 || sres.Files[0].FileName != "f2" || sres.Files[1].FileName != "f3" {
1230 t.Fatalf("got %v, want 2 result from [f2,f3]", sres.Files)
1231 }
1232
1233 if len(sres.Files[0].Branches) != 1 || sres.Files[0].Branches[0] != "stable" {
1234 t.Fatalf("got %v, want 1 branch 'stable'", sres.Files[0].Branches)
1235 }
1236 })
1237
1238 t.Run("ChunkMatches", func(t *testing.T) {
1239 sres := searchForTest(t, b, query.NewAnd(
1240 &query.Substring{
1241 Pattern: "needle",
1242 },
1243 &query.Branch{
1244 Pattern: "table",
1245 }),
1246 chunkOpts,
1247 )
1248
1249 if len(sres.Files) != 2 || sres.Files[0].FileName != "f2" || sres.Files[1].FileName != "f3" {
1250 t.Fatalf("got %v, want 2 result from [f2,f3]", sres.Files)
1251 }
1252
1253 if len(sres.Files[0].Branches) != 1 || sres.Files[0].Branches[0] != "stable" {
1254 t.Fatalf("got %v, want 1 branch 'stable'", sres.Files[0].Branches)
1255 }
1256 })
1257}
1258
1259func TestBranchLimit(t *testing.T) {
1260 for limit := 64; limit <= 65; limit++ {
1261 r := &zoekt.Repository{}
1262 for i := 0; i < limit; i++ {
1263 s := fmt.Sprintf("b%d", i)
1264 r.Branches = append(r.Branches, zoekt.RepositoryBranch{
1265 s, "v-" + s,
1266 })
1267 }
1268 _, err := NewIndexBuilder(r)
1269 if limit == 64 && err != nil {
1270 t.Fatalf("NewIndexBuilder: %v", err)
1271 } else if limit == 65 && err == nil {
1272 t.Fatalf("NewIndexBuilder succeeded")
1273 }
1274 }
1275}
1276
1277func TestBranchReport(t *testing.T) {
1278 branches := []string{"stable", "master"}
1279 b := testIndexBuilder(t, &zoekt.Repository{
1280 Branches: []zoekt.RepositoryBranch{
1281 {"stable", "vs"},
1282 {"master", "vm"},
1283 },
1284 },
1285 Document{Name: "f2", Content: []byte("needle"), Branches: branches})
1286
1287 t.Run("LineMatches", func(t *testing.T) {
1288 sres := searchForTest(t, b, &query.Substring{
1289 Pattern: "needle",
1290 })
1291 if len(sres.Files) != 1 {
1292 t.Fatalf("got %v, want 1 result from f2", sres.Files)
1293 }
1294
1295 f := sres.Files[0]
1296 if !reflect.DeepEqual(f.Branches, branches) {
1297 t.Fatalf("got branches %q, want %q", f.Branches, branches)
1298 }
1299 })
1300
1301 t.Run("ChunkMatches", func(t *testing.T) {
1302 sres := searchForTest(t, b, &query.Substring{
1303 Pattern: "needle",
1304 }, chunkOpts)
1305 if len(sres.Files) != 1 {
1306 t.Fatalf("got %v, want 1 result from f2", sres.Files)
1307 }
1308
1309 f := sres.Files[0]
1310 if !reflect.DeepEqual(f.Branches, branches) {
1311 t.Fatalf("got branches %q, want %q", f.Branches, branches)
1312 }
1313 })
1314}
1315
1316func TestBranchVersions(t *testing.T) {
1317 b := testIndexBuilder(t, &zoekt.Repository{
1318 Branches: []zoekt.RepositoryBranch{
1319 {"stable", "v-stable"},
1320 {"master", "v-master"},
1321 },
1322 }, Document{Name: "f2", Content: []byte("needle"), Branches: []string{"master"}})
1323
1324 t.Run("LineMatches", func(t *testing.T) {
1325 sres := searchForTest(t, b, &query.Substring{
1326 Pattern: "needle",
1327 })
1328 if len(sres.Files) != 1 {
1329 t.Fatalf("got %v, want 1 result from f2", sres.Files)
1330 }
1331
1332 f := sres.Files[0]
1333 if f.Version != "v-master" {
1334 t.Fatalf("got file %#v, want version 'v-master'", f)
1335 }
1336 })
1337
1338 t.Run("ChunkMatches", func(t *testing.T) {
1339 sres := searchForTest(t, b, &query.Substring{
1340 Pattern: "needle",
1341 }, chunkOpts)
1342 if len(sres.Files) != 1 {
1343 t.Fatalf("got %v, want 1 result from f2", sres.Files)
1344 }
1345
1346 f := sres.Files[0]
1347 if f.Version != "v-master" {
1348 t.Fatalf("got file %#v, want version 'v-master'", f)
1349 }
1350 })
1351}
1352
1353func mustParseRE(s string) *syntax.Regexp {
1354 r, err := syntax.Parse(s, syntax.Perl)
1355 if err != nil {
1356 panic(err)
1357 }
1358
1359 return r
1360}
1361
1362func TestRegexp(t *testing.T) {
1363 content := []byte("needle the bla")
1364 // ----------------01234567890123
1365
1366 b := testIndexBuilder(t, nil,
1367 Document{
1368 Name: "f1",
1369 Content: content,
1370 })
1371
1372 t.Run("LineMatches", func(t *testing.T) {
1373 sres := searchForTest(t, b,
1374 &query.Regexp{
1375 Regexp: mustParseRE("dle.*bla"),
1376 })
1377
1378 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 {
1379 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
1380 }
1381
1382 got := sres.Files[0].LineMatches[0]
1383 want := zoekt.LineMatch{
1384 LineFragments: []zoekt.LineFragmentMatch{{
1385 LineOffset: 3,
1386 Offset: 3,
1387 MatchLength: 11,
1388 }},
1389 Line: content,
1390 FileName: false,
1391 LineNumber: 1,
1392 LineStart: 0,
1393 LineEnd: 14,
1394 }
1395
1396 if !reflect.DeepEqual(got, want) {
1397 t.Errorf("got %#v, want %#v", got, want)
1398 }
1399 })
1400
1401 t.Run("ChunkMatches", func(t *testing.T) {
1402 sres := searchForTest(t, b,
1403 &query.Regexp{
1404 Regexp: mustParseRE("dle.*bla"),
1405 }, chunkOpts)
1406
1407 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 {
1408 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
1409 }
1410
1411 got := sres.Files[0].ChunkMatches[0]
1412 want := zoekt.ChunkMatch{
1413 Content: content,
1414 ContentStart: zoekt.Location{ByteOffset: 0, LineNumber: 1, Column: 1},
1415 Ranges: []zoekt.Range{{
1416 Start: zoekt.Location{ByteOffset: 3, LineNumber: 1, Column: 4},
1417 End: zoekt.Location{ByteOffset: 14, LineNumber: 1, Column: 15},
1418 }},
1419 }
1420
1421 if diff := cmp.Diff(want, got); diff != "" {
1422 t.Fatal(diff)
1423 }
1424 })
1425}
1426
1427func TestRegexpFile(t *testing.T) {
1428 content := []byte("needle the bla")
1429
1430 name := "let's play: find the mussel"
1431 b := testIndexBuilder(t, nil,
1432 Document{Name: name, Content: content},
1433 Document{Name: "play.txt", Content: content})
1434
1435 t.Run("LineMatches", func(t *testing.T) {
1436 sres := searchForTest(t, b,
1437 &query.Regexp{
1438 Regexp: mustParseRE("play.*mussel"),
1439 FileName: true,
1440 })
1441
1442 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 {
1443 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
1444 }
1445
1446 if sres.Files[0].FileName != name {
1447 t.Errorf("got match %#v, want name %q", sres.Files[0], name)
1448 }
1449 })
1450
1451 t.Run("ChunkMatches", func(t *testing.T) {
1452 sres := searchForTest(t, b,
1453 &query.Regexp{
1454 Regexp: mustParseRE("play.*mussel"),
1455 FileName: true,
1456 }, chunkOpts)
1457
1458 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 {
1459 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
1460 }
1461
1462 if sres.Files[0].FileName != name {
1463 t.Errorf("got match %#v, want name %q", sres.Files[0], name)
1464 }
1465 })
1466}
1467
1468func TestRegexpOrder(t *testing.T) {
1469 content := []byte("bla the needle")
1470 // ----------------01234567890123
1471
1472 b := testIndexBuilder(t, nil,
1473 Document{Name: "f1", Content: content})
1474
1475 t.Run("LineMatches", func(t *testing.T) {
1476 sres := searchForTest(t, b,
1477 &query.Regexp{
1478 Regexp: mustParseRE("dle.*bla"),
1479 })
1480
1481 if len(sres.Files) != 0 {
1482 t.Fatalf("got %v, want 0 matches", sres.Files)
1483 }
1484 })
1485
1486 t.Run("ChunkMatches", func(t *testing.T) {
1487 sres := searchForTest(t, b,
1488 &query.Regexp{
1489 Regexp: mustParseRE("dle.*bla"),
1490 })
1491
1492 if len(sres.Files) != 0 {
1493 t.Fatalf("got %v, want 0 matches", sres.Files)
1494 }
1495 })
1496}
1497
1498func TestRepoName(t *testing.T) {
1499 content := []byte("bla the needle")
1500 // ----------------01234567890123
1501
1502 b := testIndexBuilder(t, &zoekt.Repository{Name: "bla"},
1503 Document{Name: "f1", Content: content})
1504
1505 t.Run("LineMatches", func(t *testing.T) {
1506 sres := searchForTest(t, b,
1507 query.NewAnd(
1508 &query.Substring{Pattern: "needle"},
1509 &query.Repo{Regexp: regexp.MustCompile("foo")},
1510 ))
1511
1512 if len(sres.Files) != 0 {
1513 t.Fatalf("got %v, want 0 matches", sres.Files)
1514 }
1515
1516 if sres.Stats.FilesConsidered > 0 {
1517 t.Fatalf("got FilesConsidered %d, should have short circuited", sres.Stats.FilesConsidered)
1518 }
1519
1520 sres = searchForTest(t, b,
1521 query.NewAnd(
1522 &query.Substring{Pattern: "needle"},
1523 &query.Repo{Regexp: regexp.MustCompile("bla")},
1524 ))
1525 if len(sres.Files) != 1 {
1526 t.Fatalf("got %v, want 1 match", sres.Files)
1527 }
1528 })
1529
1530 t.Run("ChunkMatches", func(t *testing.T) {
1531 sres := searchForTest(t, b,
1532 query.NewAnd(
1533 &query.Substring{Pattern: "needle"},
1534 &query.Repo{Regexp: regexp.MustCompile("foo")},
1535 ),
1536 chunkOpts,
1537 )
1538
1539 if len(sres.Files) != 0 {
1540 t.Fatalf("got %v, want 0 matches", sres.Files)
1541 }
1542
1543 if sres.Stats.FilesConsidered > 0 {
1544 t.Fatalf("got FilesConsidered %d, should have short circuited", sres.Stats.FilesConsidered)
1545 }
1546
1547 sres = searchForTest(t, b,
1548 query.NewAnd(
1549 &query.Substring{Pattern: "needle"},
1550 &query.Repo{Regexp: regexp.MustCompile("bla")},
1551 ))
1552 if len(sres.Files) != 1 {
1553 t.Fatalf("got %v, want 1 match", sres.Files)
1554 }
1555 })
1556}
1557
1558func TestMergeMatches(t *testing.T) {
1559 t.Run("LineMatches, adjacent matches", func(t *testing.T) {
1560 b := testIndexBuilder(t, nil,
1561 Document{Name: "f1", Content: []byte("blablabla")})
1562 sres := searchForTest(t, b,
1563 &query.Substring{Pattern: "bla"})
1564
1565 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 {
1566 t.Fatalf("got %v, want 1 match", sres.Files)
1567 }
1568
1569 if len(sres.Files[0].LineMatches[0].LineFragments) != 3 {
1570 t.Fatalf("got %v, want 3 fragments", sres.Files[0].LineMatches[0].LineFragments)
1571 }
1572 })
1573
1574 t.Run("LineMatches, overlapping matches", func(t *testing.T) {
1575 b := testIndexBuilder(t, nil,
1576 Document{Name: "f1", Content: []byte("hellogoodbye")})
1577 sres := searchForTest(t, b,
1578 &query.And{Children: []query.Q{
1579 &query.Substring{Pattern: "hello"},
1580 &query.Substring{Pattern: "logood"},
1581 }})
1582
1583 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 {
1584 t.Fatalf("got %v, want 1 match", sres.Files)
1585 }
1586
1587 lineFragments := sres.Files[0].LineMatches[0].LineFragments
1588 if len(lineFragments) != 1 || lineFragments[0].MatchLength != len("hello") {
1589 t.Fatalf("got %v, want single line fragment 'hello'", lineFragments)
1590 }
1591 })
1592
1593 t.Run("ChunkMatches, no overlap", func(t *testing.T) {
1594 b := testIndexBuilder(t, nil,
1595 Document{Name: "f1", Content: []byte("blablabla")})
1596
1597 sres := searchForTest(t, b,
1598 &query.Substring{Pattern: "bla"},
1599 chunkOpts,
1600 )
1601 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 {
1602 t.Fatalf("got %v, want 1 match", sres.Files)
1603 }
1604
1605 if len(sres.Files[0].ChunkMatches[0].Ranges) != 3 {
1606 t.Fatalf("got %v, want 3 ranges", sres.Files[0].ChunkMatches[0].Ranges)
1607 }
1608 })
1609
1610 t.Run("ChunkMatches, overlapping matches", func(t *testing.T) {
1611 b := testIndexBuilder(t, nil,
1612 Document{Name: "f1", Content: []byte("hellogoodbye")})
1613 sres := searchForTest(t, b,
1614 &query.And{Children: []query.Q{
1615 &query.Substring{Pattern: "hello"},
1616 &query.Substring{Pattern: "logood"},
1617 }}, chunkOpts)
1618
1619 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 {
1620 t.Fatalf("got %v, want 1 chunk match", sres.Files)
1621 }
1622
1623 ranges := sres.Files[0].ChunkMatches[0].Ranges
1624 if len(ranges) != 1 || ranges[0].Start.ByteOffset != 0 || ranges[0].End.ByteOffset != 5 {
1625 t.Fatalf("got %v, want single chunk range 'hello'", ranges)
1626 }
1627 })
1628}
1629
1630func TestRepoURL(t *testing.T) {
1631 content := []byte("blablabla")
1632 b := testIndexBuilder(t, &zoekt.Repository{
1633 Name: "name",
1634 URL: "URL",
1635 CommitURLTemplate: "commit",
1636 FileURLTemplate: "file-url",
1637 LineFragmentTemplate: "fragment",
1638 }, Document{Name: "f1", Content: content})
1639
1640 sres := searchForTest(t, b, &query.Substring{Pattern: "bla"})
1641
1642 if sres.RepoURLs["name"] != "file-url" {
1643 t.Errorf("got RepoURLs %v, want {name: URL}", sres.RepoURLs)
1644 }
1645 if sres.LineFragments["name"] != "fragment" {
1646 t.Errorf("got URLs %v, want {name: URL}", sres.LineFragments)
1647 }
1648}
1649
1650func TestRegexpCaseSensitive(t *testing.T) {
1651 content := []byte("bla\nfunc unmarshalGitiles\n")
1652 b := testIndexBuilder(t, nil, Document{
1653 Name: "f1",
1654 Content: content,
1655 })
1656
1657 t.Run("LineMatches", func(t *testing.T) {
1658 res := searchForTest(t, b,
1659 &query.Regexp{
1660 Regexp: mustParseRE("func.*Gitiles"),
1661 CaseSensitive: true,
1662 })
1663
1664 if len(res.Files) != 1 {
1665 t.Fatalf("got %v, want one index", res.Files)
1666 }
1667 })
1668
1669 t.Run("ChunkMatches", func(t *testing.T) {
1670 res := searchForTest(t, b,
1671 &query.Regexp{
1672 Regexp: mustParseRE("func.*Gitiles"),
1673 CaseSensitive: true,
1674 },
1675 chunkOpts,
1676 )
1677
1678 if len(res.Files) != 1 {
1679 t.Fatalf("got %v, want one index", res.Files)
1680 }
1681 })
1682}
1683
1684func TestRegexpCaseFolding(t *testing.T) {
1685 content := []byte("bla\nfunc unmarshalGitiles\n")
1686
1687 b := testIndexBuilder(t, nil,
1688 Document{Name: "f1", Content: content})
1689 res := searchForTest(t, b,
1690 &query.Regexp{
1691 Regexp: mustParseRE("func.*GITILES"),
1692 CaseSensitive: false,
1693 })
1694
1695 if len(res.Files) != 1 {
1696 t.Fatalf("got %v, want one index", res.Files)
1697 }
1698}
1699
1700func TestCaseRegexp(t *testing.T) {
1701 content := []byte("BLABLABLA")
1702 b := testIndexBuilder(t, nil,
1703 Document{Name: "f1", Content: content})
1704
1705 t.Run("LineMatches", func(t *testing.T) {
1706 res := searchForTest(t, b,
1707 &query.Regexp{
1708 Regexp: mustParseRE("[xb][xl][xa]"),
1709 CaseSensitive: true,
1710 })
1711
1712 if len(res.Files) > 0 {
1713 t.Fatalf("got %v, want no matches", res.Files)
1714 }
1715 })
1716
1717 t.Run("ChunkMatches", func(t *testing.T) {
1718 res := searchForTest(t, b,
1719 &query.Regexp{
1720 Regexp: mustParseRE("[xb][xl][xa]"),
1721 CaseSensitive: true,
1722 },
1723 chunkOpts,
1724 )
1725
1726 if len(res.Files) > 0 {
1727 t.Fatalf("got %v, want no matches", res.Files)
1728 }
1729 })
1730}
1731
1732func TestNegativeRegexp(t *testing.T) {
1733 content := []byte("BLABLABLA needle bla")
1734 b := testIndexBuilder(t, nil,
1735 Document{Name: "f1", Content: content})
1736
1737 t.Run("LineMatches", func(t *testing.T) {
1738 res := searchForTest(t, b,
1739 query.NewAnd(
1740 &query.Substring{
1741 Pattern: "needle",
1742 },
1743 &query.Not{
1744 Child: &query.Regexp{
1745 Regexp: mustParseRE(".cs"),
1746 },
1747 }))
1748
1749 if len(res.Files) != 1 {
1750 t.Fatalf("got %v, want 1 match", res.Files)
1751 }
1752 })
1753
1754 t.Run("ChunkMatches", func(t *testing.T) {
1755 res := searchForTest(t, b,
1756 query.NewAnd(
1757 &query.Substring{
1758 Pattern: "needle",
1759 },
1760 &query.Not{
1761 Child: &query.Regexp{
1762 Regexp: mustParseRE(".cs"),
1763 },
1764 },
1765 ),
1766 chunkOpts)
1767
1768 if len(res.Files) != 1 {
1769 t.Fatalf("got %v, want 1 match", res.Files)
1770 }
1771 })
1772}
1773
1774func TestSymbolRank(t *testing.T) {
1775 t.Skip()
1776
1777 content := []byte("func bla() blubxxxxx")
1778 // ----------------01234567890123456789
1779 b := testIndexBuilder(t, nil,
1780 Document{
1781 Name: "f1",
1782 Content: content,
1783 }, Document{
1784 Name: "f2",
1785 Content: content,
1786 Symbols: []DocumentSection{{5, 8}},
1787 }, Document{
1788 Name: "f3",
1789 Content: content,
1790 })
1791
1792 t.Run("LineMatches", func(t *testing.T) {
1793 res := searchForTest(t, b,
1794 &query.Substring{
1795 CaseSensitive: false,
1796 Pattern: "bla",
1797 })
1798
1799 if len(res.Files) != 3 {
1800 t.Fatalf("got %d files, want 3 files. Full data: %v", len(res.Files), res.Files)
1801 }
1802 if res.Files[0].FileName != "f2" {
1803 t.Errorf("got %#v, want 'f2' as top index", res.Files[0])
1804 }
1805 })
1806
1807 t.Run("ChunkMatches", func(t *testing.T) {
1808 res := searchForTest(t, b,
1809 &query.Substring{
1810 CaseSensitive: false,
1811 Pattern: "bla",
1812 }, chunkOpts)
1813
1814 if len(res.Files) != 3 {
1815 t.Fatalf("got %d files, want 3 files. Full data: %v", len(res.Files), res.Files)
1816 }
1817 if res.Files[0].FileName != "f2" {
1818 t.Errorf("got %#v, want 'f2' as top index", res.Files[0])
1819 }
1820 })
1821}
1822
1823func TestSymbolRankRegexpUTF8(t *testing.T) {
1824 t.Skip()
1825
1826 prefix := strings.Repeat(string([]rune{kelvinCodePoint}), 100) + "\n"
1827 content := []byte(prefix +
1828 "func bla() blub")
1829 // ------012345678901234
1830 b := testIndexBuilder(t, nil,
1831 Document{
1832 Name: "f1",
1833 Content: content,
1834 }, Document{
1835 Name: "f2",
1836 Content: content,
1837 Symbols: []DocumentSection{{uint32(len(prefix) + 5), uint32(len(prefix) + 8)}},
1838 }, Document{
1839 Name: "f3",
1840 Content: content,
1841 })
1842
1843 t.Run("LineMatches", func(t *testing.T) {
1844 res := searchForTest(t, b,
1845 &query.Regexp{
1846 Regexp: mustParseRE("b.a"),
1847 })
1848
1849 if len(res.Files) != 3 {
1850 t.Fatalf("got %#v, want 3 files", res.Files)
1851 }
1852 if res.Files[0].FileName != "f2" {
1853 t.Errorf("got %#v, want 'f2' as top index", res.Files[0])
1854 }
1855 })
1856
1857 t.Run("ChunjkMatches", func(t *testing.T) {
1858 res := searchForTest(t, b,
1859 &query.Regexp{
1860 Regexp: mustParseRE("b.a"),
1861 }, chunkOpts)
1862
1863 if len(res.Files) != 3 {
1864 t.Fatalf("got %#v, want 3 files", res.Files)
1865 }
1866 if res.Files[0].FileName != "f2" {
1867 t.Errorf("got %#v, want 'f2' as top index", res.Files[0])
1868 }
1869 })
1870}
1871
1872func TestPartialSymbolRank(t *testing.T) {
1873 t.Skip()
1874
1875 content := []byte("func bla() blub")
1876 // ----------------012345678901234
1877
1878 b := testIndexBuilder(t, nil,
1879 Document{
1880 Name: "f1",
1881 Content: content,
1882 Symbols: []DocumentSection{{4, 9}},
1883 }, Document{
1884 Name: "f2",
1885 Content: content,
1886 Symbols: []DocumentSection{{4, 8}},
1887 }, Document{
1888 Name: "f3",
1889 Content: content,
1890 Symbols: []DocumentSection{{4, 9}},
1891 })
1892
1893 t.Run("LineMatches", func(t *testing.T) {
1894 res := searchForTest(t, b,
1895 &query.Substring{
1896 Pattern: "bla",
1897 })
1898
1899 if len(res.Files) != 3 {
1900 t.Fatalf("got %#v, want 3 files", res.Files)
1901 }
1902 if res.Files[0].FileName != "f2" {
1903 t.Errorf("got %#v, want 'f2' as top index", res.Files[0])
1904 }
1905 })
1906
1907 t.Run("ChunkMatches", func(t *testing.T) {
1908 res := searchForTest(t, b,
1909 &query.Substring{
1910 Pattern: "bla",
1911 }, chunkOpts)
1912
1913 if len(res.Files) != 3 {
1914 t.Fatalf("got %#v, want 3 files", res.Files)
1915 }
1916 if res.Files[0].FileName != "f2" {
1917 t.Errorf("got %#v, want 'f2' as top index", res.Files[0])
1918 }
1919 })
1920}
1921
1922func TestNegativeRepo(t *testing.T) {
1923 content := []byte("bla the needle")
1924 // ----------------01234567890123
1925 b := testIndexBuilder(t, &zoekt.Repository{
1926 Name: "bla",
1927 }, Document{Name: "f1", Content: content})
1928
1929 t.Run("LineMatches", func(t *testing.T) {
1930 sres := searchForTest(t, b,
1931 query.NewAnd(
1932 &query.Substring{Pattern: "needle"},
1933 &query.Not{Child: &query.Repo{Regexp: regexp.MustCompile("bla")}},
1934 ))
1935
1936 if len(sres.Files) != 0 {
1937 t.Fatalf("got %v, want 0 matches", sres.Files)
1938 }
1939 })
1940
1941 t.Run("ChunkMatches", func(t *testing.T) {
1942 sres := searchForTest(t, b,
1943 query.NewAnd(
1944 &query.Substring{Pattern: "needle"},
1945 &query.Not{Child: &query.Repo{Regexp: regexp.MustCompile("bla")}},
1946 ), chunkOpts)
1947
1948 if len(sres.Files) != 0 {
1949 t.Fatalf("got %v, want 0 matches", sres.Files)
1950 }
1951 })
1952}
1953
1954func TestListRepos(t *testing.T) {
1955 content := []byte("bla the needle\n")
1956 // ----------------012345678901234-
1957
1958 t.Run("default and minimal fallback", func(t *testing.T) {
1959 repo := &zoekt.Repository{
1960 Name: "reponame",
1961 Branches: []zoekt.RepositoryBranch{{Name: "main"}, {Name: "dev"}},
1962 }
1963 b := testIndexBuilder(t, repo,
1964 Document{Name: "f1", Content: content, Branches: []string{"main", "dev"}},
1965 Document{Name: "f2", Content: content, Branches: []string{"main"}},
1966 Document{Name: "f2", Content: content, Branches: []string{"dev"}},
1967 Document{Name: "f3", Content: content, Branches: []string{"dev"}})
1968
1969 searcher := searcherForTest(t, b)
1970
1971 for _, opts := range []*zoekt.ListOptions{
1972 nil,
1973 {},
1974 {Field: zoekt.RepoListFieldRepos},
1975 {Field: zoekt.RepoListFieldReposMap},
1976 } {
1977 t.Run(fmt.Sprint(opts), func(t *testing.T) {
1978 q := &query.Repo{Regexp: regexp.MustCompile("epo")}
1979
1980 res, err := searcher.List(context.Background(), q, opts)
1981 if err != nil {
1982 t.Fatalf("List(%v): %v", q, err)
1983 }
1984
1985 want := &zoekt.RepoList{
1986 Repos: []*zoekt.RepoListEntry{{
1987 Repository: *repo,
1988 Stats: zoekt.RepoStats{
1989 Documents: 4,
1990 ContentBytes: 68, // (15 bytes of content and 2 bytes of filename) x 4
1991 Shards: 1,
1992
1993 NewLinesCount: 4,
1994 DefaultBranchNewLinesCount: 2,
1995 OtherBranchesNewLinesCount: 3,
1996 },
1997 }},
1998 Stats: zoekt.RepoStats{
1999 Repos: 1,
2000 Documents: 4,
2001 ContentBytes: 68,
2002 Shards: 1,
2003
2004 NewLinesCount: 4,
2005 DefaultBranchNewLinesCount: 2,
2006 OtherBranchesNewLinesCount: 3,
2007 },
2008 }
2009 ignored := []cmp.Option{
2010 cmpopts.EquateEmpty(),
2011 cmpopts.IgnoreFields(zoekt.RepoListEntry{}, "IndexMetadata"),
2012 cmpopts.IgnoreFields(zoekt.RepoStats{}, "IndexBytes"),
2013 cmpopts.IgnoreFields(zoekt.Repository{}, "SubRepoMap"),
2014 cmpopts.IgnoreFields(zoekt.Repository{}, "priority"),
2015 }
2016 if diff := cmp.Diff(want, res, ignored...); diff != "" {
2017 t.Fatalf("mismatch (-want +got):\n%s", diff)
2018 }
2019
2020 q = &query.Repo{Regexp: regexp.MustCompile("bla")}
2021 res, err = searcher.List(context.Background(), q, nil)
2022 if err != nil {
2023 t.Fatalf("List(%v): %v", q, err)
2024 }
2025 if len(res.Repos) != 0 || len(res.ReposMap) != 0 {
2026 t.Fatalf("got %v, want 0 matches", res)
2027 }
2028 })
2029 }
2030 })
2031
2032 t.Run("minimal", func(t *testing.T) {
2033 repo := &zoekt.Repository{
2034 ID: 1234,
2035 Name: "reponame",
2036 Branches: []zoekt.RepositoryBranch{{Name: "main"}, {Name: "dev"}},
2037 RawConfig: map[string]string{"repoid": "1234"},
2038 }
2039 b := testIndexBuilder(t, repo,
2040 Document{Name: "f1", Content: content, Branches: []string{"main", "dev"}},
2041 Document{Name: "f2", Content: content, Branches: []string{"main"}},
2042 Document{Name: "f2", Content: content, Branches: []string{"dev"}},
2043 Document{Name: "f3", Content: content, Branches: []string{"dev"}})
2044
2045 searcher := searcherForTest(t, b)
2046
2047 q := &query.Repo{Regexp: regexp.MustCompile("epo")}
2048 res, err := searcher.List(context.Background(), q, &zoekt.ListOptions{Field: zoekt.RepoListFieldReposMap})
2049 if err != nil {
2050 t.Fatalf("List(%v): %v", q, err)
2051 }
2052
2053 want := &zoekt.RepoList{
2054 ReposMap: zoekt.ReposMap{
2055 repo.ID: {
2056 HasSymbols: repo.HasSymbols,
2057 Branches: repo.Branches,
2058 },
2059 },
2060 Stats: zoekt.RepoStats{
2061 Repos: 1,
2062 Shards: 1,
2063 Documents: 4,
2064 IndexBytes: 412,
2065 ContentBytes: 68,
2066 NewLinesCount: 4,
2067 DefaultBranchNewLinesCount: 2,
2068 OtherBranchesNewLinesCount: 3,
2069 },
2070 }
2071
2072 ignored := []cmp.Option{
2073 cmpopts.IgnoreFields(zoekt.MinimalRepoListEntry{}, "IndexTimeUnix"),
2074 }
2075 if diff := cmp.Diff(want, res, ignored...); diff != "" {
2076 t.Fatalf("mismatch (-want +got):\n%s", diff)
2077 }
2078
2079 q = &query.Repo{Regexp: regexp.MustCompile("bla")}
2080 res, err = searcher.List(context.Background(), q, &zoekt.ListOptions{Field: zoekt.RepoListFieldReposMap})
2081 if err != nil {
2082 t.Fatalf("List(%v): %v", q, err)
2083 }
2084 if len(res.Repos) != 0 || len(res.ReposMap) != 0 {
2085 t.Fatalf("got %v, want 0 matches", res)
2086 }
2087 })
2088}
2089
2090func TestListReposByContent(t *testing.T) {
2091 content := []byte("bla the needle")
2092
2093 b := testIndexBuilder(t, &zoekt.Repository{
2094 Name: "reponame",
2095 },
2096 Document{Name: "f1", Content: content},
2097 Document{Name: "f2", Content: content})
2098
2099 searcher := searcherForTest(t, b)
2100 q := &query.Substring{Pattern: "needle"}
2101 res, err := searcher.List(context.Background(), q, nil)
2102 if err != nil {
2103 t.Fatalf("List(%v): %v", q, err)
2104 }
2105 if len(res.Repos) != 1 || res.Repos[0].Repository.Name != "reponame" {
2106 t.Fatalf("got %v, want 1 matches", res)
2107 }
2108 if got := res.Repos[0].Stats.Shards; got != 1 {
2109 t.Fatalf("got %d, want 1 shard", got)
2110 }
2111 q = &query.Substring{Pattern: "foo"}
2112 res, err = searcher.List(context.Background(), q, nil)
2113 if err != nil {
2114 t.Fatalf("List(%v): %v", q, err)
2115 }
2116 if len(res.Repos) != 0 {
2117 t.Fatalf("got %v, want 0 matches", res)
2118 }
2119}
2120
2121func TestMetadata(t *testing.T) {
2122 content := []byte("bla the needle")
2123
2124 b := testIndexBuilder(t, &zoekt.Repository{
2125 Name: "reponame",
2126 }, Document{Name: "f1", Content: content},
2127 Document{Name: "f2", Content: content})
2128
2129 var buf bytes.Buffer
2130 if err := b.Write(&buf); err != nil {
2131 t.Fatal(err)
2132 }
2133 f := &memSeeker{buf.Bytes()}
2134
2135 rd, _, err := ReadMetadata(f)
2136 if err != nil {
2137 t.Fatalf("ReadMetadata: %v", err)
2138 }
2139
2140 if got, want := rd[0].Name, "reponame"; got != want {
2141 t.Fatalf("got %q want %q", got, want)
2142 }
2143}
2144
2145func TestOr(t *testing.T) {
2146 b := testIndexBuilder(t, nil,
2147 Document{Name: "f1", Content: []byte("needle")},
2148 Document{Name: "f2", Content: []byte("banana")})
2149 t.Run("LineMatches", func(t *testing.T) {
2150 sres := searchForTest(t, b, query.NewOr(
2151 &query.Substring{Pattern: "needle"},
2152 &query.Substring{Pattern: "banana"}))
2153
2154 if len(sres.Files) != 2 {
2155 t.Fatalf("got %v, want 2 files", sres.Files)
2156 }
2157 })
2158
2159 t.Run("ChunkMatches", func(t *testing.T) {
2160 sres := searchForTest(t, b, query.NewOr(
2161 &query.Substring{Pattern: "needle"},
2162 &query.Substring{Pattern: "banana"}))
2163
2164 if len(sres.Files) != 2 {
2165 t.Fatalf("got %v, want 2 files", sres.Files)
2166 }
2167 })
2168}
2169
2170func TestFrequency(t *testing.T) {
2171 content := []byte("sla _Py_HashDouble(double v sla las las shd dot dot")
2172
2173 b := testIndexBuilder(t, nil,
2174 Document{
2175 Name: "f1",
2176 Content: content,
2177 })
2178
2179 t.Run("LineMatches", func(t *testing.T) {
2180 sres := searchForTest(t, b, &query.Substring{Pattern: "slashdot"})
2181 if len(sres.Files) != 0 {
2182 t.Errorf("got %v, wanted 0 matches", sres.Files)
2183 }
2184 })
2185
2186 t.Run("ChunkMatches", func(t *testing.T) {
2187 sres := searchForTest(t, b, &query.Substring{Pattern: "slashdot"}, chunkOpts)
2188 if len(sres.Files) != 0 {
2189 t.Errorf("got %v, wanted 0 matches", sres.Files)
2190 }
2191 })
2192}
2193
2194func TestMatchNewline(t *testing.T) {
2195 re, err := syntax.Parse("[^a]a", syntax.ClassNL)
2196 if err != nil {
2197 t.Fatalf("syntax.Parse: %v", err)
2198 }
2199
2200 content := []byte("pqr\nalex")
2201
2202 b := testIndexBuilder(t, nil,
2203 Document{
2204 Name: "f1",
2205 Content: content,
2206 })
2207
2208 t.Run("LineMatches", func(t *testing.T) {
2209 sres := searchForTest(t, b, &query.Regexp{Regexp: re, CaseSensitive: true})
2210 if len(sres.Files) != 1 {
2211 t.Errorf("got %v, wanted 1 matches", sres.Files)
2212 } else if l := sres.Files[0].LineMatches[0].Line; !bytes.Equal(l, content[len("pqr\n"):]) {
2213 t.Errorf("got match line %q, want %q", l, content)
2214 }
2215 })
2216
2217 t.Run("ChunkMatches", func(t *testing.T) {
2218 sres := searchForTest(t, b, &query.Regexp{Regexp: re, CaseSensitive: true}, chunkOpts)
2219 if len(sres.Files) != 1 {
2220 t.Errorf("got %v, wanted 1 matches", sres.Files)
2221 } else if c := sres.Files[0].ChunkMatches[0].Content; !bytes.Equal(c, content) {
2222 t.Errorf("got match line %q, want %q", c, content)
2223 }
2224 })
2225}
2226
2227func TestSubRepo(t *testing.T) {
2228 subRepos := map[string]*zoekt.Repository{
2229 "sub": {
2230 Name: "sub-name",
2231 LineFragmentTemplate: "sub-line",
2232 },
2233 }
2234
2235 content := []byte("pqr\nalex")
2236
2237 b := testIndexBuilder(t, &zoekt.Repository{
2238 SubRepoMap: subRepos,
2239 }, Document{
2240 Name: "sub/f1",
2241 Content: content,
2242 SubRepositoryPath: "sub",
2243 })
2244
2245 sres := searchForTest(t, b, &query.Substring{Pattern: "alex"})
2246 if len(sres.Files) != 1 {
2247 t.Fatalf("got %v, wanted 1 matches", sres.Files)
2248 }
2249
2250 f := sres.Files[0]
2251 if f.SubRepositoryPath != "sub" || f.SubRepositoryName != "sub-name" {
2252 t.Errorf("got %#v, want SubRepository{Path,Name} = {'sub', 'sub-name'}", f)
2253 }
2254
2255 if sres.LineFragments["sub-name"] != "sub-line" {
2256 t.Errorf("got LineFragmentTemplate %v, want {'sub':'sub-line'}", sres.LineFragments)
2257 }
2258}
2259
2260func TestSearchEither(t *testing.T) {
2261 b := testIndexBuilder(t, nil,
2262 Document{Name: "f1", Content: []byte("bla needle bla")},
2263 Document{Name: "needle-file-branch", Content: []byte("bla content")})
2264
2265 t.Run("LineMatches", func(t *testing.T) {
2266 sres := searchForTest(t, b, &query.Substring{Pattern: "needle"})
2267 if len(sres.Files) != 2 {
2268 t.Fatalf("got %v, wanted 2 matches", sres.Files)
2269 }
2270
2271 sres = searchForTest(t, b, &query.Substring{Pattern: "needle", Content: true})
2272 if len(sres.Files) != 1 {
2273 t.Fatalf("got %v, wanted 1 index", sres.Files)
2274 }
2275
2276 if got, want := sres.Files[0].FileName, "f1"; got != want {
2277 t.Errorf("got %q, want %q", got, want)
2278 }
2279 })
2280
2281 t.Run("ChunkMatches", func(t *testing.T) {
2282 sres := searchForTest(t, b, &query.Substring{Pattern: "needle"}, chunkOpts)
2283 if len(sres.Files) != 2 {
2284 t.Fatalf("got %v, wanted 2 matches", sres.Files)
2285 }
2286
2287 sres = searchForTest(t, b, &query.Substring{Pattern: "needle", Content: true}, chunkOpts)
2288 if len(sres.Files) != 1 {
2289 t.Fatalf("got %v, wanted 1 index", sres.Files)
2290 }
2291
2292 if got, want := sres.Files[0].FileName, "f1"; got != want {
2293 t.Errorf("got %q, want %q", got, want)
2294 }
2295 })
2296}
2297
2298func TestUnicodeExactMatch(t *testing.T) {
2299 needle := "néédlÉ"
2300 content := []byte("blá blá " + needle + " blâ")
2301
2302 b := testIndexBuilder(t, nil,
2303 Document{Name: "f1", Content: content})
2304
2305 t.Run("LineMatches", func(t *testing.T) {
2306 if res := searchForTest(t, b, &query.Substring{Pattern: needle, CaseSensitive: true}); len(res.Files) != 1 {
2307 t.Fatalf("case sensitive: got %v, wanted 1 index", res.Files)
2308 }
2309 })
2310
2311 t.Run("ChunkMatches", func(t *testing.T) {
2312 res := searchForTest(t, b, &query.Substring{Pattern: needle, CaseSensitive: true}, chunkOpts)
2313 if len(res.Files) != 1 {
2314 t.Fatalf("case sensitive: got %v, wanted 1 index", res.Files)
2315 }
2316 })
2317}
2318
2319func TestUnicodeCoverContent(t *testing.T) {
2320 needle := "néédlÉ"
2321 content := []byte("blá blá " + needle + " blâ")
2322
2323 b := testIndexBuilder(t, nil,
2324 Document{Name: "f1", Content: content})
2325
2326 t.Run("LineMatches", func(t *testing.T) {
2327 if res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ", CaseSensitive: true}); len(res.Files) != 0 {
2328 t.Fatalf("case sensitive: got %v, wanted 0 index", res.Files)
2329 }
2330
2331 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ"})
2332 if len(res.Files) != 1 {
2333 t.Fatalf("case insensitive: got %v, wanted 1 index", res.Files)
2334 }
2335
2336 if got, want := res.Files[0].LineMatches[0].LineFragments[0].Offset, uint32(strings.Index(string(content), needle)); got != want {
2337 t.Errorf("got %d want %d", got, want)
2338 }
2339 })
2340
2341 t.Run("ChunkMatches", func(t *testing.T) {
2342 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ", CaseSensitive: true}, chunkOpts)
2343 if len(res.Files) != 0 {
2344 t.Fatalf("case sensitive: got %v, wanted 0 index", res.Files)
2345 }
2346
2347 res = searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ"}, chunkOpts)
2348 if len(res.Files) != 1 {
2349 t.Fatalf("case insensitive: got %v, wanted 1 index", res.Files)
2350 }
2351
2352 got := res.Files[0].ChunkMatches[0].Ranges[0].Start.ByteOffset
2353 want := uint32(strings.Index(string(content), needle))
2354 if got != want {
2355 t.Errorf("got %d want %d", got, want)
2356 }
2357 })
2358}
2359
2360func TestUnicodeNonCoverContent(t *testing.T) {
2361 needle := "nééáádlÉ"
2362 content := []byte("blá blá " + needle + " blâ")
2363
2364 b := testIndexBuilder(t, nil,
2365 Document{Name: "f1", Content: content})
2366
2367 t.Run("LineMatches", func(t *testing.T) {
2368 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉÁÁDLÉ", Content: true})
2369 if len(res.Files) != 1 {
2370 t.Fatalf("got %v, wanted 1 index", res.Files)
2371 }
2372
2373 if got, want := res.Files[0].LineMatches[0].LineFragments[0].Offset, uint32(strings.Index(string(content), needle)); got != want {
2374 t.Errorf("got %d want %d", got, want)
2375 }
2376 })
2377
2378 t.Run("ChunkMatches", func(t *testing.T) {
2379 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉÁÁDLÉ", Content: true}, chunkOpts)
2380 if len(res.Files) != 1 {
2381 t.Fatalf("got %v, wanted 1 index", res.Files)
2382 }
2383
2384 got := res.Files[0].ChunkMatches[0].Ranges[0].Start.ByteOffset
2385 want := uint32(strings.Index(string(content), needle))
2386 if got != want {
2387 t.Errorf("got %d want %d", got, want)
2388 }
2389 })
2390}
2391
2392const kelvinCodePoint = 8490
2393
2394func TestUnicodeVariableLength(t *testing.T) {
2395 lower := 'k'
2396 upper := rune(kelvinCodePoint)
2397
2398 needle := "nee" + string([]rune{lower}) + "eed"
2399 corpus := []byte("nee" + string([]rune{upper}) + "eed" +
2400 " ee" + string([]rune{lower}) + "ee" +
2401 " ee" + string([]rune{upper}) + "ee")
2402
2403 t.Run("LineMatches", func(t *testing.T) {
2404 b := testIndexBuilder(t, nil,
2405 Document{Name: "f1", Content: []byte(corpus)})
2406
2407 res := searchForTest(t, b, &query.Substring{Pattern: needle, Content: true})
2408 if len(res.Files) != 1 {
2409 t.Fatalf("got %v, wanted 1 index", res.Files)
2410 }
2411 })
2412
2413 t.Run("ChunkMatches", func(t *testing.T) {
2414 b := testIndexBuilder(t, nil,
2415 Document{Name: "f1", Content: []byte(corpus)})
2416
2417 res := searchForTest(t, b, &query.Substring{Pattern: needle, Content: true}, chunkOpts)
2418 if len(res.Files) != 1 {
2419 t.Fatalf("got %v, wanted 1 index", res.Files)
2420 }
2421 })
2422}
2423
2424func TestUnicodeFileStartOffsets(t *testing.T) {
2425 unicode := "世界"
2426 wat := "waaaaaat"
2427 b := testIndexBuilder(t, nil,
2428 Document{
2429 Name: "f1",
2430 Content: []byte(unicode),
2431 },
2432 Document{
2433 Name: "f2",
2434 Content: []byte(wat),
2435 },
2436 )
2437 q := &query.Substring{Pattern: wat, Content: true}
2438 res := searchForTest(t, b, q)
2439 if len(res.Files) != 1 {
2440 t.Fatalf("got %v, wanted 1 index", res.Files)
2441 }
2442}
2443
2444func TestLongFileUTF8(t *testing.T) {
2445 needle := "neeedle"
2446
2447 // 6 bytes.
2448 unicode := "世界"
2449 content := []byte(strings.Repeat(unicode, 100) + needle)
2450 b := testIndexBuilder(t, nil,
2451 Document{
2452 Name: "f1",
2453 Content: []byte(strings.Repeat("a", 50)),
2454 },
2455 Document{
2456 Name: "f2",
2457 Content: content,
2458 })
2459
2460 t.Run("LineMatches", func(t *testing.T) {
2461 q := &query.Substring{Pattern: needle, Content: true}
2462 res := searchForTest(t, b, q)
2463 if len(res.Files) != 1 {
2464 t.Errorf("got %v, want 1 result", res)
2465 }
2466 })
2467
2468 t.Run("ChunkMatches", func(t *testing.T) {
2469 q := &query.Substring{Pattern: needle, Content: true}
2470 res := searchForTest(t, b, q, chunkOpts)
2471 if len(res.Files) != 1 {
2472 t.Errorf("got %v, want 1 result", res)
2473 }
2474 })
2475}
2476
2477func TestEstimateDocCount(t *testing.T) {
2478 content := []byte("bla needle bla")
2479 b := testIndexBuilder(t, &zoekt.Repository{Name: "reponame"},
2480 Document{Name: "f1", Content: content},
2481 Document{Name: "f2", Content: content},
2482 )
2483
2484 t.Run("LineMatches", func(t *testing.T) {
2485 if sres := searchForTest(t, b,
2486 query.NewAnd(
2487 &query.Substring{Pattern: "needle"},
2488 &query.Repo{Regexp: regexp.MustCompile("reponame")},
2489 ), zoekt.SearchOptions{
2490 EstimateDocCount: true,
2491 }); sres.Stats.ShardFilesConsidered != 2 {
2492 t.Errorf("got FilesConsidered = %d, want 2", sres.Stats.FilesConsidered)
2493 }
2494 if sres := searchForTest(t, b,
2495 query.NewAnd(
2496 &query.Substring{Pattern: "needle"},
2497 &query.Repo{Regexp: regexp.MustCompile("nomatch")},
2498 ), zoekt.SearchOptions{
2499 EstimateDocCount: true,
2500 }); sres.Stats.ShardFilesConsidered != 0 {
2501 t.Errorf("got FilesConsidered = %d, want 0", sres.Stats.FilesConsidered)
2502 }
2503 })
2504
2505 t.Run("ChunkMatches", func(t *testing.T) {
2506 if sres := searchForTest(t, b,
2507 query.NewAnd(
2508 &query.Substring{Pattern: "needle"},
2509 &query.Repo{Regexp: regexp.MustCompile("reponame")},
2510 ), zoekt.SearchOptions{
2511 EstimateDocCount: true,
2512 ChunkMatches: true,
2513 }); sres.Stats.ShardFilesConsidered != 2 {
2514 t.Errorf("got FilesConsidered = %d, want 2", sres.Stats.FilesConsidered)
2515 }
2516 if sres := searchForTest(t, b,
2517 query.NewAnd(
2518 &query.Substring{Pattern: "needle"},
2519 &query.Repo{Regexp: regexp.MustCompile("nomatch")},
2520 ), zoekt.SearchOptions{
2521 EstimateDocCount: true,
2522 ChunkMatches: true,
2523 }); sres.Stats.ShardFilesConsidered != 0 {
2524 t.Errorf("got FilesConsidered = %d, want 0", sres.Stats.FilesConsidered)
2525 }
2526 })
2527}
2528
2529func TestUTF8CorrectCorpus(t *testing.T) {
2530 needle := "neeedle"
2531
2532 // 6 bytes.
2533 unicode := "世界"
2534 b := testIndexBuilder(t, nil,
2535 Document{
2536 Name: "f1",
2537 Content: []byte(strings.Repeat(unicode, 100)),
2538 },
2539 Document{
2540 Name: "xxxxxneeedle",
2541 Content: []byte("hello"),
2542 })
2543
2544 t.Run("LineMatches", func(t *testing.T) {
2545 q := &query.Substring{Pattern: needle, FileName: true}
2546 res := searchForTest(t, b, q)
2547 if len(res.Files) != 1 {
2548 t.Errorf("got %v, want 1 result", res)
2549 }
2550 })
2551
2552 t.Run("ChunkMatches", func(t *testing.T) {
2553 q := &query.Substring{Pattern: needle, FileName: true}
2554 res := searchForTest(t, b, q, chunkOpts)
2555 if len(res.Files) != 1 {
2556 t.Errorf("got %v, want 1 result", res)
2557 }
2558 })
2559}
2560
2561func TestBuilderStats(t *testing.T) {
2562 b := testIndexBuilder(t, nil,
2563 Document{
2564 Name: "f1",
2565 Content: []byte(strings.Repeat("abcd", 1024)),
2566 })
2567 var buf bytes.Buffer
2568 if err := b.Write(&buf); err != nil {
2569 t.Fatal(err)
2570 }
2571
2572 if got, want := b.ContentSize(), uint32(2+4*1024); got != want {
2573 t.Errorf("got %d, want %d", got, want)
2574 }
2575}
2576
2577func TestIOStats(t *testing.T) {
2578 b := testIndexBuilder(t, nil,
2579 Document{
2580 Name: "f1",
2581 Content: []byte(strings.Repeat("abcd", 1024)),
2582 })
2583
2584 t.Run("LineMatches", func(t *testing.T) {
2585 q := &query.Substring{Pattern: "abc", CaseSensitive: true, Content: true}
2586 res := searchForTest(t, b, q)
2587
2588 // 4096 (content) + 2 (overhead: newlines or doc sections)
2589 if got, want := res.Stats.ContentBytesLoaded, int64(4098); got != want {
2590 t.Errorf("got content I/O %d, want %d", got, want)
2591 }
2592
2593 // 1024 entries, each 4 bytes apart. 4 fits into single byte
2594 // delta encoded.
2595 if got, want := res.Stats.IndexBytesLoaded, int64(1024); got != want {
2596 t.Errorf("got index I/O %d, want %d", got, want)
2597 }
2598 })
2599
2600 t.Run("ChunkMatches", func(t *testing.T) {
2601 q := &query.Substring{Pattern: "abc", CaseSensitive: true, Content: true}
2602 res := searchForTest(t, b, q, chunkOpts)
2603
2604 // 4096 (content) + 2 (overhead: newlines or doc sections)
2605 if got, want := res.Stats.ContentBytesLoaded, int64(4098); got != want {
2606 t.Errorf("got content I/O %d, want %d", got, want)
2607 }
2608
2609 // 1024 entries, each 4 bytes apart. 4 fits into single byte
2610 // delta encoded.
2611 if got, want := res.Stats.IndexBytesLoaded, int64(1024); got != want {
2612 t.Errorf("got index I/O %d, want %d", got, want)
2613 }
2614 })
2615
2616 t.Run("LineMatches with BM25", func(t *testing.T) {
2617 q := &query.Substring{Pattern: "abc", CaseSensitive: true, Content: true}
2618 res := searchForTest(t, b, q, zoekt.SearchOptions{UseBM25Scoring: true})
2619
2620 // 4096 (content) + 2 (overhead: newlines or doc sections)
2621 if got, want := res.Stats.ContentBytesLoaded, int64(4098); got != want {
2622 t.Errorf("got content I/O %d, want %d", got, want)
2623 }
2624
2625 // 1024 entries, each 4 bytes apart. 4 fits into single byte
2626 // delta encoded.
2627 if got, want := res.Stats.IndexBytesLoaded, int64(1024); got != want {
2628 t.Errorf("got index I/O %d, want %d", got, want)
2629 }
2630 })
2631
2632 t.Run("ChunkMatches with BM25", func(t *testing.T) {
2633 q := &query.Substring{Pattern: "abc", CaseSensitive: true, Content: true}
2634 res := searchForTest(t, b, q, zoekt.SearchOptions{UseBM25Scoring: true, ChunkMatches: true})
2635
2636 // 4096 (content) + 2 (overhead: newlines or doc sections)
2637 if got, want := res.Stats.ContentBytesLoaded, int64(4098); got != want {
2638 t.Errorf("got content I/O %d, want %d", got, want)
2639 }
2640
2641 // 1024 entries, each 4 bytes apart. 4 fits into single byte
2642 // delta encoded.
2643 if got, want := res.Stats.IndexBytesLoaded, int64(1024); got != want {
2644 t.Errorf("got index I/O %d, want %d", got, want)
2645 }
2646 })
2647}
2648
2649func TestStartLineAnchor(t *testing.T) {
2650 b := testIndexBuilder(t, nil,
2651 Document{
2652 Name: "f1",
2653 Content: []byte(
2654 `hello
2655start of middle of line
2656`),
2657 })
2658
2659 t.Run("LineMatches", func(t *testing.T) {
2660 q, err := query.Parse("^start")
2661 if err != nil {
2662 t.Errorf("parse: %v", err)
2663 }
2664
2665 res := searchForTest(t, b, q)
2666 if len(res.Files) != 1 {
2667 t.Errorf("got %v, want 1 file", res.Files)
2668 }
2669
2670 q, err = query.Parse("^middle")
2671 if err != nil {
2672 t.Errorf("parse: %v", err)
2673 }
2674 res = searchForTest(t, b, q)
2675 if len(res.Files) != 0 {
2676 t.Errorf("got %v, want 0 files", res.Files)
2677 }
2678 })
2679
2680 t.Run("ChunkMatches", func(t *testing.T) {
2681 q, err := query.Parse("^start")
2682 if err != nil {
2683 t.Errorf("parse: %v", err)
2684 }
2685
2686 res := searchForTest(t, b, q, chunkOpts)
2687 if len(res.Files) != 1 {
2688 t.Errorf("got %v, want 1 file", res.Files)
2689 }
2690
2691 q, err = query.Parse("^middle")
2692 if err != nil {
2693 t.Errorf("parse: %v", err)
2694 }
2695 res = searchForTest(t, b, q, chunkOpts)
2696 if len(res.Files) != 0 {
2697 t.Errorf("got %v, want 0 files", res.Files)
2698 }
2699 })
2700}
2701
2702func TestAndOrUnicode(t *testing.T) {
2703 q, err := query.Parse("orange.*apple")
2704 if err != nil {
2705 t.Errorf("parse: %v", err)
2706 }
2707 finalQ := query.NewAnd(q,
2708 query.NewOr(query.NewAnd(&query.Repo{Regexp: regexp.MustCompile("name")},
2709 query.NewOr(&query.Branch{Pattern: "master"}))))
2710
2711 b := testIndexBuilder(t, &zoekt.Repository{
2712 Name: "name",
2713 Branches: []zoekt.RepositoryBranch{{"master", "master-version"}},
2714 }, Document{
2715 Name: "f2",
2716 Content: []byte("orange\u2318apple"),
2717 // --------------0123456 78901
2718 Branches: []string{"master"},
2719 })
2720
2721 t.Run("LineMatches", func(t *testing.T) {
2722 res := searchForTest(t, b, finalQ)
2723 if len(res.Files) != 1 {
2724 t.Errorf("got %v, want 1 result", res.Files)
2725 }
2726 })
2727
2728 t.Run("ChunkMatches", func(t *testing.T) {
2729 res := searchForTest(t, b, finalQ, chunkOpts)
2730 if len(res.Files) != 1 {
2731 t.Errorf("got %v, want 1 result", res.Files)
2732 }
2733 })
2734}
2735
2736func TestAndShort(t *testing.T) {
2737 content := []byte("bla needle at orange bla")
2738 b := testIndexBuilder(t, &zoekt.Repository{Name: "reponame"},
2739 Document{Name: "f1", Content: content},
2740 Document{Name: "f2", Content: []byte("xx at xx")},
2741 Document{Name: "f3", Content: []byte("yy orange xx")},
2742 )
2743
2744 q := query.NewAnd(&query.Substring{Pattern: "at"},
2745 &query.Substring{Pattern: "orange"})
2746
2747 t.Run("LineMatches", func(t *testing.T) {
2748 res := searchForTest(t, b, q)
2749 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
2750 t.Errorf("got %v, want 1 result", res.Files)
2751 }
2752 })
2753
2754 t.Run("ChunkMatches", func(t *testing.T) {
2755 res := searchForTest(t, b, q, chunkOpts)
2756 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
2757 t.Errorf("got %v, want 1 result", res.Files)
2758 }
2759 })
2760}
2761
2762func TestNoCollectRegexpSubstring(t *testing.T) {
2763 content := []byte("bla final bla\nfoo final, foo")
2764 b := testIndexBuilder(t, &zoekt.Repository{Name: "reponame"},
2765 Document{Name: "f1", Content: content},
2766 )
2767
2768 q := &query.Regexp{
2769 Regexp: mustParseRE("final[,.]"),
2770 }
2771
2772 t.Run("LineMatches", func(t *testing.T) {
2773 res := searchForTest(t, b, q)
2774 if len(res.Files) != 1 {
2775 t.Fatalf("got %v, want 1 result", res.Files)
2776 }
2777 if f := res.Files[0]; len(f.LineMatches) != 1 {
2778 t.Fatalf("got line matches %v, want 1 line index", printLineMatches(f.LineMatches))
2779 }
2780 })
2781
2782 t.Run("ChunkMatches", func(t *testing.T) {
2783 res := searchForTest(t, b, q, chunkOpts)
2784 if len(res.Files) != 1 {
2785 t.Fatalf("got %v, want 1 result", res.Files)
2786 }
2787 if f := res.Files[0]; len(f.ChunkMatches) != 1 {
2788 t.Fatalf("got line matches %v, want 1 line index", printLineMatches(f.LineMatches))
2789 }
2790 })
2791}
2792
2793func printLineMatches(ms []zoekt.LineMatch) string {
2794 var ss []string
2795 for _, m := range ms {
2796 ss = append(ss, fmt.Sprintf("%d:%q %v", m.LineNumber, m.Line, m.LineFragments))
2797 }
2798
2799 return strings.Join(ss, ", ")
2800}
2801
2802func TestLang(t *testing.T) {
2803 content := []byte("bla needle bla")
2804 b := testIndexBuilder(t, &zoekt.Repository{Name: "reponame"},
2805 Document{Name: "f1", Content: content},
2806 Document{Name: "f2", Language: "java", Content: content},
2807 Document{Name: "f3", Language: "cpp", Content: content},
2808 )
2809
2810 q := query.NewAnd(&query.Substring{Pattern: "needle"},
2811 &query.Language{Language: "cpp"})
2812
2813 t.Run("LineMatches", func(t *testing.T) {
2814 res := searchForTest(t, b, q)
2815 if len(res.Files) != 1 {
2816 t.Fatalf("got %v, want 1 result in f3", res.Files)
2817 }
2818 f := res.Files[0]
2819 if f.FileName != "f3" || f.Language != "cpp" {
2820 t.Fatalf("got %v, want 1 match with language cpp", f)
2821 }
2822 })
2823
2824 t.Run("ChunkMatches", func(t *testing.T) {
2825 res := searchForTest(t, b, q, chunkOpts)
2826 if len(res.Files) != 1 {
2827 t.Fatalf("got %v, want 1 result in f3", res.Files)
2828 }
2829 f := res.Files[0]
2830 if f.FileName != "f3" || f.Language != "cpp" {
2831 t.Fatalf("got %v, want 1 match with language cpp", f)
2832 }
2833 })
2834}
2835
2836func TestLangShortcut(t *testing.T) {
2837 content := []byte("bla needle bla")
2838 b := testIndexBuilder(t, &zoekt.Repository{Name: "reponame"},
2839 Document{Name: "f2", Language: "java", Content: content},
2840 Document{Name: "f3", Language: "cpp", Content: content},
2841 )
2842
2843 q := query.NewAnd(&query.Substring{Pattern: "needle"},
2844 &query.Language{Language: "fortran"})
2845
2846 t.Run("LineMatches", func(t *testing.T) {
2847 res := searchForTest(t, b, q)
2848 if len(res.Files) != 0 {
2849 t.Fatalf("got %v, want 0 results", res.Files)
2850 }
2851 if res.Stats.IndexBytesLoaded > 0 {
2852 t.Errorf("got matchBytesLoaded %d, want 0", res.Stats.IndexBytesLoaded)
2853 }
2854 })
2855
2856 t.Run("ChunkMatches", func(t *testing.T) {
2857 res := searchForTest(t, b, q, chunkOpts)
2858 if len(res.Files) != 0 {
2859 t.Fatalf("got %v, want 0 results", res.Files)
2860 }
2861 if res.Stats.IndexBytesLoaded > 0 {
2862 t.Errorf("got matchBytesLoaded %d, want 0", res.Stats.IndexBytesLoaded)
2863 }
2864 })
2865}
2866
2867func TestNoTextMatchAtoms(t *testing.T) {
2868 content := []byte("bla needle bla")
2869 b := testIndexBuilder(t, &zoekt.Repository{Name: "reponame"},
2870 Document{Name: "f1", Content: content},
2871 Document{Name: "f2", Language: "java", Content: content},
2872 Document{Name: "f3", Language: "cpp", Content: content},
2873 )
2874 q := query.NewAnd(&query.Language{Language: "java"})
2875 t.Run("LineMatches", func(t *testing.T) {
2876 res := searchForTest(t, b, q)
2877 if len(res.Files) != 1 {
2878 t.Fatalf("got %v, want 1 result in f3", res.Files)
2879 }
2880 })
2881
2882 t.Run("ChunkMatches", func(t *testing.T) {
2883 res := searchForTest(t, b, q, chunkOpts)
2884 if len(res.Files) != 1 {
2885 t.Fatalf("got %v, want 1 result in f3", res.Files)
2886 }
2887 })
2888}
2889
2890func TestNoPositiveAtoms(t *testing.T) {
2891 content := []byte("bla needle bla")
2892 b := testIndexBuilder(t, &zoekt.Repository{Name: "reponame"},
2893 Document{Name: "f1", Content: content},
2894 Document{Name: "f2", Content: content},
2895 )
2896
2897 q := query.NewAnd(
2898 &query.Not{Child: &query.Substring{Pattern: "xyz"}},
2899 &query.Repo{Regexp: regexp.MustCompile("reponame")})
2900 t.Run("LineMatches", func(t *testing.T) {
2901 res := searchForTest(t, b, q)
2902 if len(res.Files) != 2 {
2903 t.Fatalf("got %v, want 2 results in f3", res.Files)
2904 }
2905 })
2906 t.Run("ChunkMatches", func(t *testing.T) {
2907 res := searchForTest(t, b, q, chunkOpts)
2908 if len(res.Files) != 2 {
2909 t.Fatalf("got %v, want 2 results in f3", res.Files)
2910 }
2911 })
2912}
2913
2914func TestSymbolBoundaryStart(t *testing.T) {
2915 content := []byte("start\nbla bla\nend")
2916 // ----------------012345-67890123-456
2917
2918 b := testIndexBuilder(t, &zoekt.Repository{Name: "reponame"},
2919 Document{
2920 Name: "f1",
2921 Content: content,
2922 Symbols: []DocumentSection{{0, 5}, {14, 17}},
2923 },
2924 )
2925 q := &query.Symbol{
2926 Expr: &query.Substring{Pattern: "start"},
2927 }
2928 t.Run("LineMatches", func(t *testing.T) {
2929 res := searchForTest(t, b, q)
2930 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
2931 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2932 }
2933 m := res.Files[0].LineMatches[0].LineFragments[0]
2934 if m.Offset != 0 {
2935 t.Fatalf("got offset %d want 0", m.Offset)
2936 }
2937 })
2938
2939 t.Run("ChunkMatches", func(t *testing.T) {
2940 res := searchForTest(t, b, q, chunkOpts)
2941 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
2942 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2943 }
2944 m := res.Files[0].ChunkMatches[0].Ranges[0]
2945 if m.Start.ByteOffset != 0 {
2946 t.Fatalf("got offset %d want 0", m.Start.ByteOffset)
2947 }
2948 })
2949}
2950
2951func TestSymbolBoundaryEnd(t *testing.T) {
2952 content := []byte("start\nbla bla\nend")
2953 // ----------------012345-67890123-456
2954
2955 b := testIndexBuilder(t, &zoekt.Repository{Name: "reponame"},
2956 Document{
2957 Name: "f1",
2958 Content: content,
2959 Symbols: []DocumentSection{{14, 17}},
2960 },
2961 )
2962 q := &query.Symbol{
2963 Expr: &query.Substring{Pattern: "end"},
2964 }
2965 t.Run("LineMatches", func(t *testing.T) {
2966 res := searchForTest(t, b, q)
2967 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
2968 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2969 }
2970 m := res.Files[0].LineMatches[0].LineFragments[0]
2971 if m.Offset != 14 {
2972 t.Fatalf("got offset %d want 0", m.Offset)
2973 }
2974 })
2975
2976 t.Run("ChunkMatches", func(t *testing.T) {
2977 res := searchForTest(t, b, q, chunkOpts)
2978 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
2979 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2980 }
2981 m := res.Files[0].ChunkMatches[0].Ranges[0]
2982 if m.Start.ByteOffset != 14 {
2983 t.Fatalf("got offset %d want 0", m.Start.ByteOffset)
2984 }
2985 })
2986}
2987
2988func TestSymbolSubstring(t *testing.T) {
2989 content := []byte("bla\nsymblabla\nbla")
2990 // ----------------0123-4567890123-456
2991
2992 b := testIndexBuilder(t, &zoekt.Repository{Name: "reponame"},
2993 Document{
2994 Name: "f1",
2995 Content: content,
2996 Symbols: []DocumentSection{{4, 12}},
2997 },
2998 )
2999 q := &query.Symbol{
3000 Expr: &query.Substring{Pattern: "bla"},
3001 }
3002 t.Run("LineMatches", func(t *testing.T) {
3003 res := searchForTest(t, b, q)
3004 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
3005 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
3006 }
3007 m := res.Files[0].LineMatches[0].LineFragments[0]
3008 if m.Offset != 7 || m.MatchLength != 3 {
3009 t.Fatalf("got offset %d, size %d want 7 size 3", m.Offset, m.MatchLength)
3010 }
3011 })
3012
3013 t.Run("ChunkMatches", func(t *testing.T) {
3014 res := searchForTest(t, b, q, chunkOpts)
3015 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
3016 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
3017 }
3018 m := res.Files[0].ChunkMatches[0].Ranges[0]
3019 if m.Start.ByteOffset != 7 || m.End.ByteOffset != 10 {
3020 t.Fatalf("got offset %d, end %d want 7, 10", m.Start.ByteOffset, m.End.ByteOffset)
3021 }
3022 })
3023}
3024
3025func TestSymbolSubstringExact(t *testing.T) {
3026 content := []byte("bla\nsym\nbla\nsym\nasymb")
3027 // ----------------0123-4567-890123456-78901
3028
3029 b := testIndexBuilder(t, &zoekt.Repository{Name: "reponame"},
3030 Document{
3031 Name: "f1",
3032 Content: content,
3033 Symbols: []DocumentSection{{4, 7}},
3034 },
3035 )
3036 q := &query.Symbol{
3037 Expr: &query.Substring{Pattern: "sym"},
3038 }
3039 t.Run("LineMatches", func(t *testing.T) {
3040 res := searchForTest(t, b, q)
3041 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
3042 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
3043 }
3044 m := res.Files[0].LineMatches[0].LineFragments[0]
3045 if m.Offset != 4 {
3046 t.Fatalf("got offset %d, want 7", m.Offset)
3047 }
3048 })
3049
3050 t.Run("ChunkMatches", func(t *testing.T) {
3051 res := searchForTest(t, b, q, chunkOpts)
3052 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
3053 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
3054 }
3055 m := res.Files[0].ChunkMatches[0].Ranges[0]
3056 if m.Start.ByteOffset != 4 {
3057 t.Fatalf("got offset %d, want 7", m.Start.ByteOffset)
3058 }
3059 })
3060}
3061
3062func TestSymbolRegexpExact(t *testing.T) {
3063 content := []byte("blah\nbla\nbl")
3064 // ----------------01234-5678-90
3065
3066 b := testIndexBuilder(t, &zoekt.Repository{Name: "reponame"},
3067 Document{
3068 Name: "f1",
3069 Content: content,
3070 Symbols: []DocumentSection{{0, 4}, {5, 8}, {9, 11}},
3071 },
3072 )
3073 q := &query.Symbol{
3074 Expr: &query.Regexp{Regexp: mustParseRE("^bla$")},
3075 }
3076 t.Run("LineMatches", func(t *testing.T) {
3077 res := searchForTest(t, b, q)
3078 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
3079 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
3080 }
3081 m := res.Files[0].LineMatches[0].LineFragments[0]
3082 if m.Offset != 5 {
3083 t.Fatalf("got offset %d, want 5", m.Offset)
3084 }
3085 })
3086
3087 t.Run("ChunkMatches", func(t *testing.T) {
3088 res := searchForTest(t, b, q, chunkOpts)
3089 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
3090 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
3091 }
3092 m := res.Files[0].ChunkMatches[0].Ranges[0]
3093 if m.Start.ByteOffset != 5 {
3094 t.Fatalf("got offset %d, want 5", m.Start.ByteOffset)
3095 }
3096 })
3097}
3098
3099func TestSymbolRegexpPartial(t *testing.T) {
3100 content := []byte("abcdef")
3101 // ----------------012345
3102
3103 b := testIndexBuilder(t, &zoekt.Repository{Name: "reponame"},
3104 Document{
3105 Name: "f1",
3106 Content: content,
3107 Symbols: []DocumentSection{{0, 6}},
3108 },
3109 )
3110 q := &query.Symbol{
3111 Expr: &query.Regexp{Regexp: mustParseRE("(b|d)c(d|b)")},
3112 }
3113 t.Run("LineMatches", func(t *testing.T) {
3114 res := searchForTest(t, b, q)
3115 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
3116 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
3117 }
3118 m := res.Files[0].LineMatches[0].LineFragments[0]
3119 if m.Offset != 1 {
3120 t.Fatalf("got offset %d, want 1", m.Offset)
3121 }
3122 if m.MatchLength != 3 {
3123 t.Fatalf("got match length %d, want 3", m.MatchLength)
3124 }
3125 })
3126
3127 t.Run("ChunkMatches", func(t *testing.T) {
3128 res := searchForTest(t, b, q, chunkOpts)
3129 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
3130 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
3131 }
3132 m := res.Files[0].ChunkMatches[0].Ranges[0]
3133 if m.Start.ByteOffset != 1 {
3134 t.Fatalf("got offset %d, want 1", m.Start.ByteOffset)
3135 }
3136 if m.End.ByteOffset != 4 {
3137 t.Fatalf("got match end %d, want 4", m.End.ByteOffset)
3138 }
3139 })
3140}
3141
3142func TestSymbolRegexpAll(t *testing.T) {
3143 docs := []Document{
3144 {
3145 Name: "f1",
3146 Content: []byte("Hello Zoekt"),
3147 // --------------01234567890
3148 Symbols: []DocumentSection{{0, 5}, {6, 11}},
3149 },
3150 {
3151 Name: "f2",
3152 Content: []byte("Second Zoekt Third"),
3153 // --------------012345678901234567
3154 Symbols: []DocumentSection{{0, 6}, {7, 12}, {13, 18}},
3155 },
3156 }
3157
3158 b := testIndexBuilder(t, &zoekt.Repository{Name: "reponame"}, docs...)
3159 q := &query.Symbol{
3160 Expr: &query.Regexp{Regexp: mustParseRE(".*")},
3161 }
3162 t.Run("LineMatches", func(t *testing.T) {
3163 res := searchForTest(t, b, q)
3164 if len(res.Files) != len(docs) {
3165 t.Fatalf("got %v, want %d file", res.Files, len(docs))
3166 }
3167 for i, want := range docs {
3168 got := res.Files[i].LineMatches[0].LineFragments
3169 if len(got) != len(want.Symbols) {
3170 t.Fatalf("got %d symbols, want %d symbols in doc %s", len(got), len(want.Symbols), want.Name)
3171 }
3172
3173 for j, sec := range want.Symbols {
3174 if sec.Start != got[j].Offset {
3175 t.Fatalf("got offset %d, want %d in doc %s", got[j].Offset, sec.Start, want.Name)
3176 }
3177 }
3178 }
3179 })
3180
3181 t.Run("ChunkMatches", func(t *testing.T) {
3182 res := searchForTest(t, b, q, chunkOpts)
3183 if len(res.Files) != len(docs) {
3184 t.Fatalf("got %v, want %d file", res.Files, len(docs))
3185 }
3186 for i, want := range docs {
3187 got := res.Files[i].ChunkMatches[0].Ranges
3188 if len(got) != len(want.Symbols) {
3189 t.Fatalf("got %d symbols, want %d symbols in doc %s", len(got), len(want.Symbols), want.Name)
3190 }
3191
3192 for j, sec := range want.Symbols {
3193 if sec.Start != uint32(got[j].Start.ByteOffset) {
3194 t.Fatalf("got offset %d, want %d in doc %s", got[j].Start.ByteOffset, sec.Start, want.Name)
3195 }
3196 }
3197 }
3198 })
3199}
3200
3201func TestHitIterTerminate(t *testing.T) {
3202 // contrived input: trigram frequencies forces selecting abc +
3203 // def for the distance iteration. There is no index, so this
3204 // will advance the compressedPostingIterator to beyond the
3205 // end.
3206 content := []byte("abc bcdbcd cdecde abcabc def efg")
3207 b := testIndexBuilder(t, nil,
3208 Document{
3209 Name: "f1",
3210 Content: content,
3211 },
3212 )
3213
3214 t.Run("LineMatches", func(t *testing.T) {
3215 searchForTest(t, b, &query.Substring{Pattern: "abcdef"})
3216 })
3217
3218 t.Run("ChunkMatches", func(t *testing.T) {
3219 searchForTest(t, b, &query.Substring{Pattern: "abcdef"}, chunkOpts)
3220 })
3221}
3222
3223func TestDistanceHitIterBailLast(t *testing.T) {
3224 content := []byte("AST AST AST UASH")
3225 b := testIndexBuilder(t, nil,
3226 Document{
3227 Name: "f1",
3228 Content: content,
3229 },
3230 )
3231 t.Run("LineMatches", func(t *testing.T) {
3232 res := searchForTest(t, b, &query.Substring{Pattern: "UAST"})
3233 if len(res.Files) != 0 {
3234 t.Fatalf("got %v, want no results", res.Files)
3235 }
3236 })
3237
3238 t.Run("LineMatches", func(t *testing.T) {
3239 res := searchForTest(t, b, &query.Substring{Pattern: "UAST"}, chunkOpts)
3240 if len(res.Files) != 0 {
3241 t.Fatalf("got %v, want no results", res.Files)
3242 }
3243 })
3244}
3245
3246func TestDocumentSectionRuneBoundary(t *testing.T) {
3247 content := string([]rune{kelvinCodePoint, kelvinCodePoint, kelvinCodePoint})
3248 b, err := NewIndexBuilder(nil)
3249 if err != nil {
3250 t.Fatalf("NewIndexBuilder: %v", err)
3251 }
3252
3253 for i, sec := range []DocumentSection{
3254 {2, 6},
3255 {3, 7},
3256 } {
3257 if err := b.Add(Document{
3258 Name: "f1",
3259 Content: []byte(content),
3260 Symbols: []DocumentSection{sec},
3261 }); err == nil {
3262 t.Errorf("%d: Add succeeded", i)
3263 }
3264 }
3265}
3266
3267func TestUnicodeQuery(t *testing.T) {
3268 content := string([]rune{kelvinCodePoint, kelvinCodePoint, kelvinCodePoint})
3269 b := testIndexBuilder(t, nil,
3270 Document{
3271 Name: "f1",
3272 Content: []byte(content),
3273 },
3274 )
3275
3276 q := &query.Substring{Pattern: content}
3277
3278 t.Run("LineMatches", func(t *testing.T) {
3279 res := searchForTest(t, b, q)
3280 if len(res.Files) != 1 {
3281 t.Fatalf("want 1 match, got %v", res.Files)
3282 }
3283
3284 f := res.Files[0]
3285 if len(f.LineMatches) != 1 {
3286 t.Fatalf("want 1 line, got %v", f.LineMatches)
3287 }
3288 l := f.LineMatches[0]
3289
3290 if len(l.LineFragments) != 1 {
3291 t.Fatalf("want 1 line fragment, got %v", l.LineFragments)
3292 }
3293 fr := l.LineFragments[0]
3294 if fr.MatchLength != len(content) {
3295 t.Fatalf("got MatchLength %d want %d", fr.MatchLength, len(content))
3296 }
3297 })
3298
3299 t.Run("ChunkMatches", func(t *testing.T) {
3300 res := searchForTest(t, b, q, chunkOpts)
3301 if len(res.Files) != 1 {
3302 t.Fatalf("want 1 match, got %v", res.Files)
3303 }
3304
3305 f := res.Files[0]
3306 if len(f.ChunkMatches) != 1 {
3307 t.Fatalf("want 1 line, got %v", f.LineMatches)
3308 }
3309 cm := f.ChunkMatches[0]
3310
3311 if len(cm.Ranges) != 1 {
3312 t.Fatalf("want 1 line fragment, got %v", cm.Ranges)
3313 }
3314 rr := cm.Ranges[0]
3315 if matchLen := rr.End.ByteOffset - rr.Start.ByteOffset; int(matchLen) != len(content) {
3316 t.Fatalf("got MatchLength %d want %d", matchLen, len(content))
3317 }
3318 })
3319}
3320
3321func TestSkipInvalidContent(t *testing.T) {
3322 for _, content := range []string{
3323 // Binary
3324 "abc def \x00 abc",
3325 } {
3326
3327 b, err := NewIndexBuilder(nil)
3328 if err != nil {
3329 t.Fatalf("NewIndexBuilder: %v", err)
3330 }
3331
3332 if err := b.Add(Document{
3333 Name: "f1",
3334 Content: []byte(content),
3335 }); err != nil {
3336 t.Fatal(err)
3337 }
3338
3339 t.Run("LineMatches", func(t *testing.T) {
3340 q := &query.Substring{Pattern: "abc def"}
3341 res := searchForTest(t, b, q)
3342 if len(res.Files) != 0 {
3343 t.Fatalf("got %v, want no results", res.Files)
3344 }
3345
3346 q = &query.Substring{Pattern: "NOT-INDEXED"}
3347 res = searchForTest(t, b, q)
3348 if len(res.Files) != 1 {
3349 t.Fatalf("got %v, want 1 result", res.Files)
3350 }
3351 })
3352
3353 t.Run("ChunkMatches", func(t *testing.T) {
3354 q := &query.Substring{Pattern: "abc def"}
3355 res := searchForTest(t, b, q, chunkOpts)
3356 if len(res.Files) != 0 {
3357 t.Fatalf("got %v, want no results", res.Files)
3358 }
3359
3360 q = &query.Substring{Pattern: "NOT-INDEXED"}
3361 res = searchForTest(t, b, q, chunkOpts)
3362 if len(res.Files) != 1 {
3363 t.Fatalf("got %v, want 1 result", res.Files)
3364 }
3365 })
3366 }
3367}
3368
3369func TestDocChecker(t *testing.T) {
3370 docChecker := DocChecker{}
3371
3372 // Test valid and invalid text
3373 for _, text := range []string{"", "simple ascii", "símplé unicödé", "\uFEFFwith utf8 'bom'", "with \uFFFD unicode replacement char"} {
3374 if err := docChecker.Check([]byte(text), 20000, false); err != nil {
3375 t.Errorf("Check(%q): %v", text, err)
3376 }
3377 }
3378 for _, text := range []string{"zero\x00byte", "xx", "0123456789abcdefghi"} {
3379 if err := docChecker.Check([]byte(text), 15, false); err == nil {
3380 t.Errorf("Check(%q) succeeded", text)
3381 }
3382 }
3383
3384 // Test valid and invalid text with an allowed large file
3385 for _, text := range []string{"0123456789abcdefghi", "qwertyuiopasdfghjklzxcvbnm"} {
3386 if err := docChecker.Check([]byte(text), 15, true); err != nil {
3387 t.Errorf("Check(%q): %v", text, err)
3388 }
3389 }
3390 for _, text := range []string{"zero\x00byte", "xx"} {
3391 if err := docChecker.Check([]byte(text), 15, true); err == nil {
3392 t.Errorf("Check(%q) succeeded", text)
3393 }
3394 }
3395}
3396
3397func TestLineAnd(t *testing.T) {
3398 b := testIndexBuilder(t, &zoekt.Repository{Name: "reponame"},
3399 Document{Name: "f1", Content: []byte("apple\nbanana\napple banana chocolate apple pudding banana\ngrape")},
3400 Document{Name: "f2", Content: []byte("apple orange\nbanana")},
3401 Document{Name: "f3", Content: []byte("banana grape")},
3402 )
3403 pattern := "(apple)(?-s:.)*?(banana)"
3404 r, _ := syntax.Parse(pattern, syntax.Perl)
3405
3406 q := query.Regexp{
3407 Regexp: r,
3408 Content: true,
3409 }
3410 t.Run("LineMatches", func(t *testing.T) {
3411 res := searchForTest(t, b, &q)
3412 wantRegexpCount := 1
3413 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3414 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3415 }
3416 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
3417 t.Errorf("got %v, want 1 result", res.Files)
3418 }
3419 })
3420
3421 t.Run("ChunkMatches", func(t *testing.T) {
3422 res := searchForTest(t, b, &q, chunkOpts)
3423 wantRegexpCount := 1
3424 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3425 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3426 }
3427 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
3428 t.Errorf("got %v, want 1 result", res.Files)
3429 }
3430 })
3431}
3432
3433func TestLineAndFileName(t *testing.T) {
3434 b := testIndexBuilder(t, &zoekt.Repository{Name: "reponame"},
3435 Document{Name: "f1", Content: []byte("apple banana\ngrape")},
3436 Document{Name: "f2", Content: []byte("apple banana\norange")},
3437 Document{Name: "apple banana", Content: []byte("banana grape")},
3438 )
3439 pattern := "(apple)(?-s:.)*?(banana)"
3440 r, _ := syntax.Parse(pattern, syntax.Perl)
3441
3442 q := query.Regexp{
3443 Regexp: r,
3444 FileName: true,
3445 }
3446 t.Run("LineMatches", func(t *testing.T) {
3447 res := searchForTest(t, b, &q)
3448 wantRegexpCount := 1
3449 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3450 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3451 }
3452 if len(res.Files) != 1 || res.Files[0].FileName != "apple banana" {
3453 t.Errorf("got %v, want 1 result", res.Files)
3454 }
3455 })
3456
3457 t.Run("ChunkMatches", func(t *testing.T) {
3458 res := searchForTest(t, b, &q, chunkOpts)
3459 wantRegexpCount := 1
3460 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3461 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3462 }
3463 if len(res.Files) != 1 || res.Files[0].FileName != "apple banana" {
3464 t.Errorf("got %v, want 1 result", res.Files)
3465 }
3466 })
3467}
3468
3469func TestMultiLineRegex(t *testing.T) {
3470 b := testIndexBuilder(t, &zoekt.Repository{Name: "reponame"},
3471 Document{Name: "f1", Content: []byte("apple banana\ngrape")},
3472 Document{Name: "f2", Content: []byte("apple orange")},
3473 Document{Name: "f3", Content: []byte("grape apple")},
3474 )
3475 pattern := "(apple).*?[[:space:]].*?(grape)"
3476 r, _ := syntax.Parse(pattern, syntax.Perl)
3477
3478 q := query.Regexp{
3479 Regexp: r,
3480 }
3481 t.Run("LineMatches", func(t *testing.T) {
3482 res := searchForTest(t, b, &q)
3483 wantRegexpCount := 2
3484 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3485 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3486 }
3487 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
3488 t.Errorf("got %v, want 1 result", res.Files)
3489 }
3490 if l := len(res.Files[0].LineMatches); l != 2 {
3491 t.Errorf("got %v, want 2 line matches", l)
3492 }
3493 })
3494
3495 t.Run("ChunkMatches", func(t *testing.T) {
3496 res := searchForTest(t, b, &q, chunkOpts)
3497 wantRegexpCount := 2
3498 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3499 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3500 }
3501 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
3502 t.Errorf("got %v, want 1 result", res.Files)
3503 }
3504 if l := len(res.Files[0].ChunkMatches); l != 1 {
3505 t.Errorf("got %v, want 1 chunk matches", l)
3506 }
3507 if l := len(res.Files[0].ChunkMatches[0].Ranges); l != 1 {
3508 t.Errorf("got %v, want 1 chunk ranges", l)
3509 }
3510 })
3511}
3512
3513func TestSearchTypeFileName(t *testing.T) {
3514 b := testIndexBuilder(t, &zoekt.Repository{
3515 Name: "reponame",
3516 },
3517 Document{Name: "f1", Content: []byte("bla the needle")},
3518 Document{Name: "f2", Content: []byte("another file another\nneedle")},
3519 // -----------------------------------012345678901234567890-123456
3520 )
3521
3522 t.Run("LineMatches", func(t *testing.T) {
3523 wantSingleMatch := func(res *zoekt.SearchResult, want string) {
3524 t.Helper()
3525 fmatches := res.Files
3526 if len(fmatches) != 1 {
3527 t.Errorf("got %v, want 1 matches", len(fmatches))
3528 return
3529 }
3530 if len(fmatches[0].LineMatches) != 1 {
3531 t.Errorf("got %d line matches", len(fmatches[0].LineMatches))
3532 return
3533 }
3534 var got string
3535 if fmatches[0].LineMatches[0].FileName {
3536 got = fmatches[0].FileName
3537 } else {
3538 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].LineMatches[0].LineFragments[0].Offset)
3539 }
3540
3541 if got != want {
3542 t.Errorf("got %s, want %s", got, want)
3543 }
3544 }
3545
3546 // Only return the later match in the second file
3547 res := searchForTest(t, b, query.NewAnd(
3548 &query.Type{
3549 Type: query.TypeFileName,
3550 Child: &query.Substring{Pattern: "needle"},
3551 },
3552 &query.Substring{Pattern: "file"}))
3553 wantSingleMatch(res, "f2:8")
3554
3555 // Only return a filename result
3556 res = searchForTest(t, b,
3557 &query.Type{
3558 Type: query.TypeFileName,
3559 Child: &query.Substring{Pattern: "file"},
3560 })
3561 wantSingleMatch(res, "f2")
3562 })
3563
3564 t.Run("ChunkMatches", func(t *testing.T) {
3565 wantSingleMatch := func(res *zoekt.SearchResult, want string) {
3566 t.Helper()
3567 fmatches := res.Files
3568 if len(fmatches) != 1 {
3569 t.Errorf("got %v, want 1 matches", len(fmatches))
3570 return
3571 }
3572 if len(fmatches[0].ChunkMatches) != 1 {
3573 t.Errorf("got %d line matches", len(fmatches[0].ChunkMatches))
3574 return
3575 }
3576 var got string
3577 if fmatches[0].ChunkMatches[0].FileName {
3578 got = fmatches[0].FileName
3579 } else {
3580 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset)
3581 }
3582
3583 if got != want {
3584 t.Errorf("got %s, want %s", got, want)
3585 }
3586 }
3587
3588 // Only return the later match in the second file
3589 res := searchForTest(t, b, query.NewAnd(
3590 &query.Type{
3591 Type: query.TypeFileName,
3592 Child: &query.Substring{Pattern: "needle"},
3593 },
3594 &query.Substring{Pattern: "file"}),
3595 chunkOpts,
3596 )
3597 wantSingleMatch(res, "f2:8")
3598
3599 // Only return a filename result
3600 res = searchForTest(t, b,
3601 &query.Type{
3602 Type: query.TypeFileName,
3603 Child: &query.Substring{Pattern: "file"},
3604 },
3605 chunkOpts,
3606 )
3607 wantSingleMatch(res, "f2")
3608 })
3609}
3610
3611func TestSearchTypeLanguage(t *testing.T) {
3612 b := testIndexBuilder(t, &zoekt.Repository{
3613 Name: "reponame",
3614 },
3615 Document{Name: "apex.cls", Content: []byte("public class Car extends Vehicle {")},
3616 Document{Name: "tex.cls", Content: []byte(`\DeclareOption*{`)},
3617 Document{Name: "hello.h", Content: []byte(`#include <stdio.h>`)},
3618 Document{Name: "be.magik", Content: []byte(`_package unicorn`)},
3619 )
3620
3621 t.Log(b.languageMap)
3622
3623 t.Run("LineMatches", func(t *testing.T) {
3624 wantSingleMatch := func(res *zoekt.SearchResult, want string) {
3625 t.Helper()
3626 fmatches := res.Files
3627 if len(fmatches) != 1 {
3628 t.Errorf("got %v, want 1 matches", len(fmatches))
3629 return
3630 }
3631 if len(fmatches[0].LineMatches) != 1 {
3632 t.Errorf("got %d line matches", len(fmatches[0].LineMatches))
3633 return
3634 }
3635 var got string
3636 if fmatches[0].LineMatches[0].FileName {
3637 got = fmatches[0].FileName
3638 } else {
3639 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].LineMatches[0].LineFragments[0].Offset)
3640 }
3641
3642 if got != want {
3643 t.Errorf("got %s, want %s", got, want)
3644 }
3645 }
3646
3647 res := searchForTest(t, b, &query.Language{Language: "Apex"})
3648 wantSingleMatch(res, "apex.cls")
3649
3650 res = searchForTest(t, b, &query.Language{Language: "TeX"})
3651 wantSingleMatch(res, "tex.cls")
3652
3653 res = searchForTest(t, b, &query.Language{Language: "C"})
3654 wantSingleMatch(res, "hello.h")
3655
3656 res = searchForTest(t, b, &query.Language{Language: "Magik"})
3657 wantSingleMatch(res, "be.magik")
3658
3659 // test fallback language search by pretending it's an older index version
3660 res = searchForTest(t, b, &query.Language{Language: "C++"})
3661 if len(res.Files) != 0 {
3662 t.Errorf("got %d results for C++, want 0", len(res.Files))
3663 }
3664
3665 b.featureVersion = 11 // force fallback
3666 res = searchForTest(t, b, &query.Language{Language: "C++"})
3667 wantSingleMatch(res, "hello.h")
3668 })
3669
3670 t.Run("ChunkMatches", func(t *testing.T) {
3671 wantSingleMatch := func(res *zoekt.SearchResult, want string) {
3672 t.Helper()
3673 fmatches := res.Files
3674 if len(fmatches) != 1 {
3675 t.Errorf("got %v, want 1 matches", len(fmatches))
3676 return
3677 }
3678 if len(fmatches[0].ChunkMatches) != 1 {
3679 t.Errorf("got %d line matches", len(fmatches[0].ChunkMatches))
3680 return
3681 }
3682 var got string
3683 if fmatches[0].ChunkMatches[0].FileName {
3684 got = fmatches[0].FileName
3685 } else {
3686 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset)
3687 }
3688
3689 if got != want {
3690 t.Errorf("got %s, want %s", got, want)
3691 }
3692 }
3693
3694 b.featureVersion = FeatureVersion // reset feature version
3695 res := searchForTest(t, b, &query.Language{Language: "Apex"}, chunkOpts)
3696 wantSingleMatch(res, "apex.cls")
3697
3698 res = searchForTest(t, b, &query.Language{Language: "TeX"}, chunkOpts)
3699 wantSingleMatch(res, "tex.cls")
3700
3701 res = searchForTest(t, b, &query.Language{Language: "C"}, chunkOpts)
3702 wantSingleMatch(res, "hello.h")
3703
3704 // test fallback language search by pretending it's an older index version
3705 res = searchForTest(t, b, &query.Language{Language: "C++"}, chunkOpts)
3706 if len(res.Files) != 0 {
3707 t.Errorf("got %d results for C++, want 0", len(res.Files))
3708 }
3709
3710 b.featureVersion = 11 // force fallback
3711 res = searchForTest(t, b, &query.Language{Language: "C++"}, chunkOpts)
3712 wantSingleMatch(res, "hello.h")
3713 })
3714}
3715
3716func TestStats(t *testing.T) {
3717 ignored := []cmp.Option{
3718 cmpopts.EquateEmpty(),
3719 cmpopts.IgnoreFields(zoekt.RepoListEntry{}, "Repository"),
3720 cmpopts.IgnoreFields(zoekt.RepoListEntry{}, "IndexMetadata"),
3721 cmpopts.IgnoreFields(zoekt.RepoStats{}, "IndexBytes"),
3722 }
3723
3724 repoListEntries := func(b *IndexBuilder) []zoekt.RepoListEntry {
3725 searcher := searcherForTest(t, b)
3726 indexdata := searcher.(*indexData)
3727 return indexdata.repoListEntry
3728 }
3729
3730 t.Run("one empty repo", func(t *testing.T) {
3731 b := testIndexBuilder(t, nil)
3732 got := repoListEntries(b)
3733 want := []zoekt.RepoListEntry{
3734 {
3735 Stats: zoekt.RepoStats{
3736 Repos: 0,
3737 Shards: 1,
3738 Documents: 0,
3739 IndexBytes: 20,
3740 ContentBytes: 0,
3741 NewLinesCount: 0,
3742 DefaultBranchNewLinesCount: 0,
3743 OtherBranchesNewLinesCount: 0,
3744 },
3745 },
3746 }
3747
3748 if diff := cmp.Diff(want, got, ignored...); diff != "" {
3749 t.Fatalf("mismatch (-want +got):\n%s", diff)
3750 }
3751 })
3752
3753 t.Run("one simple shard", func(t *testing.T) {
3754 b := testIndexBuilder(t, nil,
3755 Document{Name: "doc 0", Content: []byte("content 0")},
3756 Document{Name: "doc 1", Content: []byte("content 1")},
3757 )
3758 got := repoListEntries(b)
3759 want := []zoekt.RepoListEntry{
3760 {
3761 Stats: zoekt.RepoStats{
3762 Repos: 0,
3763 Shards: 1,
3764 Documents: 2,
3765 IndexBytes: 224,
3766 ContentBytes: 28,
3767 NewLinesCount: 0,
3768 DefaultBranchNewLinesCount: 0,
3769 OtherBranchesNewLinesCount: 0,
3770 },
3771 },
3772 }
3773
3774 if diff := cmp.Diff(want, got, ignored...); diff != "" {
3775 t.Fatalf("mismatch (-want +got):\n%s", diff)
3776 }
3777 })
3778
3779 t.Run("one compound shard", func(t *testing.T) {
3780 b := testIndexBuilderCompound(t,
3781 []*zoekt.Repository{
3782 {Name: "repo 0"},
3783 {Name: "repo 1"},
3784 },
3785 [][]Document{
3786 {
3787 {Name: "doc 0", Content: []byte("content 0")},
3788 {Name: "doc 1", Content: []byte("content 1")},
3789 },
3790 {
3791 {Name: "doc 2", Content: []byte("content 2")},
3792 {Name: "doc 3", Content: []byte("content 3")},
3793 },
3794 },
3795 )
3796 got := repoListEntries(b)
3797 want := []zoekt.RepoListEntry{
3798 {
3799 Stats: zoekt.RepoStats{
3800 Repos: 0,
3801 Shards: 1,
3802 Documents: 2,
3803 IndexBytes: 180,
3804 ContentBytes: 28,
3805 NewLinesCount: 0,
3806 DefaultBranchNewLinesCount: 0,
3807 OtherBranchesNewLinesCount: 0,
3808 },
3809 },
3810 {
3811 Stats: zoekt.RepoStats{
3812 Repos: 0,
3813 Shards: 1,
3814 Documents: 2,
3815 IndexBytes: 180,
3816 ContentBytes: 28,
3817 NewLinesCount: 0,
3818 DefaultBranchNewLinesCount: 0,
3819 OtherBranchesNewLinesCount: 0,
3820 },
3821 },
3822 }
3823
3824 if diff := cmp.Diff(want, got, ignored...); diff != "" {
3825 t.Fatalf("mismatch (-want +got):\n%s", diff)
3826 }
3827 })
3828
3829 t.Run("compound shard with empty repos", func(t *testing.T) {
3830 b := testIndexBuilderCompound(t,
3831 []*zoekt.Repository{
3832 {Name: "repo 0"},
3833 {Name: "repo 1"},
3834 {Name: "repo 2"},
3835 {Name: "repo 3"},
3836 {Name: "repo 4"},
3837 },
3838 [][]Document{
3839 {{Name: "doc 0", Content: []byte("content 0")}},
3840 nil,
3841 {{Name: "doc 1", Content: []byte("content 1")}},
3842 nil,
3843 nil,
3844 },
3845 )
3846 got := repoListEntries(b)
3847
3848 entryEmpty := zoekt.RepoListEntry{Stats: zoekt.RepoStats{
3849 Shards: 1,
3850 Documents: 0,
3851 ContentBytes: 0,
3852 }}
3853 entryNonEmpty := zoekt.RepoListEntry{Stats: zoekt.RepoStats{
3854 Shards: 1,
3855 Documents: 1,
3856 ContentBytes: 14,
3857 }}
3858
3859 want := []zoekt.RepoListEntry{
3860 entryNonEmpty,
3861 entryEmpty,
3862 entryNonEmpty,
3863 entryEmpty,
3864 entryEmpty,
3865 }
3866
3867 if diff := cmp.Diff(want, got, ignored...); diff != "" {
3868 t.Fatalf("mismatch (-want +got):\n%s", diff)
3869 }
3870 })
3871}
3872
3873// This tests the frequent pattern "\bLITERAL\b".
3874func TestWordSearch(t *testing.T) {
3875 content := []byte("needle the bla")
3876 // ----------------01234567890123
3877
3878 b := testIndexBuilder(t, nil,
3879 Document{
3880 Name: "f1",
3881 Content: content,
3882 })
3883
3884 t.Run("LineMatches", func(t *testing.T) {
3885 sres := searchForTest(t, b,
3886 &query.Regexp{
3887 Regexp: mustParseRE("\\bthe\\b"),
3888 CaseSensitive: true,
3889 Content: true,
3890 })
3891
3892 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 {
3893 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
3894 }
3895
3896 if sres.Stats.RegexpsConsidered != 0 {
3897 t.Fatal("expected regexp to be skipped")
3898 }
3899
3900 got := sres.Files[0].LineMatches[0]
3901 want := zoekt.LineMatch{
3902 LineFragments: []zoekt.LineFragmentMatch{{
3903 LineOffset: 7,
3904 Offset: 7,
3905 MatchLength: 3,
3906 }},
3907 Line: content,
3908 FileName: false,
3909 LineNumber: 1,
3910 LineStart: 0,
3911 LineEnd: 14,
3912 }
3913
3914 if !reflect.DeepEqual(got, want) {
3915 t.Errorf("got %#v, want %#v", got, want)
3916 }
3917 })
3918
3919 t.Run("ChunkMatches", func(t *testing.T) {
3920 sres := searchForTest(t, b,
3921 &query.Regexp{
3922 Regexp: mustParseRE("\\bthe\\b"),
3923 CaseSensitive: true,
3924 }, chunkOpts)
3925
3926 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 {
3927 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
3928 }
3929
3930 if sres.Stats.RegexpsConsidered != 0 {
3931 t.Fatal("expected regexp to be skipped")
3932 }
3933
3934 got := sres.Files[0].ChunkMatches[0]
3935 want := zoekt.ChunkMatch{
3936 Content: content,
3937 ContentStart: zoekt.Location{ByteOffset: 0, LineNumber: 1, Column: 1},
3938 Ranges: []zoekt.Range{{
3939 Start: zoekt.Location{ByteOffset: 7, LineNumber: 1, Column: 8},
3940 End: zoekt.Location{ByteOffset: 10, LineNumber: 1, Column: 11},
3941 }},
3942 }
3943
3944 if diff := cmp.Diff(want, got); diff != "" {
3945 t.Fatal(diff)
3946 }
3947 })
3948}
3949
3950// Simple benchmark focused on chunk match scoring. It creates a single file that will have a 1000-line chunk match.
3951// The benchmark time is expected to be strongly correlated with time spent assembling and scoring this chunk.
3952func BenchmarkScoreChunkMatches(b *testing.B) {
3953 ctx := context.Background()
3954 var builder strings.Builder
3955 for i := 0; i < 1000; i++ {
3956 builder.WriteString(fmt.Sprintf("line-%d one one one two two two three three three four four four five five\n", i))
3957 }
3958
3959 searcher := searcherForTest(b, testIndexBuilder(b, nil,
3960 Document{Name: "f1", Content: []byte(builder.String())},
3961 ))
3962
3963 q := &query.Or{
3964 Children: []query.Q{
3965 &query.Substring{Pattern: "f"},
3966 &query.Substring{Pattern: "t"},
3967 }}
3968
3969 b.Run("score large ChunkMatch", func(b *testing.B) {
3970 b.ReportAllocs()
3971 b.ResetTimer()
3972
3973 for i := 0; i < b.N; i++ {
3974 sres, err := searcher.Search(ctx, q, &zoekt.SearchOptions{ChunkMatches: true, NumContextLines: 1})
3975 if err != nil {
3976 b.Fatal(err)
3977 }
3978
3979 matches := sres.Files
3980 if len(matches) == 0 {
3981 b.Fatalf("want file index, got none")
3982 }
3983 }
3984 })
3985}