fork of https://github.com/sourcegraph/zoekt
1// Copyright 2016 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package index
16
17import (
18 "bytes"
19 "context"
20 "fmt"
21 "reflect"
22 "regexp/syntax"
23 "strings"
24 "testing"
25
26 "github.com/google/go-cmp/cmp"
27 "github.com/google/go-cmp/cmp/cmpopts"
28 "github.com/grafana/regexp"
29
30 "github.com/sourcegraph/zoekt"
31 "github.com/sourcegraph/zoekt/query"
32)
33
34func clearScores(r *zoekt.SearchResult) {
35 for i := range r.Files {
36 r.Files[i].Score = 0.0
37 for j := range r.Files[i].LineMatches {
38 r.Files[i].LineMatches[j].Score = 0.0
39 }
40 for j := range r.Files[i].ChunkMatches {
41 r.Files[i].ChunkMatches[j].Score = 0.0
42 r.Files[i].ChunkMatches[j].BestLineMatch = 0
43 }
44 r.Files[i].Checksum = nil
45 r.Files[i].Debug = ""
46 }
47}
48
49func testShardBuilder(tb testing.TB, repo *zoekt.Repository, docs ...Document) *ShardBuilder {
50 tb.Helper()
51
52 b, err := NewShardBuilder(repo)
53 if err != nil {
54 tb.Fatalf("NewShardBuilder: %v", err)
55 }
56
57 for i, d := range docs {
58 if err := b.Add(d); err != nil {
59 tb.Fatalf("Add %d: %v", i, err)
60 }
61 }
62
63 return b
64}
65
66func testShardBuilderCompound(t *testing.T, repos []*zoekt.Repository, docs [][]Document) *ShardBuilder {
67 t.Helper()
68
69 b := newShardBuilder(0)
70 b.indexFormatVersion = NextIndexFormatVersion
71
72 if len(repos) != len(docs) {
73 t.Fatalf("testShardBuilderCompound: repos must be the same length as docs, got: len(repos)=%d len(docs)=%d", len(repos), len(docs))
74 }
75
76 for i, repo := range repos {
77 if err := b.setRepository(repo); err != nil {
78 t.Fatal(err)
79 }
80 for j, d := range docs[i] {
81 if err := b.Add(d); err != nil {
82 t.Fatalf("Add %d %d: %v", i, j, err)
83 }
84 }
85 }
86
87 return b
88}
89
90func TestBoundary(t *testing.T) {
91 b := testShardBuilder(t, nil,
92 Document{Name: "f1", Content: []byte("x the")},
93 Document{Name: "f1", Content: []byte("reader")})
94 res := searchForTest(t, b, &query.Substring{Pattern: "there"})
95 if len(res.Files) > 0 {
96 t.Fatalf("got %v, want no matches", res.Files)
97 }
98}
99
100func TestDocSectionInvalid(t *testing.T) {
101 b, err := NewShardBuilder(nil)
102 if err != nil {
103 t.Fatalf("NewShardBuilder: %v", err)
104 }
105 doc := Document{
106 Name: "f1",
107 Content: []byte("01234567890123"),
108 Symbols: []DocumentSection{{5, 8}, {7, 9}},
109 }
110
111 if err := b.Add(doc); err == nil {
112 t.Errorf("overlapping doc sections should fail")
113 }
114
115 doc = Document{
116 Name: "f1",
117 Content: []byte("01234567890123"),
118 Symbols: []DocumentSection{{0, 20}},
119 }
120
121 if err := b.Add(doc); err == nil {
122 t.Errorf("doc sections beyond EOF should fail")
123 }
124}
125
126func TestBasic(t *testing.T) {
127 b := testShardBuilder(t, nil,
128 Document{
129 Name: "f2",
130 Content: []byte("to carry water in the no later bla"),
131 // --------------0123456789012345678901234567890123
132 })
133
134 t.Run("LineMatch", func(t *testing.T) {
135 res := searchForTest(t, b, &query.Substring{
136 Pattern: "water",
137 CaseSensitive: true,
138 })
139 fmatches := res.Files
140 if len(fmatches) != 1 || len(fmatches[0].LineMatches) != 1 {
141 t.Fatalf("got %v, want 1 matches", fmatches)
142 }
143
144 got := fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].LineMatches[0].LineFragments[0].Offset)
145 want := "f2:9"
146 if got != want {
147 t.Errorf("1: got %s, want %s", got, want)
148 }
149 })
150
151 t.Run("ChunkMatch", func(t *testing.T) {
152 res := searchForTest(t, b, &query.Substring{
153 Pattern: "water",
154 CaseSensitive: true,
155 }, chunkOpts)
156 fmatches := res.Files
157 if len(fmatches) != 1 || len(fmatches[0].ChunkMatches) != 1 {
158 t.Fatalf("got %v, want 1 matches", fmatches)
159 }
160
161 got := fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset)
162 want := "f2:9"
163 if got != want {
164 t.Errorf("1: got %s, want %s", got, want)
165 }
166 })
167}
168
169func TestEmptyIndex(t *testing.T) {
170 b := testShardBuilder(t, nil)
171 searcher := searcherForTest(t, b)
172
173 var opts zoekt.SearchOptions
174 if _, err := searcher.Search(context.Background(), &query.Substring{}, &opts); err != nil {
175 t.Fatalf("Search: %v", err)
176 }
177
178 if _, err := searcher.List(context.Background(), &query.Repo{Regexp: regexp.MustCompile("")}, nil); err != nil {
179 t.Fatalf("List: %v", err)
180 }
181
182 if _, err := searcher.Search(context.Background(), &query.Substring{Pattern: "java", FileName: true}, &opts); err != nil {
183 t.Fatalf("Search: %v", err)
184 }
185}
186
187type memSeeker struct {
188 data []byte
189}
190
191func (s *memSeeker) Name() string {
192 return "memseeker"
193}
194
195func (s *memSeeker) Close() {}
196func (s *memSeeker) Read(off, sz uint32) ([]byte, error) {
197 return s.data[off : off+sz], nil
198}
199
200func (s *memSeeker) Size() (uint32, error) {
201 return uint32(len(s.data)), nil
202}
203
204func TestNewlines(t *testing.T) {
205 b := testShardBuilder(t, nil,
206 // -----------------------------------------012345-678901-234
207 Document{Name: "filename", Content: []byte("line1\nline2\nbla")})
208
209 t.Run("LineMatches", func(t *testing.T) {
210 sres := searchForTest(t, b, &query.Substring{Pattern: "ne2"})
211
212 matches := sres.Files
213 want := []zoekt.FileMatch{{
214 FileName: "filename",
215 LineMatches: []zoekt.LineMatch{{
216 LineFragments: []zoekt.LineFragmentMatch{{
217 Offset: 8,
218 LineOffset: 2,
219 MatchLength: 3,
220 }},
221 Line: []byte("line2\n"),
222 LineStart: 6,
223 LineEnd: 12,
224 LineNumber: 2,
225 }},
226 }}
227
228 if diff := cmp.Diff(matches, want); diff != "" {
229 t.Fatal(diff)
230 }
231 })
232
233 t.Run("ChunkMatches", func(t *testing.T) {
234 sres := searchForTest(t, b, &query.Substring{Pattern: "ne2"}, chunkOpts)
235
236 matches := sres.Files
237 want := []zoekt.FileMatch{{
238 FileName: "filename",
239 ChunkMatches: []zoekt.ChunkMatch{{
240 Content: []byte("line2\n"),
241 ContentStart: zoekt.Location{
242 ByteOffset: 6,
243 LineNumber: 2,
244 Column: 1,
245 },
246 Ranges: []zoekt.Range{{
247 Start: zoekt.Location{ByteOffset: 8, LineNumber: 2, Column: 3},
248 End: zoekt.Location{ByteOffset: 11, LineNumber: 2, Column: 6},
249 }},
250 }},
251 }}
252
253 if diff := cmp.Diff(want, matches); diff != "" {
254 t.Fatal(diff)
255 }
256 })
257}
258
259// A result spanning multiple lines should have LineMatches that only cover
260// single lines.
261func TestQueryNewlines(t *testing.T) {
262 text := "line1\nline2\nbla"
263 b := testShardBuilder(t, nil,
264 Document{Name: "filename", Content: []byte(text)})
265
266 t.Run("LineMatches", func(t *testing.T) {
267 sres := searchForTest(t, b, &query.Substring{Pattern: "ine2\nbla"})
268 matches := sres.Files
269 if len(matches) != 1 {
270 t.Fatalf("got %d file matches, want exactly one", len(matches))
271 }
272 m := matches[0]
273 if len(m.LineMatches) != 2 {
274 t.Fatalf("got %d line matches, want exactly two %#v", len(m.LineMatches), m.LineMatches)
275 }
276 })
277
278 t.Run("ChunkMatches", func(t *testing.T) {
279 sres := searchForTest(t, b, &query.Substring{Pattern: "ine2\nbla"}, chunkOpts)
280 matches := sres.Files
281 if len(matches) != 1 {
282 t.Fatalf("got %d file matches, want exactly one", len(matches))
283 }
284 m := matches[0]
285 if len(m.ChunkMatches) != 1 {
286 t.Fatalf("got %d chunk matches, want exactly one", len(m.ChunkMatches))
287 }
288 })
289}
290
291var chunkOpts = zoekt.SearchOptions{ChunkMatches: true}
292
293func searchForTest(t *testing.T, b *ShardBuilder, q query.Q, o ...zoekt.SearchOptions) *zoekt.SearchResult {
294 searcher := searcherForTest(t, b)
295 var opts zoekt.SearchOptions
296 if len(o) > 0 {
297 opts = o[0]
298 }
299 res, err := searcher.Search(context.Background(), q, &opts)
300 if err != nil {
301 t.Fatalf("Search(%s): %v", q, err)
302 }
303 clearScores(res)
304 return res
305}
306
307func searcherForTest(t testing.TB, b *ShardBuilder) zoekt.Searcher {
308 var buf bytes.Buffer
309 if err := b.Write(&buf); err != nil {
310 t.Fatal(err)
311 }
312 f := &memSeeker{buf.Bytes()}
313
314 searcher, err := NewSearcher(f)
315 if err != nil {
316 t.Fatalf("NewSearcher: %v", err)
317 }
318
319 return searcher
320}
321
322func TestCaseFold(t *testing.T) {
323 b := testShardBuilder(t, nil,
324 Document{Name: "f1", Content: []byte("I love BaNaNAS.")},
325 // -----------------------------------012345678901234
326 )
327 t.Run("LineMatches", func(t *testing.T) {
328 sres := searchForTest(t, b, &query.Substring{
329 Pattern: "bananas",
330 CaseSensitive: true,
331 })
332 matches := sres.Files
333 if len(matches) != 0 {
334 t.Errorf("foldcase: got %#v, want 0 matches", matches)
335 }
336
337 sres = searchForTest(t, b,
338 &query.Substring{
339 Pattern: "BaNaNAS",
340 CaseSensitive: true,
341 })
342 matches = sres.Files
343 if len(matches) != 1 {
344 t.Errorf("no foldcase: got %v, want 1 matches", matches)
345 } else if matches[0].LineMatches[0].LineFragments[0].Offset != 7 {
346 t.Errorf("foldcase: got %v, want offsets 7", matches)
347 }
348 })
349
350 t.Run("ChunkMatches", func(t *testing.T) {
351 sres := searchForTest(t, b, &query.Substring{
352 Pattern: "bananas",
353 CaseSensitive: true,
354 }, chunkOpts)
355 matches := sres.Files
356 if len(matches) != 0 {
357 t.Errorf("foldcase: got %#v, want 0 matches", matches)
358 }
359
360 sres = searchForTest(t, b,
361 &query.Substring{
362 Pattern: "BaNaNAS",
363 CaseSensitive: true,
364 })
365 matches = sres.Files
366 if len(matches) != 1 {
367 t.Errorf("no foldcase: got %v, want 1 matches", matches)
368 } else if matches[0].LineMatches[0].LineFragments[0].Offset != 7 {
369 t.Errorf("foldcase: got %v, want offsets 7", matches)
370 }
371 })
372}
373
374// wordsAsSymbols finds all ASCII words in doc.Content which are longer than 2
375// chars. Those are then set as symbols.
376func wordsAsSymbols(doc Document) Document {
377 re := regexp.MustCompile(`\b\w{2,}\b`)
378 var symbols []DocumentSection
379 var symbolsMetadata []*zoekt.Symbol
380 for _, match := range re.FindAllIndex(doc.Content, -1) {
381 symbols = append(symbols, DocumentSection{
382 Start: uint32(match[0]),
383 End: uint32(match[1]),
384 })
385 symbolsMetadata = append(symbolsMetadata, &zoekt.Symbol{Kind: "method"})
386 }
387 doc.Symbols = symbols
388 doc.SymbolsMetaData = symbolsMetadata
389 return doc
390}
391
392func TestSearchStats(t *testing.T) {
393 ctx := context.Background()
394 searcher := searcherForTest(t, testShardBuilder(t, nil,
395 wordsAsSymbols(Document{Name: "f1", Content: []byte("x banana y")}),
396 wordsAsSymbols(Document{Name: "f2", Content: []byte("x apple y")}),
397 wordsAsSymbols(Document{Name: "f3", Content: []byte("x banana apple y")}),
398 // --------------------------------------------------0123456789012345
399 ))
400
401 andQuery := query.NewAnd(
402 &query.Substring{
403 Pattern: "banana",
404 },
405 &query.Substring{
406 Pattern: "apple",
407 },
408 )
409
410 t.Run("LineMatches", func(t *testing.T) {
411 sres, err := searcher.Search(ctx, andQuery, &zoekt.SearchOptions{})
412 if err != nil {
413 t.Fatal(err)
414 }
415 matches := sres.Files
416 if len(matches) != 1 || len(matches[0].LineMatches) != 1 || len(matches[0].LineMatches[0].LineFragments) != 2 {
417 t.Fatalf("got %#v, want 1 match with 2 fragments", matches)
418 }
419
420 if matches[0].LineMatches[0].LineFragments[0].Offset != 2 || matches[0].LineMatches[0].LineFragments[1].Offset != 9 {
421 t.Fatalf("got %#v, want offsets 2,9", matches)
422 }
423 })
424 t.Run("ChunkMatches", func(t *testing.T) {
425 sres, err := searcher.Search(ctx, andQuery, &chunkOpts)
426 if err != nil {
427 t.Fatal(err)
428 }
429 matches := sres.Files
430 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 || len(matches[0].ChunkMatches[0].Ranges) != 2 {
431 t.Fatalf("got %#v, want 1 chunk match with 2 ranges", matches)
432 }
433
434 if matches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset != 2 || matches[0].ChunkMatches[0].Ranges[1].Start.ByteOffset != 9 {
435 t.Fatalf("got %#v, want offsets 2,9", matches)
436 }
437 })
438 t.Run("Stats", func(t *testing.T) {
439 cases := []struct {
440 Name string
441 Q query.Q
442 Want zoekt.Stats
443 }{{
444 Name: "and-query",
445 Q: andQuery,
446 Want: zoekt.Stats{
447 FilesLoaded: 1,
448 ContentBytesLoaded: 22,
449 IndexBytesLoaded: 10,
450 NgramMatches: 3, // we look at doc 1, because it's max(0,1) due to AND
451 NgramLookups: 104,
452 MatchCount: 2,
453 FileCount: 1,
454 FilesConsidered: 2,
455 ShardsScanned: 1,
456 },
457 }, {
458 Name: "one-trigram",
459 Q: &query.Substring{
460 Pattern: "a y",
461 Content: true,
462 CaseSensitive: true,
463 },
464 Want: zoekt.Stats{
465 ContentBytesLoaded: 14,
466 IndexBytesLoaded: 1,
467 FileCount: 1,
468 FilesConsidered: 1,
469 FilesLoaded: 1,
470 ShardsScanned: 1,
471 MatchCount: 1,
472 NgramMatches: 1,
473 NgramLookups: 2, // once to lookup frequency then again to access posting list.
474 },
475 }, {
476 Name: "one-trigram-case-insensitive",
477 Q: &query.Substring{
478 Pattern: "a y",
479 Content: true,
480 },
481 Want: zoekt.Stats{
482 ContentBytesLoaded: 14,
483 IndexBytesLoaded: 1,
484 FileCount: 1,
485 FilesConsidered: 1,
486 FilesLoaded: 1,
487 ShardsScanned: 1,
488 MatchCount: 1,
489 NgramMatches: 1,
490 NgramLookups: 8, // "a y" has 2**2 casings which we lookup twice.
491 },
492 }, {
493 Name: "one-trigram-pruned",
494 Q: &query.Substring{
495 Pattern: "foo",
496 Content: true,
497 CaseSensitive: true,
498 },
499 Want: zoekt.Stats{
500 ShardsSkippedFilter: 1,
501 NgramLookups: 1, // only had to lookup once
502 },
503 }, {
504 Name: "one-trigram-branch-pruned",
505 Q: query.NewAnd(
506 &query.Substring{
507 Pattern: "foo",
508 Content: true,
509 CaseSensitive: true,
510 },
511 &query.Substring{
512 Pattern: "a y",
513 Content: true,
514 CaseSensitive: true,
515 },
516 ),
517 Want: zoekt.Stats{
518 IndexBytesLoaded: 1, // we created an iterator for "a y" before pruning.
519 ShardsSkippedFilter: 1,
520 NgramLookups: 3, // we lookedup "foo" once (1), but lookedup and created "a y" (2).
521 },
522 }, {
523 Name: "symbol-substr-nomatch",
524 Q: &query.Symbol{Expr: &query.Substring{
525 Pattern: "banana apple",
526 Content: true,
527 CaseSensitive: true,
528 }},
529 Want: zoekt.Stats{
530 IndexBytesLoaded: 3,
531 FilesConsidered: 1, // important that we only check 1 file to ensure we are using the index
532 MatchCount: 0, // even though there is a match it doesn't align with a symbol
533 ShardsScanned: 1,
534 NgramMatches: 1,
535 NgramLookups: 12,
536 },
537 }, {
538 Name: "symbol-substr",
539 Q: &query.Symbol{Expr: &query.Substring{
540 Pattern: "apple",
541 Content: true,
542 CaseSensitive: true,
543 }},
544 Want: zoekt.Stats{
545 ContentBytesLoaded: 35,
546 IndexBytesLoaded: 4,
547 FileCount: 2,
548 FilesConsidered: 2, // must be 2 to ensure we used the index
549 FilesLoaded: 2,
550 MatchCount: 2, // apple symbols is in two files
551 ShardsScanned: 1,
552 NgramMatches: 2,
553 NgramLookups: 5,
554 },
555 }, {
556 Name: "symbol-regexp-nomatch",
557 Q: &query.Symbol{Expr: &query.Regexp{
558 Regexp: mustParseRE("^apple.banana$"),
559 Content: true,
560 CaseSensitive: true,
561 }},
562 Want: zoekt.Stats{
563 ContentBytesLoaded: 33, // we still have to run regex since "app" matches two documents
564 IndexBytesLoaded: 10,
565 FilesConsidered: 2, // important that we don't check 3 to ensure we are using the index
566 FilesLoaded: 2,
567 MatchCount: 0, // even though there is a match it doesn't align with a symbol
568 ShardsScanned: 1,
569 NgramMatches: 3,
570 NgramLookups: 11,
571 },
572 }, {
573 Name: "symbol-regexp",
574 Q: &query.Symbol{Expr: &query.Regexp{
575 Regexp: mustParseRE("^app.e$"),
576 Content: true,
577 CaseSensitive: true,
578 }},
579 Want: zoekt.Stats{
580 ContentBytesLoaded: 35,
581 IndexBytesLoaded: 2,
582 FileCount: 2,
583 FilesConsidered: 2, // must be 2 to ensure we used the index
584 FilesLoaded: 2,
585 MatchCount: 2, // apple symbols is in two files
586 ShardsScanned: 1,
587 NgramMatches: 2,
588 NgramLookups: 2,
589 },
590 }}
591
592 for _, tc := range cases {
593 t.Run(tc.Name, func(t *testing.T) {
594 sres, err := searcher.Search(ctx, tc.Q, &chunkOpts)
595 if err != nil {
596 t.Fatal(err)
597 }
598 if diff := cmp.Diff(tc.Want, sres.Stats, cmpopts.IgnoreFields(zoekt.Stats{}, "MatchTreeConstruction", "MatchTreeSearch")); diff != "" {
599 t.Errorf("unexpected Stats (-want +got):\n%s", diff)
600 }
601 })
602 }
603 })
604}
605
606func TestAndNegateSearch(t *testing.T) {
607 b := testShardBuilder(t, nil,
608 Document{Name: "f1", Content: []byte("x banana y")},
609 // -----------------------------------0123456789
610 Document{Name: "f4", Content: []byte("x banana apple y")})
611
612 t.Run("LineMatches", func(t *testing.T) {
613 sres := searchForTest(t, b, query.NewAnd(
614 &query.Substring{
615 Pattern: "banana",
616 },
617 &query.Not{Child: &query.Substring{
618 Pattern: "apple",
619 }}))
620
621 matches := sres.Files
622
623 if len(matches) != 1 || len(matches[0].LineMatches) != 1 {
624 t.Fatalf("got %v, want 1 match", matches)
625 }
626 if matches[0].FileName != "f1" {
627 t.Fatalf("got match %#v, want FileName: f1", matches[0])
628 }
629 if matches[0].LineMatches[0].LineFragments[0].Offset != 2 {
630 t.Fatalf("got %v, want offset 2", matches)
631 }
632 })
633
634 t.Run("ChunkMatches", func(t *testing.T) {
635 sres := searchForTest(t, b,
636 query.NewAnd(
637 &query.Substring{
638 Pattern: "banana",
639 },
640 &query.Not{Child: &query.Substring{
641 Pattern: "apple",
642 }},
643 ),
644 chunkOpts,
645 )
646
647 matches := sres.Files
648
649 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 {
650 t.Fatalf("got %v, want 1 match", matches)
651 }
652 if matches[0].FileName != "f1" {
653 t.Fatalf("got match %#v, want FileName: f1", matches[0])
654 }
655 if matches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset != 2 {
656 t.Fatalf("got %v, want offset 2", matches)
657 }
658 })
659}
660
661func TestNegativeMatchesOnlyShortcut(t *testing.T) {
662 b := testShardBuilder(t, nil,
663 Document{Name: "f1", Content: []byte("x banana y")},
664 Document{Name: "f2", Content: []byte("x appelmoes y")},
665 Document{Name: "f3", Content: []byte("x appelmoes y")},
666 Document{Name: "f3", Content: []byte("x appelmoes y")})
667
668 t.Run("LineMatches", func(t *testing.T) {
669 sres := searchForTest(t, b, query.NewAnd(
670 &query.Substring{
671 Pattern: "banana",
672 },
673 &query.Not{Child: &query.Substring{
674 Pattern: "appel",
675 }}))
676
677 if sres.Stats.FilesConsidered != 1 {
678 t.Errorf("got %#v, want FilesConsidered: 1", sres.Stats)
679 }
680 })
681
682 t.Run("ChunkMatches", func(t *testing.T) {
683 sres := searchForTest(t, b, query.NewAnd(
684 &query.Substring{
685 Pattern: "banana",
686 },
687 &query.Not{Child: &query.Substring{
688 Pattern: "appel",
689 }}), chunkOpts)
690
691 if sres.Stats.FilesConsidered != 1 {
692 t.Errorf("got %#v, want FilesConsidered: 1", sres.Stats)
693 }
694 })
695}
696
697func TestFileSearch(t *testing.T) {
698 b := testShardBuilder(t, nil,
699 Document{Name: "banzana", Content: []byte("x orange y")},
700 // -------------0123456
701 Document{Name: "banana", Content: []byte("x apple y")},
702 // -------------012345
703 )
704
705 t.Run("LineMatches", func(t *testing.T) {
706 sres := searchForTest(t, b, &query.Substring{
707 Pattern: "anan",
708 FileName: true,
709 })
710
711 matches := sres.Files
712 if len(matches) != 1 || len(matches[0].LineMatches) != 1 {
713 t.Fatalf("got %v, want 1 match", matches)
714 }
715
716 got := matches[0].LineMatches[0]
717 want := zoekt.LineMatch{
718 Line: []byte("banana"),
719 LineFragments: []zoekt.LineFragmentMatch{{
720 Offset: 1,
721 LineOffset: 1,
722 MatchLength: 4,
723 }},
724 FileName: true,
725 }
726
727 if !reflect.DeepEqual(got, want) {
728 t.Errorf("got %#v, want %#v", got, want)
729 }
730 })
731
732 t.Run("ChunkMatches", func(t *testing.T) {
733 sres := searchForTest(t, b, &query.Substring{
734 Pattern: "anan",
735 FileName: true,
736 }, chunkOpts)
737
738 matches := sres.Files
739 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 {
740 t.Fatalf("got %v, want 1 match", matches)
741 }
742
743 got := matches[0].ChunkMatches[0]
744 want := zoekt.ChunkMatch{
745 Content: []byte("banana"),
746 ContentStart: zoekt.Location{ByteOffset: 0, LineNumber: 1, Column: 1},
747 Ranges: []zoekt.Range{{
748 Start: zoekt.Location{ByteOffset: 1, LineNumber: 1, Column: 2},
749 End: zoekt.Location{ByteOffset: 5, LineNumber: 1, Column: 6},
750 }},
751 FileName: true,
752 }
753
754 if diff := cmp.Diff(want, got); diff != "" {
755 t.Fatal(diff)
756 }
757 })
758
759 t.Run("FileNameSet", func(t *testing.T) {
760 sres := searchForTest(t, b, query.NewFileNameSet("banana"), chunkOpts)
761
762 matches := sres.Files
763 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 {
764 t.Fatalf("got %v, want 1 match", matches)
765 }
766
767 got := matches[0].ChunkMatches[0]
768 want := zoekt.ChunkMatch{
769 Content: []byte("banana"),
770 ContentStart: zoekt.Location{ByteOffset: 0, LineNumber: 1, Column: 1},
771 Ranges: []zoekt.Range{{
772 Start: zoekt.Location{ByteOffset: 0, LineNumber: 1, Column: 1},
773 End: zoekt.Location{ByteOffset: 6, LineNumber: 1, Column: 7},
774 }},
775 FileName: true,
776 }
777
778 if diff := cmp.Diff(want, got); diff != "" {
779 t.Fatal(diff)
780 }
781 })
782}
783
784func TestFileCase(t *testing.T) {
785 b := testShardBuilder(t, nil,
786 Document{Name: "BANANA", Content: []byte("x orange y")})
787
788 t.Run("LineMatches", func(t *testing.T) {
789 sres := searchForTest(t, b, &query.Substring{
790 Pattern: "banana",
791 FileName: true,
792 })
793
794 matches := sres.Files
795 if len(matches) != 1 || matches[0].FileName != "BANANA" {
796 t.Fatalf("got %v, want 1 match 'BANANA'", matches)
797 }
798 })
799
800 t.Run("ChunkMatches", func(t *testing.T) {
801 sres := searchForTest(t, b, &query.Substring{
802 Pattern: "banana",
803 FileName: true,
804 }, chunkOpts)
805
806 matches := sres.Files
807 if len(matches) != 1 || matches[0].FileName != "BANANA" {
808 t.Fatalf("got %v, want 1 match 'BANANA'", matches)
809 }
810 })
811}
812
813func TestFileRegexpSearchBruteForce(t *testing.T) {
814 b := testShardBuilder(t, nil,
815 Document{Name: "banzana", Content: []byte("x orange y")},
816 Document{Name: "banana", Content: []byte("x apple y")},
817 )
818 t.Run("LineMatches", func(t *testing.T) {
819 sres := searchForTest(t, b, &query.Regexp{
820 Regexp: mustParseRE("[qn][zx]"),
821 FileName: true,
822 })
823
824 matches := sres.Files
825 if len(matches) != 1 || matches[0].FileName != "banzana" {
826 t.Fatalf("got %v, want 1 match on 'banzana'", matches)
827 }
828 })
829 t.Run("LineMatches", func(t *testing.T) {
830 sres := searchForTest(t, b, &query.Regexp{
831 Regexp: mustParseRE("[qn][zx]"),
832 FileName: true,
833 }, chunkOpts)
834
835 matches := sres.Files
836 if len(matches) != 1 || matches[0].FileName != "banzana" {
837 t.Fatalf("got %v, want 1 match on 'banzana'", matches)
838 }
839 })
840}
841
842func TestFileRegexpSearchShortString(t *testing.T) {
843 b := testShardBuilder(t, nil,
844 Document{Name: "banana.py", Content: []byte("x orange y")})
845
846 t.Run("LineMatches", func(t *testing.T) {
847 sres := searchForTest(t, b, &query.Regexp{
848 Regexp: mustParseRE("ana.py"),
849 FileName: true,
850 })
851
852 matches := sres.Files
853 if len(matches) != 1 || matches[0].FileName != "banana.py" {
854 t.Fatalf("got %v, want 1 match on 'banana.py'", matches)
855 }
856 })
857
858 t.Run("ChunkMatches", func(t *testing.T) {
859 sres := searchForTest(t, b, &query.Regexp{
860 Regexp: mustParseRE("ana.py"),
861 FileName: true,
862 }, chunkOpts)
863
864 matches := sres.Files
865 if len(matches) != 1 || matches[0].FileName != "banana.py" {
866 t.Fatalf("got %v, want 1 match on 'banana.py'", matches)
867 }
868 })
869}
870
871func TestFileSubstringSearchBruteForce(t *testing.T) {
872 b := testShardBuilder(t, nil,
873 Document{Name: "BANZANA", Content: []byte("x orange y")},
874 Document{Name: "banana", Content: []byte("x apple y")})
875
876 q := &query.Substring{
877 Pattern: "z",
878 FileName: true,
879 }
880
881 t.Run("LineMatches", func(t *testing.T) {
882 res := searchForTest(t, b, q)
883 if len(res.Files) != 1 || res.Files[0].FileName != "BANZANA" {
884 t.Fatalf("got %v, want 1 match on 'BANZANA''", res.Files)
885 }
886 })
887
888 t.Run("ChunkMatches", func(t *testing.T) {
889 res := searchForTest(t, b, q, chunkOpts)
890 if len(res.Files) != 1 || res.Files[0].FileName != "BANZANA" {
891 t.Fatalf("got %v, want 1 match on 'BANZANA''", res.Files)
892 }
893 })
894}
895
896func TestFileSubstringSearchBruteForceEnd(t *testing.T) {
897 b := testShardBuilder(t, nil,
898 Document{Name: "BANZANA", Content: []byte("x orange y")},
899 Document{Name: "bananaq", Content: []byte("x apple y")})
900
901 q := &query.Substring{
902 Pattern: "q",
903 FileName: true,
904 }
905 t.Run("LineMatches", func(t *testing.T) {
906 res := searchForTest(t, b, q)
907 if want := "bananaq"; len(res.Files) != 1 || res.Files[0].FileName != want {
908 t.Fatalf("got %v, want 1 match in %q", res.Files, want)
909 }
910 })
911
912 t.Run("LineMatches", func(t *testing.T) {
913 res := searchForTest(t, b, q, chunkOpts)
914 if want := "bananaq"; len(res.Files) != 1 || res.Files[0].FileName != want {
915 t.Fatalf("got %v, want 1 match in %q", res.Files, want)
916 }
917 })
918}
919
920func TestSearchMatchAll(t *testing.T) {
921 b := testShardBuilder(t, nil,
922 Document{Name: "banzana", Content: []byte("x orange y")},
923 Document{Name: "banana", Content: []byte("x apple y")})
924
925 t.Run("LineMatches", func(t *testing.T) {
926 sres := searchForTest(t, b, &query.Const{Value: true})
927 matches := sres.Files
928 if len(matches) != 2 {
929 t.Fatalf("got %v, want 2 matches", matches)
930 }
931 })
932
933 t.Run("ChunkMatches", func(t *testing.T) {
934 sres := searchForTest(t, b, &query.Const{Value: true}, chunkOpts)
935 matches := sres.Files
936 if len(matches) != 2 {
937 t.Fatalf("got %v, want 2 matches", matches)
938 }
939 })
940}
941
942func TestSearchNewline(t *testing.T) {
943 b := testShardBuilder(t, nil,
944 Document{Name: "banzana", Content: []byte("abcd\ndefg")})
945
946 t.Run("LineMatches", func(t *testing.T) {
947 sres := searchForTest(t, b, &query.Substring{Pattern: "d\nd"})
948
949 // Just check that we don't crash.
950
951 matches := sres.Files
952 if len(matches) != 1 {
953 t.Fatalf("got %v, want 1 matches", matches)
954 }
955 })
956
957 t.Run("ChunkMatches", func(t *testing.T) {
958 sres := searchForTest(t, b, &query.Substring{Pattern: "d\nd"}, chunkOpts)
959
960 // Just check that we don't crash.
961
962 matches := sres.Files
963 if len(matches) != 1 {
964 t.Fatalf("got %v, want 1 matches", matches)
965 }
966 })
967}
968
969func TestSearchMatchAllRegexp(t *testing.T) {
970 b := testShardBuilder(t, nil,
971 Document{Name: "banzana", Content: []byte("abcd")},
972 Document{Name: "banana", Content: []byte("pqrs")})
973
974 t.Run("LineMatches", func(t *testing.T) {
975 sres := searchForTest(t, b, &query.Regexp{Regexp: mustParseRE(".")})
976
977 matches := sres.Files
978 if len(matches) != 2 || sres.Stats.MatchCount != 2 {
979 t.Fatalf("got %v, want 2 matches", matches)
980 }
981 if len(matches[0].LineMatches[0].Line) != 4 || len(matches[1].LineMatches[0].Line) != 4 {
982 t.Fatalf("want 4 chars in every file, got %#v", matches)
983 }
984 })
985
986 t.Run("ChunkMatches", func(t *testing.T) {
987 sres := searchForTest(t, b, &query.Regexp{Regexp: mustParseRE(".")}, chunkOpts)
988
989 matches := sres.Files
990 if len(matches) != 2 || sres.Stats.MatchCount != 8 {
991 t.Fatalf("got %v, want 2 matches", matches)
992 }
993 if len(matches[0].ChunkMatches[0].Content) != 4 || len(matches[1].ChunkMatches[0].Content) != 4 {
994 t.Fatalf("want 4 chars in every file, got %#v", matches)
995 }
996 })
997}
998
999func TestSearchBM25MatchScores(t *testing.T) {
1000 ctx := context.Background()
1001 searcher := searcherForTest(t, testShardBuilder(t, nil,
1002 Document{Name: "f1", Content: []byte("one two three\naaaaaaaaaa\nbbbbbbbb\none two two")},
1003 Document{Name: "f2", Content: []byte("four five six\naaaaaaaaaa\nbbbbbbbb\nfour five five\nsix six")},
1004 wordsAsSymbols(Document{Name: "f3", Content: []byte("public static void main")}),
1005 ))
1006
1007 t.Run("LineMatches", func(t *testing.T) {
1008 q := &query.Substring{Pattern: "two"}
1009 sres, err := searcher.Search(ctx, q, &zoekt.SearchOptions{UseBM25Scoring: true})
1010 if err != nil {
1011 t.Fatal(err)
1012 }
1013 matches := sres.Files
1014 if len(matches) != 1 {
1015 t.Fatalf("want 1 file index, got %d", len(matches))
1016 }
1017
1018 if len(matches[0].LineMatches) != 2 {
1019 t.Fatalf("want 2 chunk matches, got %d", len(matches[0].ChunkMatches))
1020 }
1021
1022 if matches[0].LineMatches[0].LineNumber != 4 {
1023 t.Fatalf("want best-scoring line to be line 4, got %d", matches[0].LineMatches[0].LineNumber)
1024 }
1025 })
1026
1027 t.Run("ChunkMatches", func(t *testing.T) {
1028 q := &query.Substring{Pattern: "five"}
1029 sres, err := searcher.Search(ctx, q, &zoekt.SearchOptions{UseBM25Scoring: true, ChunkMatches: true, NumContextLines: 1})
1030 if err != nil {
1031 t.Fatal(err)
1032 }
1033
1034 matches := sres.Files
1035 if len(matches) != 1 {
1036 t.Fatalf("want 1 file index, got %d", len(matches))
1037 }
1038
1039 if len(matches[0].ChunkMatches) != 2 {
1040 t.Fatalf("want 2 chunk matches, got %d", len(matches[0].ChunkMatches))
1041 }
1042
1043 if matches[0].ChunkMatches[0].BestLineMatch != 4 {
1044 t.Fatalf("want best-scoring line to be line 4, got %d", matches[0].ChunkMatches[0].BestLineMatch)
1045 }
1046 })
1047
1048 t.Run("ChunkMatches with symbols", func(t *testing.T) {
1049 q := &query.Or{
1050 Children: []query.Q{
1051 &query.Symbol{Expr: &query.Substring{Pattern: "main"}},
1052 &query.Substring{Pattern: "five"},
1053 },
1054 }
1055
1056 sres, err := searcher.Search(ctx, q, &zoekt.SearchOptions{UseBM25Scoring: true, ChunkMatches: true, NumContextLines: 1})
1057 if err != nil {
1058 t.Fatal(err)
1059 }
1060
1061 matches := sres.Files
1062 if len(matches) != 2 {
1063 t.Fatalf("want 2 file index, got %d", len(matches))
1064 }
1065
1066 foundSymbolInfo := false
1067 for _, m := range matches {
1068 for _, cm := range m.ChunkMatches {
1069 if len(cm.SymbolInfo) > 0 {
1070 foundSymbolInfo = true
1071 }
1072 }
1073 }
1074
1075 if !foundSymbolInfo {
1076 t.Fatalf("want symbol info, got none")
1077 }
1078 })
1079}
1080
1081func TestFileRestriction(t *testing.T) {
1082 b := testShardBuilder(t, nil,
1083 Document{Name: "banana1", Content: []byte("x orange y")},
1084 Document{Name: "banana2", Content: []byte("x apple y")},
1085 Document{Name: "orange", Content: []byte("x apple z")})
1086
1087 t.Run("LineMatches", func(t *testing.T) {
1088 sres := searchForTest(t, b, query.NewAnd(
1089 &query.Substring{
1090 Pattern: "banana",
1091 FileName: true,
1092 },
1093 &query.Substring{
1094 Pattern: "apple",
1095 }))
1096
1097 matches := sres.Files
1098 if len(matches) != 1 || len(matches[0].LineMatches) != 1 {
1099 t.Fatalf("got %v, want 1 match", matches)
1100 }
1101
1102 match := matches[0].LineMatches[0]
1103 got := string(match.Line)
1104 want := "x apple y"
1105 if got != want {
1106 t.Errorf("got match %#v, want line %q", match, want)
1107 }
1108 })
1109
1110 t.Run("ChunkMatches", func(t *testing.T) {
1111 sres := searchForTest(t, b, query.NewAnd(
1112 &query.Substring{
1113 Pattern: "banana",
1114 FileName: true,
1115 },
1116 &query.Substring{
1117 Pattern: "apple",
1118 }), chunkOpts)
1119
1120 matches := sres.Files
1121 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 {
1122 t.Fatalf("got %v, want 1 match", matches)
1123 }
1124
1125 match := matches[0].ChunkMatches[0]
1126 got := string(match.Content)
1127 want := "x apple y"
1128 if got != want {
1129 t.Errorf("got match %#v, want line %q", match, want)
1130 }
1131 })
1132}
1133
1134func TestFileNameBoundary(t *testing.T) {
1135 b := testShardBuilder(t, nil,
1136 Document{Name: "banana2", Content: []byte("x apple y")},
1137 Document{Name: "helpers.go", Content: []byte("x apple y")},
1138 Document{Name: "foo", Content: []byte("x apple y")})
1139
1140 t.Run("LineMatches", func(t *testing.T) {
1141 sres := searchForTest(t, b, &query.Substring{
1142 Pattern: "helpers.go",
1143 FileName: true,
1144 })
1145
1146 matches := sres.Files
1147 if len(matches) != 1 || len(matches[0].LineMatches) != 1 {
1148 t.Fatalf("got %v, want 1 match", matches)
1149 }
1150 })
1151
1152 t.Run("ChunkMatches", func(t *testing.T) {
1153 sres := searchForTest(t, b, &query.Substring{
1154 Pattern: "helpers.go",
1155 FileName: true,
1156 }, chunkOpts)
1157
1158 matches := sres.Files
1159 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 {
1160 t.Fatalf("got %v, want 1 match", matches)
1161 }
1162 })
1163}
1164
1165func TestDocumentOrder(t *testing.T) {
1166 var docs []Document
1167 for i := range 3 {
1168 docs = append(docs, Document{Name: fmt.Sprintf("f%d", i), Content: []byte("needle")})
1169 }
1170
1171 b := testShardBuilder(t, nil, docs...)
1172
1173 t.Run("LineMatches", func(t *testing.T) {
1174 sres := searchForTest(t, b, query.NewAnd(
1175 &query.Substring{
1176 Pattern: "needle",
1177 }))
1178
1179 want := []string{"f0", "f1", "f2"}
1180 var got []string
1181 for _, f := range sres.Files {
1182 got = append(got, f.FileName)
1183 }
1184 if !reflect.DeepEqual(got, want) {
1185 t.Fatalf("got %v, want %v", got, want)
1186 }
1187 })
1188
1189 t.Run("ChunkMatches", func(t *testing.T) {
1190 sres := searchForTest(t, b,
1191 query.NewAnd(&query.Substring{
1192 Pattern: "needle",
1193 }),
1194 chunkOpts,
1195 )
1196
1197 want := []string{"f0", "f1", "f2"}
1198 var got []string
1199 for _, f := range sres.Files {
1200 got = append(got, f.FileName)
1201 }
1202 if !reflect.DeepEqual(got, want) {
1203 t.Fatalf("got %v, want %v", got, want)
1204 }
1205 })
1206}
1207
1208func TestBranchMask(t *testing.T) {
1209 b := testShardBuilder(t, &zoekt.Repository{
1210 Branches: []zoekt.RepositoryBranch{
1211 {"master", "v-master"},
1212 {"stable", "v-stable"},
1213 {"bonzai", "v-bonzai"},
1214 },
1215 }, Document{Name: "f1", Content: []byte("needle"), Branches: []string{"master"}},
1216 Document{Name: "f2", Content: []byte("needle"), Branches: []string{"stable", "master"}},
1217 Document{Name: "f3", Content: []byte("needle"), Branches: []string{"stable", "master"}},
1218 Document{Name: "f4", Content: []byte("needle"), Branches: []string{"bonzai"}},
1219 )
1220
1221 t.Run("LineMatches", func(t *testing.T) {
1222 sres := searchForTest(t, b, query.NewAnd(
1223 &query.Substring{
1224 Pattern: "needle",
1225 },
1226 &query.Branch{
1227 Pattern: "table",
1228 }))
1229
1230 if len(sres.Files) != 2 || sres.Files[0].FileName != "f2" || sres.Files[1].FileName != "f3" {
1231 t.Fatalf("got %v, want 2 result from [f2,f3]", sres.Files)
1232 }
1233
1234 if len(sres.Files[0].Branches) != 1 || sres.Files[0].Branches[0] != "stable" {
1235 t.Fatalf("got %v, want 1 branch 'stable'", sres.Files[0].Branches)
1236 }
1237 })
1238
1239 t.Run("ChunkMatches", func(t *testing.T) {
1240 sres := searchForTest(t, b, query.NewAnd(
1241 &query.Substring{
1242 Pattern: "needle",
1243 },
1244 &query.Branch{
1245 Pattern: "table",
1246 }),
1247 chunkOpts,
1248 )
1249
1250 if len(sres.Files) != 2 || sres.Files[0].FileName != "f2" || sres.Files[1].FileName != "f3" {
1251 t.Fatalf("got %v, want 2 result from [f2,f3]", sres.Files)
1252 }
1253
1254 if len(sres.Files[0].Branches) != 1 || sres.Files[0].Branches[0] != "stable" {
1255 t.Fatalf("got %v, want 1 branch 'stable'", sres.Files[0].Branches)
1256 }
1257 })
1258}
1259
1260func TestBranchLimit(t *testing.T) {
1261 for limit := 64; limit <= 65; limit++ {
1262 r := &zoekt.Repository{}
1263 for i := range limit {
1264 s := fmt.Sprintf("b%d", i)
1265 r.Branches = append(r.Branches, zoekt.RepositoryBranch{
1266 s, "v-" + s,
1267 })
1268 }
1269 _, err := NewShardBuilder(r)
1270 if limit == 64 && err != nil {
1271 t.Fatalf("NewShardBuilder: %v", err)
1272 } else if limit == 65 && err == nil {
1273 t.Fatalf("NewShardBuilder succeeded")
1274 }
1275 }
1276}
1277
1278func TestBranchReport(t *testing.T) {
1279 branches := []string{"stable", "master"}
1280 b := testShardBuilder(t, &zoekt.Repository{
1281 Branches: []zoekt.RepositoryBranch{
1282 {"stable", "vs"},
1283 {"master", "vm"},
1284 },
1285 },
1286 Document{Name: "f2", Content: []byte("needle"), Branches: branches})
1287
1288 t.Run("LineMatches", func(t *testing.T) {
1289 sres := searchForTest(t, b, &query.Substring{
1290 Pattern: "needle",
1291 })
1292 if len(sres.Files) != 1 {
1293 t.Fatalf("got %v, want 1 result from f2", sres.Files)
1294 }
1295
1296 f := sres.Files[0]
1297 if !reflect.DeepEqual(f.Branches, branches) {
1298 t.Fatalf("got branches %q, want %q", f.Branches, branches)
1299 }
1300 })
1301
1302 t.Run("ChunkMatches", func(t *testing.T) {
1303 sres := searchForTest(t, b, &query.Substring{
1304 Pattern: "needle",
1305 }, chunkOpts)
1306 if len(sres.Files) != 1 {
1307 t.Fatalf("got %v, want 1 result from f2", sres.Files)
1308 }
1309
1310 f := sres.Files[0]
1311 if !reflect.DeepEqual(f.Branches, branches) {
1312 t.Fatalf("got branches %q, want %q", f.Branches, branches)
1313 }
1314 })
1315}
1316
1317func TestBranchVersions(t *testing.T) {
1318 b := testShardBuilder(t, &zoekt.Repository{
1319 Branches: []zoekt.RepositoryBranch{
1320 {"stable", "v-stable"},
1321 {"master", "v-master"},
1322 },
1323 }, Document{Name: "f2", Content: []byte("needle"), Branches: []string{"master"}})
1324
1325 t.Run("LineMatches", func(t *testing.T) {
1326 sres := searchForTest(t, b, &query.Substring{
1327 Pattern: "needle",
1328 })
1329 if len(sres.Files) != 1 {
1330 t.Fatalf("got %v, want 1 result from f2", sres.Files)
1331 }
1332
1333 f := sres.Files[0]
1334 if f.Version != "v-master" {
1335 t.Fatalf("got file %#v, want version 'v-master'", f)
1336 }
1337 })
1338
1339 t.Run("ChunkMatches", func(t *testing.T) {
1340 sres := searchForTest(t, b, &query.Substring{
1341 Pattern: "needle",
1342 }, chunkOpts)
1343 if len(sres.Files) != 1 {
1344 t.Fatalf("got %v, want 1 result from f2", sres.Files)
1345 }
1346
1347 f := sres.Files[0]
1348 if f.Version != "v-master" {
1349 t.Fatalf("got file %#v, want version 'v-master'", f)
1350 }
1351 })
1352}
1353
1354func mustParseRE(s string) *syntax.Regexp {
1355 r, err := syntax.Parse(s, syntax.Perl)
1356 if err != nil {
1357 panic(err)
1358 }
1359
1360 return r
1361}
1362
1363func TestRegexp(t *testing.T) {
1364 content := []byte("needle the bla")
1365 // ----------------01234567890123
1366
1367 b := testShardBuilder(t, nil,
1368 Document{
1369 Name: "f1",
1370 Content: content,
1371 })
1372
1373 t.Run("LineMatches", func(t *testing.T) {
1374 sres := searchForTest(t, b,
1375 &query.Regexp{
1376 Regexp: mustParseRE("dle.*bla"),
1377 })
1378
1379 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 {
1380 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
1381 }
1382
1383 got := sres.Files[0].LineMatches[0]
1384 want := zoekt.LineMatch{
1385 LineFragments: []zoekt.LineFragmentMatch{{
1386 LineOffset: 3,
1387 Offset: 3,
1388 MatchLength: 11,
1389 }},
1390 Line: content,
1391 FileName: false,
1392 LineNumber: 1,
1393 LineStart: 0,
1394 LineEnd: 14,
1395 }
1396
1397 if !reflect.DeepEqual(got, want) {
1398 t.Errorf("got %#v, want %#v", got, want)
1399 }
1400 })
1401
1402 t.Run("ChunkMatches", func(t *testing.T) {
1403 sres := searchForTest(t, b,
1404 &query.Regexp{
1405 Regexp: mustParseRE("dle.*bla"),
1406 }, chunkOpts)
1407
1408 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 {
1409 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
1410 }
1411
1412 got := sres.Files[0].ChunkMatches[0]
1413 want := zoekt.ChunkMatch{
1414 Content: content,
1415 ContentStart: zoekt.Location{ByteOffset: 0, LineNumber: 1, Column: 1},
1416 Ranges: []zoekt.Range{{
1417 Start: zoekt.Location{ByteOffset: 3, LineNumber: 1, Column: 4},
1418 End: zoekt.Location{ByteOffset: 14, LineNumber: 1, Column: 15},
1419 }},
1420 }
1421
1422 if diff := cmp.Diff(want, got); diff != "" {
1423 t.Fatal(diff)
1424 }
1425 })
1426}
1427
1428func TestRegexpFile(t *testing.T) {
1429 content := []byte("needle the bla")
1430
1431 name := "let's play: find the mussel"
1432 b := testShardBuilder(t, nil,
1433 Document{Name: name, Content: content},
1434 Document{Name: "play.txt", Content: content})
1435
1436 t.Run("LineMatches", func(t *testing.T) {
1437 sres := searchForTest(t, b,
1438 &query.Regexp{
1439 Regexp: mustParseRE("play.*mussel"),
1440 FileName: true,
1441 })
1442
1443 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 {
1444 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
1445 }
1446
1447 if sres.Files[0].FileName != name {
1448 t.Errorf("got match %#v, want name %q", sres.Files[0], name)
1449 }
1450 })
1451
1452 t.Run("ChunkMatches", func(t *testing.T) {
1453 sres := searchForTest(t, b,
1454 &query.Regexp{
1455 Regexp: mustParseRE("play.*mussel"),
1456 FileName: true,
1457 }, chunkOpts)
1458
1459 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 {
1460 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
1461 }
1462
1463 if sres.Files[0].FileName != name {
1464 t.Errorf("got match %#v, want name %q", sres.Files[0], name)
1465 }
1466 })
1467}
1468
1469func TestRegexpOrder(t *testing.T) {
1470 content := []byte("bla the needle")
1471 // ----------------01234567890123
1472
1473 b := testShardBuilder(t, nil,
1474 Document{Name: "f1", Content: content})
1475
1476 t.Run("LineMatches", func(t *testing.T) {
1477 sres := searchForTest(t, b,
1478 &query.Regexp{
1479 Regexp: mustParseRE("dle.*bla"),
1480 })
1481
1482 if len(sres.Files) != 0 {
1483 t.Fatalf("got %v, want 0 matches", sres.Files)
1484 }
1485 })
1486
1487 t.Run("ChunkMatches", func(t *testing.T) {
1488 sres := searchForTest(t, b,
1489 &query.Regexp{
1490 Regexp: mustParseRE("dle.*bla"),
1491 })
1492
1493 if len(sres.Files) != 0 {
1494 t.Fatalf("got %v, want 0 matches", sres.Files)
1495 }
1496 })
1497}
1498
1499func TestRepoName(t *testing.T) {
1500 content := []byte("bla the needle")
1501 // ----------------01234567890123
1502
1503 b := testShardBuilder(t, &zoekt.Repository{Name: "bla"},
1504 Document{Name: "f1", Content: content})
1505
1506 t.Run("LineMatches", func(t *testing.T) {
1507 sres := searchForTest(t, b,
1508 query.NewAnd(
1509 &query.Substring{Pattern: "needle"},
1510 &query.Repo{Regexp: regexp.MustCompile("foo")},
1511 ))
1512
1513 if len(sres.Files) != 0 {
1514 t.Fatalf("got %v, want 0 matches", sres.Files)
1515 }
1516
1517 if sres.Stats.FilesConsidered > 0 {
1518 t.Fatalf("got FilesConsidered %d, should have short circuited", sres.Stats.FilesConsidered)
1519 }
1520
1521 sres = searchForTest(t, b,
1522 query.NewAnd(
1523 &query.Substring{Pattern: "needle"},
1524 &query.Repo{Regexp: regexp.MustCompile("bla")},
1525 ))
1526 if len(sres.Files) != 1 {
1527 t.Fatalf("got %v, want 1 match", sres.Files)
1528 }
1529 })
1530
1531 t.Run("ChunkMatches", func(t *testing.T) {
1532 sres := searchForTest(t, b,
1533 query.NewAnd(
1534 &query.Substring{Pattern: "needle"},
1535 &query.Repo{Regexp: regexp.MustCompile("foo")},
1536 ),
1537 chunkOpts,
1538 )
1539
1540 if len(sres.Files) != 0 {
1541 t.Fatalf("got %v, want 0 matches", sres.Files)
1542 }
1543
1544 if sres.Stats.FilesConsidered > 0 {
1545 t.Fatalf("got FilesConsidered %d, should have short circuited", sres.Stats.FilesConsidered)
1546 }
1547
1548 sres = searchForTest(t, b,
1549 query.NewAnd(
1550 &query.Substring{Pattern: "needle"},
1551 &query.Repo{Regexp: regexp.MustCompile("bla")},
1552 ))
1553 if len(sres.Files) != 1 {
1554 t.Fatalf("got %v, want 1 match", sres.Files)
1555 }
1556 })
1557}
1558
1559func TestMergeMatches(t *testing.T) {
1560 t.Run("LineMatches, adjacent matches", func(t *testing.T) {
1561 b := testShardBuilder(t, nil,
1562 Document{Name: "f1", Content: []byte("blablabla")})
1563 sres := searchForTest(t, b,
1564 &query.Substring{Pattern: "bla"})
1565
1566 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 {
1567 t.Fatalf("got %v, want 1 match", sres.Files)
1568 }
1569
1570 if len(sres.Files[0].LineMatches[0].LineFragments) != 3 {
1571 t.Fatalf("got %v, want 3 fragments", sres.Files[0].LineMatches[0].LineFragments)
1572 }
1573 })
1574
1575 t.Run("LineMatches, overlapping matches", func(t *testing.T) {
1576 b := testShardBuilder(t, nil,
1577 Document{Name: "f1", Content: []byte("hellogoodbye")})
1578 sres := searchForTest(t, b,
1579 &query.And{Children: []query.Q{
1580 &query.Substring{Pattern: "hello"},
1581 &query.Substring{Pattern: "logood"},
1582 }})
1583
1584 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 {
1585 t.Fatalf("got %v, want 1 match", sres.Files)
1586 }
1587
1588 lineFragments := sres.Files[0].LineMatches[0].LineFragments
1589 if len(lineFragments) != 1 || lineFragments[0].MatchLength != len("hello") {
1590 t.Fatalf("got %v, want single line fragment 'hello'", lineFragments)
1591 }
1592 })
1593
1594 t.Run("ChunkMatches, no overlap", func(t *testing.T) {
1595 b := testShardBuilder(t, nil,
1596 Document{Name: "f1", Content: []byte("blablabla")})
1597
1598 sres := searchForTest(t, b,
1599 &query.Substring{Pattern: "bla"},
1600 chunkOpts,
1601 )
1602 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 {
1603 t.Fatalf("got %v, want 1 match", sres.Files)
1604 }
1605
1606 if len(sres.Files[0].ChunkMatches[0].Ranges) != 3 {
1607 t.Fatalf("got %v, want 3 ranges", sres.Files[0].ChunkMatches[0].Ranges)
1608 }
1609 })
1610
1611 t.Run("ChunkMatches, overlapping matches", func(t *testing.T) {
1612 b := testShardBuilder(t, nil,
1613 Document{Name: "f1", Content: []byte("hellogoodbye")})
1614 sres := searchForTest(t, b,
1615 &query.And{Children: []query.Q{
1616 &query.Substring{Pattern: "hello"},
1617 &query.Substring{Pattern: "logood"},
1618 }}, chunkOpts)
1619
1620 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 {
1621 t.Fatalf("got %v, want 1 chunk match", sres.Files)
1622 }
1623
1624 ranges := sres.Files[0].ChunkMatches[0].Ranges
1625 if len(ranges) != 1 || ranges[0].Start.ByteOffset != 0 || ranges[0].End.ByteOffset != 5 {
1626 t.Fatalf("got %v, want single chunk range 'hello'", ranges)
1627 }
1628 })
1629}
1630
1631func TestRepoURL(t *testing.T) {
1632 content := []byte("blablabla")
1633 b := testShardBuilder(t, &zoekt.Repository{
1634 Name: "name",
1635 URL: "URL",
1636 CommitURLTemplate: "commit",
1637 FileURLTemplate: "file-url",
1638 LineFragmentTemplate: "fragment",
1639 }, Document{Name: "f1", Content: content})
1640
1641 sres := searchForTest(t, b, &query.Substring{Pattern: "bla"})
1642
1643 if sres.RepoURLs["name"] != "file-url" {
1644 t.Errorf("got RepoURLs %v, want {name: URL}", sres.RepoURLs)
1645 }
1646 if sres.LineFragments["name"] != "fragment" {
1647 t.Errorf("got URLs %v, want {name: URL}", sres.LineFragments)
1648 }
1649}
1650
1651func TestRegexpCaseSensitive(t *testing.T) {
1652 content := []byte("bla\nfunc unmarshalGitiles\n")
1653 b := testShardBuilder(t, nil, Document{
1654 Name: "f1",
1655 Content: content,
1656 })
1657
1658 t.Run("LineMatches", func(t *testing.T) {
1659 res := searchForTest(t, b,
1660 &query.Regexp{
1661 Regexp: mustParseRE("func.*Gitiles"),
1662 CaseSensitive: true,
1663 })
1664
1665 if len(res.Files) != 1 {
1666 t.Fatalf("got %v, want one index", res.Files)
1667 }
1668 })
1669
1670 t.Run("ChunkMatches", func(t *testing.T) {
1671 res := searchForTest(t, b,
1672 &query.Regexp{
1673 Regexp: mustParseRE("func.*Gitiles"),
1674 CaseSensitive: true,
1675 },
1676 chunkOpts,
1677 )
1678
1679 if len(res.Files) != 1 {
1680 t.Fatalf("got %v, want one index", res.Files)
1681 }
1682 })
1683}
1684
1685func TestRegexpCaseFolding(t *testing.T) {
1686 content := []byte("bla\nfunc unmarshalGitiles\n")
1687
1688 b := testShardBuilder(t, nil,
1689 Document{Name: "f1", Content: content})
1690 res := searchForTest(t, b,
1691 &query.Regexp{
1692 Regexp: mustParseRE("func.*GITILES"),
1693 CaseSensitive: false,
1694 })
1695
1696 if len(res.Files) != 1 {
1697 t.Fatalf("got %v, want one index", res.Files)
1698 }
1699}
1700
1701func TestCaseRegexp(t *testing.T) {
1702 content := []byte("BLABLABLA")
1703 b := testShardBuilder(t, nil,
1704 Document{Name: "f1", Content: content})
1705
1706 t.Run("LineMatches", func(t *testing.T) {
1707 res := searchForTest(t, b,
1708 &query.Regexp{
1709 Regexp: mustParseRE("[xb][xl][xa]"),
1710 CaseSensitive: true,
1711 })
1712
1713 if len(res.Files) > 0 {
1714 t.Fatalf("got %v, want no matches", res.Files)
1715 }
1716 })
1717
1718 t.Run("ChunkMatches", func(t *testing.T) {
1719 res := searchForTest(t, b,
1720 &query.Regexp{
1721 Regexp: mustParseRE("[xb][xl][xa]"),
1722 CaseSensitive: true,
1723 },
1724 chunkOpts,
1725 )
1726
1727 if len(res.Files) > 0 {
1728 t.Fatalf("got %v, want no matches", res.Files)
1729 }
1730 })
1731}
1732
1733func TestNegativeRegexp(t *testing.T) {
1734 content := []byte("BLABLABLA needle bla")
1735 b := testShardBuilder(t, nil,
1736 Document{Name: "f1", Content: content})
1737
1738 t.Run("LineMatches", func(t *testing.T) {
1739 res := searchForTest(t, b,
1740 query.NewAnd(
1741 &query.Substring{
1742 Pattern: "needle",
1743 },
1744 &query.Not{
1745 Child: &query.Regexp{
1746 Regexp: mustParseRE(".cs"),
1747 },
1748 }))
1749
1750 if len(res.Files) != 1 {
1751 t.Fatalf("got %v, want 1 match", res.Files)
1752 }
1753 })
1754
1755 t.Run("ChunkMatches", func(t *testing.T) {
1756 res := searchForTest(t, b,
1757 query.NewAnd(
1758 &query.Substring{
1759 Pattern: "needle",
1760 },
1761 &query.Not{
1762 Child: &query.Regexp{
1763 Regexp: mustParseRE(".cs"),
1764 },
1765 },
1766 ),
1767 chunkOpts)
1768
1769 if len(res.Files) != 1 {
1770 t.Fatalf("got %v, want 1 match", res.Files)
1771 }
1772 })
1773}
1774
1775func TestSymbolRank(t *testing.T) {
1776 t.Skip()
1777
1778 content := []byte("func bla() blubxxxxx")
1779 // ----------------01234567890123456789
1780 b := testShardBuilder(t, nil,
1781 Document{
1782 Name: "f1",
1783 Content: content,
1784 }, Document{
1785 Name: "f2",
1786 Content: content,
1787 Symbols: []DocumentSection{{5, 8}},
1788 }, Document{
1789 Name: "f3",
1790 Content: content,
1791 })
1792
1793 t.Run("LineMatches", func(t *testing.T) {
1794 res := searchForTest(t, b,
1795 &query.Substring{
1796 CaseSensitive: false,
1797 Pattern: "bla",
1798 })
1799
1800 if len(res.Files) != 3 {
1801 t.Fatalf("got %d files, want 3 files. Full data: %v", len(res.Files), res.Files)
1802 }
1803 if res.Files[0].FileName != "f2" {
1804 t.Errorf("got %#v, want 'f2' as top index", res.Files[0])
1805 }
1806 })
1807
1808 t.Run("ChunkMatches", func(t *testing.T) {
1809 res := searchForTest(t, b,
1810 &query.Substring{
1811 CaseSensitive: false,
1812 Pattern: "bla",
1813 }, chunkOpts)
1814
1815 if len(res.Files) != 3 {
1816 t.Fatalf("got %d files, want 3 files. Full data: %v", len(res.Files), res.Files)
1817 }
1818 if res.Files[0].FileName != "f2" {
1819 t.Errorf("got %#v, want 'f2' as top index", res.Files[0])
1820 }
1821 })
1822}
1823
1824func TestSymbolRankRegexpUTF8(t *testing.T) {
1825 t.Skip()
1826
1827 prefix := strings.Repeat(string([]rune{kelvinCodePoint}), 100) + "\n"
1828 content := []byte(prefix +
1829 "func bla() blub")
1830 // ------012345678901234
1831 b := testShardBuilder(t, nil,
1832 Document{
1833 Name: "f1",
1834 Content: content,
1835 }, Document{
1836 Name: "f2",
1837 Content: content,
1838 Symbols: []DocumentSection{{uint32(len(prefix) + 5), uint32(len(prefix) + 8)}},
1839 }, Document{
1840 Name: "f3",
1841 Content: content,
1842 })
1843
1844 t.Run("LineMatches", func(t *testing.T) {
1845 res := searchForTest(t, b,
1846 &query.Regexp{
1847 Regexp: mustParseRE("b.a"),
1848 })
1849
1850 if len(res.Files) != 3 {
1851 t.Fatalf("got %#v, want 3 files", res.Files)
1852 }
1853 if res.Files[0].FileName != "f2" {
1854 t.Errorf("got %#v, want 'f2' as top index", res.Files[0])
1855 }
1856 })
1857
1858 t.Run("ChunjkMatches", func(t *testing.T) {
1859 res := searchForTest(t, b,
1860 &query.Regexp{
1861 Regexp: mustParseRE("b.a"),
1862 }, chunkOpts)
1863
1864 if len(res.Files) != 3 {
1865 t.Fatalf("got %#v, want 3 files", res.Files)
1866 }
1867 if res.Files[0].FileName != "f2" {
1868 t.Errorf("got %#v, want 'f2' as top index", res.Files[0])
1869 }
1870 })
1871}
1872
1873func TestPartialSymbolRank(t *testing.T) {
1874 t.Skip()
1875
1876 content := []byte("func bla() blub")
1877 // ----------------012345678901234
1878
1879 b := testShardBuilder(t, nil,
1880 Document{
1881 Name: "f1",
1882 Content: content,
1883 Symbols: []DocumentSection{{4, 9}},
1884 }, Document{
1885 Name: "f2",
1886 Content: content,
1887 Symbols: []DocumentSection{{4, 8}},
1888 }, Document{
1889 Name: "f3",
1890 Content: content,
1891 Symbols: []DocumentSection{{4, 9}},
1892 })
1893
1894 t.Run("LineMatches", func(t *testing.T) {
1895 res := searchForTest(t, b,
1896 &query.Substring{
1897 Pattern: "bla",
1898 })
1899
1900 if len(res.Files) != 3 {
1901 t.Fatalf("got %#v, want 3 files", res.Files)
1902 }
1903 if res.Files[0].FileName != "f2" {
1904 t.Errorf("got %#v, want 'f2' as top index", res.Files[0])
1905 }
1906 })
1907
1908 t.Run("ChunkMatches", func(t *testing.T) {
1909 res := searchForTest(t, b,
1910 &query.Substring{
1911 Pattern: "bla",
1912 }, chunkOpts)
1913
1914 if len(res.Files) != 3 {
1915 t.Fatalf("got %#v, want 3 files", res.Files)
1916 }
1917 if res.Files[0].FileName != "f2" {
1918 t.Errorf("got %#v, want 'f2' as top index", res.Files[0])
1919 }
1920 })
1921}
1922
1923func TestNegativeRepo(t *testing.T) {
1924 content := []byte("bla the needle")
1925 // ----------------01234567890123
1926 b := testShardBuilder(t, &zoekt.Repository{
1927 Name: "bla",
1928 }, Document{Name: "f1", Content: content})
1929
1930 t.Run("LineMatches", func(t *testing.T) {
1931 sres := searchForTest(t, b,
1932 query.NewAnd(
1933 &query.Substring{Pattern: "needle"},
1934 &query.Not{Child: &query.Repo{Regexp: regexp.MustCompile("bla")}},
1935 ))
1936
1937 if len(sres.Files) != 0 {
1938 t.Fatalf("got %v, want 0 matches", sres.Files)
1939 }
1940 })
1941
1942 t.Run("ChunkMatches", func(t *testing.T) {
1943 sres := searchForTest(t, b,
1944 query.NewAnd(
1945 &query.Substring{Pattern: "needle"},
1946 &query.Not{Child: &query.Repo{Regexp: regexp.MustCompile("bla")}},
1947 ), chunkOpts)
1948
1949 if len(sres.Files) != 0 {
1950 t.Fatalf("got %v, want 0 matches", sres.Files)
1951 }
1952 })
1953}
1954
1955func TestListRepos(t *testing.T) {
1956 content := []byte("bla the needle\n")
1957 // ----------------012345678901234-
1958
1959 t.Run("default and minimal fallback", func(t *testing.T) {
1960 repo := &zoekt.Repository{
1961 Name: "reponame",
1962 Branches: []zoekt.RepositoryBranch{{Name: "main"}, {Name: "dev"}},
1963 }
1964 b := testShardBuilder(t, repo,
1965 Document{Name: "f1", Content: content, Branches: []string{"main", "dev"}},
1966 Document{Name: "f2", Content: content, Branches: []string{"main"}},
1967 Document{Name: "f2", Content: content, Branches: []string{"dev"}},
1968 Document{Name: "f3", Content: content, Branches: []string{"dev"}})
1969
1970 searcher := searcherForTest(t, b)
1971
1972 for _, opts := range []*zoekt.ListOptions{
1973 nil,
1974 {},
1975 {Field: zoekt.RepoListFieldRepos},
1976 {Field: zoekt.RepoListFieldReposMap},
1977 } {
1978 t.Run(fmt.Sprint(opts), func(t *testing.T) {
1979 q := &query.Repo{Regexp: regexp.MustCompile("epo")}
1980
1981 res, err := searcher.List(context.Background(), q, opts)
1982 if err != nil {
1983 t.Fatalf("List(%v): %v", q, err)
1984 }
1985
1986 want := &zoekt.RepoList{
1987 Repos: []*zoekt.RepoListEntry{{
1988 Repository: *repo,
1989 Stats: zoekt.RepoStats{
1990 Documents: 4,
1991 ContentBytes: 68, // (15 bytes of content and 2 bytes of filename) x 4
1992 Shards: 1,
1993
1994 NewLinesCount: 4,
1995 DefaultBranchNewLinesCount: 2,
1996 OtherBranchesNewLinesCount: 3,
1997 },
1998 }},
1999 Stats: zoekt.RepoStats{
2000 Repos: 1,
2001 Documents: 4,
2002 ContentBytes: 68,
2003 Shards: 1,
2004
2005 NewLinesCount: 4,
2006 DefaultBranchNewLinesCount: 2,
2007 OtherBranchesNewLinesCount: 3,
2008 },
2009 }
2010 ignored := []cmp.Option{
2011 cmpopts.EquateEmpty(),
2012 cmpopts.IgnoreFields(zoekt.RepoListEntry{}, "IndexMetadata"),
2013 cmpopts.IgnoreFields(zoekt.RepoStats{}, "IndexBytes"),
2014 cmpopts.IgnoreFields(zoekt.Repository{}, "SubRepoMap"),
2015 cmpopts.IgnoreFields(zoekt.Repository{}, "priority"),
2016 }
2017 if diff := cmp.Diff(want, res, ignored...); diff != "" {
2018 t.Fatalf("mismatch (-want +got):\n%s", diff)
2019 }
2020
2021 q = &query.Repo{Regexp: regexp.MustCompile("bla")}
2022 res, err = searcher.List(context.Background(), q, nil)
2023 if err != nil {
2024 t.Fatalf("List(%v): %v", q, err)
2025 }
2026 if len(res.Repos) != 0 || len(res.ReposMap) != 0 {
2027 t.Fatalf("got %v, want 0 matches", res)
2028 }
2029 })
2030 }
2031 })
2032
2033 t.Run("minimal", func(t *testing.T) {
2034 repo := &zoekt.Repository{
2035 ID: 1234,
2036 Name: "reponame",
2037 Branches: []zoekt.RepositoryBranch{{Name: "main"}, {Name: "dev"}},
2038 RawConfig: map[string]string{"repoid": "1234"},
2039 }
2040 b := testShardBuilder(t, repo,
2041 Document{Name: "f1", Content: content, Branches: []string{"main", "dev"}},
2042 Document{Name: "f2", Content: content, Branches: []string{"main"}},
2043 Document{Name: "f2", Content: content, Branches: []string{"dev"}},
2044 Document{Name: "f3", Content: content, Branches: []string{"dev"}})
2045
2046 searcher := searcherForTest(t, b)
2047
2048 q := &query.Repo{Regexp: regexp.MustCompile("epo")}
2049 res, err := searcher.List(context.Background(), q, &zoekt.ListOptions{Field: zoekt.RepoListFieldReposMap})
2050 if err != nil {
2051 t.Fatalf("List(%v): %v", q, err)
2052 }
2053
2054 want := &zoekt.RepoList{
2055 ReposMap: zoekt.ReposMap{
2056 repo.ID: {
2057 HasSymbols: repo.HasSymbols,
2058 Branches: repo.Branches,
2059 },
2060 },
2061 Stats: zoekt.RepoStats{
2062 Repos: 1,
2063 Shards: 1,
2064 Documents: 4,
2065 IndexBytes: 412,
2066 ContentBytes: 68,
2067 NewLinesCount: 4,
2068 DefaultBranchNewLinesCount: 2,
2069 OtherBranchesNewLinesCount: 3,
2070 },
2071 }
2072
2073 ignored := []cmp.Option{
2074 cmpopts.IgnoreFields(zoekt.MinimalRepoListEntry{}, "IndexTimeUnix"),
2075 }
2076 if diff := cmp.Diff(want, res, ignored...); diff != "" {
2077 t.Fatalf("mismatch (-want +got):\n%s", diff)
2078 }
2079
2080 q = &query.Repo{Regexp: regexp.MustCompile("bla")}
2081 res, err = searcher.List(context.Background(), q, &zoekt.ListOptions{Field: zoekt.RepoListFieldReposMap})
2082 if err != nil {
2083 t.Fatalf("List(%v): %v", q, err)
2084 }
2085 if len(res.Repos) != 0 || len(res.ReposMap) != 0 {
2086 t.Fatalf("got %v, want 0 matches", res)
2087 }
2088 })
2089}
2090
2091func TestListReposByContent(t *testing.T) {
2092 content := []byte("bla the needle")
2093
2094 b := testShardBuilder(t, &zoekt.Repository{
2095 Name: "reponame",
2096 },
2097 Document{Name: "f1", Content: content},
2098 Document{Name: "f2", Content: content})
2099
2100 searcher := searcherForTest(t, b)
2101 q := &query.Substring{Pattern: "needle"}
2102 res, err := searcher.List(context.Background(), q, nil)
2103 if err != nil {
2104 t.Fatalf("List(%v): %v", q, err)
2105 }
2106 if len(res.Repos) != 1 || res.Repos[0].Repository.Name != "reponame" {
2107 t.Fatalf("got %v, want 1 matches", res)
2108 }
2109 if got := res.Repos[0].Stats.Shards; got != 1 {
2110 t.Fatalf("got %d, want 1 shard", got)
2111 }
2112 q = &query.Substring{Pattern: "foo"}
2113 res, err = searcher.List(context.Background(), q, nil)
2114 if err != nil {
2115 t.Fatalf("List(%v): %v", q, err)
2116 }
2117 if len(res.Repos) != 0 {
2118 t.Fatalf("got %v, want 0 matches", res)
2119 }
2120}
2121
2122func TestMetadata(t *testing.T) {
2123 content := []byte("bla the needle")
2124
2125 b := testShardBuilder(t, &zoekt.Repository{
2126 Name: "reponame",
2127 }, Document{Name: "f1", Content: content},
2128 Document{Name: "f2", Content: content})
2129
2130 var buf bytes.Buffer
2131 if err := b.Write(&buf); err != nil {
2132 t.Fatal(err)
2133 }
2134 f := &memSeeker{buf.Bytes()}
2135
2136 rd, _, err := ReadMetadata(f)
2137 if err != nil {
2138 t.Fatalf("ReadMetadata: %v", err)
2139 }
2140
2141 if got, want := rd[0].Name, "reponame"; got != want {
2142 t.Fatalf("got %q want %q", got, want)
2143 }
2144}
2145
2146func TestRepoWithMetadata(t *testing.T) {
2147 sb := newShardBuilder(0)
2148 sb.repoList = []zoekt.Repository{
2149 {
2150 Name: "repo1",
2151 Metadata: map[string]string{"language": "go", "custom_key": "value"},
2152 },
2153 }
2154
2155 var buf bytes.Buffer
2156 if err := sb.Write(&buf); err != nil {
2157 t.Fatalf("failed to write shard: %v", err)
2158 }
2159
2160 // Simulate reading the shard back
2161 f := &memSeeker{buf.Bytes()}
2162 repoMetaData, _, err := ReadMetadata(f)
2163 if err != nil {
2164 t.Fatalf("failed to read metadata: %v", err)
2165 }
2166
2167 // Verify the metadata
2168 if len(repoMetaData) != 1 {
2169 t.Fatalf("expected 1 repository, got %d", len(repoMetaData))
2170 }
2171 if got, want := repoMetaData[0].Metadata["language"], "go"; got != want {
2172 t.Errorf("expected metadata 'language' to be %q, got %q", want, got)
2173 }
2174 if got, want := repoMetaData[0].Metadata["custom_key"], "value"; got != want {
2175 t.Errorf("expected metadata 'custom_key' to be %q, got %q", want, got)
2176 }
2177}
2178
2179func TestOr(t *testing.T) {
2180 b := testShardBuilder(t, nil,
2181 Document{Name: "f1", Content: []byte("needle")},
2182 Document{Name: "f2", Content: []byte("banana")})
2183 t.Run("LineMatches", func(t *testing.T) {
2184 sres := searchForTest(t, b, query.NewOr(
2185 &query.Substring{Pattern: "needle"},
2186 &query.Substring{Pattern: "banana"}))
2187
2188 if len(sres.Files) != 2 {
2189 t.Fatalf("got %v, want 2 files", sres.Files)
2190 }
2191 })
2192
2193 t.Run("ChunkMatches", func(t *testing.T) {
2194 sres := searchForTest(t, b, query.NewOr(
2195 &query.Substring{Pattern: "needle"},
2196 &query.Substring{Pattern: "banana"}))
2197
2198 if len(sres.Files) != 2 {
2199 t.Fatalf("got %v, want 2 files", sres.Files)
2200 }
2201 })
2202}
2203
2204func TestFrequency(t *testing.T) {
2205 content := []byte("sla _Py_HashDouble(double v sla las las shd dot dot")
2206
2207 b := testShardBuilder(t, nil,
2208 Document{
2209 Name: "f1",
2210 Content: content,
2211 })
2212
2213 t.Run("LineMatches", func(t *testing.T) {
2214 sres := searchForTest(t, b, &query.Substring{Pattern: "slashdot"})
2215 if len(sres.Files) != 0 {
2216 t.Errorf("got %v, wanted 0 matches", sres.Files)
2217 }
2218 })
2219
2220 t.Run("ChunkMatches", func(t *testing.T) {
2221 sres := searchForTest(t, b, &query.Substring{Pattern: "slashdot"}, chunkOpts)
2222 if len(sres.Files) != 0 {
2223 t.Errorf("got %v, wanted 0 matches", sres.Files)
2224 }
2225 })
2226}
2227
2228func TestMatchNewline(t *testing.T) {
2229 re, err := syntax.Parse("[^a]a", syntax.ClassNL)
2230 if err != nil {
2231 t.Fatalf("syntax.Parse: %v", err)
2232 }
2233
2234 content := []byte("pqr\nalex")
2235
2236 b := testShardBuilder(t, nil,
2237 Document{
2238 Name: "f1",
2239 Content: content,
2240 })
2241
2242 t.Run("LineMatches", func(t *testing.T) {
2243 sres := searchForTest(t, b, &query.Regexp{Regexp: re, CaseSensitive: true})
2244 if len(sres.Files) != 1 {
2245 t.Errorf("got %v, wanted 1 matches", sres.Files)
2246 } else if l := sres.Files[0].LineMatches[0].Line; !bytes.Equal(l, content[len("pqr\n"):]) {
2247 t.Errorf("got match line %q, want %q", l, content)
2248 }
2249 })
2250
2251 t.Run("ChunkMatches", func(t *testing.T) {
2252 sres := searchForTest(t, b, &query.Regexp{Regexp: re, CaseSensitive: true}, chunkOpts)
2253 if len(sres.Files) != 1 {
2254 t.Errorf("got %v, wanted 1 matches", sres.Files)
2255 } else if c := sres.Files[0].ChunkMatches[0].Content; !bytes.Equal(c, content) {
2256 t.Errorf("got match line %q, want %q", c, content)
2257 }
2258 })
2259}
2260
2261func TestSubRepo(t *testing.T) {
2262 subRepos := map[string]*zoekt.Repository{
2263 "sub": {
2264 Name: "sub-name",
2265 LineFragmentTemplate: "sub-line",
2266 },
2267 }
2268
2269 content := []byte("pqr\nalex")
2270
2271 b := testShardBuilder(t, &zoekt.Repository{
2272 SubRepoMap: subRepos,
2273 }, Document{
2274 Name: "sub/f1",
2275 Content: content,
2276 SubRepositoryPath: "sub",
2277 })
2278
2279 sres := searchForTest(t, b, &query.Substring{Pattern: "alex"})
2280 if len(sres.Files) != 1 {
2281 t.Fatalf("got %v, wanted 1 matches", sres.Files)
2282 }
2283
2284 f := sres.Files[0]
2285 if f.SubRepositoryPath != "sub" || f.SubRepositoryName != "sub-name" {
2286 t.Errorf("got %#v, want SubRepository{Path,Name} = {'sub', 'sub-name'}", f)
2287 }
2288
2289 if sres.LineFragments["sub-name"] != "sub-line" {
2290 t.Errorf("got LineFragmentTemplate %v, want {'sub':'sub-line'}", sres.LineFragments)
2291 }
2292}
2293
2294func TestSearchEither(t *testing.T) {
2295 b := testShardBuilder(t, nil,
2296 Document{Name: "f1", Content: []byte("bla needle bla")},
2297 Document{Name: "needle-file-branch", Content: []byte("bla content")})
2298
2299 t.Run("LineMatches", func(t *testing.T) {
2300 sres := searchForTest(t, b, &query.Substring{Pattern: "needle"})
2301 if len(sres.Files) != 2 {
2302 t.Fatalf("got %v, wanted 2 matches", sres.Files)
2303 }
2304
2305 sres = searchForTest(t, b, &query.Substring{Pattern: "needle", Content: true})
2306 if len(sres.Files) != 1 {
2307 t.Fatalf("got %v, wanted 1 index", sres.Files)
2308 }
2309
2310 if got, want := sres.Files[0].FileName, "f1"; got != want {
2311 t.Errorf("got %q, want %q", got, want)
2312 }
2313 })
2314
2315 t.Run("ChunkMatches", func(t *testing.T) {
2316 sres := searchForTest(t, b, &query.Substring{Pattern: "needle"}, chunkOpts)
2317 if len(sres.Files) != 2 {
2318 t.Fatalf("got %v, wanted 2 matches", sres.Files)
2319 }
2320
2321 sres = searchForTest(t, b, &query.Substring{Pattern: "needle", Content: true}, chunkOpts)
2322 if len(sres.Files) != 1 {
2323 t.Fatalf("got %v, wanted 1 index", sres.Files)
2324 }
2325
2326 if got, want := sres.Files[0].FileName, "f1"; got != want {
2327 t.Errorf("got %q, want %q", got, want)
2328 }
2329 })
2330}
2331
2332func TestUnicodeExactMatch(t *testing.T) {
2333 needle := "néédlÉ"
2334 content := []byte("blá blá " + needle + " blâ")
2335
2336 b := testShardBuilder(t, nil,
2337 Document{Name: "f1", Content: content})
2338
2339 t.Run("LineMatches", func(t *testing.T) {
2340 if res := searchForTest(t, b, &query.Substring{Pattern: needle, CaseSensitive: true}); len(res.Files) != 1 {
2341 t.Fatalf("case sensitive: got %v, wanted 1 index", res.Files)
2342 }
2343 })
2344
2345 t.Run("ChunkMatches", func(t *testing.T) {
2346 res := searchForTest(t, b, &query.Substring{Pattern: needle, CaseSensitive: true}, chunkOpts)
2347 if len(res.Files) != 1 {
2348 t.Fatalf("case sensitive: got %v, wanted 1 index", res.Files)
2349 }
2350 })
2351}
2352
2353func TestUnicodeCoverContent(t *testing.T) {
2354 needle := "néédlÉ"
2355 content := []byte("blá blá " + needle + " blâ")
2356
2357 b := testShardBuilder(t, nil,
2358 Document{Name: "f1", Content: content})
2359
2360 t.Run("LineMatches", func(t *testing.T) {
2361 if res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ", CaseSensitive: true}); len(res.Files) != 0 {
2362 t.Fatalf("case sensitive: got %v, wanted 0 index", res.Files)
2363 }
2364
2365 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ"})
2366 if len(res.Files) != 1 {
2367 t.Fatalf("case insensitive: got %v, wanted 1 index", res.Files)
2368 }
2369
2370 if got, want := res.Files[0].LineMatches[0].LineFragments[0].Offset, uint32(strings.Index(string(content), needle)); got != want {
2371 t.Errorf("got %d want %d", got, want)
2372 }
2373 })
2374
2375 t.Run("ChunkMatches", func(t *testing.T) {
2376 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ", CaseSensitive: true}, chunkOpts)
2377 if len(res.Files) != 0 {
2378 t.Fatalf("case sensitive: got %v, wanted 0 index", res.Files)
2379 }
2380
2381 res = searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ"}, chunkOpts)
2382 if len(res.Files) != 1 {
2383 t.Fatalf("case insensitive: got %v, wanted 1 index", res.Files)
2384 }
2385
2386 got := res.Files[0].ChunkMatches[0].Ranges[0].Start.ByteOffset
2387 want := uint32(strings.Index(string(content), needle))
2388 if got != want {
2389 t.Errorf("got %d want %d", got, want)
2390 }
2391 })
2392}
2393
2394func TestUnicodeNonCoverContent(t *testing.T) {
2395 needle := "nééáádlÉ"
2396 content := []byte("blá blá " + needle + " blâ")
2397
2398 b := testShardBuilder(t, nil,
2399 Document{Name: "f1", Content: content})
2400
2401 t.Run("LineMatches", func(t *testing.T) {
2402 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉÁÁDLÉ", Content: true})
2403 if len(res.Files) != 1 {
2404 t.Fatalf("got %v, wanted 1 index", res.Files)
2405 }
2406
2407 if got, want := res.Files[0].LineMatches[0].LineFragments[0].Offset, uint32(strings.Index(string(content), needle)); got != want {
2408 t.Errorf("got %d want %d", got, want)
2409 }
2410 })
2411
2412 t.Run("ChunkMatches", func(t *testing.T) {
2413 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉÁÁDLÉ", Content: true}, chunkOpts)
2414 if len(res.Files) != 1 {
2415 t.Fatalf("got %v, wanted 1 index", res.Files)
2416 }
2417
2418 got := res.Files[0].ChunkMatches[0].Ranges[0].Start.ByteOffset
2419 want := uint32(strings.Index(string(content), needle))
2420 if got != want {
2421 t.Errorf("got %d want %d", got, want)
2422 }
2423 })
2424}
2425
2426const kelvinCodePoint = 8490
2427
2428func TestUnicodeVariableLength(t *testing.T) {
2429 lower := 'k'
2430 upper := rune(kelvinCodePoint)
2431
2432 needle := "nee" + string([]rune{lower}) + "eed"
2433 corpus := []byte("nee" + string([]rune{upper}) + "eed" +
2434 " ee" + string([]rune{lower}) + "ee" +
2435 " ee" + string([]rune{upper}) + "ee")
2436
2437 t.Run("LineMatches", func(t *testing.T) {
2438 b := testShardBuilder(t, nil,
2439 Document{Name: "f1", Content: corpus})
2440
2441 res := searchForTest(t, b, &query.Substring{Pattern: needle, Content: true})
2442 if len(res.Files) != 1 {
2443 t.Fatalf("got %v, wanted 1 index", res.Files)
2444 }
2445 })
2446
2447 t.Run("ChunkMatches", func(t *testing.T) {
2448 b := testShardBuilder(t, nil,
2449 Document{Name: "f1", Content: corpus})
2450
2451 res := searchForTest(t, b, &query.Substring{Pattern: needle, Content: true}, chunkOpts)
2452 if len(res.Files) != 1 {
2453 t.Fatalf("got %v, wanted 1 index", res.Files)
2454 }
2455 })
2456}
2457
2458func TestUnicodeFileStartOffsets(t *testing.T) {
2459 unicode := "世界"
2460 wat := "waaaaaat"
2461 b := testShardBuilder(t, nil,
2462 Document{
2463 Name: "f1",
2464 Content: []byte(unicode),
2465 },
2466 Document{
2467 Name: "f2",
2468 Content: []byte(wat),
2469 },
2470 )
2471 q := &query.Substring{Pattern: wat, Content: true}
2472 res := searchForTest(t, b, q)
2473 if len(res.Files) != 1 {
2474 t.Fatalf("got %v, wanted 1 index", res.Files)
2475 }
2476}
2477
2478func TestLongFileUTF8(t *testing.T) {
2479 needle := "neeedle"
2480
2481 // 6 bytes.
2482 unicode := "世界"
2483 content := []byte(strings.Repeat(unicode, 100) + needle)
2484 b := testShardBuilder(t, nil,
2485 Document{
2486 Name: "f1",
2487 Content: []byte(strings.Repeat("a", 50)),
2488 },
2489 Document{
2490 Name: "f2",
2491 Content: content,
2492 })
2493
2494 t.Run("LineMatches", func(t *testing.T) {
2495 q := &query.Substring{Pattern: needle, Content: true}
2496 res := searchForTest(t, b, q)
2497 if len(res.Files) != 1 {
2498 t.Errorf("got %v, want 1 result", res)
2499 }
2500 })
2501
2502 t.Run("ChunkMatches", func(t *testing.T) {
2503 q := &query.Substring{Pattern: needle, Content: true}
2504 res := searchForTest(t, b, q, chunkOpts)
2505 if len(res.Files) != 1 {
2506 t.Errorf("got %v, want 1 result", res)
2507 }
2508 })
2509}
2510
2511func TestEstimateDocCount(t *testing.T) {
2512 content := []byte("bla needle bla")
2513 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"},
2514 Document{Name: "f1", Content: content},
2515 Document{Name: "f2", Content: content},
2516 )
2517
2518 t.Run("LineMatches", func(t *testing.T) {
2519 if sres := searchForTest(t, b,
2520 query.NewAnd(
2521 &query.Substring{Pattern: "needle"},
2522 &query.Repo{Regexp: regexp.MustCompile("reponame")},
2523 ), zoekt.SearchOptions{
2524 EstimateDocCount: true,
2525 }); sres.Stats.ShardFilesConsidered != 2 {
2526 t.Errorf("got FilesConsidered = %d, want 2", sres.Stats.FilesConsidered)
2527 }
2528 if sres := searchForTest(t, b,
2529 query.NewAnd(
2530 &query.Substring{Pattern: "needle"},
2531 &query.Repo{Regexp: regexp.MustCompile("nomatch")},
2532 ), zoekt.SearchOptions{
2533 EstimateDocCount: true,
2534 }); sres.Stats.ShardFilesConsidered != 0 {
2535 t.Errorf("got FilesConsidered = %d, want 0", sres.Stats.FilesConsidered)
2536 }
2537 })
2538
2539 t.Run("ChunkMatches", func(t *testing.T) {
2540 if sres := searchForTest(t, b,
2541 query.NewAnd(
2542 &query.Substring{Pattern: "needle"},
2543 &query.Repo{Regexp: regexp.MustCompile("reponame")},
2544 ), zoekt.SearchOptions{
2545 EstimateDocCount: true,
2546 ChunkMatches: true,
2547 }); sres.Stats.ShardFilesConsidered != 2 {
2548 t.Errorf("got FilesConsidered = %d, want 2", sres.Stats.FilesConsidered)
2549 }
2550 if sres := searchForTest(t, b,
2551 query.NewAnd(
2552 &query.Substring{Pattern: "needle"},
2553 &query.Repo{Regexp: regexp.MustCompile("nomatch")},
2554 ), zoekt.SearchOptions{
2555 EstimateDocCount: true,
2556 ChunkMatches: true,
2557 }); sres.Stats.ShardFilesConsidered != 0 {
2558 t.Errorf("got FilesConsidered = %d, want 0", sres.Stats.FilesConsidered)
2559 }
2560 })
2561}
2562
2563func TestUTF8CorrectCorpus(t *testing.T) {
2564 needle := "neeedle"
2565
2566 // 6 bytes.
2567 unicode := "世界"
2568 b := testShardBuilder(t, nil,
2569 Document{
2570 Name: "f1",
2571 Content: []byte(strings.Repeat(unicode, 100)),
2572 },
2573 Document{
2574 Name: "xxxxxneeedle",
2575 Content: []byte("hello"),
2576 })
2577
2578 t.Run("LineMatches", func(t *testing.T) {
2579 q := &query.Substring{Pattern: needle, FileName: true}
2580 res := searchForTest(t, b, q)
2581 if len(res.Files) != 1 {
2582 t.Errorf("got %v, want 1 result", res)
2583 }
2584 })
2585
2586 t.Run("ChunkMatches", func(t *testing.T) {
2587 q := &query.Substring{Pattern: needle, FileName: true}
2588 res := searchForTest(t, b, q, chunkOpts)
2589 if len(res.Files) != 1 {
2590 t.Errorf("got %v, want 1 result", res)
2591 }
2592 })
2593}
2594
2595func TestBuilderStats(t *testing.T) {
2596 b := testShardBuilder(t, nil,
2597 Document{
2598 Name: "f1",
2599 Content: []byte(strings.Repeat("abcd", 1024)),
2600 })
2601 var buf bytes.Buffer
2602 if err := b.Write(&buf); err != nil {
2603 t.Fatal(err)
2604 }
2605
2606 if got, want := b.ContentSize(), uint32(2+4*1024); got != want {
2607 t.Errorf("got %d, want %d", got, want)
2608 }
2609}
2610
2611func TestIOStats(t *testing.T) {
2612 b := testShardBuilder(t, nil,
2613 Document{
2614 Name: "f1",
2615 Content: []byte(strings.Repeat("abcd", 1024)),
2616 })
2617
2618 t.Run("LineMatches", func(t *testing.T) {
2619 q := &query.Substring{Pattern: "abc", CaseSensitive: true, Content: true}
2620 res := searchForTest(t, b, q)
2621
2622 // 4096 (content) + 2 (overhead: newlines or doc sections)
2623 if got, want := res.Stats.ContentBytesLoaded, int64(4098); got != want {
2624 t.Errorf("got content I/O %d, want %d", got, want)
2625 }
2626
2627 // 1024 entries, each 4 bytes apart. 4 fits into single byte
2628 // delta encoded.
2629 if got, want := res.Stats.IndexBytesLoaded, int64(1024); got != want {
2630 t.Errorf("got index I/O %d, want %d", got, want)
2631 }
2632 })
2633
2634 t.Run("ChunkMatches", func(t *testing.T) {
2635 q := &query.Substring{Pattern: "abc", CaseSensitive: true, Content: true}
2636 res := searchForTest(t, b, q, chunkOpts)
2637
2638 // 4096 (content) + 2 (overhead: newlines or doc sections)
2639 if got, want := res.Stats.ContentBytesLoaded, int64(4098); got != want {
2640 t.Errorf("got content I/O %d, want %d", got, want)
2641 }
2642
2643 // 1024 entries, each 4 bytes apart. 4 fits into single byte
2644 // delta encoded.
2645 if got, want := res.Stats.IndexBytesLoaded, int64(1024); got != want {
2646 t.Errorf("got index I/O %d, want %d", got, want)
2647 }
2648 })
2649
2650 t.Run("LineMatches with BM25", func(t *testing.T) {
2651 q := &query.Substring{Pattern: "abc", CaseSensitive: true, Content: true}
2652 res := searchForTest(t, b, q, zoekt.SearchOptions{UseBM25Scoring: true})
2653
2654 // 4096 (content) + 2 (overhead: newlines or doc sections)
2655 if got, want := res.Stats.ContentBytesLoaded, int64(4098); got != want {
2656 t.Errorf("got content I/O %d, want %d", got, want)
2657 }
2658
2659 // 1024 entries, each 4 bytes apart. 4 fits into single byte
2660 // delta encoded.
2661 if got, want := res.Stats.IndexBytesLoaded, int64(1024); got != want {
2662 t.Errorf("got index I/O %d, want %d", got, want)
2663 }
2664 })
2665
2666 t.Run("ChunkMatches with BM25", func(t *testing.T) {
2667 q := &query.Substring{Pattern: "abc", CaseSensitive: true, Content: true}
2668 res := searchForTest(t, b, q, zoekt.SearchOptions{UseBM25Scoring: true, ChunkMatches: true})
2669
2670 // 4096 (content) + 2 (overhead: newlines or doc sections)
2671 if got, want := res.Stats.ContentBytesLoaded, int64(4098); got != want {
2672 t.Errorf("got content I/O %d, want %d", got, want)
2673 }
2674
2675 // 1024 entries, each 4 bytes apart. 4 fits into single byte
2676 // delta encoded.
2677 if got, want := res.Stats.IndexBytesLoaded, int64(1024); got != want {
2678 t.Errorf("got index I/O %d, want %d", got, want)
2679 }
2680 })
2681}
2682
2683func TestStartLineAnchor(t *testing.T) {
2684 b := testShardBuilder(t, nil,
2685 Document{
2686 Name: "f1",
2687 Content: []byte(
2688 `hello
2689start of middle of line
2690`),
2691 })
2692
2693 t.Run("LineMatches", func(t *testing.T) {
2694 q, err := query.Parse("^start")
2695 if err != nil {
2696 t.Errorf("parse: %v", err)
2697 }
2698
2699 res := searchForTest(t, b, q)
2700 if len(res.Files) != 1 {
2701 t.Errorf("got %v, want 1 file", res.Files)
2702 }
2703
2704 q, err = query.Parse("^middle")
2705 if err != nil {
2706 t.Errorf("parse: %v", err)
2707 }
2708 res = searchForTest(t, b, q)
2709 if len(res.Files) != 0 {
2710 t.Errorf("got %v, want 0 files", res.Files)
2711 }
2712 })
2713
2714 t.Run("ChunkMatches", func(t *testing.T) {
2715 q, err := query.Parse("^start")
2716 if err != nil {
2717 t.Errorf("parse: %v", err)
2718 }
2719
2720 res := searchForTest(t, b, q, chunkOpts)
2721 if len(res.Files) != 1 {
2722 t.Errorf("got %v, want 1 file", res.Files)
2723 }
2724
2725 q, err = query.Parse("^middle")
2726 if err != nil {
2727 t.Errorf("parse: %v", err)
2728 }
2729 res = searchForTest(t, b, q, chunkOpts)
2730 if len(res.Files) != 0 {
2731 t.Errorf("got %v, want 0 files", res.Files)
2732 }
2733 })
2734}
2735
2736func TestAndOrUnicode(t *testing.T) {
2737 q, err := query.Parse("orange.*apple")
2738 if err != nil {
2739 t.Errorf("parse: %v", err)
2740 }
2741 finalQ := query.NewAnd(q,
2742 query.NewOr(query.NewAnd(&query.Repo{Regexp: regexp.MustCompile("name")},
2743 query.NewOr(&query.Branch{Pattern: "master"}))))
2744
2745 b := testShardBuilder(t, &zoekt.Repository{
2746 Name: "name",
2747 Branches: []zoekt.RepositoryBranch{{"master", "master-version"}},
2748 }, Document{
2749 Name: "f2",
2750 Content: []byte("orange\u2318apple"),
2751 // --------------0123456 78901
2752 Branches: []string{"master"},
2753 })
2754
2755 t.Run("LineMatches", func(t *testing.T) {
2756 res := searchForTest(t, b, finalQ)
2757 if len(res.Files) != 1 {
2758 t.Errorf("got %v, want 1 result", res.Files)
2759 }
2760 })
2761
2762 t.Run("ChunkMatches", func(t *testing.T) {
2763 res := searchForTest(t, b, finalQ, chunkOpts)
2764 if len(res.Files) != 1 {
2765 t.Errorf("got %v, want 1 result", res.Files)
2766 }
2767 })
2768}
2769
2770func TestAndShort(t *testing.T) {
2771 content := []byte("bla needle at orange bla")
2772 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"},
2773 Document{Name: "f1", Content: content},
2774 Document{Name: "f2", Content: []byte("xx at xx")},
2775 Document{Name: "f3", Content: []byte("yy orange xx")},
2776 )
2777
2778 q := query.NewAnd(&query.Substring{Pattern: "at"},
2779 &query.Substring{Pattern: "orange"})
2780
2781 t.Run("LineMatches", func(t *testing.T) {
2782 res := searchForTest(t, b, q)
2783 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
2784 t.Errorf("got %v, want 1 result", res.Files)
2785 }
2786 })
2787
2788 t.Run("ChunkMatches", func(t *testing.T) {
2789 res := searchForTest(t, b, q, chunkOpts)
2790 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
2791 t.Errorf("got %v, want 1 result", res.Files)
2792 }
2793 })
2794}
2795
2796func TestNoCollectRegexpSubstring(t *testing.T) {
2797 content := []byte("bla final bla\nfoo final, foo")
2798 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"},
2799 Document{Name: "f1", Content: content},
2800 )
2801
2802 q := &query.Regexp{
2803 Regexp: mustParseRE("final[,.]"),
2804 }
2805
2806 t.Run("LineMatches", func(t *testing.T) {
2807 res := searchForTest(t, b, q)
2808 if len(res.Files) != 1 {
2809 t.Fatalf("got %v, want 1 result", res.Files)
2810 }
2811 if f := res.Files[0]; len(f.LineMatches) != 1 {
2812 t.Fatalf("got line matches %v, want 1 line index", printLineMatches(f.LineMatches))
2813 }
2814 })
2815
2816 t.Run("ChunkMatches", func(t *testing.T) {
2817 res := searchForTest(t, b, q, chunkOpts)
2818 if len(res.Files) != 1 {
2819 t.Fatalf("got %v, want 1 result", res.Files)
2820 }
2821 if f := res.Files[0]; len(f.ChunkMatches) != 1 {
2822 t.Fatalf("got line matches %v, want 1 line index", printLineMatches(f.LineMatches))
2823 }
2824 })
2825}
2826
2827func printLineMatches(ms []zoekt.LineMatch) string {
2828 var ss []string
2829 for _, m := range ms {
2830 ss = append(ss, fmt.Sprintf("%d:%q %v", m.LineNumber, m.Line, m.LineFragments))
2831 }
2832
2833 return strings.Join(ss, ", ")
2834}
2835
2836func TestLang(t *testing.T) {
2837 content := []byte("bla needle bla")
2838 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"},
2839 Document{Name: "f1", Content: content},
2840 Document{Name: "f2", Language: "java", Content: content},
2841 Document{Name: "f3", Language: "cpp", Content: content},
2842 )
2843
2844 q := query.NewAnd(&query.Substring{Pattern: "needle"},
2845 &query.Language{Language: "cpp"})
2846
2847 t.Run("LineMatches", func(t *testing.T) {
2848 res := searchForTest(t, b, q)
2849 if len(res.Files) != 1 {
2850 t.Fatalf("got %v, want 1 result in f3", res.Files)
2851 }
2852 f := res.Files[0]
2853 if f.FileName != "f3" || f.Language != "cpp" {
2854 t.Fatalf("got %v, want 1 match with language cpp", f)
2855 }
2856 })
2857
2858 t.Run("ChunkMatches", func(t *testing.T) {
2859 res := searchForTest(t, b, q, chunkOpts)
2860 if len(res.Files) != 1 {
2861 t.Fatalf("got %v, want 1 result in f3", res.Files)
2862 }
2863 f := res.Files[0]
2864 if f.FileName != "f3" || f.Language != "cpp" {
2865 t.Fatalf("got %v, want 1 match with language cpp", f)
2866 }
2867 })
2868}
2869
2870func TestLangShortcut(t *testing.T) {
2871 content := []byte("bla needle bla")
2872 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"},
2873 Document{Name: "f2", Language: "java", Content: content},
2874 Document{Name: "f3", Language: "cpp", Content: content},
2875 )
2876
2877 q := query.NewAnd(&query.Substring{Pattern: "needle"},
2878 &query.Language{Language: "fortran"})
2879
2880 t.Run("LineMatches", func(t *testing.T) {
2881 res := searchForTest(t, b, q)
2882 if len(res.Files) != 0 {
2883 t.Fatalf("got %v, want 0 results", res.Files)
2884 }
2885 if res.Stats.IndexBytesLoaded > 0 {
2886 t.Errorf("got matchBytesLoaded %d, want 0", res.Stats.IndexBytesLoaded)
2887 }
2888 })
2889
2890 t.Run("ChunkMatches", func(t *testing.T) {
2891 res := searchForTest(t, b, q, chunkOpts)
2892 if len(res.Files) != 0 {
2893 t.Fatalf("got %v, want 0 results", res.Files)
2894 }
2895 if res.Stats.IndexBytesLoaded > 0 {
2896 t.Errorf("got matchBytesLoaded %d, want 0", res.Stats.IndexBytesLoaded)
2897 }
2898 })
2899}
2900
2901func TestNoTextMatchAtoms(t *testing.T) {
2902 content := []byte("bla needle bla")
2903 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"},
2904 Document{Name: "f1", Content: content},
2905 Document{Name: "f2", Language: "java", Content: content},
2906 Document{Name: "f3", Language: "cpp", Content: content},
2907 )
2908 q := query.NewAnd(&query.Language{Language: "java"})
2909 t.Run("LineMatches", func(t *testing.T) {
2910 res := searchForTest(t, b, q)
2911 if len(res.Files) != 1 {
2912 t.Fatalf("got %v, want 1 result in f3", res.Files)
2913 }
2914 })
2915
2916 t.Run("ChunkMatches", func(t *testing.T) {
2917 res := searchForTest(t, b, q, chunkOpts)
2918 if len(res.Files) != 1 {
2919 t.Fatalf("got %v, want 1 result in f3", res.Files)
2920 }
2921 })
2922}
2923
2924func TestNoPositiveAtoms(t *testing.T) {
2925 content := []byte("bla needle bla")
2926 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"},
2927 Document{Name: "f1", Content: content},
2928 Document{Name: "f2", Content: content},
2929 )
2930
2931 q := query.NewAnd(
2932 &query.Not{Child: &query.Substring{Pattern: "xyz"}},
2933 &query.Repo{Regexp: regexp.MustCompile("reponame")})
2934 t.Run("LineMatches", func(t *testing.T) {
2935 res := searchForTest(t, b, q)
2936 if len(res.Files) != 2 {
2937 t.Fatalf("got %v, want 2 results in f3", res.Files)
2938 }
2939 })
2940 t.Run("ChunkMatches", func(t *testing.T) {
2941 res := searchForTest(t, b, q, chunkOpts)
2942 if len(res.Files) != 2 {
2943 t.Fatalf("got %v, want 2 results in f3", res.Files)
2944 }
2945 })
2946}
2947
2948func TestSymbolBoundaryStart(t *testing.T) {
2949 content := []byte("start\nbla bla\nend")
2950 // ----------------012345-67890123-456
2951
2952 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"},
2953 Document{
2954 Name: "f1",
2955 Content: content,
2956 Symbols: []DocumentSection{{0, 5}, {14, 17}},
2957 },
2958 )
2959 q := &query.Symbol{
2960 Expr: &query.Substring{Pattern: "start"},
2961 }
2962 t.Run("LineMatches", func(t *testing.T) {
2963 res := searchForTest(t, b, q)
2964 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
2965 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2966 }
2967 m := res.Files[0].LineMatches[0].LineFragments[0]
2968 if m.Offset != 0 {
2969 t.Fatalf("got offset %d want 0", m.Offset)
2970 }
2971 })
2972
2973 t.Run("ChunkMatches", func(t *testing.T) {
2974 res := searchForTest(t, b, q, chunkOpts)
2975 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
2976 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2977 }
2978 m := res.Files[0].ChunkMatches[0].Ranges[0]
2979 if m.Start.ByteOffset != 0 {
2980 t.Fatalf("got offset %d want 0", m.Start.ByteOffset)
2981 }
2982 })
2983}
2984
2985func TestSymbolBoundaryEnd(t *testing.T) {
2986 content := []byte("start\nbla bla\nend")
2987 // ----------------012345-67890123-456
2988
2989 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"},
2990 Document{
2991 Name: "f1",
2992 Content: content,
2993 Symbols: []DocumentSection{{14, 17}},
2994 },
2995 )
2996 q := &query.Symbol{
2997 Expr: &query.Substring{Pattern: "end"},
2998 }
2999 t.Run("LineMatches", func(t *testing.T) {
3000 res := searchForTest(t, b, q)
3001 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
3002 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
3003 }
3004 m := res.Files[0].LineMatches[0].LineFragments[0]
3005 if m.Offset != 14 {
3006 t.Fatalf("got offset %d want 0", m.Offset)
3007 }
3008 })
3009
3010 t.Run("ChunkMatches", func(t *testing.T) {
3011 res := searchForTest(t, b, q, chunkOpts)
3012 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
3013 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
3014 }
3015 m := res.Files[0].ChunkMatches[0].Ranges[0]
3016 if m.Start.ByteOffset != 14 {
3017 t.Fatalf("got offset %d want 0", m.Start.ByteOffset)
3018 }
3019 })
3020}
3021
3022func TestSymbolSubstring(t *testing.T) {
3023 content := []byte("bla\nsymblabla\nbla")
3024 // ----------------0123-4567890123-456
3025
3026 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"},
3027 Document{
3028 Name: "f1",
3029 Content: content,
3030 Symbols: []DocumentSection{{4, 12}},
3031 },
3032 )
3033 q := &query.Symbol{
3034 Expr: &query.Substring{Pattern: "bla"},
3035 }
3036 t.Run("LineMatches", func(t *testing.T) {
3037 res := searchForTest(t, b, q)
3038 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
3039 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
3040 }
3041 m := res.Files[0].LineMatches[0].LineFragments[0]
3042 if m.Offset != 7 || m.MatchLength != 3 {
3043 t.Fatalf("got offset %d, size %d want 7 size 3", m.Offset, m.MatchLength)
3044 }
3045 })
3046
3047 t.Run("ChunkMatches", func(t *testing.T) {
3048 res := searchForTest(t, b, q, chunkOpts)
3049 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
3050 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
3051 }
3052 m := res.Files[0].ChunkMatches[0].Ranges[0]
3053 if m.Start.ByteOffset != 7 || m.End.ByteOffset != 10 {
3054 t.Fatalf("got offset %d, end %d want 7, 10", m.Start.ByteOffset, m.End.ByteOffset)
3055 }
3056 })
3057}
3058
3059func TestSymbolSubstringExact(t *testing.T) {
3060 content := []byte("bla\nsym\nbla\nsym\nasymb")
3061 // ----------------0123-4567-890123456-78901
3062
3063 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"},
3064 Document{
3065 Name: "f1",
3066 Content: content,
3067 Symbols: []DocumentSection{{4, 7}},
3068 },
3069 )
3070 q := &query.Symbol{
3071 Expr: &query.Substring{Pattern: "sym"},
3072 }
3073 t.Run("LineMatches", func(t *testing.T) {
3074 res := searchForTest(t, b, q)
3075 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
3076 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
3077 }
3078 m := res.Files[0].LineMatches[0].LineFragments[0]
3079 if m.Offset != 4 {
3080 t.Fatalf("got offset %d, want 7", m.Offset)
3081 }
3082 })
3083
3084 t.Run("ChunkMatches", func(t *testing.T) {
3085 res := searchForTest(t, b, q, chunkOpts)
3086 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
3087 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
3088 }
3089 m := res.Files[0].ChunkMatches[0].Ranges[0]
3090 if m.Start.ByteOffset != 4 {
3091 t.Fatalf("got offset %d, want 7", m.Start.ByteOffset)
3092 }
3093 })
3094}
3095
3096func TestSymbolRegexpExact(t *testing.T) {
3097 content := []byte("blah\nbla\nbl")
3098 // ----------------01234-5678-90
3099
3100 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"},
3101 Document{
3102 Name: "f1",
3103 Content: content,
3104 Symbols: []DocumentSection{{0, 4}, {5, 8}, {9, 11}},
3105 },
3106 )
3107 q := &query.Symbol{
3108 Expr: &query.Regexp{Regexp: mustParseRE("^bla$")},
3109 }
3110 t.Run("LineMatches", func(t *testing.T) {
3111 res := searchForTest(t, b, q)
3112 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
3113 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
3114 }
3115 m := res.Files[0].LineMatches[0].LineFragments[0]
3116 if m.Offset != 5 {
3117 t.Fatalf("got offset %d, want 5", m.Offset)
3118 }
3119 })
3120
3121 t.Run("ChunkMatches", func(t *testing.T) {
3122 res := searchForTest(t, b, q, chunkOpts)
3123 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
3124 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
3125 }
3126 m := res.Files[0].ChunkMatches[0].Ranges[0]
3127 if m.Start.ByteOffset != 5 {
3128 t.Fatalf("got offset %d, want 5", m.Start.ByteOffset)
3129 }
3130 })
3131}
3132
3133func TestSymbolRegexpPartial(t *testing.T) {
3134 content := []byte("abcdef")
3135 // ----------------012345
3136
3137 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"},
3138 Document{
3139 Name: "f1",
3140 Content: content,
3141 Symbols: []DocumentSection{{0, 6}},
3142 },
3143 )
3144 q := &query.Symbol{
3145 Expr: &query.Regexp{Regexp: mustParseRE("(b|d)c(d|b)")},
3146 }
3147 t.Run("LineMatches", func(t *testing.T) {
3148 res := searchForTest(t, b, q)
3149 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
3150 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
3151 }
3152 m := res.Files[0].LineMatches[0].LineFragments[0]
3153 if m.Offset != 1 {
3154 t.Fatalf("got offset %d, want 1", m.Offset)
3155 }
3156 if m.MatchLength != 3 {
3157 t.Fatalf("got match length %d, want 3", m.MatchLength)
3158 }
3159 })
3160
3161 t.Run("ChunkMatches", func(t *testing.T) {
3162 res := searchForTest(t, b, q, chunkOpts)
3163 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
3164 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
3165 }
3166 m := res.Files[0].ChunkMatches[0].Ranges[0]
3167 if m.Start.ByteOffset != 1 {
3168 t.Fatalf("got offset %d, want 1", m.Start.ByteOffset)
3169 }
3170 if m.End.ByteOffset != 4 {
3171 t.Fatalf("got match end %d, want 4", m.End.ByteOffset)
3172 }
3173 })
3174}
3175
3176func TestSymbolRegexpAll(t *testing.T) {
3177 docs := []Document{
3178 {
3179 Name: "f1",
3180 Content: []byte("Hello Zoekt"),
3181 // --------------01234567890
3182 Symbols: []DocumentSection{{0, 5}, {6, 11}},
3183 },
3184 {
3185 Name: "f2",
3186 Content: []byte("Second Zoekt Third"),
3187 // --------------012345678901234567
3188 Symbols: []DocumentSection{{0, 6}, {7, 12}, {13, 18}},
3189 },
3190 }
3191
3192 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"}, docs...)
3193 q := &query.Symbol{
3194 Expr: &query.Regexp{Regexp: mustParseRE(".*")},
3195 }
3196 t.Run("LineMatches", func(t *testing.T) {
3197 res := searchForTest(t, b, q)
3198 if len(res.Files) != len(docs) {
3199 t.Fatalf("got %v, want %d file", res.Files, len(docs))
3200 }
3201 for i, want := range docs {
3202 got := res.Files[i].LineMatches[0].LineFragments
3203 if len(got) != len(want.Symbols) {
3204 t.Fatalf("got %d symbols, want %d symbols in doc %s", len(got), len(want.Symbols), want.Name)
3205 }
3206
3207 for j, sec := range want.Symbols {
3208 if sec.Start != got[j].Offset {
3209 t.Fatalf("got offset %d, want %d in doc %s", got[j].Offset, sec.Start, want.Name)
3210 }
3211 }
3212 }
3213 })
3214
3215 t.Run("ChunkMatches", func(t *testing.T) {
3216 res := searchForTest(t, b, q, chunkOpts)
3217 if len(res.Files) != len(docs) {
3218 t.Fatalf("got %v, want %d file", res.Files, len(docs))
3219 }
3220 for i, want := range docs {
3221 got := res.Files[i].ChunkMatches[0].Ranges
3222 if len(got) != len(want.Symbols) {
3223 t.Fatalf("got %d symbols, want %d symbols in doc %s", len(got), len(want.Symbols), want.Name)
3224 }
3225
3226 for j, sec := range want.Symbols {
3227 if sec.Start != got[j].Start.ByteOffset {
3228 t.Fatalf("got offset %d, want %d in doc %s", got[j].Start.ByteOffset, sec.Start, want.Name)
3229 }
3230 }
3231 }
3232 })
3233}
3234
3235func TestHitIterTerminate(t *testing.T) {
3236 // contrived input: trigram frequencies forces selecting abc +
3237 // def for the distance iteration. There is no index, so this
3238 // will advance the compressedPostingIterator to beyond the
3239 // end.
3240 content := []byte("abc bcdbcd cdecde abcabc def efg")
3241 b := testShardBuilder(t, nil,
3242 Document{
3243 Name: "f1",
3244 Content: content,
3245 },
3246 )
3247
3248 t.Run("LineMatches", func(t *testing.T) {
3249 searchForTest(t, b, &query.Substring{Pattern: "abcdef"})
3250 })
3251
3252 t.Run("ChunkMatches", func(t *testing.T) {
3253 searchForTest(t, b, &query.Substring{Pattern: "abcdef"}, chunkOpts)
3254 })
3255}
3256
3257func TestDistanceHitIterBailLast(t *testing.T) {
3258 content := []byte("AST AST AST UASH")
3259 b := testShardBuilder(t, nil,
3260 Document{
3261 Name: "f1",
3262 Content: content,
3263 },
3264 )
3265 t.Run("LineMatches", func(t *testing.T) {
3266 res := searchForTest(t, b, &query.Substring{Pattern: "UAST"})
3267 if len(res.Files) != 0 {
3268 t.Fatalf("got %v, want no results", res.Files)
3269 }
3270 })
3271
3272 t.Run("LineMatches", func(t *testing.T) {
3273 res := searchForTest(t, b, &query.Substring{Pattern: "UAST"}, chunkOpts)
3274 if len(res.Files) != 0 {
3275 t.Fatalf("got %v, want no results", res.Files)
3276 }
3277 })
3278}
3279
3280func TestDocumentSectionRuneBoundary(t *testing.T) {
3281 content := string([]rune{kelvinCodePoint, kelvinCodePoint, kelvinCodePoint})
3282 b, err := NewShardBuilder(nil)
3283 if err != nil {
3284 t.Fatalf("NewShardBuilder: %v", err)
3285 }
3286
3287 for i, sec := range []DocumentSection{
3288 {2, 6},
3289 {3, 7},
3290 } {
3291 if err := b.Add(Document{
3292 Name: "f1",
3293 Content: []byte(content),
3294 Symbols: []DocumentSection{sec},
3295 }); err == nil {
3296 t.Errorf("%d: Add succeeded", i)
3297 }
3298 }
3299}
3300
3301func TestUnicodeQuery(t *testing.T) {
3302 content := string([]rune{kelvinCodePoint, kelvinCodePoint, kelvinCodePoint})
3303 b := testShardBuilder(t, nil,
3304 Document{
3305 Name: "f1",
3306 Content: []byte(content),
3307 },
3308 )
3309
3310 q := &query.Substring{Pattern: content}
3311
3312 t.Run("LineMatches", func(t *testing.T) {
3313 res := searchForTest(t, b, q)
3314 if len(res.Files) != 1 {
3315 t.Fatalf("want 1 match, got %v", res.Files)
3316 }
3317
3318 f := res.Files[0]
3319 if len(f.LineMatches) != 1 {
3320 t.Fatalf("want 1 line, got %v", f.LineMatches)
3321 }
3322 l := f.LineMatches[0]
3323
3324 if len(l.LineFragments) != 1 {
3325 t.Fatalf("want 1 line fragment, got %v", l.LineFragments)
3326 }
3327 fr := l.LineFragments[0]
3328 if fr.MatchLength != len(content) {
3329 t.Fatalf("got MatchLength %d want %d", fr.MatchLength, len(content))
3330 }
3331 })
3332
3333 t.Run("ChunkMatches", func(t *testing.T) {
3334 res := searchForTest(t, b, q, chunkOpts)
3335 if len(res.Files) != 1 {
3336 t.Fatalf("want 1 match, got %v", res.Files)
3337 }
3338
3339 f := res.Files[0]
3340 if len(f.ChunkMatches) != 1 {
3341 t.Fatalf("want 1 line, got %v", f.LineMatches)
3342 }
3343 cm := f.ChunkMatches[0]
3344
3345 if len(cm.Ranges) != 1 {
3346 t.Fatalf("want 1 line fragment, got %v", cm.Ranges)
3347 }
3348 rr := cm.Ranges[0]
3349 if matchLen := rr.End.ByteOffset - rr.Start.ByteOffset; int(matchLen) != len(content) {
3350 t.Fatalf("got MatchLength %d want %d", matchLen, len(content))
3351 }
3352 })
3353}
3354
3355func TestSkipInvalidContent(t *testing.T) {
3356 for _, content := range []string{
3357 // Binary
3358 "abc def \x00 abc",
3359 } {
3360
3361 b, err := NewShardBuilder(nil)
3362 if err != nil {
3363 t.Fatalf("NewShardBuilder: %v", err)
3364 }
3365
3366 if err := b.Add(Document{
3367 Name: "f1",
3368 Content: []byte(content),
3369 }); err != nil {
3370 t.Fatal(err)
3371 }
3372
3373 t.Run("LineMatches", func(t *testing.T) {
3374 q := &query.Substring{Pattern: "abc def"}
3375 res := searchForTest(t, b, q)
3376 if len(res.Files) != 0 {
3377 t.Fatalf("got %v, want no results", res.Files)
3378 }
3379
3380 q = &query.Substring{Pattern: "NOT-INDEXED"}
3381 res = searchForTest(t, b, q)
3382 if len(res.Files) != 1 {
3383 t.Fatalf("got %v, want 1 result", res.Files)
3384 }
3385 })
3386
3387 t.Run("ChunkMatches", func(t *testing.T) {
3388 q := &query.Substring{Pattern: "abc def"}
3389 res := searchForTest(t, b, q, chunkOpts)
3390 if len(res.Files) != 0 {
3391 t.Fatalf("got %v, want no results", res.Files)
3392 }
3393
3394 q = &query.Substring{Pattern: "NOT-INDEXED"}
3395 res = searchForTest(t, b, q, chunkOpts)
3396 if len(res.Files) != 1 {
3397 t.Fatalf("got %v, want 1 result", res.Files)
3398 }
3399 })
3400 }
3401}
3402
3403func TestDocChecker(t *testing.T) {
3404 docChecker := DocChecker{}
3405
3406 // Test valid and invalid text
3407 for _, text := range []string{"", "simple ascii", "símplé unicödé", "\uFEFFwith utf8 'bom'", "with \uFFFD unicode replacement char"} {
3408 if skip := docChecker.Check([]byte(text), 20000, false); skip != SkipReasonNone {
3409 t.Errorf("Check(%q): %v", text, skip)
3410 }
3411 }
3412 for _, text := range []string{"zero\x00byte", "\x00starts with null byte", "xx", "0123456789abcdefghi"} {
3413 if skip := docChecker.Check([]byte(text), 15, false); skip == SkipReasonNone {
3414 t.Errorf("Check(%q) succeeded", text)
3415 }
3416 }
3417
3418 // Test valid and invalid text with an allowed large file
3419 for _, text := range []string{"0123456789abcdefghi", "qwertyuiopasdfghjklzxcvbnm"} {
3420 if skip := docChecker.Check([]byte(text), 15, true); skip != SkipReasonNone {
3421 t.Errorf("Check(%q): %v", text, skip)
3422 }
3423 }
3424 for _, text := range []string{"zero\x00byte", "\x00starts with null byte", "xx"} {
3425 if skip := docChecker.Check([]byte(text), 15, true); skip == SkipReasonNone {
3426 t.Errorf("Check(%q) succeeded", text)
3427 }
3428 }
3429}
3430
3431func TestLineAnd(t *testing.T) {
3432 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"},
3433 Document{Name: "f1", Content: []byte("apple\nbanana\napple banana chocolate apple pudding banana\ngrape")},
3434 Document{Name: "f2", Content: []byte("apple orange\nbanana")},
3435 Document{Name: "f3", Content: []byte("banana grape")},
3436 )
3437 pattern := "(apple)(?-s:.)*?(banana)"
3438 r, _ := syntax.Parse(pattern, syntax.Perl)
3439
3440 q := query.Regexp{
3441 Regexp: r,
3442 Content: true,
3443 }
3444 t.Run("LineMatches", func(t *testing.T) {
3445 res := searchForTest(t, b, &q)
3446 wantRegexpCount := 1
3447 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3448 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3449 }
3450 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
3451 t.Errorf("got %v, want 1 result", res.Files)
3452 }
3453 })
3454
3455 t.Run("ChunkMatches", func(t *testing.T) {
3456 res := searchForTest(t, b, &q, chunkOpts)
3457 wantRegexpCount := 1
3458 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3459 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3460 }
3461 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
3462 t.Errorf("got %v, want 1 result", res.Files)
3463 }
3464 })
3465}
3466
3467func TestLineAndFileName(t *testing.T) {
3468 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"},
3469 Document{Name: "f1", Content: []byte("apple banana\ngrape")},
3470 Document{Name: "f2", Content: []byte("apple banana\norange")},
3471 Document{Name: "apple banana", Content: []byte("banana grape")},
3472 )
3473 pattern := "(apple)(?-s:.)*?(banana)"
3474 r, _ := syntax.Parse(pattern, syntax.Perl)
3475
3476 q := query.Regexp{
3477 Regexp: r,
3478 FileName: true,
3479 }
3480 t.Run("LineMatches", func(t *testing.T) {
3481 res := searchForTest(t, b, &q)
3482 wantRegexpCount := 1
3483 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3484 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3485 }
3486 if len(res.Files) != 1 || res.Files[0].FileName != "apple banana" {
3487 t.Errorf("got %v, want 1 result", res.Files)
3488 }
3489 })
3490
3491 t.Run("ChunkMatches", func(t *testing.T) {
3492 res := searchForTest(t, b, &q, chunkOpts)
3493 wantRegexpCount := 1
3494 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3495 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3496 }
3497 if len(res.Files) != 1 || res.Files[0].FileName != "apple banana" {
3498 t.Errorf("got %v, want 1 result", res.Files)
3499 }
3500 })
3501}
3502
3503func TestMultiLineRegex(t *testing.T) {
3504 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"},
3505 Document{Name: "f1", Content: []byte("apple banana\ngrape")},
3506 Document{Name: "f2", Content: []byte("apple orange")},
3507 Document{Name: "f3", Content: []byte("grape apple")},
3508 )
3509 pattern := "(apple).*?[[:space:]].*?(grape)"
3510 r, _ := syntax.Parse(pattern, syntax.Perl)
3511
3512 q := query.Regexp{
3513 Regexp: r,
3514 }
3515 t.Run("LineMatches", func(t *testing.T) {
3516 res := searchForTest(t, b, &q)
3517 wantRegexpCount := 2
3518 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3519 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3520 }
3521 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
3522 t.Errorf("got %v, want 1 result", res.Files)
3523 }
3524 if l := len(res.Files[0].LineMatches); l != 2 {
3525 t.Errorf("got %v, want 2 line matches", l)
3526 }
3527 })
3528
3529 t.Run("ChunkMatches", func(t *testing.T) {
3530 res := searchForTest(t, b, &q, chunkOpts)
3531 wantRegexpCount := 2
3532 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3533 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3534 }
3535 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
3536 t.Errorf("got %v, want 1 result", res.Files)
3537 }
3538 if l := len(res.Files[0].ChunkMatches); l != 1 {
3539 t.Errorf("got %v, want 1 chunk matches", l)
3540 }
3541 if l := len(res.Files[0].ChunkMatches[0].Ranges); l != 1 {
3542 t.Errorf("got %v, want 1 chunk ranges", l)
3543 }
3544 })
3545}
3546
3547func TestSearchTypeFileName(t *testing.T) {
3548 b := testShardBuilder(t, &zoekt.Repository{
3549 Name: "reponame",
3550 },
3551 Document{Name: "f1", Content: []byte("bla the needle")},
3552 Document{Name: "f2", Content: []byte("another file another\nneedle")},
3553 // -----------------------------------012345678901234567890-123456
3554 )
3555
3556 t.Run("LineMatches", func(t *testing.T) {
3557 wantSingleMatch := func(res *zoekt.SearchResult, want string) {
3558 t.Helper()
3559 fmatches := res.Files
3560 if len(fmatches) != 1 {
3561 t.Errorf("got %v, want 1 matches", len(fmatches))
3562 return
3563 }
3564 if len(fmatches[0].LineMatches) != 1 {
3565 t.Errorf("got %d line matches", len(fmatches[0].LineMatches))
3566 return
3567 }
3568 var got string
3569 if fmatches[0].LineMatches[0].FileName {
3570 got = fmatches[0].FileName
3571 } else {
3572 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].LineMatches[0].LineFragments[0].Offset)
3573 }
3574
3575 if got != want {
3576 t.Errorf("got %s, want %s", got, want)
3577 }
3578 }
3579
3580 // Only return the later match in the second file
3581 res := searchForTest(t, b, query.NewAnd(
3582 &query.Type{
3583 Type: query.TypeFileName,
3584 Child: &query.Substring{Pattern: "needle"},
3585 },
3586 &query.Substring{Pattern: "file"}))
3587 wantSingleMatch(res, "f2:8")
3588
3589 // Only return a filename result
3590 res = searchForTest(t, b,
3591 &query.Type{
3592 Type: query.TypeFileName,
3593 Child: &query.Substring{Pattern: "file"},
3594 })
3595 wantSingleMatch(res, "f2")
3596 })
3597
3598 t.Run("ChunkMatches", func(t *testing.T) {
3599 wantSingleMatch := func(res *zoekt.SearchResult, want string) {
3600 t.Helper()
3601 fmatches := res.Files
3602 if len(fmatches) != 1 {
3603 t.Errorf("got %v, want 1 matches", len(fmatches))
3604 return
3605 }
3606 if len(fmatches[0].ChunkMatches) != 1 {
3607 t.Errorf("got %d line matches", len(fmatches[0].ChunkMatches))
3608 return
3609 }
3610 var got string
3611 if fmatches[0].ChunkMatches[0].FileName {
3612 got = fmatches[0].FileName
3613 } else {
3614 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset)
3615 }
3616
3617 if got != want {
3618 t.Errorf("got %s, want %s", got, want)
3619 }
3620 }
3621
3622 // Only return the later match in the second file
3623 res := searchForTest(t, b, query.NewAnd(
3624 &query.Type{
3625 Type: query.TypeFileName,
3626 Child: &query.Substring{Pattern: "needle"},
3627 },
3628 &query.Substring{Pattern: "file"}),
3629 chunkOpts,
3630 )
3631 wantSingleMatch(res, "f2:8")
3632
3633 // Only return a filename result
3634 res = searchForTest(t, b,
3635 &query.Type{
3636 Type: query.TypeFileName,
3637 Child: &query.Substring{Pattern: "file"},
3638 },
3639 chunkOpts,
3640 )
3641 wantSingleMatch(res, "f2")
3642 })
3643}
3644
3645func TestSearchTypeLanguage(t *testing.T) {
3646 b := testShardBuilder(t, &zoekt.Repository{
3647 Name: "reponame",
3648 },
3649 Document{Name: "apex.cls", Content: []byte("public class Car extends Vehicle {")},
3650 Document{Name: "tex.cls", Content: []byte(`\DeclareOption*{`)},
3651 Document{Name: "hello.h", Content: []byte(`#include <stdio.h>`)},
3652 Document{Name: "be.magik", Content: []byte(`_package unicorn`)},
3653 )
3654
3655 t.Log(b.languageMap)
3656
3657 t.Run("LineMatches", func(t *testing.T) {
3658 wantSingleMatch := func(res *zoekt.SearchResult, want string) {
3659 t.Helper()
3660 fmatches := res.Files
3661 if len(fmatches) != 1 {
3662 t.Errorf("got %v, want 1 matches", len(fmatches))
3663 return
3664 }
3665 if len(fmatches[0].LineMatches) != 1 {
3666 t.Errorf("got %d line matches", len(fmatches[0].LineMatches))
3667 return
3668 }
3669 var got string
3670 if fmatches[0].LineMatches[0].FileName {
3671 got = fmatches[0].FileName
3672 } else {
3673 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].LineMatches[0].LineFragments[0].Offset)
3674 }
3675
3676 if got != want {
3677 t.Errorf("got %s, want %s", got, want)
3678 }
3679 }
3680
3681 res := searchForTest(t, b, &query.Language{Language: "Apex"})
3682 wantSingleMatch(res, "apex.cls")
3683
3684 res = searchForTest(t, b, &query.Language{Language: "TeX"})
3685 wantSingleMatch(res, "tex.cls")
3686
3687 res = searchForTest(t, b, &query.Language{Language: "C"})
3688 wantSingleMatch(res, "hello.h")
3689
3690 res = searchForTest(t, b, &query.Language{Language: "Magik"})
3691 wantSingleMatch(res, "be.magik")
3692
3693 // test fallback language search by pretending it's an older index version
3694 res = searchForTest(t, b, &query.Language{Language: "C++"})
3695 if len(res.Files) != 0 {
3696 t.Errorf("got %d results for C++, want 0", len(res.Files))
3697 }
3698
3699 b.featureVersion = 11 // force fallback
3700 res = searchForTest(t, b, &query.Language{Language: "C++"})
3701 wantSingleMatch(res, "hello.h")
3702 })
3703
3704 t.Run("ChunkMatches", func(t *testing.T) {
3705 wantSingleMatch := func(res *zoekt.SearchResult, want string) {
3706 t.Helper()
3707 fmatches := res.Files
3708 if len(fmatches) != 1 {
3709 t.Errorf("got %v, want 1 matches", len(fmatches))
3710 return
3711 }
3712 if len(fmatches[0].ChunkMatches) != 1 {
3713 t.Errorf("got %d line matches", len(fmatches[0].ChunkMatches))
3714 return
3715 }
3716 var got string
3717 if fmatches[0].ChunkMatches[0].FileName {
3718 got = fmatches[0].FileName
3719 } else {
3720 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset)
3721 }
3722
3723 if got != want {
3724 t.Errorf("got %s, want %s", got, want)
3725 }
3726 }
3727
3728 b.featureVersion = FeatureVersion // reset feature version
3729 res := searchForTest(t, b, &query.Language{Language: "Apex"}, chunkOpts)
3730 wantSingleMatch(res, "apex.cls")
3731
3732 res = searchForTest(t, b, &query.Language{Language: "TeX"}, chunkOpts)
3733 wantSingleMatch(res, "tex.cls")
3734
3735 res = searchForTest(t, b, &query.Language{Language: "C"}, chunkOpts)
3736 wantSingleMatch(res, "hello.h")
3737
3738 // test fallback language search by pretending it's an older index version
3739 res = searchForTest(t, b, &query.Language{Language: "C++"}, chunkOpts)
3740 if len(res.Files) != 0 {
3741 t.Errorf("got %d results for C++, want 0", len(res.Files))
3742 }
3743
3744 b.featureVersion = 11 // force fallback
3745 res = searchForTest(t, b, &query.Language{Language: "C++"}, chunkOpts)
3746 wantSingleMatch(res, "hello.h")
3747 })
3748}
3749
3750func TestStats(t *testing.T) {
3751 ignored := []cmp.Option{
3752 cmpopts.EquateEmpty(),
3753 cmpopts.IgnoreFields(zoekt.RepoListEntry{}, "Repository"),
3754 cmpopts.IgnoreFields(zoekt.RepoListEntry{}, "IndexMetadata"),
3755 cmpopts.IgnoreFields(zoekt.RepoStats{}, "IndexBytes"),
3756 }
3757
3758 repoListEntries := func(b *ShardBuilder) []zoekt.RepoListEntry {
3759 searcher := searcherForTest(t, b)
3760 indexdata := searcher.(*indexData)
3761 return indexdata.repoListEntry
3762 }
3763
3764 t.Run("one empty repo", func(t *testing.T) {
3765 b := testShardBuilder(t, nil)
3766 got := repoListEntries(b)
3767 want := []zoekt.RepoListEntry{
3768 {
3769 Stats: zoekt.RepoStats{
3770 Repos: 0,
3771 Shards: 1,
3772 Documents: 0,
3773 IndexBytes: 20,
3774 ContentBytes: 0,
3775 NewLinesCount: 0,
3776 DefaultBranchNewLinesCount: 0,
3777 OtherBranchesNewLinesCount: 0,
3778 },
3779 },
3780 }
3781
3782 if diff := cmp.Diff(want, got, ignored...); diff != "" {
3783 t.Fatalf("mismatch (-want +got):\n%s", diff)
3784 }
3785 })
3786
3787 t.Run("one simple shard", func(t *testing.T) {
3788 b := testShardBuilder(t, nil,
3789 Document{Name: "doc 0", Content: []byte("content 0")},
3790 Document{Name: "doc 1", Content: []byte("content 1")},
3791 )
3792 got := repoListEntries(b)
3793 want := []zoekt.RepoListEntry{
3794 {
3795 Stats: zoekt.RepoStats{
3796 Repos: 0,
3797 Shards: 1,
3798 Documents: 2,
3799 IndexBytes: 224,
3800 ContentBytes: 28,
3801 NewLinesCount: 0,
3802 DefaultBranchNewLinesCount: 0,
3803 OtherBranchesNewLinesCount: 0,
3804 },
3805 },
3806 }
3807
3808 if diff := cmp.Diff(want, got, ignored...); diff != "" {
3809 t.Fatalf("mismatch (-want +got):\n%s", diff)
3810 }
3811 })
3812
3813 t.Run("one compound shard", func(t *testing.T) {
3814 b := testShardBuilderCompound(t,
3815 []*zoekt.Repository{
3816 {Name: "repo 0"},
3817 {Name: "repo 1"},
3818 },
3819 [][]Document{
3820 {
3821 {Name: "doc 0", Content: []byte("content 0")},
3822 {Name: "doc 1", Content: []byte("content 1")},
3823 },
3824 {
3825 {Name: "doc 2", Content: []byte("content 2")},
3826 {Name: "doc 3", Content: []byte("content 3")},
3827 },
3828 },
3829 )
3830 got := repoListEntries(b)
3831 want := []zoekt.RepoListEntry{
3832 {
3833 Stats: zoekt.RepoStats{
3834 Repos: 0,
3835 Shards: 1,
3836 Documents: 2,
3837 IndexBytes: 180,
3838 ContentBytes: 28,
3839 NewLinesCount: 0,
3840 DefaultBranchNewLinesCount: 0,
3841 OtherBranchesNewLinesCount: 0,
3842 },
3843 },
3844 {
3845 Stats: zoekt.RepoStats{
3846 Repos: 0,
3847 Shards: 1,
3848 Documents: 2,
3849 IndexBytes: 180,
3850 ContentBytes: 28,
3851 NewLinesCount: 0,
3852 DefaultBranchNewLinesCount: 0,
3853 OtherBranchesNewLinesCount: 0,
3854 },
3855 },
3856 }
3857
3858 if diff := cmp.Diff(want, got, ignored...); diff != "" {
3859 t.Fatalf("mismatch (-want +got):\n%s", diff)
3860 }
3861 })
3862
3863 t.Run("compound shard with empty repos", func(t *testing.T) {
3864 b := testShardBuilderCompound(t,
3865 []*zoekt.Repository{
3866 {Name: "repo 0"},
3867 {Name: "repo 1"},
3868 {Name: "repo 2"},
3869 {Name: "repo 3"},
3870 {Name: "repo 4"},
3871 },
3872 [][]Document{
3873 {{Name: "doc 0", Content: []byte("content 0")}},
3874 nil,
3875 {{Name: "doc 1", Content: []byte("content 1")}},
3876 nil,
3877 nil,
3878 },
3879 )
3880 got := repoListEntries(b)
3881
3882 entryEmpty := zoekt.RepoListEntry{Stats: zoekt.RepoStats{
3883 Shards: 1,
3884 Documents: 0,
3885 ContentBytes: 0,
3886 }}
3887 entryNonEmpty := zoekt.RepoListEntry{Stats: zoekt.RepoStats{
3888 Shards: 1,
3889 Documents: 1,
3890 ContentBytes: 14,
3891 }}
3892
3893 want := []zoekt.RepoListEntry{
3894 entryNonEmpty,
3895 entryEmpty,
3896 entryNonEmpty,
3897 entryEmpty,
3898 entryEmpty,
3899 }
3900
3901 if diff := cmp.Diff(want, got, ignored...); diff != "" {
3902 t.Fatalf("mismatch (-want +got):\n%s", diff)
3903 }
3904 })
3905}
3906
3907// This tests the frequent pattern "\bLITERAL\b".
3908func TestWordSearch(t *testing.T) {
3909 content := []byte("needle the bla")
3910 // ----------------01234567890123
3911
3912 b := testShardBuilder(t, nil,
3913 Document{
3914 Name: "f1",
3915 Content: content,
3916 })
3917
3918 t.Run("LineMatches", func(t *testing.T) {
3919 sres := searchForTest(t, b,
3920 &query.Regexp{
3921 Regexp: mustParseRE("\\bthe\\b"),
3922 CaseSensitive: true,
3923 Content: true,
3924 })
3925
3926 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 {
3927 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
3928 }
3929
3930 if sres.Stats.RegexpsConsidered != 0 {
3931 t.Fatal("expected regexp to be skipped")
3932 }
3933
3934 got := sres.Files[0].LineMatches[0]
3935 want := zoekt.LineMatch{
3936 LineFragments: []zoekt.LineFragmentMatch{{
3937 LineOffset: 7,
3938 Offset: 7,
3939 MatchLength: 3,
3940 }},
3941 Line: content,
3942 FileName: false,
3943 LineNumber: 1,
3944 LineStart: 0,
3945 LineEnd: 14,
3946 }
3947
3948 if !reflect.DeepEqual(got, want) {
3949 t.Errorf("got %#v, want %#v", got, want)
3950 }
3951 })
3952
3953 t.Run("ChunkMatches", func(t *testing.T) {
3954 sres := searchForTest(t, b,
3955 &query.Regexp{
3956 Regexp: mustParseRE("\\bthe\\b"),
3957 CaseSensitive: true,
3958 }, chunkOpts)
3959
3960 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 {
3961 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
3962 }
3963
3964 if sres.Stats.RegexpsConsidered != 0 {
3965 t.Fatal("expected regexp to be skipped")
3966 }
3967
3968 got := sres.Files[0].ChunkMatches[0]
3969 want := zoekt.ChunkMatch{
3970 Content: content,
3971 ContentStart: zoekt.Location{ByteOffset: 0, LineNumber: 1, Column: 1},
3972 Ranges: []zoekt.Range{{
3973 Start: zoekt.Location{ByteOffset: 7, LineNumber: 1, Column: 8},
3974 End: zoekt.Location{ByteOffset: 10, LineNumber: 1, Column: 11},
3975 }},
3976 }
3977
3978 if diff := cmp.Diff(want, got); diff != "" {
3979 t.Fatal(diff)
3980 }
3981 })
3982}
3983
3984// Simple benchmark focused on chunk match scoring. It creates a single file that will have a 1000-line chunk match.
3985// The benchmark time is expected to be strongly correlated with time spent assembling and scoring this chunk.
3986func BenchmarkScoreChunkMatches(b *testing.B) {
3987 ctx := context.Background()
3988 var builder strings.Builder
3989 for i := range 1000 {
3990 builder.WriteString(fmt.Sprintf("line-%d one one one two two two three three three four four four five five\n", i))
3991 }
3992
3993 searcher := searcherForTest(b, testShardBuilder(b, nil,
3994 Document{Name: "f1", Content: []byte(builder.String())},
3995 ))
3996
3997 q := &query.Or{
3998 Children: []query.Q{
3999 &query.Substring{Pattern: "f"},
4000 &query.Substring{Pattern: "t"},
4001 }}
4002
4003 b.Run("score large ChunkMatch", func(b *testing.B) {
4004 b.ReportAllocs()
4005 b.ResetTimer()
4006
4007 for i := 0; i < b.N; i++ {
4008 sres, err := searcher.Search(ctx, q, &zoekt.SearchOptions{ChunkMatches: true, NumContextLines: 1})
4009 if err != nil {
4010 b.Fatal(err)
4011 }
4012
4013 matches := sres.Files
4014 if len(matches) == 0 {
4015 b.Fatalf("want file index, got none")
4016 }
4017 }
4018 })
4019}