fork of https://github.com/sourcegraph/zoekt
1// Copyright 2016 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package zoekt
16
17import (
18 "bytes"
19 "context"
20 "fmt"
21 "reflect"
22 "regexp/syntax"
23 "strings"
24 "testing"
25
26 "github.com/google/go-cmp/cmp"
27 "github.com/google/go-cmp/cmp/cmpopts"
28 "github.com/grafana/regexp"
29
30 "github.com/sourcegraph/zoekt/query"
31)
32
33func clearScores(r *SearchResult) {
34 for i := range r.Files {
35 r.Files[i].Score = 0.0
36 for j := range r.Files[i].LineMatches {
37 r.Files[i].LineMatches[j].Score = 0.0
38 }
39 for j := range r.Files[i].ChunkMatches {
40 r.Files[i].ChunkMatches[j].Score = 0.0
41 r.Files[i].ChunkMatches[j].BestLineMatch = 0
42 }
43 r.Files[i].Checksum = nil
44 r.Files[i].Debug = ""
45 }
46}
47
48func testIndexBuilder(tb testing.TB, repo *Repository, docs ...Document) *IndexBuilder {
49 tb.Helper()
50
51 b, err := NewIndexBuilder(repo)
52 if err != nil {
53 tb.Fatalf("NewIndexBuilder: %v", err)
54 }
55
56 for i, d := range docs {
57 if err := b.Add(d); err != nil {
58 tb.Fatalf("Add %d: %v", i, err)
59 }
60 }
61
62 return b
63}
64
65func testIndexBuilderCompound(t *testing.T, repos []*Repository, docs [][]Document) *IndexBuilder {
66 t.Helper()
67
68 b := newIndexBuilder()
69 b.indexFormatVersion = NextIndexFormatVersion
70
71 if len(repos) != len(docs) {
72 t.Fatalf("testIndexBuilderCompound: repos must be the same length as docs, got: len(repos)=%d len(docs)=%d", len(repos), len(docs))
73 }
74
75 for i, repo := range repos {
76 if err := b.setRepository(repo); err != nil {
77 t.Fatal(err)
78 }
79 for j, d := range docs[i] {
80 if err := b.Add(d); err != nil {
81 t.Fatalf("Add %d %d: %v", i, j, err)
82 }
83 }
84 }
85
86 return b
87}
88
89func TestBoundary(t *testing.T) {
90 b := testIndexBuilder(t, nil,
91 Document{Name: "f1", Content: []byte("x the")},
92 Document{Name: "f1", Content: []byte("reader")})
93 res := searchForTest(t, b, &query.Substring{Pattern: "there"})
94 if len(res.Files) > 0 {
95 t.Fatalf("got %v, want no matches", res.Files)
96 }
97}
98
99func TestDocSectionInvalid(t *testing.T) {
100 b, err := NewIndexBuilder(nil)
101 if err != nil {
102 t.Fatalf("NewIndexBuilder: %v", err)
103 }
104 doc := Document{
105 Name: "f1",
106 Content: []byte("01234567890123"),
107 Symbols: []DocumentSection{{5, 8}, {7, 9}},
108 }
109
110 if err := b.Add(doc); err == nil {
111 t.Errorf("overlapping doc sections should fail")
112 }
113
114 doc = Document{
115 Name: "f1",
116 Content: []byte("01234567890123"),
117 Symbols: []DocumentSection{{0, 20}},
118 }
119
120 if err := b.Add(doc); err == nil {
121 t.Errorf("doc sections beyond EOF should fail")
122 }
123}
124
125func TestBasic(t *testing.T) {
126 b := testIndexBuilder(t, nil,
127 Document{
128 Name: "f2",
129 Content: []byte("to carry water in the no later bla"),
130 // --------------0123456789012345678901234567890123
131 })
132
133 t.Run("LineMatch", func(t *testing.T) {
134 res := searchForTest(t, b, &query.Substring{
135 Pattern: "water",
136 CaseSensitive: true,
137 })
138 fmatches := res.Files
139 if len(fmatches) != 1 || len(fmatches[0].LineMatches) != 1 {
140 t.Fatalf("got %v, want 1 matches", fmatches)
141 }
142
143 got := fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].LineMatches[0].LineFragments[0].Offset)
144 want := "f2:9"
145 if got != want {
146 t.Errorf("1: got %s, want %s", got, want)
147 }
148 })
149
150 t.Run("ChunkMatch", func(t *testing.T) {
151 res := searchForTest(t, b, &query.Substring{
152 Pattern: "water",
153 CaseSensitive: true,
154 }, chunkOpts)
155 fmatches := res.Files
156 if len(fmatches) != 1 || len(fmatches[0].ChunkMatches) != 1 {
157 t.Fatalf("got %v, want 1 matches", fmatches)
158 }
159
160 got := fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset)
161 want := "f2:9"
162 if got != want {
163 t.Errorf("1: got %s, want %s", got, want)
164 }
165 })
166}
167
168func TestEmptyIndex(t *testing.T) {
169 b := testIndexBuilder(t, nil)
170 searcher := searcherForTest(t, b)
171
172 var opts SearchOptions
173 if _, err := searcher.Search(context.Background(), &query.Substring{}, &opts); err != nil {
174 t.Fatalf("Search: %v", err)
175 }
176
177 if _, err := searcher.List(context.Background(), &query.Repo{Regexp: regexp.MustCompile("")}, nil); err != nil {
178 t.Fatalf("List: %v", err)
179 }
180
181 if _, err := searcher.Search(context.Background(), &query.Substring{Pattern: "java", FileName: true}, &opts); err != nil {
182 t.Fatalf("Search: %v", err)
183 }
184}
185
186type memSeeker struct {
187 data []byte
188}
189
190func (s *memSeeker) Name() string {
191 return "memseeker"
192}
193
194func (s *memSeeker) Close() {}
195func (s *memSeeker) Read(off, sz uint32) ([]byte, error) {
196 return s.data[off : off+sz], nil
197}
198
199func (s *memSeeker) Size() (uint32, error) {
200 return uint32(len(s.data)), nil
201}
202
203func TestNewlines(t *testing.T) {
204 b := testIndexBuilder(t, nil,
205 // -----------------------------------------012345-678901-234
206 Document{Name: "filename", Content: []byte("line1\nline2\nbla")})
207
208 t.Run("LineMatches", func(t *testing.T) {
209 sres := searchForTest(t, b, &query.Substring{Pattern: "ne2"})
210
211 matches := sres.Files
212 want := []FileMatch{{
213 FileName: "filename",
214 LineMatches: []LineMatch{{
215 LineFragments: []LineFragmentMatch{{
216 Offset: 8,
217 LineOffset: 2,
218 MatchLength: 3,
219 }},
220 Line: []byte("line2\n"),
221 LineStart: 6,
222 LineEnd: 12,
223 LineNumber: 2,
224 }},
225 }}
226
227 if diff := cmp.Diff(matches, want); diff != "" {
228 t.Fatal(diff)
229 }
230 })
231
232 t.Run("ChunkMatches", func(t *testing.T) {
233 sres := searchForTest(t, b, &query.Substring{Pattern: "ne2"}, chunkOpts)
234
235 matches := sres.Files
236 want := []FileMatch{{
237 FileName: "filename",
238 ChunkMatches: []ChunkMatch{{
239 Content: []byte("line2\n"),
240 ContentStart: Location{
241 ByteOffset: 6,
242 LineNumber: 2,
243 Column: 1,
244 },
245 Ranges: []Range{{
246 Start: Location{ByteOffset: 8, LineNumber: 2, Column: 3},
247 End: Location{ByteOffset: 11, LineNumber: 2, Column: 6},
248 }},
249 }},
250 }}
251
252 if diff := cmp.Diff(want, matches); diff != "" {
253 t.Fatal(diff)
254 }
255 })
256}
257
258// A result spanning multiple lines should have LineMatches that only cover
259// single lines.
260func TestQueryNewlines(t *testing.T) {
261 text := "line1\nline2\nbla"
262 b := testIndexBuilder(t, nil,
263 Document{Name: "filename", Content: []byte(text)})
264
265 t.Run("LineMatches", func(t *testing.T) {
266 sres := searchForTest(t, b, &query.Substring{Pattern: "ine2\nbla"})
267 matches := sres.Files
268 if len(matches) != 1 {
269 t.Fatalf("got %d file matches, want exactly one", len(matches))
270 }
271 m := matches[0]
272 if len(m.LineMatches) != 2 {
273 t.Fatalf("got %d line matches, want exactly two %#v", len(m.LineMatches), m.LineMatches)
274 }
275 })
276
277 t.Run("ChunkMatches", func(t *testing.T) {
278 sres := searchForTest(t, b, &query.Substring{Pattern: "ine2\nbla"}, chunkOpts)
279 matches := sres.Files
280 if len(matches) != 1 {
281 t.Fatalf("got %d file matches, want exactly one", len(matches))
282 }
283 m := matches[0]
284 if len(m.ChunkMatches) != 1 {
285 t.Fatalf("got %d chunk matches, want exactly one", len(m.ChunkMatches))
286 }
287 })
288}
289
290var chunkOpts = SearchOptions{ChunkMatches: true}
291
292func searchForTest(t *testing.T, b *IndexBuilder, q query.Q, o ...SearchOptions) *SearchResult {
293 searcher := searcherForTest(t, b)
294 var opts SearchOptions
295 if len(o) > 0 {
296 opts = o[0]
297 }
298 res, err := searcher.Search(context.Background(), q, &opts)
299 if err != nil {
300 t.Fatalf("Search(%s): %v", q, err)
301 }
302 clearScores(res)
303 return res
304}
305
306func searcherForTest(t testing.TB, b *IndexBuilder) Searcher {
307 var buf bytes.Buffer
308 if err := b.Write(&buf); err != nil {
309 t.Fatal(err)
310 }
311 f := &memSeeker{buf.Bytes()}
312
313 searcher, err := NewSearcher(f)
314 if err != nil {
315 t.Fatalf("NewSearcher: %v", err)
316 }
317
318 return searcher
319}
320
321func TestCaseFold(t *testing.T) {
322 b := testIndexBuilder(t, nil,
323 Document{Name: "f1", Content: []byte("I love BaNaNAS.")},
324 // -----------------------------------012345678901234
325 )
326 t.Run("LineMatches", func(t *testing.T) {
327 sres := searchForTest(t, b, &query.Substring{
328 Pattern: "bananas",
329 CaseSensitive: true,
330 })
331 matches := sres.Files
332 if len(matches) != 0 {
333 t.Errorf("foldcase: got %#v, want 0 matches", matches)
334 }
335
336 sres = searchForTest(t, b,
337 &query.Substring{
338 Pattern: "BaNaNAS",
339 CaseSensitive: true,
340 })
341 matches = sres.Files
342 if len(matches) != 1 {
343 t.Errorf("no foldcase: got %v, want 1 matches", matches)
344 } else if matches[0].LineMatches[0].LineFragments[0].Offset != 7 {
345 t.Errorf("foldcase: got %v, want offsets 7", matches)
346 }
347 })
348
349 t.Run("ChunkMatches", func(t *testing.T) {
350 sres := searchForTest(t, b, &query.Substring{
351 Pattern: "bananas",
352 CaseSensitive: true,
353 }, chunkOpts)
354 matches := sres.Files
355 if len(matches) != 0 {
356 t.Errorf("foldcase: got %#v, want 0 matches", matches)
357 }
358
359 sres = searchForTest(t, b,
360 &query.Substring{
361 Pattern: "BaNaNAS",
362 CaseSensitive: true,
363 })
364 matches = sres.Files
365 if len(matches) != 1 {
366 t.Errorf("no foldcase: got %v, want 1 matches", matches)
367 } else if matches[0].LineMatches[0].LineFragments[0].Offset != 7 {
368 t.Errorf("foldcase: got %v, want offsets 7", matches)
369 }
370 })
371}
372
373// wordsAsSymbols finds all ASCII words in doc.Content which are longer than 2
374// chars. Those are then set as symbols.
375func wordsAsSymbols(doc Document) Document {
376 re := regexp.MustCompile(`\b\w{2,}\b`)
377 var symbols []DocumentSection
378 var symbolsMetadata []*Symbol
379 for _, match := range re.FindAllIndex(doc.Content, -1) {
380 symbols = append(symbols, DocumentSection{
381 Start: uint32(match[0]),
382 End: uint32(match[1]),
383 })
384 symbolsMetadata = append(symbolsMetadata, &Symbol{Kind: "method"})
385 }
386 doc.Symbols = symbols
387 doc.SymbolsMetaData = symbolsMetadata
388 return doc
389}
390
391func TestSearchStats(t *testing.T) {
392 ctx := context.Background()
393 searcher := searcherForTest(t, testIndexBuilder(t, nil,
394 wordsAsSymbols(Document{Name: "f1", Content: []byte("x banana y")}),
395 wordsAsSymbols(Document{Name: "f2", Content: []byte("x apple y")}),
396 wordsAsSymbols(Document{Name: "f3", Content: []byte("x banana apple y")}),
397 // --------------------------------------------------0123456789012345
398 ))
399
400 andQuery := query.NewAnd(
401 &query.Substring{
402 Pattern: "banana",
403 },
404 &query.Substring{
405 Pattern: "apple",
406 },
407 )
408
409 t.Run("LineMatches", func(t *testing.T) {
410 sres, err := searcher.Search(ctx, andQuery, &SearchOptions{})
411 if err != nil {
412 t.Fatal(err)
413 }
414 matches := sres.Files
415 if len(matches) != 1 || len(matches[0].LineMatches) != 1 || len(matches[0].LineMatches[0].LineFragments) != 2 {
416 t.Fatalf("got %#v, want 1 match with 2 fragments", matches)
417 }
418
419 if matches[0].LineMatches[0].LineFragments[0].Offset != 2 || matches[0].LineMatches[0].LineFragments[1].Offset != 9 {
420 t.Fatalf("got %#v, want offsets 2,9", matches)
421 }
422 })
423 t.Run("ChunkMatches", func(t *testing.T) {
424 sres, err := searcher.Search(ctx, andQuery, &chunkOpts)
425 if err != nil {
426 t.Fatal(err)
427 }
428 matches := sres.Files
429 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 || len(matches[0].ChunkMatches[0].Ranges) != 2 {
430 t.Fatalf("got %#v, want 1 chunk match with 2 ranges", matches)
431 }
432
433 if matches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset != 2 || matches[0].ChunkMatches[0].Ranges[1].Start.ByteOffset != 9 {
434 t.Fatalf("got %#v, want offsets 2,9", matches)
435 }
436 })
437 t.Run("Stats", func(t *testing.T) {
438 cases := []struct {
439 Name string
440 Q query.Q
441 Want Stats
442 }{{
443 Name: "and-query",
444 Q: andQuery,
445 Want: Stats{
446 FilesLoaded: 1,
447 ContentBytesLoaded: 22,
448 IndexBytesLoaded: 10,
449 NgramMatches: 3, // we look at doc 1, because it's max(0,1) due to AND
450 NgramLookups: 104,
451 MatchCount: 2,
452 FileCount: 1,
453 FilesConsidered: 2,
454 ShardsScanned: 1,
455 },
456 }, {
457 Name: "one-trigram",
458 Q: &query.Substring{
459 Pattern: "a y",
460 Content: true,
461 CaseSensitive: true,
462 },
463 Want: Stats{
464 ContentBytesLoaded: 14,
465 IndexBytesLoaded: 1,
466 FileCount: 1,
467 FilesConsidered: 1,
468 FilesLoaded: 1,
469 ShardsScanned: 1,
470 MatchCount: 1,
471 NgramMatches: 1,
472 NgramLookups: 2, // once to lookup frequency then again to access posting list.
473 },
474 }, {
475 Name: "one-trigram-case-insensitive",
476 Q: &query.Substring{
477 Pattern: "a y",
478 Content: true,
479 },
480 Want: Stats{
481 ContentBytesLoaded: 14,
482 IndexBytesLoaded: 1,
483 FileCount: 1,
484 FilesConsidered: 1,
485 FilesLoaded: 1,
486 ShardsScanned: 1,
487 MatchCount: 1,
488 NgramMatches: 1,
489 NgramLookups: 8, // "a y" has 2**2 casings which we lookup twice.
490 },
491 }, {
492 Name: "one-trigram-pruned",
493 Q: &query.Substring{
494 Pattern: "foo",
495 Content: true,
496 CaseSensitive: true,
497 },
498 Want: Stats{
499 ShardsSkippedFilter: 1,
500 NgramLookups: 1, // only had to lookup once
501 },
502 }, {
503 Name: "one-trigram-branch-pruned",
504 Q: query.NewAnd(
505 &query.Substring{
506 Pattern: "foo",
507 Content: true,
508 CaseSensitive: true,
509 },
510 &query.Substring{
511 Pattern: "a y",
512 Content: true,
513 CaseSensitive: true,
514 },
515 ),
516 Want: Stats{
517 IndexBytesLoaded: 1, // we created an iterator for "a y" before pruning.
518 ShardsSkippedFilter: 1,
519 NgramLookups: 3, // we lookedup "foo" once (1), but lookedup and created "a y" (2).
520 },
521 }, {
522 Name: "symbol-substr-nomatch",
523 Q: &query.Symbol{Expr: &query.Substring{
524 Pattern: "banana apple",
525 Content: true,
526 CaseSensitive: true,
527 }},
528 Want: Stats{
529 IndexBytesLoaded: 3,
530 FilesConsidered: 1, // important that we only check 1 file to ensure we are using the index
531 MatchCount: 0, // even though there is a match it doesn't align with a symbol
532 ShardsScanned: 1,
533 NgramMatches: 1,
534 NgramLookups: 12,
535 },
536 }, {
537 Name: "symbol-substr",
538 Q: &query.Symbol{Expr: &query.Substring{
539 Pattern: "apple",
540 Content: true,
541 CaseSensitive: true,
542 }},
543 Want: Stats{
544 ContentBytesLoaded: 35,
545 IndexBytesLoaded: 4,
546 FileCount: 2,
547 FilesConsidered: 2, // must be 2 to ensure we used the index
548 FilesLoaded: 2,
549 MatchCount: 2, // apple symbols is in two files
550 ShardsScanned: 1,
551 NgramMatches: 2,
552 NgramLookups: 5,
553 },
554 }, {
555 Name: "symbol-regexp-nomatch",
556 Q: &query.Symbol{Expr: &query.Regexp{
557 Regexp: mustParseRE("^apple.banana$"),
558 Content: true,
559 CaseSensitive: true,
560 }},
561 Want: Stats{
562 ContentBytesLoaded: 33, // we still have to run regex since "app" matches two documents
563 IndexBytesLoaded: 10,
564 FilesConsidered: 2, // important that we don't check 3 to ensure we are using the index
565 FilesLoaded: 2,
566 MatchCount: 0, // even though there is a match it doesn't align with a symbol
567 ShardsScanned: 1,
568 NgramMatches: 3,
569 NgramLookups: 11,
570 },
571 }, {
572 Name: "symbol-regexp",
573 Q: &query.Symbol{Expr: &query.Regexp{
574 Regexp: mustParseRE("^app.e$"),
575 Content: true,
576 CaseSensitive: true,
577 }},
578 Want: Stats{
579 ContentBytesLoaded: 35,
580 IndexBytesLoaded: 2,
581 FileCount: 2,
582 FilesConsidered: 2, // must be 2 to ensure we used the index
583 FilesLoaded: 2,
584 MatchCount: 2, // apple symbols is in two files
585 ShardsScanned: 1,
586 NgramMatches: 2,
587 NgramLookups: 2,
588 },
589 }}
590
591 for _, tc := range cases {
592 t.Run(tc.Name, func(t *testing.T) {
593 sres, err := searcher.Search(ctx, tc.Q, &chunkOpts)
594 if err != nil {
595 t.Fatal(err)
596 }
597 if diff := cmp.Diff(tc.Want, sres.Stats, cmpopts.IgnoreFields(Stats{}, "MatchTreeConstruction", "MatchTreeSearch")); diff != "" {
598 t.Errorf("unexpected Stats (-want +got):\n%s", diff)
599 }
600 })
601 }
602 })
603}
604
605func TestAndNegateSearch(t *testing.T) {
606 b := testIndexBuilder(t, nil,
607 Document{Name: "f1", Content: []byte("x banana y")},
608 // -----------------------------------0123456789
609 Document{Name: "f4", Content: []byte("x banana apple y")})
610
611 t.Run("LineMatches", func(t *testing.T) {
612 sres := searchForTest(t, b, query.NewAnd(
613 &query.Substring{
614 Pattern: "banana",
615 },
616 &query.Not{Child: &query.Substring{
617 Pattern: "apple",
618 }}))
619
620 matches := sres.Files
621
622 if len(matches) != 1 || len(matches[0].LineMatches) != 1 {
623 t.Fatalf("got %v, want 1 match", matches)
624 }
625 if matches[0].FileName != "f1" {
626 t.Fatalf("got match %#v, want FileName: f1", matches[0])
627 }
628 if matches[0].LineMatches[0].LineFragments[0].Offset != 2 {
629 t.Fatalf("got %v, want offset 2", matches)
630 }
631 })
632
633 t.Run("ChunkMatches", func(t *testing.T) {
634 sres := searchForTest(t, b,
635 query.NewAnd(
636 &query.Substring{
637 Pattern: "banana",
638 },
639 &query.Not{Child: &query.Substring{
640 Pattern: "apple",
641 }},
642 ),
643 chunkOpts,
644 )
645
646 matches := sres.Files
647
648 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 {
649 t.Fatalf("got %v, want 1 match", matches)
650 }
651 if matches[0].FileName != "f1" {
652 t.Fatalf("got match %#v, want FileName: f1", matches[0])
653 }
654 if matches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset != 2 {
655 t.Fatalf("got %v, want offset 2", matches)
656 }
657 })
658}
659
660func TestNegativeMatchesOnlyShortcut(t *testing.T) {
661 b := testIndexBuilder(t, nil,
662 Document{Name: "f1", Content: []byte("x banana y")},
663 Document{Name: "f2", Content: []byte("x appelmoes y")},
664 Document{Name: "f3", Content: []byte("x appelmoes y")},
665 Document{Name: "f3", Content: []byte("x appelmoes y")})
666
667 t.Run("LineMatches", func(t *testing.T) {
668 sres := searchForTest(t, b, query.NewAnd(
669 &query.Substring{
670 Pattern: "banana",
671 },
672 &query.Not{Child: &query.Substring{
673 Pattern: "appel",
674 }}))
675
676 if sres.Stats.FilesConsidered != 1 {
677 t.Errorf("got %#v, want FilesConsidered: 1", sres.Stats)
678 }
679 })
680
681 t.Run("ChunkMatches", func(t *testing.T) {
682 sres := searchForTest(t, b, query.NewAnd(
683 &query.Substring{
684 Pattern: "banana",
685 },
686 &query.Not{Child: &query.Substring{
687 Pattern: "appel",
688 }}), chunkOpts)
689
690 if sres.Stats.FilesConsidered != 1 {
691 t.Errorf("got %#v, want FilesConsidered: 1", sres.Stats)
692 }
693 })
694}
695
696func TestFileSearch(t *testing.T) {
697 b := testIndexBuilder(t, nil,
698 Document{Name: "banzana", Content: []byte("x orange y")},
699 // -------------0123456
700 Document{Name: "banana", Content: []byte("x apple y")},
701 // -------------012345
702 )
703
704 t.Run("LineMatches", func(t *testing.T) {
705 sres := searchForTest(t, b, &query.Substring{
706 Pattern: "anan",
707 FileName: true,
708 })
709
710 matches := sres.Files
711 if len(matches) != 1 || len(matches[0].LineMatches) != 1 {
712 t.Fatalf("got %v, want 1 match", matches)
713 }
714
715 got := matches[0].LineMatches[0]
716 want := LineMatch{
717 Line: []byte("banana"),
718 LineFragments: []LineFragmentMatch{{
719 Offset: 1,
720 LineOffset: 1,
721 MatchLength: 4,
722 }},
723 FileName: true,
724 }
725
726 if !reflect.DeepEqual(got, want) {
727 t.Errorf("got %#v, want %#v", got, want)
728 }
729 })
730
731 t.Run("ChunkMatches", func(t *testing.T) {
732 sres := searchForTest(t, b, &query.Substring{
733 Pattern: "anan",
734 FileName: true,
735 }, chunkOpts)
736
737 matches := sres.Files
738 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 {
739 t.Fatalf("got %v, want 1 match", matches)
740 }
741
742 got := matches[0].ChunkMatches[0]
743 want := ChunkMatch{
744 Content: []byte("banana"),
745 ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1},
746 Ranges: []Range{{
747 Start: Location{ByteOffset: 1, LineNumber: 1, Column: 2},
748 End: Location{ByteOffset: 5, LineNumber: 1, Column: 6},
749 }},
750 FileName: true,
751 }
752
753 if diff := cmp.Diff(want, got); diff != "" {
754 t.Fatal(diff)
755 }
756 })
757
758 t.Run("FileNameSet", func(t *testing.T) {
759 sres := searchForTest(t, b, query.NewFileNameSet("banana"), chunkOpts)
760
761 matches := sres.Files
762 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 {
763 t.Fatalf("got %v, want 1 match", matches)
764 }
765
766 got := matches[0].ChunkMatches[0]
767 want := ChunkMatch{
768 Content: []byte("banana"),
769 ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1},
770 Ranges: []Range{{
771 Start: Location{ByteOffset: 0, LineNumber: 1, Column: 1},
772 End: Location{ByteOffset: 6, LineNumber: 1, Column: 7},
773 }},
774 FileName: true,
775 }
776
777 if diff := cmp.Diff(want, got); diff != "" {
778 t.Fatal(diff)
779 }
780 })
781}
782
783func TestFileCase(t *testing.T) {
784 b := testIndexBuilder(t, nil,
785 Document{Name: "BANANA", Content: []byte("x orange y")})
786
787 t.Run("LineMatches", func(t *testing.T) {
788 sres := searchForTest(t, b, &query.Substring{
789 Pattern: "banana",
790 FileName: true,
791 })
792
793 matches := sres.Files
794 if len(matches) != 1 || matches[0].FileName != "BANANA" {
795 t.Fatalf("got %v, want 1 match 'BANANA'", matches)
796 }
797 })
798
799 t.Run("ChunkMatches", func(t *testing.T) {
800 sres := searchForTest(t, b, &query.Substring{
801 Pattern: "banana",
802 FileName: true,
803 }, chunkOpts)
804
805 matches := sres.Files
806 if len(matches) != 1 || matches[0].FileName != "BANANA" {
807 t.Fatalf("got %v, want 1 match 'BANANA'", matches)
808 }
809 })
810}
811
812func TestFileRegexpSearchBruteForce(t *testing.T) {
813 b := testIndexBuilder(t, nil,
814 Document{Name: "banzana", Content: []byte("x orange y")},
815 Document{Name: "banana", Content: []byte("x apple y")},
816 )
817 t.Run("LineMatches", func(t *testing.T) {
818 sres := searchForTest(t, b, &query.Regexp{
819 Regexp: mustParseRE("[qn][zx]"),
820 FileName: true,
821 })
822
823 matches := sres.Files
824 if len(matches) != 1 || matches[0].FileName != "banzana" {
825 t.Fatalf("got %v, want 1 match on 'banzana'", matches)
826 }
827 })
828 t.Run("LineMatches", func(t *testing.T) {
829 sres := searchForTest(t, b, &query.Regexp{
830 Regexp: mustParseRE("[qn][zx]"),
831 FileName: true,
832 }, chunkOpts)
833
834 matches := sres.Files
835 if len(matches) != 1 || matches[0].FileName != "banzana" {
836 t.Fatalf("got %v, want 1 match on 'banzana'", matches)
837 }
838 })
839}
840
841func TestFileRegexpSearchShortString(t *testing.T) {
842 b := testIndexBuilder(t, nil,
843 Document{Name: "banana.py", Content: []byte("x orange y")})
844
845 t.Run("LineMatches", func(t *testing.T) {
846 sres := searchForTest(t, b, &query.Regexp{
847 Regexp: mustParseRE("ana.py"),
848 FileName: true,
849 })
850
851 matches := sres.Files
852 if len(matches) != 1 || matches[0].FileName != "banana.py" {
853 t.Fatalf("got %v, want 1 match on 'banana.py'", matches)
854 }
855 })
856
857 t.Run("ChunkMatches", func(t *testing.T) {
858 sres := searchForTest(t, b, &query.Regexp{
859 Regexp: mustParseRE("ana.py"),
860 FileName: true,
861 }, chunkOpts)
862
863 matches := sres.Files
864 if len(matches) != 1 || matches[0].FileName != "banana.py" {
865 t.Fatalf("got %v, want 1 match on 'banana.py'", matches)
866 }
867 })
868}
869
870func TestFileSubstringSearchBruteForce(t *testing.T) {
871 b := testIndexBuilder(t, nil,
872 Document{Name: "BANZANA", Content: []byte("x orange y")},
873 Document{Name: "banana", Content: []byte("x apple y")})
874
875 q := &query.Substring{
876 Pattern: "z",
877 FileName: true,
878 }
879
880 t.Run("LineMatches", func(t *testing.T) {
881 res := searchForTest(t, b, q)
882 if len(res.Files) != 1 || res.Files[0].FileName != "BANZANA" {
883 t.Fatalf("got %v, want 1 match on 'BANZANA''", res.Files)
884 }
885 })
886
887 t.Run("ChunkMatches", func(t *testing.T) {
888 res := searchForTest(t, b, q, chunkOpts)
889 if len(res.Files) != 1 || res.Files[0].FileName != "BANZANA" {
890 t.Fatalf("got %v, want 1 match on 'BANZANA''", res.Files)
891 }
892 })
893}
894
895func TestFileSubstringSearchBruteForceEnd(t *testing.T) {
896 b := testIndexBuilder(t, nil,
897 Document{Name: "BANZANA", Content: []byte("x orange y")},
898 Document{Name: "bananaq", Content: []byte("x apple y")})
899
900 q := &query.Substring{
901 Pattern: "q",
902 FileName: true,
903 }
904 t.Run("LineMatches", func(t *testing.T) {
905 res := searchForTest(t, b, q)
906 if want := "bananaq"; len(res.Files) != 1 || res.Files[0].FileName != want {
907 t.Fatalf("got %v, want 1 match in %q", res.Files, want)
908 }
909 })
910
911 t.Run("LineMatches", func(t *testing.T) {
912 res := searchForTest(t, b, q, chunkOpts)
913 if want := "bananaq"; len(res.Files) != 1 || res.Files[0].FileName != want {
914 t.Fatalf("got %v, want 1 match in %q", res.Files, want)
915 }
916 })
917}
918
919func TestSearchMatchAll(t *testing.T) {
920 b := testIndexBuilder(t, nil,
921 Document{Name: "banzana", Content: []byte("x orange y")},
922 Document{Name: "banana", Content: []byte("x apple y")})
923
924 t.Run("LineMatches", func(t *testing.T) {
925 sres := searchForTest(t, b, &query.Const{Value: true})
926 matches := sres.Files
927 if len(matches) != 2 {
928 t.Fatalf("got %v, want 2 matches", matches)
929 }
930 })
931
932 t.Run("ChunkMatches", func(t *testing.T) {
933 sres := searchForTest(t, b, &query.Const{Value: true}, chunkOpts)
934 matches := sres.Files
935 if len(matches) != 2 {
936 t.Fatalf("got %v, want 2 matches", matches)
937 }
938 })
939}
940
941func TestSearchNewline(t *testing.T) {
942 b := testIndexBuilder(t, nil,
943 Document{Name: "banzana", Content: []byte("abcd\ndefg")})
944
945 t.Run("LineMatches", func(t *testing.T) {
946 sres := searchForTest(t, b, &query.Substring{Pattern: "d\nd"})
947
948 // Just check that we don't crash.
949
950 matches := sres.Files
951 if len(matches) != 1 {
952 t.Fatalf("got %v, want 1 matches", matches)
953 }
954 })
955
956 t.Run("ChunkMatches", func(t *testing.T) {
957 sres := searchForTest(t, b, &query.Substring{Pattern: "d\nd"}, chunkOpts)
958
959 // Just check that we don't crash.
960
961 matches := sres.Files
962 if len(matches) != 1 {
963 t.Fatalf("got %v, want 1 matches", matches)
964 }
965 })
966}
967
968func TestSearchMatchAllRegexp(t *testing.T) {
969 b := testIndexBuilder(t, nil,
970 Document{Name: "banzana", Content: []byte("abcd")},
971 Document{Name: "banana", Content: []byte("pqrs")})
972
973 t.Run("LineMatches", func(t *testing.T) {
974 sres := searchForTest(t, b, &query.Regexp{Regexp: mustParseRE(".")})
975
976 matches := sres.Files
977 if len(matches) != 2 || sres.Stats.MatchCount != 2 {
978 t.Fatalf("got %v, want 2 matches", matches)
979 }
980 if len(matches[0].LineMatches[0].Line) != 4 || len(matches[1].LineMatches[0].Line) != 4 {
981 t.Fatalf("want 4 chars in every file, got %#v", matches)
982 }
983 })
984
985 t.Run("ChunkMatches", func(t *testing.T) {
986 sres := searchForTest(t, b, &query.Regexp{Regexp: mustParseRE(".")}, chunkOpts)
987
988 matches := sres.Files
989 if len(matches) != 2 || sres.Stats.MatchCount != 8 {
990 t.Fatalf("got %v, want 2 matches", matches)
991 }
992 if len(matches[0].ChunkMatches[0].Content) != 4 || len(matches[1].ChunkMatches[0].Content) != 4 {
993 t.Fatalf("want 4 chars in every file, got %#v", matches)
994 }
995 })
996}
997
998func TestSearchBM25MatchScores(t *testing.T) {
999 ctx := context.Background()
1000 searcher := searcherForTest(t, testIndexBuilder(t, nil,
1001 Document{Name: "f1", Content: []byte("one two three\naaaaaaaaaa\nbbbbbbbb\none two two")},
1002 Document{Name: "f2", Content: []byte("four five six\naaaaaaaaaa\nbbbbbbbb\nfour five five\nsix six")},
1003 wordsAsSymbols(Document{Name: "f3", Content: []byte("public static void main")}),
1004 ))
1005
1006 t.Run("LineMatches", func(t *testing.T) {
1007 q := &query.Substring{Pattern: "two"}
1008 sres, err := searcher.Search(ctx, q, &SearchOptions{UseBM25Scoring: true})
1009 if err != nil {
1010 t.Fatal(err)
1011 }
1012 matches := sres.Files
1013 if len(matches) != 1 {
1014 t.Fatalf("want 1 file match, got %d", len(matches))
1015 }
1016
1017 if len(matches[0].LineMatches) != 2 {
1018 t.Fatalf("want 2 chunk matches, got %d", len(matches[0].ChunkMatches))
1019 }
1020
1021 if matches[0].LineMatches[0].LineNumber != 4 {
1022 t.Fatalf("want best-scoring line to be line 4, got %d", matches[0].LineMatches[0].LineNumber)
1023 }
1024 })
1025
1026 t.Run("ChunkMatches", func(t *testing.T) {
1027 q := &query.Substring{Pattern: "five"}
1028 sres, err := searcher.Search(ctx, q, &SearchOptions{UseBM25Scoring: true, ChunkMatches: true, NumContextLines: 1})
1029 if err != nil {
1030 t.Fatal(err)
1031 }
1032
1033 matches := sres.Files
1034 if len(matches) != 1 {
1035 t.Fatalf("want 1 file match, got %d", len(matches))
1036 }
1037
1038 if len(matches[0].ChunkMatches) != 2 {
1039 t.Fatalf("want 2 chunk matches, got %d", len(matches[0].ChunkMatches))
1040 }
1041
1042 if matches[0].ChunkMatches[0].BestLineMatch != 4 {
1043 t.Fatalf("want best-scoring line to be line 4, got %d", matches[0].ChunkMatches[0].BestLineMatch)
1044 }
1045 })
1046
1047 t.Run("ChunkMatches with symbols", func(t *testing.T) {
1048 q := &query.Or{
1049 Children: []query.Q{
1050 &query.Symbol{Expr: &query.Substring{Pattern: "main"}},
1051 &query.Substring{Pattern: "five"},
1052 },
1053 }
1054
1055 sres, err := searcher.Search(ctx, q, &SearchOptions{UseBM25Scoring: true, ChunkMatches: true, NumContextLines: 1})
1056 if err != nil {
1057 t.Fatal(err)
1058 }
1059
1060 matches := sres.Files
1061 if len(matches) != 2 {
1062 t.Fatalf("want 2 file match, got %d", len(matches))
1063 }
1064
1065 foundSymbolInfo := false
1066 for _, m := range matches {
1067 for _, cm := range m.ChunkMatches {
1068 if len(cm.SymbolInfo) > 0 {
1069 foundSymbolInfo = true
1070 }
1071 }
1072 }
1073
1074 if !foundSymbolInfo {
1075 t.Fatalf("want symbol info, got none")
1076 }
1077 })
1078}
1079
1080func TestFileRestriction(t *testing.T) {
1081 b := testIndexBuilder(t, nil,
1082 Document{Name: "banana1", Content: []byte("x orange y")},
1083 Document{Name: "banana2", Content: []byte("x apple y")},
1084 Document{Name: "orange", Content: []byte("x apple z")})
1085
1086 t.Run("LineMatches", func(t *testing.T) {
1087 sres := searchForTest(t, b, query.NewAnd(
1088 &query.Substring{
1089 Pattern: "banana",
1090 FileName: true,
1091 },
1092 &query.Substring{
1093 Pattern: "apple",
1094 }))
1095
1096 matches := sres.Files
1097 if len(matches) != 1 || len(matches[0].LineMatches) != 1 {
1098 t.Fatalf("got %v, want 1 match", matches)
1099 }
1100
1101 match := matches[0].LineMatches[0]
1102 got := string(match.Line)
1103 want := "x apple y"
1104 if got != want {
1105 t.Errorf("got match %#v, want line %q", match, want)
1106 }
1107 })
1108
1109 t.Run("ChunkMatches", func(t *testing.T) {
1110 sres := searchForTest(t, b, query.NewAnd(
1111 &query.Substring{
1112 Pattern: "banana",
1113 FileName: true,
1114 },
1115 &query.Substring{
1116 Pattern: "apple",
1117 }), chunkOpts)
1118
1119 matches := sres.Files
1120 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 {
1121 t.Fatalf("got %v, want 1 match", matches)
1122 }
1123
1124 match := matches[0].ChunkMatches[0]
1125 got := string(match.Content)
1126 want := "x apple y"
1127 if got != want {
1128 t.Errorf("got match %#v, want line %q", match, want)
1129 }
1130 })
1131}
1132
1133func TestFileNameBoundary(t *testing.T) {
1134 b := testIndexBuilder(t, nil,
1135 Document{Name: "banana2", Content: []byte("x apple y")},
1136 Document{Name: "helpers.go", Content: []byte("x apple y")},
1137 Document{Name: "foo", Content: []byte("x apple y")})
1138
1139 t.Run("LineMatches", func(t *testing.T) {
1140 sres := searchForTest(t, b, &query.Substring{
1141 Pattern: "helpers.go",
1142 FileName: true,
1143 })
1144
1145 matches := sres.Files
1146 if len(matches) != 1 || len(matches[0].LineMatches) != 1 {
1147 t.Fatalf("got %v, want 1 match", matches)
1148 }
1149 })
1150
1151 t.Run("ChunkMatches", func(t *testing.T) {
1152 sres := searchForTest(t, b, &query.Substring{
1153 Pattern: "helpers.go",
1154 FileName: true,
1155 }, chunkOpts)
1156
1157 matches := sres.Files
1158 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 {
1159 t.Fatalf("got %v, want 1 match", matches)
1160 }
1161 })
1162}
1163
1164func TestDocumentOrder(t *testing.T) {
1165 var docs []Document
1166 for i := 0; i < 3; i++ {
1167 docs = append(docs, Document{Name: fmt.Sprintf("f%d", i), Content: []byte("needle")})
1168 }
1169
1170 b := testIndexBuilder(t, nil, docs...)
1171
1172 t.Run("LineMatches", func(t *testing.T) {
1173 sres := searchForTest(t, b, query.NewAnd(
1174 &query.Substring{
1175 Pattern: "needle",
1176 }))
1177
1178 want := []string{"f0", "f1", "f2"}
1179 var got []string
1180 for _, f := range sres.Files {
1181 got = append(got, f.FileName)
1182 }
1183 if !reflect.DeepEqual(got, want) {
1184 t.Fatalf("got %v, want %v", got, want)
1185 }
1186 })
1187
1188 t.Run("ChunkMatches", func(t *testing.T) {
1189 sres := searchForTest(t, b,
1190 query.NewAnd(&query.Substring{
1191 Pattern: "needle",
1192 }),
1193 chunkOpts,
1194 )
1195
1196 want := []string{"f0", "f1", "f2"}
1197 var got []string
1198 for _, f := range sres.Files {
1199 got = append(got, f.FileName)
1200 }
1201 if !reflect.DeepEqual(got, want) {
1202 t.Fatalf("got %v, want %v", got, want)
1203 }
1204 })
1205}
1206
1207func TestBranchMask(t *testing.T) {
1208 b := testIndexBuilder(t, &Repository{
1209 Branches: []RepositoryBranch{
1210 {"master", "v-master"},
1211 {"stable", "v-stable"},
1212 {"bonzai", "v-bonzai"},
1213 },
1214 }, Document{Name: "f1", Content: []byte("needle"), Branches: []string{"master"}},
1215 Document{Name: "f2", Content: []byte("needle"), Branches: []string{"stable", "master"}},
1216 Document{Name: "f3", Content: []byte("needle"), Branches: []string{"stable", "master"}},
1217 Document{Name: "f4", Content: []byte("needle"), Branches: []string{"bonzai"}},
1218 )
1219
1220 t.Run("LineMatches", func(t *testing.T) {
1221 sres := searchForTest(t, b, query.NewAnd(
1222 &query.Substring{
1223 Pattern: "needle",
1224 },
1225 &query.Branch{
1226 Pattern: "table",
1227 }))
1228
1229 if len(sres.Files) != 2 || sres.Files[0].FileName != "f2" || sres.Files[1].FileName != "f3" {
1230 t.Fatalf("got %v, want 2 result from [f2,f3]", sres.Files)
1231 }
1232
1233 if len(sres.Files[0].Branches) != 1 || sres.Files[0].Branches[0] != "stable" {
1234 t.Fatalf("got %v, want 1 branch 'stable'", sres.Files[0].Branches)
1235 }
1236 })
1237
1238 t.Run("ChunkMatches", func(t *testing.T) {
1239 sres := searchForTest(t, b, query.NewAnd(
1240 &query.Substring{
1241 Pattern: "needle",
1242 },
1243 &query.Branch{
1244 Pattern: "table",
1245 }),
1246 chunkOpts,
1247 )
1248
1249 if len(sres.Files) != 2 || sres.Files[0].FileName != "f2" || sres.Files[1].FileName != "f3" {
1250 t.Fatalf("got %v, want 2 result from [f2,f3]", sres.Files)
1251 }
1252
1253 if len(sres.Files[0].Branches) != 1 || sres.Files[0].Branches[0] != "stable" {
1254 t.Fatalf("got %v, want 1 branch 'stable'", sres.Files[0].Branches)
1255 }
1256 })
1257}
1258
1259func TestBranchLimit(t *testing.T) {
1260 for limit := 64; limit <= 65; limit++ {
1261 r := &Repository{}
1262 for i := 0; i < limit; i++ {
1263 s := fmt.Sprintf("b%d", i)
1264 r.Branches = append(r.Branches, RepositoryBranch{
1265 s, "v-" + s,
1266 })
1267 }
1268 _, err := NewIndexBuilder(r)
1269 if limit == 64 && err != nil {
1270 t.Fatalf("NewIndexBuilder: %v", err)
1271 } else if limit == 65 && err == nil {
1272 t.Fatalf("NewIndexBuilder succeeded")
1273 }
1274 }
1275}
1276
1277func TestBranchReport(t *testing.T) {
1278 branches := []string{"stable", "master"}
1279 b := testIndexBuilder(t, &Repository{
1280 Branches: []RepositoryBranch{
1281 {"stable", "vs"},
1282 {"master", "vm"},
1283 },
1284 },
1285 Document{Name: "f2", Content: []byte("needle"), Branches: branches})
1286
1287 t.Run("LineMatches", func(t *testing.T) {
1288 sres := searchForTest(t, b, &query.Substring{
1289 Pattern: "needle",
1290 })
1291 if len(sres.Files) != 1 {
1292 t.Fatalf("got %v, want 1 result from f2", sres.Files)
1293 }
1294
1295 f := sres.Files[0]
1296 if !reflect.DeepEqual(f.Branches, branches) {
1297 t.Fatalf("got branches %q, want %q", f.Branches, branches)
1298 }
1299 })
1300
1301 t.Run("ChunkMatches", func(t *testing.T) {
1302 sres := searchForTest(t, b, &query.Substring{
1303 Pattern: "needle",
1304 }, chunkOpts)
1305 if len(sres.Files) != 1 {
1306 t.Fatalf("got %v, want 1 result from f2", sres.Files)
1307 }
1308
1309 f := sres.Files[0]
1310 if !reflect.DeepEqual(f.Branches, branches) {
1311 t.Fatalf("got branches %q, want %q", f.Branches, branches)
1312 }
1313 })
1314}
1315
1316func TestBranchVersions(t *testing.T) {
1317 b := testIndexBuilder(t, &Repository{
1318 Branches: []RepositoryBranch{
1319 {"stable", "v-stable"},
1320 {"master", "v-master"},
1321 },
1322 }, Document{Name: "f2", Content: []byte("needle"), Branches: []string{"master"}})
1323
1324 t.Run("LineMatches", func(t *testing.T) {
1325 sres := searchForTest(t, b, &query.Substring{
1326 Pattern: "needle",
1327 })
1328 if len(sres.Files) != 1 {
1329 t.Fatalf("got %v, want 1 result from f2", sres.Files)
1330 }
1331
1332 f := sres.Files[0]
1333 if f.Version != "v-master" {
1334 t.Fatalf("got file %#v, want version 'v-master'", f)
1335 }
1336 })
1337
1338 t.Run("ChunkMatches", func(t *testing.T) {
1339 sres := searchForTest(t, b, &query.Substring{
1340 Pattern: "needle",
1341 }, chunkOpts)
1342 if len(sres.Files) != 1 {
1343 t.Fatalf("got %v, want 1 result from f2", sres.Files)
1344 }
1345
1346 f := sres.Files[0]
1347 if f.Version != "v-master" {
1348 t.Fatalf("got file %#v, want version 'v-master'", f)
1349 }
1350 })
1351}
1352
1353func mustParseRE(s string) *syntax.Regexp {
1354 r, err := syntax.Parse(s, syntax.Perl)
1355 if err != nil {
1356 panic(err)
1357 }
1358
1359 return r
1360}
1361
1362func TestRegexp(t *testing.T) {
1363 content := []byte("needle the bla")
1364 // ----------------01234567890123
1365
1366 b := testIndexBuilder(t, nil,
1367 Document{
1368 Name: "f1",
1369 Content: content,
1370 })
1371
1372 t.Run("LineMatches", func(t *testing.T) {
1373 sres := searchForTest(t, b,
1374 &query.Regexp{
1375 Regexp: mustParseRE("dle.*bla"),
1376 })
1377
1378 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 {
1379 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
1380 }
1381
1382 got := sres.Files[0].LineMatches[0]
1383 want := LineMatch{
1384 LineFragments: []LineFragmentMatch{{
1385 LineOffset: 3,
1386 Offset: 3,
1387 MatchLength: 11,
1388 }},
1389 Line: content,
1390 FileName: false,
1391 LineNumber: 1,
1392 LineStart: 0,
1393 LineEnd: 14,
1394 }
1395
1396 if !reflect.DeepEqual(got, want) {
1397 t.Errorf("got %#v, want %#v", got, want)
1398 }
1399 })
1400
1401 t.Run("ChunkMatches", func(t *testing.T) {
1402 sres := searchForTest(t, b,
1403 &query.Regexp{
1404 Regexp: mustParseRE("dle.*bla"),
1405 }, chunkOpts)
1406
1407 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 {
1408 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
1409 }
1410
1411 got := sres.Files[0].ChunkMatches[0]
1412 want := ChunkMatch{
1413 Content: content,
1414 ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1},
1415 Ranges: []Range{{
1416 Start: Location{ByteOffset: 3, LineNumber: 1, Column: 4},
1417 End: Location{ByteOffset: 14, LineNumber: 1, Column: 15},
1418 }},
1419 }
1420
1421 if diff := cmp.Diff(want, got); diff != "" {
1422 t.Fatal(diff)
1423 }
1424 })
1425}
1426
1427func TestRegexpFile(t *testing.T) {
1428 content := []byte("needle the bla")
1429
1430 name := "let's play: find the mussel"
1431 b := testIndexBuilder(t, nil,
1432 Document{Name: name, Content: content},
1433 Document{Name: "play.txt", Content: content})
1434
1435 t.Run("LineMatches", func(t *testing.T) {
1436 sres := searchForTest(t, b,
1437 &query.Regexp{
1438 Regexp: mustParseRE("play.*mussel"),
1439 FileName: true,
1440 })
1441
1442 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 {
1443 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
1444 }
1445
1446 if sres.Files[0].FileName != name {
1447 t.Errorf("got match %#v, want name %q", sres.Files[0], name)
1448 }
1449 })
1450
1451 t.Run("ChunkMatches", func(t *testing.T) {
1452 sres := searchForTest(t, b,
1453 &query.Regexp{
1454 Regexp: mustParseRE("play.*mussel"),
1455 FileName: true,
1456 }, chunkOpts)
1457
1458 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 {
1459 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
1460 }
1461
1462 if sres.Files[0].FileName != name {
1463 t.Errorf("got match %#v, want name %q", sres.Files[0], name)
1464 }
1465 })
1466}
1467
1468func TestRegexpOrder(t *testing.T) {
1469 content := []byte("bla the needle")
1470 // ----------------01234567890123
1471
1472 b := testIndexBuilder(t, nil,
1473 Document{Name: "f1", Content: content})
1474
1475 t.Run("LineMatches", func(t *testing.T) {
1476 sres := searchForTest(t, b,
1477 &query.Regexp{
1478 Regexp: mustParseRE("dle.*bla"),
1479 })
1480
1481 if len(sres.Files) != 0 {
1482 t.Fatalf("got %v, want 0 matches", sres.Files)
1483 }
1484 })
1485
1486 t.Run("ChunkMatches", func(t *testing.T) {
1487 sres := searchForTest(t, b,
1488 &query.Regexp{
1489 Regexp: mustParseRE("dle.*bla"),
1490 })
1491
1492 if len(sres.Files) != 0 {
1493 t.Fatalf("got %v, want 0 matches", sres.Files)
1494 }
1495 })
1496}
1497
1498func TestRepoName(t *testing.T) {
1499 content := []byte("bla the needle")
1500 // ----------------01234567890123
1501
1502 b := testIndexBuilder(t, &Repository{Name: "bla"},
1503 Document{Name: "f1", Content: content})
1504
1505 t.Run("LineMatches", func(t *testing.T) {
1506 sres := searchForTest(t, b,
1507 query.NewAnd(
1508 &query.Substring{Pattern: "needle"},
1509 &query.Repo{Regexp: regexp.MustCompile("foo")},
1510 ))
1511
1512 if len(sres.Files) != 0 {
1513 t.Fatalf("got %v, want 0 matches", sres.Files)
1514 }
1515
1516 if sres.Stats.FilesConsidered > 0 {
1517 t.Fatalf("got FilesConsidered %d, should have short circuited", sres.Stats.FilesConsidered)
1518 }
1519
1520 sres = searchForTest(t, b,
1521 query.NewAnd(
1522 &query.Substring{Pattern: "needle"},
1523 &query.Repo{Regexp: regexp.MustCompile("bla")},
1524 ))
1525 if len(sres.Files) != 1 {
1526 t.Fatalf("got %v, want 1 match", sres.Files)
1527 }
1528 })
1529
1530 t.Run("ChunkMatches", func(t *testing.T) {
1531 sres := searchForTest(t, b,
1532 query.NewAnd(
1533 &query.Substring{Pattern: "needle"},
1534 &query.Repo{Regexp: regexp.MustCompile("foo")},
1535 ),
1536 chunkOpts,
1537 )
1538
1539 if len(sres.Files) != 0 {
1540 t.Fatalf("got %v, want 0 matches", sres.Files)
1541 }
1542
1543 if sres.Stats.FilesConsidered > 0 {
1544 t.Fatalf("got FilesConsidered %d, should have short circuited", sres.Stats.FilesConsidered)
1545 }
1546
1547 sres = searchForTest(t, b,
1548 query.NewAnd(
1549 &query.Substring{Pattern: "needle"},
1550 &query.Repo{Regexp: regexp.MustCompile("bla")},
1551 ))
1552 if len(sres.Files) != 1 {
1553 t.Fatalf("got %v, want 1 match", sres.Files)
1554 }
1555 })
1556}
1557
1558func TestMergeMatches(t *testing.T) {
1559 content := []byte("blablabla")
1560 b := testIndexBuilder(t, nil,
1561 Document{Name: "f1", Content: content})
1562
1563 t.Run("LineMatches", func(t *testing.T) {
1564 sres := searchForTest(t, b,
1565 &query.Substring{Pattern: "bla"})
1566 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 {
1567 t.Fatalf("got %v, want 1 match", sres.Files)
1568 }
1569 })
1570
1571 t.Run("ChunkMatches", func(t *testing.T) {
1572 sres := searchForTest(t, b,
1573 &query.Substring{Pattern: "bla"},
1574 chunkOpts,
1575 )
1576 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 {
1577 t.Fatalf("got %v, want 1 match", sres.Files)
1578 }
1579 })
1580}
1581
1582func TestRepoURL(t *testing.T) {
1583 content := []byte("blablabla")
1584 b := testIndexBuilder(t, &Repository{
1585 Name: "name",
1586 URL: "URL",
1587 CommitURLTemplate: "commit",
1588 FileURLTemplate: "file-url",
1589 LineFragmentTemplate: "fragment",
1590 }, Document{Name: "f1", Content: content})
1591
1592 sres := searchForTest(t, b, &query.Substring{Pattern: "bla"})
1593
1594 if sres.RepoURLs["name"] != "file-url" {
1595 t.Errorf("got RepoURLs %v, want {name: URL}", sres.RepoURLs)
1596 }
1597 if sres.LineFragments["name"] != "fragment" {
1598 t.Errorf("got URLs %v, want {name: URL}", sres.LineFragments)
1599 }
1600}
1601
1602func TestRegexpCaseSensitive(t *testing.T) {
1603 content := []byte("bla\nfunc unmarshalGitiles\n")
1604 b := testIndexBuilder(t, nil, Document{
1605 Name: "f1",
1606 Content: content,
1607 })
1608
1609 t.Run("LineMatches", func(t *testing.T) {
1610 res := searchForTest(t, b,
1611 &query.Regexp{
1612 Regexp: mustParseRE("func.*Gitiles"),
1613 CaseSensitive: true,
1614 })
1615
1616 if len(res.Files) != 1 {
1617 t.Fatalf("got %v, want one match", res.Files)
1618 }
1619 })
1620
1621 t.Run("ChunkMatches", func(t *testing.T) {
1622 res := searchForTest(t, b,
1623 &query.Regexp{
1624 Regexp: mustParseRE("func.*Gitiles"),
1625 CaseSensitive: true,
1626 },
1627 chunkOpts,
1628 )
1629
1630 if len(res.Files) != 1 {
1631 t.Fatalf("got %v, want one match", res.Files)
1632 }
1633 })
1634}
1635
1636func TestRegexpCaseFolding(t *testing.T) {
1637 content := []byte("bla\nfunc unmarshalGitiles\n")
1638
1639 b := testIndexBuilder(t, nil,
1640 Document{Name: "f1", Content: content})
1641 res := searchForTest(t, b,
1642 &query.Regexp{
1643 Regexp: mustParseRE("func.*GITILES"),
1644 CaseSensitive: false,
1645 })
1646
1647 if len(res.Files) != 1 {
1648 t.Fatalf("got %v, want one match", res.Files)
1649 }
1650}
1651
1652func TestCaseRegexp(t *testing.T) {
1653 content := []byte("BLABLABLA")
1654 b := testIndexBuilder(t, nil,
1655 Document{Name: "f1", Content: content})
1656
1657 t.Run("LineMatches", func(t *testing.T) {
1658 res := searchForTest(t, b,
1659 &query.Regexp{
1660 Regexp: mustParseRE("[xb][xl][xa]"),
1661 CaseSensitive: true,
1662 })
1663
1664 if len(res.Files) > 0 {
1665 t.Fatalf("got %v, want no matches", res.Files)
1666 }
1667 })
1668
1669 t.Run("ChunkMatches", func(t *testing.T) {
1670 res := searchForTest(t, b,
1671 &query.Regexp{
1672 Regexp: mustParseRE("[xb][xl][xa]"),
1673 CaseSensitive: true,
1674 },
1675 chunkOpts,
1676 )
1677
1678 if len(res.Files) > 0 {
1679 t.Fatalf("got %v, want no matches", res.Files)
1680 }
1681 })
1682}
1683
1684func TestNegativeRegexp(t *testing.T) {
1685 content := []byte("BLABLABLA needle bla")
1686 b := testIndexBuilder(t, nil,
1687 Document{Name: "f1", Content: content})
1688
1689 t.Run("LineMatches", func(t *testing.T) {
1690 res := searchForTest(t, b,
1691 query.NewAnd(
1692 &query.Substring{
1693 Pattern: "needle",
1694 },
1695 &query.Not{
1696 Child: &query.Regexp{
1697 Regexp: mustParseRE(".cs"),
1698 },
1699 }))
1700
1701 if len(res.Files) != 1 {
1702 t.Fatalf("got %v, want 1 match", res.Files)
1703 }
1704 })
1705
1706 t.Run("ChunkMatches", func(t *testing.T) {
1707 res := searchForTest(t, b,
1708 query.NewAnd(
1709 &query.Substring{
1710 Pattern: "needle",
1711 },
1712 &query.Not{
1713 Child: &query.Regexp{
1714 Regexp: mustParseRE(".cs"),
1715 },
1716 },
1717 ),
1718 chunkOpts)
1719
1720 if len(res.Files) != 1 {
1721 t.Fatalf("got %v, want 1 match", res.Files)
1722 }
1723 })
1724}
1725
1726func TestSymbolRank(t *testing.T) {
1727 t.Skip()
1728
1729 content := []byte("func bla() blubxxxxx")
1730 // ----------------01234567890123456789
1731 b := testIndexBuilder(t, nil,
1732 Document{
1733 Name: "f1",
1734 Content: content,
1735 }, Document{
1736 Name: "f2",
1737 Content: content,
1738 Symbols: []DocumentSection{{5, 8}},
1739 }, Document{
1740 Name: "f3",
1741 Content: content,
1742 })
1743
1744 t.Run("LineMatches", func(t *testing.T) {
1745 res := searchForTest(t, b,
1746 &query.Substring{
1747 CaseSensitive: false,
1748 Pattern: "bla",
1749 })
1750
1751 if len(res.Files) != 3 {
1752 t.Fatalf("got %d files, want 3 files. Full data: %v", len(res.Files), res.Files)
1753 }
1754 if res.Files[0].FileName != "f2" {
1755 t.Errorf("got %#v, want 'f2' as top match", res.Files[0])
1756 }
1757 })
1758
1759 t.Run("ChunkMatches", func(t *testing.T) {
1760 res := searchForTest(t, b,
1761 &query.Substring{
1762 CaseSensitive: false,
1763 Pattern: "bla",
1764 }, chunkOpts)
1765
1766 if len(res.Files) != 3 {
1767 t.Fatalf("got %d files, want 3 files. Full data: %v", len(res.Files), res.Files)
1768 }
1769 if res.Files[0].FileName != "f2" {
1770 t.Errorf("got %#v, want 'f2' as top match", res.Files[0])
1771 }
1772 })
1773}
1774
1775func TestSymbolRankRegexpUTF8(t *testing.T) {
1776 t.Skip()
1777
1778 prefix := strings.Repeat(string([]rune{kelvinCodePoint}), 100) + "\n"
1779 content := []byte(prefix +
1780 "func bla() blub")
1781 // ------012345678901234
1782 b := testIndexBuilder(t, nil,
1783 Document{
1784 Name: "f1",
1785 Content: content,
1786 }, Document{
1787 Name: "f2",
1788 Content: content,
1789 Symbols: []DocumentSection{{uint32(len(prefix) + 5), uint32(len(prefix) + 8)}},
1790 }, Document{
1791 Name: "f3",
1792 Content: content,
1793 })
1794
1795 t.Run("LineMatches", func(t *testing.T) {
1796 res := searchForTest(t, b,
1797 &query.Regexp{
1798 Regexp: mustParseRE("b.a"),
1799 })
1800
1801 if len(res.Files) != 3 {
1802 t.Fatalf("got %#v, want 3 files", res.Files)
1803 }
1804 if res.Files[0].FileName != "f2" {
1805 t.Errorf("got %#v, want 'f2' as top match", res.Files[0])
1806 }
1807 })
1808
1809 t.Run("ChunjkMatches", func(t *testing.T) {
1810 res := searchForTest(t, b,
1811 &query.Regexp{
1812 Regexp: mustParseRE("b.a"),
1813 }, chunkOpts)
1814
1815 if len(res.Files) != 3 {
1816 t.Fatalf("got %#v, want 3 files", res.Files)
1817 }
1818 if res.Files[0].FileName != "f2" {
1819 t.Errorf("got %#v, want 'f2' as top match", res.Files[0])
1820 }
1821 })
1822}
1823
1824func TestPartialSymbolRank(t *testing.T) {
1825 t.Skip()
1826
1827 content := []byte("func bla() blub")
1828 // ----------------012345678901234
1829
1830 b := testIndexBuilder(t, nil,
1831 Document{
1832 Name: "f1",
1833 Content: content,
1834 Symbols: []DocumentSection{{4, 9}},
1835 }, Document{
1836 Name: "f2",
1837 Content: content,
1838 Symbols: []DocumentSection{{4, 8}},
1839 }, Document{
1840 Name: "f3",
1841 Content: content,
1842 Symbols: []DocumentSection{{4, 9}},
1843 })
1844
1845 t.Run("LineMatches", func(t *testing.T) {
1846 res := searchForTest(t, b,
1847 &query.Substring{
1848 Pattern: "bla",
1849 })
1850
1851 if len(res.Files) != 3 {
1852 t.Fatalf("got %#v, want 3 files", res.Files)
1853 }
1854 if res.Files[0].FileName != "f2" {
1855 t.Errorf("got %#v, want 'f2' as top match", res.Files[0])
1856 }
1857 })
1858
1859 t.Run("ChunkMatches", func(t *testing.T) {
1860 res := searchForTest(t, b,
1861 &query.Substring{
1862 Pattern: "bla",
1863 }, chunkOpts)
1864
1865 if len(res.Files) != 3 {
1866 t.Fatalf("got %#v, want 3 files", res.Files)
1867 }
1868 if res.Files[0].FileName != "f2" {
1869 t.Errorf("got %#v, want 'f2' as top match", res.Files[0])
1870 }
1871 })
1872}
1873
1874func TestNegativeRepo(t *testing.T) {
1875 content := []byte("bla the needle")
1876 // ----------------01234567890123
1877 b := testIndexBuilder(t, &Repository{
1878 Name: "bla",
1879 }, Document{Name: "f1", Content: content})
1880
1881 t.Run("LineMatches", func(t *testing.T) {
1882 sres := searchForTest(t, b,
1883 query.NewAnd(
1884 &query.Substring{Pattern: "needle"},
1885 &query.Not{Child: &query.Repo{Regexp: regexp.MustCompile("bla")}},
1886 ))
1887
1888 if len(sres.Files) != 0 {
1889 t.Fatalf("got %v, want 0 matches", sres.Files)
1890 }
1891 })
1892
1893 t.Run("ChunkMatches", func(t *testing.T) {
1894 sres := searchForTest(t, b,
1895 query.NewAnd(
1896 &query.Substring{Pattern: "needle"},
1897 &query.Not{Child: &query.Repo{Regexp: regexp.MustCompile("bla")}},
1898 ), chunkOpts)
1899
1900 if len(sres.Files) != 0 {
1901 t.Fatalf("got %v, want 0 matches", sres.Files)
1902 }
1903 })
1904}
1905
1906func TestListRepos(t *testing.T) {
1907 content := []byte("bla the needle\n")
1908 // ----------------012345678901234-
1909
1910 t.Run("default and minimal fallback", func(t *testing.T) {
1911 repo := &Repository{
1912 Name: "reponame",
1913 Branches: []RepositoryBranch{{Name: "main"}, {Name: "dev"}},
1914 }
1915 b := testIndexBuilder(t, repo,
1916 Document{Name: "f1", Content: content, Branches: []string{"main", "dev"}},
1917 Document{Name: "f2", Content: content, Branches: []string{"main"}},
1918 Document{Name: "f2", Content: content, Branches: []string{"dev"}},
1919 Document{Name: "f3", Content: content, Branches: []string{"dev"}})
1920
1921 searcher := searcherForTest(t, b)
1922
1923 for _, opts := range []*ListOptions{
1924 nil,
1925 {},
1926 {Field: RepoListFieldRepos},
1927 {Field: RepoListFieldReposMap},
1928 } {
1929 t.Run(fmt.Sprint(opts), func(t *testing.T) {
1930 q := &query.Repo{Regexp: regexp.MustCompile("epo")}
1931
1932 res, err := searcher.List(context.Background(), q, opts)
1933 if err != nil {
1934 t.Fatalf("List(%v): %v", q, err)
1935 }
1936
1937 want := &RepoList{
1938 Repos: []*RepoListEntry{{
1939 Repository: *repo,
1940 Stats: RepoStats{
1941 Documents: 4,
1942 ContentBytes: 68, // (15 bytes of content and 2 bytes of filename) x 4
1943 Shards: 1,
1944
1945 NewLinesCount: 4,
1946 DefaultBranchNewLinesCount: 2,
1947 OtherBranchesNewLinesCount: 3,
1948 },
1949 }},
1950 Stats: RepoStats{
1951 Repos: 1,
1952 Documents: 4,
1953 ContentBytes: 68,
1954 Shards: 1,
1955
1956 NewLinesCount: 4,
1957 DefaultBranchNewLinesCount: 2,
1958 OtherBranchesNewLinesCount: 3,
1959 },
1960 }
1961 ignored := []cmp.Option{
1962 cmpopts.EquateEmpty(),
1963 cmpopts.IgnoreFields(RepoListEntry{}, "IndexMetadata"),
1964 cmpopts.IgnoreFields(RepoStats{}, "IndexBytes"),
1965 cmpopts.IgnoreFields(Repository{}, "SubRepoMap"),
1966 cmpopts.IgnoreFields(Repository{}, "priority"),
1967 }
1968 if diff := cmp.Diff(want, res, ignored...); diff != "" {
1969 t.Fatalf("mismatch (-want +got):\n%s", diff)
1970 }
1971
1972 q = &query.Repo{Regexp: regexp.MustCompile("bla")}
1973 res, err = searcher.List(context.Background(), q, nil)
1974 if err != nil {
1975 t.Fatalf("List(%v): %v", q, err)
1976 }
1977 if len(res.Repos) != 0 || len(res.ReposMap) != 0 {
1978 t.Fatalf("got %v, want 0 matches", res)
1979 }
1980 })
1981 }
1982 })
1983
1984 t.Run("minimal", func(t *testing.T) {
1985 repo := &Repository{
1986 ID: 1234,
1987 Name: "reponame",
1988 Branches: []RepositoryBranch{{Name: "main"}, {Name: "dev"}},
1989 RawConfig: map[string]string{"repoid": "1234"},
1990 }
1991 b := testIndexBuilder(t, repo,
1992 Document{Name: "f1", Content: content, Branches: []string{"main", "dev"}},
1993 Document{Name: "f2", Content: content, Branches: []string{"main"}},
1994 Document{Name: "f2", Content: content, Branches: []string{"dev"}},
1995 Document{Name: "f3", Content: content, Branches: []string{"dev"}})
1996
1997 searcher := searcherForTest(t, b)
1998
1999 q := &query.Repo{Regexp: regexp.MustCompile("epo")}
2000 res, err := searcher.List(context.Background(), q, &ListOptions{Field: RepoListFieldReposMap})
2001 if err != nil {
2002 t.Fatalf("List(%v): %v", q, err)
2003 }
2004
2005 want := &RepoList{
2006 ReposMap: ReposMap{
2007 repo.ID: {
2008 HasSymbols: repo.HasSymbols,
2009 Branches: repo.Branches,
2010 },
2011 },
2012 Stats: RepoStats{
2013 Repos: 1,
2014 Shards: 1,
2015 Documents: 4,
2016 IndexBytes: 412,
2017 ContentBytes: 68,
2018 NewLinesCount: 4,
2019 DefaultBranchNewLinesCount: 2,
2020 OtherBranchesNewLinesCount: 3,
2021 },
2022 }
2023
2024 ignored := []cmp.Option{
2025 cmpopts.IgnoreFields(MinimalRepoListEntry{}, "IndexTimeUnix"),
2026 }
2027 if diff := cmp.Diff(want, res, ignored...); diff != "" {
2028 t.Fatalf("mismatch (-want +got):\n%s", diff)
2029 }
2030
2031 q = &query.Repo{Regexp: regexp.MustCompile("bla")}
2032 res, err = searcher.List(context.Background(), q, &ListOptions{Field: RepoListFieldReposMap})
2033 if err != nil {
2034 t.Fatalf("List(%v): %v", q, err)
2035 }
2036 if len(res.Repos) != 0 || len(res.ReposMap) != 0 {
2037 t.Fatalf("got %v, want 0 matches", res)
2038 }
2039 })
2040}
2041
2042func TestListReposByContent(t *testing.T) {
2043 content := []byte("bla the needle")
2044
2045 b := testIndexBuilder(t, &Repository{
2046 Name: "reponame",
2047 },
2048 Document{Name: "f1", Content: content},
2049 Document{Name: "f2", Content: content})
2050
2051 searcher := searcherForTest(t, b)
2052 q := &query.Substring{Pattern: "needle"}
2053 res, err := searcher.List(context.Background(), q, nil)
2054 if err != nil {
2055 t.Fatalf("List(%v): %v", q, err)
2056 }
2057 if len(res.Repos) != 1 || res.Repos[0].Repository.Name != "reponame" {
2058 t.Fatalf("got %v, want 1 matches", res)
2059 }
2060 if got := res.Repos[0].Stats.Shards; got != 1 {
2061 t.Fatalf("got %d, want 1 shard", got)
2062 }
2063 q = &query.Substring{Pattern: "foo"}
2064 res, err = searcher.List(context.Background(), q, nil)
2065 if err != nil {
2066 t.Fatalf("List(%v): %v", q, err)
2067 }
2068 if len(res.Repos) != 0 {
2069 t.Fatalf("got %v, want 0 matches", res)
2070 }
2071}
2072
2073func TestMetadata(t *testing.T) {
2074 content := []byte("bla the needle")
2075
2076 b := testIndexBuilder(t, &Repository{
2077 Name: "reponame",
2078 }, Document{Name: "f1", Content: content},
2079 Document{Name: "f2", Content: content})
2080
2081 var buf bytes.Buffer
2082 if err := b.Write(&buf); err != nil {
2083 t.Fatal(err)
2084 }
2085 f := &memSeeker{buf.Bytes()}
2086
2087 rd, _, err := ReadMetadata(f)
2088 if err != nil {
2089 t.Fatalf("ReadMetadata: %v", err)
2090 }
2091
2092 if got, want := rd[0].Name, "reponame"; got != want {
2093 t.Fatalf("got %q want %q", got, want)
2094 }
2095}
2096
2097func TestOr(t *testing.T) {
2098 b := testIndexBuilder(t, nil,
2099 Document{Name: "f1", Content: []byte("needle")},
2100 Document{Name: "f2", Content: []byte("banana")})
2101 t.Run("LineMatches", func(t *testing.T) {
2102 sres := searchForTest(t, b, query.NewOr(
2103 &query.Substring{Pattern: "needle"},
2104 &query.Substring{Pattern: "banana"}))
2105
2106 if len(sres.Files) != 2 {
2107 t.Fatalf("got %v, want 2 files", sres.Files)
2108 }
2109 })
2110
2111 t.Run("ChunkMatches", func(t *testing.T) {
2112 sres := searchForTest(t, b, query.NewOr(
2113 &query.Substring{Pattern: "needle"},
2114 &query.Substring{Pattern: "banana"}))
2115
2116 if len(sres.Files) != 2 {
2117 t.Fatalf("got %v, want 2 files", sres.Files)
2118 }
2119 })
2120}
2121
2122func TestFrequency(t *testing.T) {
2123 content := []byte("sla _Py_HashDouble(double v sla las las shd dot dot")
2124
2125 b := testIndexBuilder(t, nil,
2126 Document{
2127 Name: "f1",
2128 Content: content,
2129 })
2130
2131 t.Run("LineMatches", func(t *testing.T) {
2132 sres := searchForTest(t, b, &query.Substring{Pattern: "slashdot"})
2133 if len(sres.Files) != 0 {
2134 t.Errorf("got %v, wanted 0 matches", sres.Files)
2135 }
2136 })
2137
2138 t.Run("ChunkMatches", func(t *testing.T) {
2139 sres := searchForTest(t, b, &query.Substring{Pattern: "slashdot"}, chunkOpts)
2140 if len(sres.Files) != 0 {
2141 t.Errorf("got %v, wanted 0 matches", sres.Files)
2142 }
2143 })
2144}
2145
2146func TestMatchNewline(t *testing.T) {
2147 re, err := syntax.Parse("[^a]a", syntax.ClassNL)
2148 if err != nil {
2149 t.Fatalf("syntax.Parse: %v", err)
2150 }
2151
2152 content := []byte("pqr\nalex")
2153
2154 b := testIndexBuilder(t, nil,
2155 Document{
2156 Name: "f1",
2157 Content: content,
2158 })
2159
2160 t.Run("LineMatches", func(t *testing.T) {
2161 sres := searchForTest(t, b, &query.Regexp{Regexp: re, CaseSensitive: true})
2162 if len(sres.Files) != 1 {
2163 t.Errorf("got %v, wanted 1 matches", sres.Files)
2164 } else if l := sres.Files[0].LineMatches[0].Line; !bytes.Equal(l, content[len("pqr\n"):]) {
2165 t.Errorf("got match line %q, want %q", l, content)
2166 }
2167 })
2168
2169 t.Run("ChunkMatches", func(t *testing.T) {
2170 sres := searchForTest(t, b, &query.Regexp{Regexp: re, CaseSensitive: true}, chunkOpts)
2171 if len(sres.Files) != 1 {
2172 t.Errorf("got %v, wanted 1 matches", sres.Files)
2173 } else if c := sres.Files[0].ChunkMatches[0].Content; !bytes.Equal(c, content) {
2174 t.Errorf("got match line %q, want %q", c, content)
2175 }
2176 })
2177}
2178
2179func TestSubRepo(t *testing.T) {
2180 subRepos := map[string]*Repository{
2181 "sub": {
2182 Name: "sub-name",
2183 LineFragmentTemplate: "sub-line",
2184 },
2185 }
2186
2187 content := []byte("pqr\nalex")
2188
2189 b := testIndexBuilder(t, &Repository{
2190 SubRepoMap: subRepos,
2191 }, Document{
2192 Name: "sub/f1",
2193 Content: content,
2194 SubRepositoryPath: "sub",
2195 })
2196
2197 sres := searchForTest(t, b, &query.Substring{Pattern: "alex"})
2198 if len(sres.Files) != 1 {
2199 t.Fatalf("got %v, wanted 1 matches", sres.Files)
2200 }
2201
2202 f := sres.Files[0]
2203 if f.SubRepositoryPath != "sub" || f.SubRepositoryName != "sub-name" {
2204 t.Errorf("got %#v, want SubRepository{Path,Name} = {'sub', 'sub-name'}", f)
2205 }
2206
2207 if sres.LineFragments["sub-name"] != "sub-line" {
2208 t.Errorf("got LineFragmentTemplate %v, want {'sub':'sub-line'}", sres.LineFragments)
2209 }
2210}
2211
2212func TestSearchEither(t *testing.T) {
2213 b := testIndexBuilder(t, nil,
2214 Document{Name: "f1", Content: []byte("bla needle bla")},
2215 Document{Name: "needle-file-branch", Content: []byte("bla content")})
2216
2217 t.Run("LineMatches", func(t *testing.T) {
2218 sres := searchForTest(t, b, &query.Substring{Pattern: "needle"})
2219 if len(sres.Files) != 2 {
2220 t.Fatalf("got %v, wanted 2 matches", sres.Files)
2221 }
2222
2223 sres = searchForTest(t, b, &query.Substring{Pattern: "needle", Content: true})
2224 if len(sres.Files) != 1 {
2225 t.Fatalf("got %v, wanted 1 match", sres.Files)
2226 }
2227
2228 if got, want := sres.Files[0].FileName, "f1"; got != want {
2229 t.Errorf("got %q, want %q", got, want)
2230 }
2231 })
2232
2233 t.Run("ChunkMatches", func(t *testing.T) {
2234 sres := searchForTest(t, b, &query.Substring{Pattern: "needle"}, chunkOpts)
2235 if len(sres.Files) != 2 {
2236 t.Fatalf("got %v, wanted 2 matches", sres.Files)
2237 }
2238
2239 sres = searchForTest(t, b, &query.Substring{Pattern: "needle", Content: true}, chunkOpts)
2240 if len(sres.Files) != 1 {
2241 t.Fatalf("got %v, wanted 1 match", sres.Files)
2242 }
2243
2244 if got, want := sres.Files[0].FileName, "f1"; got != want {
2245 t.Errorf("got %q, want %q", got, want)
2246 }
2247 })
2248}
2249
2250func TestUnicodeExactMatch(t *testing.T) {
2251 needle := "néédlÉ"
2252 content := []byte("blá blá " + needle + " blâ")
2253
2254 b := testIndexBuilder(t, nil,
2255 Document{Name: "f1", Content: content})
2256
2257 t.Run("LineMatches", func(t *testing.T) {
2258 if res := searchForTest(t, b, &query.Substring{Pattern: needle, CaseSensitive: true}); len(res.Files) != 1 {
2259 t.Fatalf("case sensitive: got %v, wanted 1 match", res.Files)
2260 }
2261 })
2262
2263 t.Run("ChunkMatches", func(t *testing.T) {
2264 res := searchForTest(t, b, &query.Substring{Pattern: needle, CaseSensitive: true}, chunkOpts)
2265 if len(res.Files) != 1 {
2266 t.Fatalf("case sensitive: got %v, wanted 1 match", res.Files)
2267 }
2268 })
2269}
2270
2271func TestUnicodeCoverContent(t *testing.T) {
2272 needle := "néédlÉ"
2273 content := []byte("blá blá " + needle + " blâ")
2274
2275 b := testIndexBuilder(t, nil,
2276 Document{Name: "f1", Content: content})
2277
2278 t.Run("LineMatches", func(t *testing.T) {
2279 if res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ", CaseSensitive: true}); len(res.Files) != 0 {
2280 t.Fatalf("case sensitive: got %v, wanted 0 match", res.Files)
2281 }
2282
2283 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ"})
2284 if len(res.Files) != 1 {
2285 t.Fatalf("case insensitive: got %v, wanted 1 match", res.Files)
2286 }
2287
2288 if got, want := res.Files[0].LineMatches[0].LineFragments[0].Offset, uint32(strings.Index(string(content), needle)); got != want {
2289 t.Errorf("got %d want %d", got, want)
2290 }
2291 })
2292
2293 t.Run("ChunkMatches", func(t *testing.T) {
2294 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ", CaseSensitive: true}, chunkOpts)
2295 if len(res.Files) != 0 {
2296 t.Fatalf("case sensitive: got %v, wanted 0 match", res.Files)
2297 }
2298
2299 res = searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ"}, chunkOpts)
2300 if len(res.Files) != 1 {
2301 t.Fatalf("case insensitive: got %v, wanted 1 match", res.Files)
2302 }
2303
2304 got := res.Files[0].ChunkMatches[0].Ranges[0].Start.ByteOffset
2305 want := uint32(strings.Index(string(content), needle))
2306 if got != want {
2307 t.Errorf("got %d want %d", got, want)
2308 }
2309 })
2310}
2311
2312func TestUnicodeNonCoverContent(t *testing.T) {
2313 needle := "nééáádlÉ"
2314 content := []byte("blá blá " + needle + " blâ")
2315
2316 b := testIndexBuilder(t, nil,
2317 Document{Name: "f1", Content: content})
2318
2319 t.Run("LineMatches", func(t *testing.T) {
2320 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉÁÁDLÉ", Content: true})
2321 if len(res.Files) != 1 {
2322 t.Fatalf("got %v, wanted 1 match", res.Files)
2323 }
2324
2325 if got, want := res.Files[0].LineMatches[0].LineFragments[0].Offset, uint32(strings.Index(string(content), needle)); got != want {
2326 t.Errorf("got %d want %d", got, want)
2327 }
2328 })
2329
2330 t.Run("ChunkMatches", func(t *testing.T) {
2331 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉÁÁDLÉ", Content: true}, chunkOpts)
2332 if len(res.Files) != 1 {
2333 t.Fatalf("got %v, wanted 1 match", res.Files)
2334 }
2335
2336 got := res.Files[0].ChunkMatches[0].Ranges[0].Start.ByteOffset
2337 want := uint32(strings.Index(string(content), needle))
2338 if got != want {
2339 t.Errorf("got %d want %d", got, want)
2340 }
2341 })
2342}
2343
2344const kelvinCodePoint = 8490
2345
2346func TestUnicodeVariableLength(t *testing.T) {
2347 lower := 'k'
2348 upper := rune(kelvinCodePoint)
2349
2350 needle := "nee" + string([]rune{lower}) + "eed"
2351 corpus := []byte("nee" + string([]rune{upper}) + "eed" +
2352 " ee" + string([]rune{lower}) + "ee" +
2353 " ee" + string([]rune{upper}) + "ee")
2354
2355 t.Run("LineMatches", func(t *testing.T) {
2356 b := testIndexBuilder(t, nil,
2357 Document{Name: "f1", Content: []byte(corpus)})
2358
2359 res := searchForTest(t, b, &query.Substring{Pattern: needle, Content: true})
2360 if len(res.Files) != 1 {
2361 t.Fatalf("got %v, wanted 1 match", res.Files)
2362 }
2363 })
2364
2365 t.Run("ChunkMatches", func(t *testing.T) {
2366 b := testIndexBuilder(t, nil,
2367 Document{Name: "f1", Content: []byte(corpus)})
2368
2369 res := searchForTest(t, b, &query.Substring{Pattern: needle, Content: true}, chunkOpts)
2370 if len(res.Files) != 1 {
2371 t.Fatalf("got %v, wanted 1 match", res.Files)
2372 }
2373 })
2374}
2375
2376func TestUnicodeFileStartOffsets(t *testing.T) {
2377 unicode := "世界"
2378 wat := "waaaaaat"
2379 b := testIndexBuilder(t, nil,
2380 Document{
2381 Name: "f1",
2382 Content: []byte(unicode),
2383 },
2384 Document{
2385 Name: "f2",
2386 Content: []byte(wat),
2387 },
2388 )
2389 q := &query.Substring{Pattern: wat, Content: true}
2390 res := searchForTest(t, b, q)
2391 if len(res.Files) != 1 {
2392 t.Fatalf("got %v, wanted 1 match", res.Files)
2393 }
2394}
2395
2396func TestLongFileUTF8(t *testing.T) {
2397 needle := "neeedle"
2398
2399 // 6 bytes.
2400 unicode := "世界"
2401 content := []byte(strings.Repeat(unicode, 100) + needle)
2402 b := testIndexBuilder(t, nil,
2403 Document{
2404 Name: "f1",
2405 Content: []byte(strings.Repeat("a", 50)),
2406 },
2407 Document{
2408 Name: "f2",
2409 Content: content,
2410 })
2411
2412 t.Run("LineMatches", func(t *testing.T) {
2413 q := &query.Substring{Pattern: needle, Content: true}
2414 res := searchForTest(t, b, q)
2415 if len(res.Files) != 1 {
2416 t.Errorf("got %v, want 1 result", res)
2417 }
2418 })
2419
2420 t.Run("ChunkMatches", func(t *testing.T) {
2421 q := &query.Substring{Pattern: needle, Content: true}
2422 res := searchForTest(t, b, q, chunkOpts)
2423 if len(res.Files) != 1 {
2424 t.Errorf("got %v, want 1 result", res)
2425 }
2426 })
2427}
2428
2429func TestEstimateDocCount(t *testing.T) {
2430 content := []byte("bla needle bla")
2431 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2432 Document{Name: "f1", Content: content},
2433 Document{Name: "f2", Content: content},
2434 )
2435
2436 t.Run("LineMatches", func(t *testing.T) {
2437 if sres := searchForTest(t, b,
2438 query.NewAnd(
2439 &query.Substring{Pattern: "needle"},
2440 &query.Repo{Regexp: regexp.MustCompile("reponame")},
2441 ), SearchOptions{
2442 EstimateDocCount: true,
2443 }); sres.Stats.ShardFilesConsidered != 2 {
2444 t.Errorf("got FilesConsidered = %d, want 2", sres.Stats.FilesConsidered)
2445 }
2446 if sres := searchForTest(t, b,
2447 query.NewAnd(
2448 &query.Substring{Pattern: "needle"},
2449 &query.Repo{Regexp: regexp.MustCompile("nomatch")},
2450 ), SearchOptions{
2451 EstimateDocCount: true,
2452 }); sres.Stats.ShardFilesConsidered != 0 {
2453 t.Errorf("got FilesConsidered = %d, want 0", sres.Stats.FilesConsidered)
2454 }
2455 })
2456
2457 t.Run("ChunkMatches", func(t *testing.T) {
2458 if sres := searchForTest(t, b,
2459 query.NewAnd(
2460 &query.Substring{Pattern: "needle"},
2461 &query.Repo{Regexp: regexp.MustCompile("reponame")},
2462 ), SearchOptions{
2463 EstimateDocCount: true,
2464 ChunkMatches: true,
2465 }); sres.Stats.ShardFilesConsidered != 2 {
2466 t.Errorf("got FilesConsidered = %d, want 2", sres.Stats.FilesConsidered)
2467 }
2468 if sres := searchForTest(t, b,
2469 query.NewAnd(
2470 &query.Substring{Pattern: "needle"},
2471 &query.Repo{Regexp: regexp.MustCompile("nomatch")},
2472 ), SearchOptions{
2473 EstimateDocCount: true,
2474 ChunkMatches: true,
2475 }); sres.Stats.ShardFilesConsidered != 0 {
2476 t.Errorf("got FilesConsidered = %d, want 0", sres.Stats.FilesConsidered)
2477 }
2478 })
2479}
2480
2481func TestUTF8CorrectCorpus(t *testing.T) {
2482 needle := "neeedle"
2483
2484 // 6 bytes.
2485 unicode := "世界"
2486 b := testIndexBuilder(t, nil,
2487 Document{
2488 Name: "f1",
2489 Content: []byte(strings.Repeat(unicode, 100)),
2490 },
2491 Document{
2492 Name: "xxxxxneeedle",
2493 Content: []byte("hello"),
2494 })
2495
2496 t.Run("LineMatches", func(t *testing.T) {
2497 q := &query.Substring{Pattern: needle, FileName: true}
2498 res := searchForTest(t, b, q)
2499 if len(res.Files) != 1 {
2500 t.Errorf("got %v, want 1 result", res)
2501 }
2502 })
2503
2504 t.Run("ChunkMatches", func(t *testing.T) {
2505 q := &query.Substring{Pattern: needle, FileName: true}
2506 res := searchForTest(t, b, q, chunkOpts)
2507 if len(res.Files) != 1 {
2508 t.Errorf("got %v, want 1 result", res)
2509 }
2510 })
2511}
2512
2513func TestBuilderStats(t *testing.T) {
2514 b := testIndexBuilder(t, nil,
2515 Document{
2516 Name: "f1",
2517 Content: []byte(strings.Repeat("abcd", 1024)),
2518 })
2519 var buf bytes.Buffer
2520 if err := b.Write(&buf); err != nil {
2521 t.Fatal(err)
2522 }
2523
2524 if got, want := b.ContentSize(), uint32(2+4*1024); got != want {
2525 t.Errorf("got %d, want %d", got, want)
2526 }
2527}
2528
2529func TestIOStats(t *testing.T) {
2530 b := testIndexBuilder(t, nil,
2531 Document{
2532 Name: "f1",
2533 Content: []byte(strings.Repeat("abcd", 1024)),
2534 })
2535
2536 t.Run("LineMatches", func(t *testing.T) {
2537 q := &query.Substring{Pattern: "abc", CaseSensitive: true, Content: true}
2538 res := searchForTest(t, b, q)
2539
2540 // 4096 (content) + 2 (overhead: newlines or doc sections)
2541 if got, want := res.Stats.ContentBytesLoaded, int64(4098); got != want {
2542 t.Errorf("got content I/O %d, want %d", got, want)
2543 }
2544
2545 // 1024 entries, each 4 bytes apart. 4 fits into single byte
2546 // delta encoded.
2547 if got, want := res.Stats.IndexBytesLoaded, int64(1024); got != want {
2548 t.Errorf("got index I/O %d, want %d", got, want)
2549 }
2550 })
2551
2552 t.Run("ChunkMatches", func(t *testing.T) {
2553 q := &query.Substring{Pattern: "abc", CaseSensitive: true, Content: true}
2554 res := searchForTest(t, b, q, chunkOpts)
2555
2556 // 4096 (content) + 2 (overhead: newlines or doc sections)
2557 if got, want := res.Stats.ContentBytesLoaded, int64(4098); got != want {
2558 t.Errorf("got content I/O %d, want %d", got, want)
2559 }
2560
2561 // 1024 entries, each 4 bytes apart. 4 fits into single byte
2562 // delta encoded.
2563 if got, want := res.Stats.IndexBytesLoaded, int64(1024); got != want {
2564 t.Errorf("got index I/O %d, want %d", got, want)
2565 }
2566 })
2567
2568 t.Run("LineMatches with BM25", func(t *testing.T) {
2569 q := &query.Substring{Pattern: "abc", CaseSensitive: true, Content: true}
2570 res := searchForTest(t, b, q, SearchOptions{UseBM25Scoring: true})
2571
2572 // 4096 (content) + 2 (overhead: newlines or doc sections)
2573 if got, want := res.Stats.ContentBytesLoaded, int64(4098); got != want {
2574 t.Errorf("got content I/O %d, want %d", got, want)
2575 }
2576
2577 // 1024 entries, each 4 bytes apart. 4 fits into single byte
2578 // delta encoded.
2579 if got, want := res.Stats.IndexBytesLoaded, int64(1024); got != want {
2580 t.Errorf("got index I/O %d, want %d", got, want)
2581 }
2582 })
2583
2584 t.Run("ChunkMatches with BM25", func(t *testing.T) {
2585 q := &query.Substring{Pattern: "abc", CaseSensitive: true, Content: true}
2586 res := searchForTest(t, b, q, SearchOptions{UseBM25Scoring: true, ChunkMatches: true})
2587
2588 // 4096 (content) + 2 (overhead: newlines or doc sections)
2589 if got, want := res.Stats.ContentBytesLoaded, int64(4098); got != want {
2590 t.Errorf("got content I/O %d, want %d", got, want)
2591 }
2592
2593 // 1024 entries, each 4 bytes apart. 4 fits into single byte
2594 // delta encoded.
2595 if got, want := res.Stats.IndexBytesLoaded, int64(1024); got != want {
2596 t.Errorf("got index I/O %d, want %d", got, want)
2597 }
2598 })
2599}
2600
2601func TestStartLineAnchor(t *testing.T) {
2602 b := testIndexBuilder(t, nil,
2603 Document{
2604 Name: "f1",
2605 Content: []byte(
2606 `hello
2607start of middle of line
2608`),
2609 })
2610
2611 t.Run("LineMatches", func(t *testing.T) {
2612 q, err := query.Parse("^start")
2613 if err != nil {
2614 t.Errorf("parse: %v", err)
2615 }
2616
2617 res := searchForTest(t, b, q)
2618 if len(res.Files) != 1 {
2619 t.Errorf("got %v, want 1 file", res.Files)
2620 }
2621
2622 q, err = query.Parse("^middle")
2623 if err != nil {
2624 t.Errorf("parse: %v", err)
2625 }
2626 res = searchForTest(t, b, q)
2627 if len(res.Files) != 0 {
2628 t.Errorf("got %v, want 0 files", res.Files)
2629 }
2630 })
2631
2632 t.Run("ChunkMatches", func(t *testing.T) {
2633 q, err := query.Parse("^start")
2634 if err != nil {
2635 t.Errorf("parse: %v", err)
2636 }
2637
2638 res := searchForTest(t, b, q, chunkOpts)
2639 if len(res.Files) != 1 {
2640 t.Errorf("got %v, want 1 file", res.Files)
2641 }
2642
2643 q, err = query.Parse("^middle")
2644 if err != nil {
2645 t.Errorf("parse: %v", err)
2646 }
2647 res = searchForTest(t, b, q, chunkOpts)
2648 if len(res.Files) != 0 {
2649 t.Errorf("got %v, want 0 files", res.Files)
2650 }
2651 })
2652}
2653
2654func TestAndOrUnicode(t *testing.T) {
2655 q, err := query.Parse("orange.*apple")
2656 if err != nil {
2657 t.Errorf("parse: %v", err)
2658 }
2659 finalQ := query.NewAnd(q,
2660 query.NewOr(query.NewAnd(&query.Repo{Regexp: regexp.MustCompile("name")},
2661 query.NewOr(&query.Branch{Pattern: "master"}))))
2662
2663 b := testIndexBuilder(t, &Repository{
2664 Name: "name",
2665 Branches: []RepositoryBranch{{"master", "master-version"}},
2666 }, Document{
2667 Name: "f2",
2668 Content: []byte("orange\u2318apple"),
2669 // --------------0123456 78901
2670 Branches: []string{"master"},
2671 })
2672
2673 t.Run("LineMatches", func(t *testing.T) {
2674 res := searchForTest(t, b, finalQ)
2675 if len(res.Files) != 1 {
2676 t.Errorf("got %v, want 1 result", res.Files)
2677 }
2678 })
2679
2680 t.Run("ChunkMatches", func(t *testing.T) {
2681 res := searchForTest(t, b, finalQ, chunkOpts)
2682 if len(res.Files) != 1 {
2683 t.Errorf("got %v, want 1 result", res.Files)
2684 }
2685 })
2686}
2687
2688func TestAndShort(t *testing.T) {
2689 content := []byte("bla needle at orange bla")
2690 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2691 Document{Name: "f1", Content: content},
2692 Document{Name: "f2", Content: []byte("xx at xx")},
2693 Document{Name: "f3", Content: []byte("yy orange xx")},
2694 )
2695
2696 q := query.NewAnd(&query.Substring{Pattern: "at"},
2697 &query.Substring{Pattern: "orange"})
2698
2699 t.Run("LineMatches", func(t *testing.T) {
2700 res := searchForTest(t, b, q)
2701 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
2702 t.Errorf("got %v, want 1 result", res.Files)
2703 }
2704 })
2705
2706 t.Run("ChunkMatches", func(t *testing.T) {
2707 res := searchForTest(t, b, q, chunkOpts)
2708 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
2709 t.Errorf("got %v, want 1 result", res.Files)
2710 }
2711 })
2712}
2713
2714func TestNoCollectRegexpSubstring(t *testing.T) {
2715 content := []byte("bla final bla\nfoo final, foo")
2716 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2717 Document{Name: "f1", Content: content},
2718 )
2719
2720 q := &query.Regexp{
2721 Regexp: mustParseRE("final[,.]"),
2722 }
2723
2724 t.Run("LineMatches", func(t *testing.T) {
2725 res := searchForTest(t, b, q)
2726 if len(res.Files) != 1 {
2727 t.Fatalf("got %v, want 1 result", res.Files)
2728 }
2729 if f := res.Files[0]; len(f.LineMatches) != 1 {
2730 t.Fatalf("got line matches %v, want 1 line match", printLineMatches(f.LineMatches))
2731 }
2732 })
2733
2734 t.Run("ChunkMatches", func(t *testing.T) {
2735 res := searchForTest(t, b, q, chunkOpts)
2736 if len(res.Files) != 1 {
2737 t.Fatalf("got %v, want 1 result", res.Files)
2738 }
2739 if f := res.Files[0]; len(f.ChunkMatches) != 1 {
2740 t.Fatalf("got line matches %v, want 1 line match", printLineMatches(f.LineMatches))
2741 }
2742 })
2743}
2744
2745func printLineMatches(ms []LineMatch) string {
2746 var ss []string
2747 for _, m := range ms {
2748 ss = append(ss, fmt.Sprintf("%d:%q %v", m.LineNumber, m.Line, m.LineFragments))
2749 }
2750
2751 return strings.Join(ss, ", ")
2752}
2753
2754func TestLang(t *testing.T) {
2755 content := []byte("bla needle bla")
2756 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2757 Document{Name: "f1", Content: content},
2758 Document{Name: "f2", Language: "java", Content: content},
2759 Document{Name: "f3", Language: "cpp", Content: content},
2760 )
2761
2762 q := query.NewAnd(&query.Substring{Pattern: "needle"},
2763 &query.Language{Language: "cpp"})
2764
2765 t.Run("LineMatches", func(t *testing.T) {
2766 res := searchForTest(t, b, q)
2767 if len(res.Files) != 1 {
2768 t.Fatalf("got %v, want 1 result in f3", res.Files)
2769 }
2770 f := res.Files[0]
2771 if f.FileName != "f3" || f.Language != "cpp" {
2772 t.Fatalf("got %v, want 1 match with language cpp", f)
2773 }
2774 })
2775
2776 t.Run("ChunkMatches", func(t *testing.T) {
2777 res := searchForTest(t, b, q, chunkOpts)
2778 if len(res.Files) != 1 {
2779 t.Fatalf("got %v, want 1 result in f3", res.Files)
2780 }
2781 f := res.Files[0]
2782 if f.FileName != "f3" || f.Language != "cpp" {
2783 t.Fatalf("got %v, want 1 match with language cpp", f)
2784 }
2785 })
2786}
2787
2788func TestLangShortcut(t *testing.T) {
2789 content := []byte("bla needle bla")
2790 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2791 Document{Name: "f2", Language: "java", Content: content},
2792 Document{Name: "f3", Language: "cpp", Content: content},
2793 )
2794
2795 q := query.NewAnd(&query.Substring{Pattern: "needle"},
2796 &query.Language{Language: "fortran"})
2797
2798 t.Run("LineMatches", func(t *testing.T) {
2799 res := searchForTest(t, b, q)
2800 if len(res.Files) != 0 {
2801 t.Fatalf("got %v, want 0 results", res.Files)
2802 }
2803 if res.Stats.IndexBytesLoaded > 0 {
2804 t.Errorf("got IndexBytesLoaded %d, want 0", res.Stats.IndexBytesLoaded)
2805 }
2806 })
2807
2808 t.Run("ChunkMatches", func(t *testing.T) {
2809 res := searchForTest(t, b, q, chunkOpts)
2810 if len(res.Files) != 0 {
2811 t.Fatalf("got %v, want 0 results", res.Files)
2812 }
2813 if res.Stats.IndexBytesLoaded > 0 {
2814 t.Errorf("got IndexBytesLoaded %d, want 0", res.Stats.IndexBytesLoaded)
2815 }
2816 })
2817}
2818
2819func TestNoTextMatchAtoms(t *testing.T) {
2820 content := []byte("bla needle bla")
2821 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2822 Document{Name: "f1", Content: content},
2823 Document{Name: "f2", Language: "java", Content: content},
2824 Document{Name: "f3", Language: "cpp", Content: content},
2825 )
2826 q := query.NewAnd(&query.Language{Language: "java"})
2827 t.Run("LineMatches", func(t *testing.T) {
2828 res := searchForTest(t, b, q)
2829 if len(res.Files) != 1 {
2830 t.Fatalf("got %v, want 1 result in f3", res.Files)
2831 }
2832 })
2833
2834 t.Run("ChunkMatches", func(t *testing.T) {
2835 res := searchForTest(t, b, q, chunkOpts)
2836 if len(res.Files) != 1 {
2837 t.Fatalf("got %v, want 1 result in f3", res.Files)
2838 }
2839 })
2840}
2841
2842func TestNoPositiveAtoms(t *testing.T) {
2843 content := []byte("bla needle bla")
2844 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2845 Document{Name: "f1", Content: content},
2846 Document{Name: "f2", Content: content},
2847 )
2848
2849 q := query.NewAnd(
2850 &query.Not{Child: &query.Substring{Pattern: "xyz"}},
2851 &query.Repo{Regexp: regexp.MustCompile("reponame")})
2852 t.Run("LineMatches", func(t *testing.T) {
2853 res := searchForTest(t, b, q)
2854 if len(res.Files) != 2 {
2855 t.Fatalf("got %v, want 2 results in f3", res.Files)
2856 }
2857 })
2858 t.Run("ChunkMatches", func(t *testing.T) {
2859 res := searchForTest(t, b, q, chunkOpts)
2860 if len(res.Files) != 2 {
2861 t.Fatalf("got %v, want 2 results in f3", res.Files)
2862 }
2863 })
2864}
2865
2866func TestSymbolBoundaryStart(t *testing.T) {
2867 content := []byte("start\nbla bla\nend")
2868 // ----------------012345-67890123-456
2869
2870 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2871 Document{
2872 Name: "f1",
2873 Content: content,
2874 Symbols: []DocumentSection{{0, 5}, {14, 17}},
2875 },
2876 )
2877 q := &query.Symbol{
2878 Expr: &query.Substring{Pattern: "start"},
2879 }
2880 t.Run("LineMatches", func(t *testing.T) {
2881 res := searchForTest(t, b, q)
2882 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
2883 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2884 }
2885 m := res.Files[0].LineMatches[0].LineFragments[0]
2886 if m.Offset != 0 {
2887 t.Fatalf("got offset %d want 0", m.Offset)
2888 }
2889 })
2890
2891 t.Run("ChunkMatches", func(t *testing.T) {
2892 res := searchForTest(t, b, q, chunkOpts)
2893 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
2894 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2895 }
2896 m := res.Files[0].ChunkMatches[0].Ranges[0]
2897 if m.Start.ByteOffset != 0 {
2898 t.Fatalf("got offset %d want 0", m.Start.ByteOffset)
2899 }
2900 })
2901}
2902
2903func TestSymbolBoundaryEnd(t *testing.T) {
2904 content := []byte("start\nbla bla\nend")
2905 // ----------------012345-67890123-456
2906
2907 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2908 Document{
2909 Name: "f1",
2910 Content: content,
2911 Symbols: []DocumentSection{{14, 17}},
2912 },
2913 )
2914 q := &query.Symbol{
2915 Expr: &query.Substring{Pattern: "end"},
2916 }
2917 t.Run("LineMatches", func(t *testing.T) {
2918 res := searchForTest(t, b, q)
2919 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
2920 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2921 }
2922 m := res.Files[0].LineMatches[0].LineFragments[0]
2923 if m.Offset != 14 {
2924 t.Fatalf("got offset %d want 0", m.Offset)
2925 }
2926 })
2927
2928 t.Run("ChunkMatches", func(t *testing.T) {
2929 res := searchForTest(t, b, q, chunkOpts)
2930 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
2931 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2932 }
2933 m := res.Files[0].ChunkMatches[0].Ranges[0]
2934 if m.Start.ByteOffset != 14 {
2935 t.Fatalf("got offset %d want 0", m.Start.ByteOffset)
2936 }
2937 })
2938}
2939
2940func TestSymbolSubstring(t *testing.T) {
2941 content := []byte("bla\nsymblabla\nbla")
2942 // ----------------0123-4567890123-456
2943
2944 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2945 Document{
2946 Name: "f1",
2947 Content: content,
2948 Symbols: []DocumentSection{{4, 12}},
2949 },
2950 )
2951 q := &query.Symbol{
2952 Expr: &query.Substring{Pattern: "bla"},
2953 }
2954 t.Run("LineMatches", func(t *testing.T) {
2955 res := searchForTest(t, b, q)
2956 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
2957 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2958 }
2959 m := res.Files[0].LineMatches[0].LineFragments[0]
2960 if m.Offset != 7 || m.MatchLength != 3 {
2961 t.Fatalf("got offset %d, size %d want 7 size 3", m.Offset, m.MatchLength)
2962 }
2963 })
2964
2965 t.Run("ChunkMatches", func(t *testing.T) {
2966 res := searchForTest(t, b, q, chunkOpts)
2967 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
2968 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2969 }
2970 m := res.Files[0].ChunkMatches[0].Ranges[0]
2971 if m.Start.ByteOffset != 7 || m.End.ByteOffset != 10 {
2972 t.Fatalf("got offset %d, end %d want 7, 10", m.Start.ByteOffset, m.End.ByteOffset)
2973 }
2974 })
2975}
2976
2977func TestSymbolSubstringExact(t *testing.T) {
2978 content := []byte("bla\nsym\nbla\nsym\nasymb")
2979 // ----------------0123-4567-890123456-78901
2980
2981 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2982 Document{
2983 Name: "f1",
2984 Content: content,
2985 Symbols: []DocumentSection{{4, 7}},
2986 },
2987 )
2988 q := &query.Symbol{
2989 Expr: &query.Substring{Pattern: "sym"},
2990 }
2991 t.Run("LineMatches", func(t *testing.T) {
2992 res := searchForTest(t, b, q)
2993 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
2994 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2995 }
2996 m := res.Files[0].LineMatches[0].LineFragments[0]
2997 if m.Offset != 4 {
2998 t.Fatalf("got offset %d, want 7", m.Offset)
2999 }
3000 })
3001
3002 t.Run("ChunkMatches", func(t *testing.T) {
3003 res := searchForTest(t, b, q, chunkOpts)
3004 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
3005 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
3006 }
3007 m := res.Files[0].ChunkMatches[0].Ranges[0]
3008 if m.Start.ByteOffset != 4 {
3009 t.Fatalf("got offset %d, want 7", m.Start.ByteOffset)
3010 }
3011 })
3012}
3013
3014func TestSymbolRegexpExact(t *testing.T) {
3015 content := []byte("blah\nbla\nbl")
3016 // ----------------01234-5678-90
3017
3018 b := testIndexBuilder(t, &Repository{Name: "reponame"},
3019 Document{
3020 Name: "f1",
3021 Content: content,
3022 Symbols: []DocumentSection{{0, 4}, {5, 8}, {9, 11}},
3023 },
3024 )
3025 q := &query.Symbol{
3026 Expr: &query.Regexp{Regexp: mustParseRE("^bla$")},
3027 }
3028 t.Run("LineMatches", func(t *testing.T) {
3029 res := searchForTest(t, b, q)
3030 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
3031 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
3032 }
3033 m := res.Files[0].LineMatches[0].LineFragments[0]
3034 if m.Offset != 5 {
3035 t.Fatalf("got offset %d, want 5", m.Offset)
3036 }
3037 })
3038
3039 t.Run("ChunkMatches", func(t *testing.T) {
3040 res := searchForTest(t, b, q, chunkOpts)
3041 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
3042 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
3043 }
3044 m := res.Files[0].ChunkMatches[0].Ranges[0]
3045 if m.Start.ByteOffset != 5 {
3046 t.Fatalf("got offset %d, want 5", m.Start.ByteOffset)
3047 }
3048 })
3049}
3050
3051func TestSymbolRegexpPartial(t *testing.T) {
3052 content := []byte("abcdef")
3053 // ----------------012345
3054
3055 b := testIndexBuilder(t, &Repository{Name: "reponame"},
3056 Document{
3057 Name: "f1",
3058 Content: content,
3059 Symbols: []DocumentSection{{0, 6}},
3060 },
3061 )
3062 q := &query.Symbol{
3063 Expr: &query.Regexp{Regexp: mustParseRE("(b|d)c(d|b)")},
3064 }
3065 t.Run("LineMatches", func(t *testing.T) {
3066 res := searchForTest(t, b, q)
3067 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
3068 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
3069 }
3070 m := res.Files[0].LineMatches[0].LineFragments[0]
3071 if m.Offset != 1 {
3072 t.Fatalf("got offset %d, want 1", m.Offset)
3073 }
3074 if m.MatchLength != 3 {
3075 t.Fatalf("got match length %d, want 3", m.MatchLength)
3076 }
3077 })
3078
3079 t.Run("ChunkMatches", func(t *testing.T) {
3080 res := searchForTest(t, b, q, chunkOpts)
3081 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
3082 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
3083 }
3084 m := res.Files[0].ChunkMatches[0].Ranges[0]
3085 if m.Start.ByteOffset != 1 {
3086 t.Fatalf("got offset %d, want 1", m.Start.ByteOffset)
3087 }
3088 if m.End.ByteOffset != 4 {
3089 t.Fatalf("got match end %d, want 4", m.End.ByteOffset)
3090 }
3091 })
3092}
3093
3094func TestSymbolRegexpAll(t *testing.T) {
3095 docs := []Document{
3096 {
3097 Name: "f1",
3098 Content: []byte("Hello Zoekt"),
3099 // --------------01234567890
3100 Symbols: []DocumentSection{{0, 5}, {6, 11}},
3101 },
3102 {
3103 Name: "f2",
3104 Content: []byte("Second Zoekt Third"),
3105 // --------------012345678901234567
3106 Symbols: []DocumentSection{{0, 6}, {7, 12}, {13, 18}},
3107 },
3108 }
3109
3110 b := testIndexBuilder(t, &Repository{Name: "reponame"}, docs...)
3111 q := &query.Symbol{
3112 Expr: &query.Regexp{Regexp: mustParseRE(".*")},
3113 }
3114 t.Run("LineMatches", func(t *testing.T) {
3115 res := searchForTest(t, b, q)
3116 if len(res.Files) != len(docs) {
3117 t.Fatalf("got %v, want %d file", res.Files, len(docs))
3118 }
3119 for i, want := range docs {
3120 got := res.Files[i].LineMatches[0].LineFragments
3121 if len(got) != len(want.Symbols) {
3122 t.Fatalf("got %d symbols, want %d symbols in doc %s", len(got), len(want.Symbols), want.Name)
3123 }
3124
3125 for j, sec := range want.Symbols {
3126 if sec.Start != got[j].Offset {
3127 t.Fatalf("got offset %d, want %d in doc %s", got[j].Offset, sec.Start, want.Name)
3128 }
3129 }
3130 }
3131 })
3132
3133 t.Run("ChunkMatches", func(t *testing.T) {
3134 res := searchForTest(t, b, q, chunkOpts)
3135 if len(res.Files) != len(docs) {
3136 t.Fatalf("got %v, want %d file", res.Files, len(docs))
3137 }
3138 for i, want := range docs {
3139 got := res.Files[i].ChunkMatches[0].Ranges
3140 if len(got) != len(want.Symbols) {
3141 t.Fatalf("got %d symbols, want %d symbols in doc %s", len(got), len(want.Symbols), want.Name)
3142 }
3143
3144 for j, sec := range want.Symbols {
3145 if sec.Start != uint32(got[j].Start.ByteOffset) {
3146 t.Fatalf("got offset %d, want %d in doc %s", got[j].Start.ByteOffset, sec.Start, want.Name)
3147 }
3148 }
3149 }
3150 })
3151}
3152
3153func TestHitIterTerminate(t *testing.T) {
3154 // contrived input: trigram frequencies forces selecting abc +
3155 // def for the distance iteration. There is no match, so this
3156 // will advance the compressedPostingIterator to beyond the
3157 // end.
3158 content := []byte("abc bcdbcd cdecde abcabc def efg")
3159 b := testIndexBuilder(t, nil,
3160 Document{
3161 Name: "f1",
3162 Content: content,
3163 },
3164 )
3165
3166 t.Run("LineMatches", func(t *testing.T) {
3167 searchForTest(t, b, &query.Substring{Pattern: "abcdef"})
3168 })
3169
3170 t.Run("ChunkMatches", func(t *testing.T) {
3171 searchForTest(t, b, &query.Substring{Pattern: "abcdef"}, chunkOpts)
3172 })
3173}
3174
3175func TestDistanceHitIterBailLast(t *testing.T) {
3176 content := []byte("AST AST AST UASH")
3177 b := testIndexBuilder(t, nil,
3178 Document{
3179 Name: "f1",
3180 Content: content,
3181 },
3182 )
3183 t.Run("LineMatches", func(t *testing.T) {
3184 res := searchForTest(t, b, &query.Substring{Pattern: "UAST"})
3185 if len(res.Files) != 0 {
3186 t.Fatalf("got %v, want no results", res.Files)
3187 }
3188 })
3189
3190 t.Run("LineMatches", func(t *testing.T) {
3191 res := searchForTest(t, b, &query.Substring{Pattern: "UAST"}, chunkOpts)
3192 if len(res.Files) != 0 {
3193 t.Fatalf("got %v, want no results", res.Files)
3194 }
3195 })
3196}
3197
3198func TestDocumentSectionRuneBoundary(t *testing.T) {
3199 content := string([]rune{kelvinCodePoint, kelvinCodePoint, kelvinCodePoint})
3200 b, err := NewIndexBuilder(nil)
3201 if err != nil {
3202 t.Fatalf("NewIndexBuilder: %v", err)
3203 }
3204
3205 for i, sec := range []DocumentSection{
3206 {2, 6},
3207 {3, 7},
3208 } {
3209 if err := b.Add(Document{
3210 Name: "f1",
3211 Content: []byte(content),
3212 Symbols: []DocumentSection{sec},
3213 }); err == nil {
3214 t.Errorf("%d: Add succeeded", i)
3215 }
3216 }
3217}
3218
3219func TestUnicodeQuery(t *testing.T) {
3220 content := string([]rune{kelvinCodePoint, kelvinCodePoint, kelvinCodePoint})
3221 b := testIndexBuilder(t, nil,
3222 Document{
3223 Name: "f1",
3224 Content: []byte(content),
3225 },
3226 )
3227
3228 q := &query.Substring{Pattern: content}
3229
3230 t.Run("LineMatches", func(t *testing.T) {
3231 res := searchForTest(t, b, q)
3232 if len(res.Files) != 1 {
3233 t.Fatalf("want 1 match, got %v", res.Files)
3234 }
3235
3236 f := res.Files[0]
3237 if len(f.LineMatches) != 1 {
3238 t.Fatalf("want 1 line, got %v", f.LineMatches)
3239 }
3240 l := f.LineMatches[0]
3241
3242 if len(l.LineFragments) != 1 {
3243 t.Fatalf("want 1 line fragment, got %v", l.LineFragments)
3244 }
3245 fr := l.LineFragments[0]
3246 if fr.MatchLength != len(content) {
3247 t.Fatalf("got MatchLength %d want %d", fr.MatchLength, len(content))
3248 }
3249 })
3250
3251 t.Run("ChunkMatches", func(t *testing.T) {
3252 res := searchForTest(t, b, q, chunkOpts)
3253 if len(res.Files) != 1 {
3254 t.Fatalf("want 1 match, got %v", res.Files)
3255 }
3256
3257 f := res.Files[0]
3258 if len(f.ChunkMatches) != 1 {
3259 t.Fatalf("want 1 line, got %v", f.LineMatches)
3260 }
3261 cm := f.ChunkMatches[0]
3262
3263 if len(cm.Ranges) != 1 {
3264 t.Fatalf("want 1 line fragment, got %v", cm.Ranges)
3265 }
3266 rr := cm.Ranges[0]
3267 if matchLen := rr.End.ByteOffset - rr.Start.ByteOffset; int(matchLen) != len(content) {
3268 t.Fatalf("got MatchLength %d want %d", matchLen, len(content))
3269 }
3270 })
3271}
3272
3273func TestSkipInvalidContent(t *testing.T) {
3274 for _, content := range []string{
3275 // Binary
3276 "abc def \x00 abc",
3277 } {
3278
3279 b, err := NewIndexBuilder(nil)
3280 if err != nil {
3281 t.Fatalf("NewIndexBuilder: %v", err)
3282 }
3283
3284 if err := b.Add(Document{
3285 Name: "f1",
3286 Content: []byte(content),
3287 }); err != nil {
3288 t.Fatal(err)
3289 }
3290
3291 t.Run("LineMatches", func(t *testing.T) {
3292 q := &query.Substring{Pattern: "abc def"}
3293 res := searchForTest(t, b, q)
3294 if len(res.Files) != 0 {
3295 t.Fatalf("got %v, want no results", res.Files)
3296 }
3297
3298 q = &query.Substring{Pattern: "NOT-INDEXED"}
3299 res = searchForTest(t, b, q)
3300 if len(res.Files) != 1 {
3301 t.Fatalf("got %v, want 1 result", res.Files)
3302 }
3303 })
3304
3305 t.Run("ChunkMatches", func(t *testing.T) {
3306 q := &query.Substring{Pattern: "abc def"}
3307 res := searchForTest(t, b, q, chunkOpts)
3308 if len(res.Files) != 0 {
3309 t.Fatalf("got %v, want no results", res.Files)
3310 }
3311
3312 q = &query.Substring{Pattern: "NOT-INDEXED"}
3313 res = searchForTest(t, b, q, chunkOpts)
3314 if len(res.Files) != 1 {
3315 t.Fatalf("got %v, want 1 result", res.Files)
3316 }
3317 })
3318 }
3319}
3320
3321func TestDocChecker(t *testing.T) {
3322 docChecker := DocChecker{}
3323
3324 // Test valid and invalid text
3325 for _, text := range []string{"", "simple ascii", "símplé unicödé", "\uFEFFwith utf8 'bom'", "with \uFFFD unicode replacement char"} {
3326 if err := docChecker.Check([]byte(text), 20000, false); err != nil {
3327 t.Errorf("Check(%q): %v", text, err)
3328 }
3329 }
3330 for _, text := range []string{"zero\x00byte", "xx", "0123456789abcdefghi"} {
3331 if err := docChecker.Check([]byte(text), 15, false); err == nil {
3332 t.Errorf("Check(%q) succeeded", text)
3333 }
3334 }
3335
3336 // Test valid and invalid text with an allowed large file
3337 for _, text := range []string{"0123456789abcdefghi", "qwertyuiopasdfghjklzxcvbnm"} {
3338 if err := docChecker.Check([]byte(text), 15, true); err != nil {
3339 t.Errorf("Check(%q): %v", text, err)
3340 }
3341 }
3342 for _, text := range []string{"zero\x00byte", "xx"} {
3343 if err := docChecker.Check([]byte(text), 15, true); err == nil {
3344 t.Errorf("Check(%q) succeeded", text)
3345 }
3346 }
3347}
3348
3349func TestLineAnd(t *testing.T) {
3350 b := testIndexBuilder(t, &Repository{Name: "reponame"},
3351 Document{Name: "f1", Content: []byte("apple\nbanana\napple banana chocolate apple pudding banana\ngrape")},
3352 Document{Name: "f2", Content: []byte("apple orange\nbanana")},
3353 Document{Name: "f3", Content: []byte("banana grape")},
3354 )
3355 pattern := "(apple)(?-s:.)*?(banana)"
3356 r, _ := syntax.Parse(pattern, syntax.Perl)
3357
3358 q := query.Regexp{
3359 Regexp: r,
3360 Content: true,
3361 }
3362 t.Run("LineMatches", func(t *testing.T) {
3363 res := searchForTest(t, b, &q)
3364 wantRegexpCount := 1
3365 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3366 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3367 }
3368 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
3369 t.Errorf("got %v, want 1 result", res.Files)
3370 }
3371 })
3372
3373 t.Run("ChunkMatches", func(t *testing.T) {
3374 res := searchForTest(t, b, &q, chunkOpts)
3375 wantRegexpCount := 1
3376 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3377 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3378 }
3379 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
3380 t.Errorf("got %v, want 1 result", res.Files)
3381 }
3382 })
3383}
3384
3385func TestLineAndFileName(t *testing.T) {
3386 b := testIndexBuilder(t, &Repository{Name: "reponame"},
3387 Document{Name: "f1", Content: []byte("apple banana\ngrape")},
3388 Document{Name: "f2", Content: []byte("apple banana\norange")},
3389 Document{Name: "apple banana", Content: []byte("banana grape")},
3390 )
3391 pattern := "(apple)(?-s:.)*?(banana)"
3392 r, _ := syntax.Parse(pattern, syntax.Perl)
3393
3394 q := query.Regexp{
3395 Regexp: r,
3396 FileName: true,
3397 }
3398 t.Run("LineMatches", func(t *testing.T) {
3399 res := searchForTest(t, b, &q)
3400 wantRegexpCount := 1
3401 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3402 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3403 }
3404 if len(res.Files) != 1 || res.Files[0].FileName != "apple banana" {
3405 t.Errorf("got %v, want 1 result", res.Files)
3406 }
3407 })
3408
3409 t.Run("ChunkMatches", func(t *testing.T) {
3410 res := searchForTest(t, b, &q, chunkOpts)
3411 wantRegexpCount := 1
3412 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3413 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3414 }
3415 if len(res.Files) != 1 || res.Files[0].FileName != "apple banana" {
3416 t.Errorf("got %v, want 1 result", res.Files)
3417 }
3418 })
3419}
3420
3421func TestMultiLineRegex(t *testing.T) {
3422 b := testIndexBuilder(t, &Repository{Name: "reponame"},
3423 Document{Name: "f1", Content: []byte("apple banana\ngrape")},
3424 Document{Name: "f2", Content: []byte("apple orange")},
3425 Document{Name: "f3", Content: []byte("grape apple")},
3426 )
3427 pattern := "(apple).*?[[:space:]].*?(grape)"
3428 r, _ := syntax.Parse(pattern, syntax.Perl)
3429
3430 q := query.Regexp{
3431 Regexp: r,
3432 }
3433 t.Run("LineMatches", func(t *testing.T) {
3434 res := searchForTest(t, b, &q)
3435 wantRegexpCount := 2
3436 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3437 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3438 }
3439 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
3440 t.Errorf("got %v, want 1 result", res.Files)
3441 }
3442 if l := len(res.Files[0].LineMatches); l != 2 {
3443 t.Errorf("got %v, want 2 line matches", l)
3444 }
3445 })
3446
3447 t.Run("ChunkMatches", func(t *testing.T) {
3448 res := searchForTest(t, b, &q, chunkOpts)
3449 wantRegexpCount := 2
3450 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3451 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3452 }
3453 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
3454 t.Errorf("got %v, want 1 result", res.Files)
3455 }
3456 if l := len(res.Files[0].ChunkMatches); l != 1 {
3457 t.Errorf("got %v, want 1 chunk matches", l)
3458 }
3459 if l := len(res.Files[0].ChunkMatches[0].Ranges); l != 1 {
3460 t.Errorf("got %v, want 1 chunk ranges", l)
3461 }
3462 })
3463}
3464
3465func TestSearchTypeFileName(t *testing.T) {
3466 b := testIndexBuilder(t, &Repository{
3467 Name: "reponame",
3468 },
3469 Document{Name: "f1", Content: []byte("bla the needle")},
3470 Document{Name: "f2", Content: []byte("another file another\nneedle")},
3471 // -----------------------------------012345678901234567890-123456
3472 )
3473
3474 t.Run("LineMatches", func(t *testing.T) {
3475 wantSingleMatch := func(res *SearchResult, want string) {
3476 t.Helper()
3477 fmatches := res.Files
3478 if len(fmatches) != 1 {
3479 t.Errorf("got %v, want 1 matches", len(fmatches))
3480 return
3481 }
3482 if len(fmatches[0].LineMatches) != 1 {
3483 t.Errorf("got %d line matches", len(fmatches[0].LineMatches))
3484 return
3485 }
3486 var got string
3487 if fmatches[0].LineMatches[0].FileName {
3488 got = fmatches[0].FileName
3489 } else {
3490 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].LineMatches[0].LineFragments[0].Offset)
3491 }
3492
3493 if got != want {
3494 t.Errorf("got %s, want %s", got, want)
3495 }
3496 }
3497
3498 // Only return the later match in the second file
3499 res := searchForTest(t, b, query.NewAnd(
3500 &query.Type{
3501 Type: query.TypeFileName,
3502 Child: &query.Substring{Pattern: "needle"},
3503 },
3504 &query.Substring{Pattern: "file"}))
3505 wantSingleMatch(res, "f2:8")
3506
3507 // Only return a filename result
3508 res = searchForTest(t, b,
3509 &query.Type{
3510 Type: query.TypeFileName,
3511 Child: &query.Substring{Pattern: "file"},
3512 })
3513 wantSingleMatch(res, "f2")
3514 })
3515
3516 t.Run("ChunkMatches", func(t *testing.T) {
3517 wantSingleMatch := func(res *SearchResult, want string) {
3518 t.Helper()
3519 fmatches := res.Files
3520 if len(fmatches) != 1 {
3521 t.Errorf("got %v, want 1 matches", len(fmatches))
3522 return
3523 }
3524 if len(fmatches[0].ChunkMatches) != 1 {
3525 t.Errorf("got %d line matches", len(fmatches[0].ChunkMatches))
3526 return
3527 }
3528 var got string
3529 if fmatches[0].ChunkMatches[0].FileName {
3530 got = fmatches[0].FileName
3531 } else {
3532 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset)
3533 }
3534
3535 if got != want {
3536 t.Errorf("got %s, want %s", got, want)
3537 }
3538 }
3539
3540 // Only return the later match in the second file
3541 res := searchForTest(t, b, query.NewAnd(
3542 &query.Type{
3543 Type: query.TypeFileName,
3544 Child: &query.Substring{Pattern: "needle"},
3545 },
3546 &query.Substring{Pattern: "file"}),
3547 chunkOpts,
3548 )
3549 wantSingleMatch(res, "f2:8")
3550
3551 // Only return a filename result
3552 res = searchForTest(t, b,
3553 &query.Type{
3554 Type: query.TypeFileName,
3555 Child: &query.Substring{Pattern: "file"},
3556 },
3557 chunkOpts,
3558 )
3559 wantSingleMatch(res, "f2")
3560 })
3561}
3562
3563func TestSearchTypeLanguage(t *testing.T) {
3564 b := testIndexBuilder(t, &Repository{
3565 Name: "reponame",
3566 },
3567 Document{Name: "apex.cls", Content: []byte("public class Car extends Vehicle {")},
3568 Document{Name: "tex.cls", Content: []byte(`\DeclareOption*{`)},
3569 Document{Name: "hello.h", Content: []byte(`#include <stdio.h>`)},
3570 Document{Name: "be.magik", Content: []byte(`_package unicorn`)},
3571 )
3572
3573 t.Log(b.languageMap)
3574
3575 t.Run("LineMatches", func(t *testing.T) {
3576 wantSingleMatch := func(res *SearchResult, want string) {
3577 t.Helper()
3578 fmatches := res.Files
3579 if len(fmatches) != 1 {
3580 t.Errorf("got %v, want 1 matches", len(fmatches))
3581 return
3582 }
3583 if len(fmatches[0].LineMatches) != 1 {
3584 t.Errorf("got %d line matches", len(fmatches[0].LineMatches))
3585 return
3586 }
3587 var got string
3588 if fmatches[0].LineMatches[0].FileName {
3589 got = fmatches[0].FileName
3590 } else {
3591 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].LineMatches[0].LineFragments[0].Offset)
3592 }
3593
3594 if got != want {
3595 t.Errorf("got %s, want %s", got, want)
3596 }
3597 }
3598
3599 res := searchForTest(t, b, &query.Language{Language: "Apex"})
3600 wantSingleMatch(res, "apex.cls")
3601
3602 res = searchForTest(t, b, &query.Language{Language: "TeX"})
3603 wantSingleMatch(res, "tex.cls")
3604
3605 res = searchForTest(t, b, &query.Language{Language: "C"})
3606 wantSingleMatch(res, "hello.h")
3607
3608 res = searchForTest(t, b, &query.Language{Language: "Magik"})
3609 wantSingleMatch(res, "be.magik")
3610
3611 // test fallback language search by pretending it's an older index version
3612 res = searchForTest(t, b, &query.Language{Language: "C++"})
3613 if len(res.Files) != 0 {
3614 t.Errorf("got %d results for C++, want 0", len(res.Files))
3615 }
3616
3617 b.featureVersion = 11 // force fallback
3618 res = searchForTest(t, b, &query.Language{Language: "C++"})
3619 wantSingleMatch(res, "hello.h")
3620 })
3621
3622 t.Run("ChunkMatches", func(t *testing.T) {
3623 wantSingleMatch := func(res *SearchResult, want string) {
3624 t.Helper()
3625 fmatches := res.Files
3626 if len(fmatches) != 1 {
3627 t.Errorf("got %v, want 1 matches", len(fmatches))
3628 return
3629 }
3630 if len(fmatches[0].ChunkMatches) != 1 {
3631 t.Errorf("got %d line matches", len(fmatches[0].ChunkMatches))
3632 return
3633 }
3634 var got string
3635 if fmatches[0].ChunkMatches[0].FileName {
3636 got = fmatches[0].FileName
3637 } else {
3638 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset)
3639 }
3640
3641 if got != want {
3642 t.Errorf("got %s, want %s", got, want)
3643 }
3644 }
3645
3646 b.featureVersion = FeatureVersion // reset feature version
3647 res := searchForTest(t, b, &query.Language{Language: "Apex"}, chunkOpts)
3648 wantSingleMatch(res, "apex.cls")
3649
3650 res = searchForTest(t, b, &query.Language{Language: "TeX"}, chunkOpts)
3651 wantSingleMatch(res, "tex.cls")
3652
3653 res = searchForTest(t, b, &query.Language{Language: "C"}, chunkOpts)
3654 wantSingleMatch(res, "hello.h")
3655
3656 // test fallback language search by pretending it's an older index version
3657 res = searchForTest(t, b, &query.Language{Language: "C++"}, chunkOpts)
3658 if len(res.Files) != 0 {
3659 t.Errorf("got %d results for C++, want 0", len(res.Files))
3660 }
3661
3662 b.featureVersion = 11 // force fallback
3663 res = searchForTest(t, b, &query.Language{Language: "C++"}, chunkOpts)
3664 wantSingleMatch(res, "hello.h")
3665 })
3666}
3667
3668func TestStats(t *testing.T) {
3669 ignored := []cmp.Option{
3670 cmpopts.EquateEmpty(),
3671 cmpopts.IgnoreFields(RepoListEntry{}, "Repository"),
3672 cmpopts.IgnoreFields(RepoListEntry{}, "IndexMetadata"),
3673 cmpopts.IgnoreFields(RepoStats{}, "IndexBytes"),
3674 }
3675
3676 repoListEntries := func(b *IndexBuilder) []RepoListEntry {
3677 searcher := searcherForTest(t, b)
3678 indexdata := searcher.(*indexData)
3679 return indexdata.repoListEntry
3680 }
3681
3682 t.Run("one empty repo", func(t *testing.T) {
3683 b := testIndexBuilder(t, nil)
3684 got := repoListEntries(b)
3685 want := []RepoListEntry{
3686 {
3687 Stats: RepoStats{
3688 Repos: 0,
3689 Shards: 1,
3690 Documents: 0,
3691 IndexBytes: 20,
3692 ContentBytes: 0,
3693 NewLinesCount: 0,
3694 DefaultBranchNewLinesCount: 0,
3695 OtherBranchesNewLinesCount: 0,
3696 },
3697 },
3698 }
3699
3700 if diff := cmp.Diff(want, got, ignored...); diff != "" {
3701 t.Fatalf("mismatch (-want +got):\n%s", diff)
3702 }
3703 })
3704
3705 t.Run("one simple shard", func(t *testing.T) {
3706 b := testIndexBuilder(t, nil,
3707 Document{Name: "doc 0", Content: []byte("content 0")},
3708 Document{Name: "doc 1", Content: []byte("content 1")},
3709 )
3710 got := repoListEntries(b)
3711 want := []RepoListEntry{
3712 {
3713 Stats: RepoStats{
3714 Repos: 0,
3715 Shards: 1,
3716 Documents: 2,
3717 IndexBytes: 224,
3718 ContentBytes: 28,
3719 NewLinesCount: 0,
3720 DefaultBranchNewLinesCount: 0,
3721 OtherBranchesNewLinesCount: 0,
3722 },
3723 },
3724 }
3725
3726 if diff := cmp.Diff(want, got, ignored...); diff != "" {
3727 t.Fatalf("mismatch (-want +got):\n%s", diff)
3728 }
3729 })
3730
3731 t.Run("one compound shard", func(t *testing.T) {
3732 b := testIndexBuilderCompound(t,
3733 []*Repository{
3734 {Name: "repo 0"},
3735 {Name: "repo 1"},
3736 },
3737 [][]Document{
3738 {
3739 {Name: "doc 0", Content: []byte("content 0")},
3740 {Name: "doc 1", Content: []byte("content 1")},
3741 },
3742 {
3743 {Name: "doc 2", Content: []byte("content 2")},
3744 {Name: "doc 3", Content: []byte("content 3")},
3745 },
3746 },
3747 )
3748 got := repoListEntries(b)
3749 want := []RepoListEntry{
3750 {
3751 Stats: RepoStats{
3752 Repos: 0,
3753 Shards: 1,
3754 Documents: 2,
3755 IndexBytes: 180,
3756 ContentBytes: 28,
3757 NewLinesCount: 0,
3758 DefaultBranchNewLinesCount: 0,
3759 OtherBranchesNewLinesCount: 0,
3760 },
3761 },
3762 {
3763 Stats: RepoStats{
3764 Repos: 0,
3765 Shards: 1,
3766 Documents: 2,
3767 IndexBytes: 180,
3768 ContentBytes: 28,
3769 NewLinesCount: 0,
3770 DefaultBranchNewLinesCount: 0,
3771 OtherBranchesNewLinesCount: 0,
3772 },
3773 },
3774 }
3775
3776 if diff := cmp.Diff(want, got, ignored...); diff != "" {
3777 t.Fatalf("mismatch (-want +got):\n%s", diff)
3778 }
3779 })
3780
3781 t.Run("compound shard with empty repos", func(t *testing.T) {
3782 b := testIndexBuilderCompound(t,
3783 []*Repository{
3784 {Name: "repo 0"},
3785 {Name: "repo 1"},
3786 {Name: "repo 2"},
3787 {Name: "repo 3"},
3788 {Name: "repo 4"},
3789 },
3790 [][]Document{
3791 {{Name: "doc 0", Content: []byte("content 0")}},
3792 nil,
3793 {{Name: "doc 1", Content: []byte("content 1")}},
3794 nil,
3795 nil,
3796 },
3797 )
3798 got := repoListEntries(b)
3799
3800 entryEmpty := RepoListEntry{Stats: RepoStats{
3801 Shards: 1,
3802 Documents: 0,
3803 ContentBytes: 0,
3804 }}
3805 entryNonEmpty := RepoListEntry{Stats: RepoStats{
3806 Shards: 1,
3807 Documents: 1,
3808 ContentBytes: 14,
3809 }}
3810
3811 want := []RepoListEntry{
3812 entryNonEmpty,
3813 entryEmpty,
3814 entryNonEmpty,
3815 entryEmpty,
3816 entryEmpty,
3817 }
3818
3819 if diff := cmp.Diff(want, got, ignored...); diff != "" {
3820 t.Fatalf("mismatch (-want +got):\n%s", diff)
3821 }
3822 })
3823}
3824
3825// This tests the frequent pattern "\bLITERAL\b".
3826func TestWordSearch(t *testing.T) {
3827 content := []byte("needle the bla")
3828 // ----------------01234567890123
3829
3830 b := testIndexBuilder(t, nil,
3831 Document{
3832 Name: "f1",
3833 Content: content,
3834 })
3835
3836 t.Run("LineMatches", func(t *testing.T) {
3837 sres := searchForTest(t, b,
3838 &query.Regexp{
3839 Regexp: mustParseRE("\\bthe\\b"),
3840 CaseSensitive: true,
3841 Content: true,
3842 })
3843
3844 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 {
3845 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
3846 }
3847
3848 if sres.Stats.RegexpsConsidered != 0 {
3849 t.Fatal("expected regexp to be skipped")
3850 }
3851
3852 got := sres.Files[0].LineMatches[0]
3853 want := LineMatch{
3854 LineFragments: []LineFragmentMatch{{
3855 LineOffset: 7,
3856 Offset: 7,
3857 MatchLength: 3,
3858 }},
3859 Line: content,
3860 FileName: false,
3861 LineNumber: 1,
3862 LineStart: 0,
3863 LineEnd: 14,
3864 }
3865
3866 if !reflect.DeepEqual(got, want) {
3867 t.Errorf("got %#v, want %#v", got, want)
3868 }
3869 })
3870
3871 t.Run("ChunkMatches", func(t *testing.T) {
3872 sres := searchForTest(t, b,
3873 &query.Regexp{
3874 Regexp: mustParseRE("\\bthe\\b"),
3875 CaseSensitive: true,
3876 }, chunkOpts)
3877
3878 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 {
3879 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
3880 }
3881
3882 if sres.Stats.RegexpsConsidered != 0 {
3883 t.Fatal("expected regexp to be skipped")
3884 }
3885
3886 got := sres.Files[0].ChunkMatches[0]
3887 want := ChunkMatch{
3888 Content: content,
3889 ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1},
3890 Ranges: []Range{{
3891 Start: Location{ByteOffset: 7, LineNumber: 1, Column: 8},
3892 End: Location{ByteOffset: 10, LineNumber: 1, Column: 11},
3893 }},
3894 }
3895
3896 if diff := cmp.Diff(want, got); diff != "" {
3897 t.Fatal(diff)
3898 }
3899 })
3900}
3901
3902// Simple benchmark focused on chunk match scoring. It creates a single file that will have a 1000-line chunk match.
3903// The benchmark time is expected to be strongly correlated with time spent assembling and scoring this chunk.
3904func BenchmarkScoreChunkMatches(b *testing.B) {
3905 ctx := context.Background()
3906 var builder strings.Builder
3907 for i := 0; i < 1000; i++ {
3908 builder.WriteString(fmt.Sprintf("line-%d one one one two two two three three three four four four five five\n", i))
3909 }
3910
3911 searcher := searcherForTest(b, testIndexBuilder(b, nil,
3912 Document{Name: "f1", Content: []byte(builder.String())},
3913 ))
3914
3915 q := &query.Or{
3916 Children: []query.Q{
3917 &query.Substring{Pattern: "f"},
3918 &query.Substring{Pattern: "t"},
3919 }}
3920
3921 b.Run("score large ChunkMatch", func(b *testing.B) {
3922 b.ReportAllocs()
3923 b.ResetTimer()
3924
3925 for i := 0; i < b.N; i++ {
3926 sres, err := searcher.Search(ctx, q, &SearchOptions{ChunkMatches: true, NumContextLines: 1})
3927 if err != nil {
3928 b.Fatal(err)
3929 }
3930
3931 matches := sres.Files
3932 if len(matches) == 0 {
3933 b.Fatalf("want file match, got none")
3934 }
3935 }
3936 })
3937}