fork of https://github.com/sourcegraph/zoekt
1// Copyright 2016 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package zoekt
16
17import (
18 "bytes"
19 "context"
20 "fmt"
21 "reflect"
22 "regexp/syntax"
23 "strings"
24 "testing"
25
26 "github.com/google/go-cmp/cmp"
27 "github.com/google/go-cmp/cmp/cmpopts"
28 "github.com/grafana/regexp"
29
30 "github.com/sourcegraph/zoekt/query"
31)
32
33func clearScores(r *SearchResult) {
34 for i := range r.Files {
35 r.Files[i].Score = 0.0
36 for j := range r.Files[i].LineMatches {
37 r.Files[i].LineMatches[j].Score = 0.0
38 }
39 for j := range r.Files[i].ChunkMatches {
40 r.Files[i].ChunkMatches[j].Score = 0.0
41 }
42 r.Files[i].Checksum = nil
43 r.Files[i].Debug = ""
44 }
45}
46
47func testIndexBuilder(t *testing.T, repo *Repository, docs ...Document) *IndexBuilder {
48 t.Helper()
49
50 b, err := NewIndexBuilder(repo)
51 if err != nil {
52 t.Fatalf("NewIndexBuilder: %v", err)
53 }
54
55 for i, d := range docs {
56 if err := b.Add(d); err != nil {
57 t.Fatalf("Add %d: %v", i, err)
58 }
59 }
60
61 return b
62}
63
64func testIndexBuilderCompound(t *testing.T, repos []*Repository, docs [][]Document) *IndexBuilder {
65 t.Helper()
66
67 b := newIndexBuilder()
68 b.indexFormatVersion = NextIndexFormatVersion
69
70 if len(repos) != len(docs) {
71 t.Fatalf("testIndexBuilderCompound: repos must be the same length as docs, got: len(repos)=%d len(docs)=%d", len(repos), len(docs))
72 }
73
74 for i, repo := range repos {
75 if err := b.setRepository(repo); err != nil {
76 t.Fatal(err)
77 }
78 for j, d := range docs[i] {
79 if err := b.Add(d); err != nil {
80 t.Fatalf("Add %d %d: %v", i, j, err)
81 }
82 }
83 }
84
85 return b
86}
87
88func TestBoundary(t *testing.T) {
89 b := testIndexBuilder(t, nil,
90 Document{Name: "f1", Content: []byte("x the")},
91 Document{Name: "f1", Content: []byte("reader")})
92 res := searchForTest(t, b, &query.Substring{Pattern: "there"})
93 if len(res.Files) > 0 {
94 t.Fatalf("got %v, want no matches", res.Files)
95 }
96}
97
98func TestDocSectionInvalid(t *testing.T) {
99 b, err := NewIndexBuilder(nil)
100 if err != nil {
101 t.Fatalf("NewIndexBuilder: %v", err)
102 }
103 doc := Document{
104 Name: "f1",
105 Content: []byte("01234567890123"),
106 Symbols: []DocumentSection{{5, 8}, {7, 9}},
107 }
108
109 if err := b.Add(doc); err == nil {
110 t.Errorf("overlapping doc sections should fail")
111 }
112
113 doc = Document{
114 Name: "f1",
115 Content: []byte("01234567890123"),
116 Symbols: []DocumentSection{{0, 20}},
117 }
118
119 if err := b.Add(doc); err == nil {
120 t.Errorf("doc sections beyond EOF should fail")
121 }
122}
123
124func TestBasic(t *testing.T) {
125 b := testIndexBuilder(t, nil,
126 Document{
127 Name: "f2",
128 Content: []byte("to carry water in the no later bla"),
129 // --------------0123456789012345678901234567890123
130 })
131
132 t.Run("LineMatch", func(t *testing.T) {
133 res := searchForTest(t, b, &query.Substring{
134 Pattern: "water",
135 CaseSensitive: true,
136 })
137 fmatches := res.Files
138 if len(fmatches) != 1 || len(fmatches[0].LineMatches) != 1 {
139 t.Fatalf("got %v, want 1 matches", fmatches)
140 }
141
142 got := fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].LineMatches[0].LineFragments[0].Offset)
143 want := "f2:9"
144 if got != want {
145 t.Errorf("1: got %s, want %s", got, want)
146 }
147 })
148
149 t.Run("ChunkMatch", func(t *testing.T) {
150 res := searchForTest(t, b, &query.Substring{
151 Pattern: "water",
152 CaseSensitive: true,
153 }, chunkOpts)
154 fmatches := res.Files
155 if len(fmatches) != 1 || len(fmatches[0].ChunkMatches) != 1 {
156 t.Fatalf("got %v, want 1 matches", fmatches)
157 }
158
159 got := fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset)
160 want := "f2:9"
161 if got != want {
162 t.Errorf("1: got %s, want %s", got, want)
163 }
164 })
165}
166
167func TestEmptyIndex(t *testing.T) {
168 b := testIndexBuilder(t, nil)
169 searcher := searcherForTest(t, b)
170
171 var opts SearchOptions
172 if _, err := searcher.Search(context.Background(), &query.Substring{}, &opts); err != nil {
173 t.Fatalf("Search: %v", err)
174 }
175
176 if _, err := searcher.List(context.Background(), &query.Repo{Regexp: regexp.MustCompile("")}, nil); err != nil {
177 t.Fatalf("List: %v", err)
178 }
179
180 if _, err := searcher.Search(context.Background(), &query.Substring{Pattern: "java", FileName: true}, &opts); err != nil {
181 t.Fatalf("Search: %v", err)
182 }
183}
184
185type memSeeker struct {
186 data []byte
187}
188
189func (s *memSeeker) Name() string {
190 return "memseeker"
191}
192
193func (s *memSeeker) Close() {}
194func (s *memSeeker) Read(off, sz uint32) ([]byte, error) {
195 return s.data[off : off+sz], nil
196}
197
198func (s *memSeeker) Size() (uint32, error) {
199 return uint32(len(s.data)), nil
200}
201
202func TestNewlines(t *testing.T) {
203 b := testIndexBuilder(t, nil,
204 Document{Name: "filename", Content: []byte("line1\nline2\nbla")})
205 // ---------------------------------------------012345-678901-234
206
207 t.Run("LineMatches", func(t *testing.T) {
208 sres := searchForTest(t, b, &query.Substring{Pattern: "ne2"})
209
210 matches := sres.Files
211 want := []FileMatch{{
212 FileName: "filename",
213 LineMatches: []LineMatch{{
214 LineFragments: []LineFragmentMatch{{
215 Offset: 8,
216 LineOffset: 2,
217 MatchLength: 3,
218 }},
219 Line: []byte("line2"),
220 LineStart: 6,
221 LineEnd: 11,
222 LineNumber: 2,
223 }},
224 }}
225
226 if !reflect.DeepEqual(matches, want) {
227 t.Errorf("got %v, want %v", matches, want)
228 }
229 })
230
231 t.Run("ChunkMatches", func(t *testing.T) {
232 sres := searchForTest(t, b, &query.Substring{Pattern: "ne2"}, chunkOpts)
233
234 matches := sres.Files
235 want := []FileMatch{{
236 FileName: "filename",
237 ChunkMatches: []ChunkMatch{{
238 Content: []byte("line2"),
239 ContentStart: Location{
240 ByteOffset: 6,
241 LineNumber: 2,
242 Column: 1,
243 },
244 Ranges: []Range{{
245 Start: Location{ByteOffset: 8, LineNumber: 2, Column: 3},
246 End: Location{ByteOffset: 11, LineNumber: 2, Column: 6},
247 }},
248 }},
249 }}
250
251 if diff := cmp.Diff(want, matches); diff != "" {
252 t.Fatal(diff)
253 }
254 })
255}
256
257// A result spanning multiple lines should have LineMatches that only cover
258// single lines.
259func TestQueryNewlines(t *testing.T) {
260 text := "line1\nline2\nbla"
261 b := testIndexBuilder(t, nil,
262 Document{Name: "filename", Content: []byte(text)})
263
264 t.Run("LineMatches", func(t *testing.T) {
265 sres := searchForTest(t, b, &query.Substring{Pattern: "ine2\nbla"})
266 matches := sres.Files
267 if len(matches) != 1 {
268 t.Fatalf("got %d file matches, want exactly one", len(matches))
269 }
270 m := matches[0]
271 if len(m.LineMatches) != 2 {
272 t.Fatalf("got %d line matches, want exactly two", len(m.LineMatches))
273 }
274 })
275
276 t.Run("ChunkMatches", func(t *testing.T) {
277 sres := searchForTest(t, b, &query.Substring{Pattern: "ine2\nbla"}, chunkOpts)
278 matches := sres.Files
279 if len(matches) != 1 {
280 t.Fatalf("got %d file matches, want exactly one", len(matches))
281 }
282 m := matches[0]
283 if len(m.ChunkMatches) != 1 {
284 t.Fatalf("got %d chunk matches, want exactly one", len(m.ChunkMatches))
285 }
286 })
287}
288
289var chunkOpts = SearchOptions{ChunkMatches: true}
290
291func searchForTest(t *testing.T, b *IndexBuilder, q query.Q, o ...SearchOptions) *SearchResult {
292 searcher := searcherForTest(t, b)
293 var opts SearchOptions
294 if len(o) > 0 {
295 opts = o[0]
296 }
297 res, err := searcher.Search(context.Background(), q, &opts)
298 if err != nil {
299 t.Fatalf("Search(%s): %v", q, err)
300 }
301 clearScores(res)
302 return res
303}
304
305func searcherForTest(t *testing.T, b *IndexBuilder) Searcher {
306 var buf bytes.Buffer
307 if err := b.Write(&buf); err != nil {
308 t.Fatal(err)
309 }
310 f := &memSeeker{buf.Bytes()}
311
312 searcher, err := NewSearcher(f)
313 if err != nil {
314 t.Fatalf("NewSearcher: %v", err)
315 }
316
317 return searcher
318}
319
320func TestCaseFold(t *testing.T) {
321 b := testIndexBuilder(t, nil,
322 Document{Name: "f1", Content: []byte("I love BaNaNAS.")},
323 // -----------------------------------012345678901234
324 )
325 t.Run("LineMatches", func(t *testing.T) {
326 sres := searchForTest(t, b, &query.Substring{
327 Pattern: "bananas",
328 CaseSensitive: true,
329 })
330 matches := sres.Files
331 if len(matches) != 0 {
332 t.Errorf("foldcase: got %#v, want 0 matches", matches)
333 }
334
335 sres = searchForTest(t, b,
336 &query.Substring{
337 Pattern: "BaNaNAS",
338 CaseSensitive: true,
339 })
340 matches = sres.Files
341 if len(matches) != 1 {
342 t.Errorf("no foldcase: got %v, want 1 matches", matches)
343 } else if matches[0].LineMatches[0].LineFragments[0].Offset != 7 {
344 t.Errorf("foldcase: got %v, want offsets 7", matches)
345 }
346 })
347
348 t.Run("ChunkMatches", func(t *testing.T) {
349 sres := searchForTest(t, b, &query.Substring{
350 Pattern: "bananas",
351 CaseSensitive: true,
352 }, chunkOpts)
353 matches := sres.Files
354 if len(matches) != 0 {
355 t.Errorf("foldcase: got %#v, want 0 matches", matches)
356 }
357
358 sres = searchForTest(t, b,
359 &query.Substring{
360 Pattern: "BaNaNAS",
361 CaseSensitive: true,
362 })
363 matches = sres.Files
364 if len(matches) != 1 {
365 t.Errorf("no foldcase: got %v, want 1 matches", matches)
366 } else if matches[0].LineMatches[0].LineFragments[0].Offset != 7 {
367 t.Errorf("foldcase: got %v, want offsets 7", matches)
368 }
369 })
370}
371
372// wordsAsSymbols finds all ASCII words in doc.Content which are longer than 2
373// chars. Those are then set as symbols.
374func wordsAsSymbols(doc Document) Document {
375 re := regexp.MustCompile(`\b\w{2,}\b`)
376 var symbols []DocumentSection
377 for _, match := range re.FindAllIndex(doc.Content, -1) {
378 symbols = append(symbols, DocumentSection{
379 Start: uint32(match[0]),
380 End: uint32(match[1]),
381 })
382 }
383 doc.Symbols = symbols
384 return doc
385}
386
387func TestSearchStats(t *testing.T) {
388 ctx := context.Background()
389 searcher := searcherForTest(t, testIndexBuilder(t, nil,
390 wordsAsSymbols(Document{Name: "f1", Content: []byte("x banana y")}),
391 wordsAsSymbols(Document{Name: "f2", Content: []byte("x apple y")}),
392 wordsAsSymbols(Document{Name: "f3", Content: []byte("x banana apple y")}),
393 // --------------------------------------------------0123456789012345
394 ))
395
396 andQuery := query.NewAnd(
397 &query.Substring{
398 Pattern: "banana",
399 },
400 &query.Substring{
401 Pattern: "apple",
402 },
403 )
404
405 t.Run("LineMatches", func(t *testing.T) {
406 sres, err := searcher.Search(ctx, andQuery, &SearchOptions{})
407 if err != nil {
408 t.Fatal(err)
409 }
410 matches := sres.Files
411 if len(matches) != 1 || len(matches[0].LineMatches) != 1 || len(matches[0].LineMatches[0].LineFragments) != 2 {
412 t.Fatalf("got %#v, want 1 match with 2 fragments", matches)
413 }
414
415 if matches[0].LineMatches[0].LineFragments[0].Offset != 2 || matches[0].LineMatches[0].LineFragments[1].Offset != 9 {
416 t.Fatalf("got %#v, want offsets 2,9", matches)
417 }
418 })
419 t.Run("ChunkMatches", func(t *testing.T) {
420 sres, err := searcher.Search(ctx, andQuery, &chunkOpts)
421 if err != nil {
422 t.Fatal(err)
423 }
424 matches := sres.Files
425 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 || len(matches[0].ChunkMatches[0].Ranges) != 2 {
426 t.Fatalf("got %#v, want 1 chunk match with 2 ranges", matches)
427 }
428
429 if matches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset != 2 || matches[0].ChunkMatches[0].Ranges[1].Start.ByteOffset != 9 {
430 t.Fatalf("got %#v, want offsets 2,9", matches)
431 }
432 })
433 t.Run("Stats", func(t *testing.T) {
434 cases := []struct {
435 Name string
436 Q query.Q
437 Want Stats
438 }{{
439 Name: "and-query",
440 Q: andQuery,
441 Want: Stats{
442 FilesLoaded: 1,
443 ContentBytesLoaded: 22,
444 IndexBytesLoaded: 8,
445 NgramMatches: 3, // we look at doc 1, because it's max(0,1) due to AND
446 NgramLookups: 104,
447 MatchCount: 2,
448 FileCount: 1,
449 FilesConsidered: 2,
450 ShardsScanned: 1,
451 },
452 }, {
453 Name: "one-trigram",
454 Q: &query.Substring{
455 Pattern: "a y",
456 Content: true,
457 CaseSensitive: true,
458 },
459 Want: Stats{
460 ContentBytesLoaded: 14,
461 IndexBytesLoaded: 1,
462 FileCount: 1,
463 FilesConsidered: 1,
464 FilesLoaded: 1,
465 ShardsScanned: 1,
466 MatchCount: 1,
467 NgramMatches: 1,
468 NgramLookups: 2, // once to lookup frequency then again to access posting list.
469 },
470 }, {
471 Name: "one-trigram-case-insensitive",
472 Q: &query.Substring{
473 Pattern: "a y",
474 Content: true,
475 },
476 Want: Stats{
477 ContentBytesLoaded: 14,
478 IndexBytesLoaded: 1,
479 FileCount: 1,
480 FilesConsidered: 1,
481 FilesLoaded: 1,
482 ShardsScanned: 1,
483 MatchCount: 1,
484 NgramMatches: 1,
485 NgramLookups: 8, // "a y" has 2**2 casings which we lookup twice.
486 },
487 }, {
488 Name: "one-trigram-pruned",
489 Q: &query.Substring{
490 Pattern: "foo",
491 Content: true,
492 CaseSensitive: true,
493 },
494 Want: Stats{
495 ShardsSkippedFilter: 1,
496 NgramLookups: 1, // only had to lookup once
497 },
498 }, {
499 Name: "one-trigram-branch-pruned",
500 Q: query.NewAnd(
501 &query.Substring{
502 Pattern: "foo",
503 Content: true,
504 CaseSensitive: true,
505 },
506 &query.Substring{
507 Pattern: "a y",
508 Content: true,
509 CaseSensitive: true,
510 },
511 ),
512 Want: Stats{
513 IndexBytesLoaded: 1, // we created an iterator for "a y" before pruning.
514 ShardsSkippedFilter: 1,
515 NgramLookups: 3, // we lookedup "foo" once (1), but lookedup and created "a y" (2).
516 },
517 }, {
518 Name: "symbol-substr-nomatch",
519 Q: &query.Symbol{Expr: &query.Substring{
520 Pattern: "banana apple",
521 Content: true,
522 CaseSensitive: true,
523 }},
524 Want: Stats{
525 IndexBytesLoaded: 3,
526 FilesConsidered: 1, // important that we only check 1 file to ensure we are using the index
527 MatchCount: 0, // even though there is a match it doesn't align with a symbol
528 ShardsScanned: 1,
529 NgramMatches: 1,
530 NgramLookups: 12,
531 },
532 }, {
533 Name: "symbol-substr",
534 Q: &query.Symbol{Expr: &query.Substring{
535 Pattern: "apple",
536 Content: true,
537 CaseSensitive: true,
538 }},
539 Want: Stats{
540 ContentBytesLoaded: 35,
541 IndexBytesLoaded: 4,
542 FileCount: 2,
543 FilesConsidered: 2, // must be 2 to ensure we used the index
544 FilesLoaded: 2,
545 MatchCount: 2, // apple symbols is in two files
546 ShardsScanned: 1,
547 NgramMatches: 2,
548 NgramLookups: 5,
549 },
550 }, {
551 Name: "symbol-regexp-nomatch",
552 Q: &query.Symbol{Expr: &query.Regexp{
553 Regexp: mustParseRE("^apple.banana$"),
554 Content: true,
555 CaseSensitive: true,
556 }},
557 Want: Stats{
558 ContentBytesLoaded: 33, // we still have to run regex since "app" matches two documents
559 IndexBytesLoaded: 8,
560 FilesConsidered: 2, // important that we don't check 3 to ensure we are using the index
561 FilesLoaded: 2,
562 MatchCount: 0, // even though there is a match it doesn't align with a symbol
563 ShardsScanned: 1,
564 NgramMatches: 3,
565 NgramLookups: 11,
566 },
567 }, {
568 Name: "symbol-regexp",
569 Q: &query.Symbol{Expr: &query.Regexp{
570 Regexp: mustParseRE("^app.e$"),
571 Content: true,
572 CaseSensitive: true,
573 }},
574 Want: Stats{
575 ContentBytesLoaded: 35,
576 IndexBytesLoaded: 2,
577 FileCount: 2,
578 FilesConsidered: 2, // must be 2 to ensure we used the index
579 FilesLoaded: 2,
580 MatchCount: 2, // apple symbols is in two files
581 ShardsScanned: 1,
582 NgramMatches: 2,
583 NgramLookups: 2,
584 },
585 }}
586
587 for _, tc := range cases {
588 t.Run(tc.Name, func(t *testing.T) {
589 sres, err := searcher.Search(ctx, tc.Q, &chunkOpts)
590 if err != nil {
591 t.Fatal(err)
592 }
593 if diff := cmp.Diff(tc.Want, sres.Stats, cmpopts.IgnoreFields(Stats{}, "MatchTreeConstruction", "MatchTreeSearch")); diff != "" {
594 t.Errorf("unexpected Stats (-want +got):\n%s", diff)
595 }
596 })
597 }
598
599 })
600}
601
602func TestAndNegateSearch(t *testing.T) {
603 b := testIndexBuilder(t, nil,
604 Document{Name: "f1", Content: []byte("x banana y")},
605 // -----------------------------------0123456789
606 Document{Name: "f4", Content: []byte("x banana apple y")})
607
608 t.Run("LineMatches", func(t *testing.T) {
609 sres := searchForTest(t, b, query.NewAnd(
610 &query.Substring{
611 Pattern: "banana",
612 },
613 &query.Not{Child: &query.Substring{
614 Pattern: "apple",
615 }}))
616
617 matches := sres.Files
618
619 if len(matches) != 1 || len(matches[0].LineMatches) != 1 {
620 t.Fatalf("got %v, want 1 match", matches)
621 }
622 if matches[0].FileName != "f1" {
623 t.Fatalf("got match %#v, want FileName: f1", matches[0])
624 }
625 if matches[0].LineMatches[0].LineFragments[0].Offset != 2 {
626 t.Fatalf("got %v, want offset 2", matches)
627 }
628 })
629
630 t.Run("ChunkMatches", func(t *testing.T) {
631 sres := searchForTest(t, b,
632 query.NewAnd(
633 &query.Substring{
634 Pattern: "banana",
635 },
636 &query.Not{Child: &query.Substring{
637 Pattern: "apple",
638 }},
639 ),
640 chunkOpts,
641 )
642
643 matches := sres.Files
644
645 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 {
646 t.Fatalf("got %v, want 1 match", matches)
647 }
648 if matches[0].FileName != "f1" {
649 t.Fatalf("got match %#v, want FileName: f1", matches[0])
650 }
651 if matches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset != 2 {
652 t.Fatalf("got %v, want offset 2", matches)
653 }
654 })
655}
656
657func TestNegativeMatchesOnlyShortcut(t *testing.T) {
658 b := testIndexBuilder(t, nil,
659 Document{Name: "f1", Content: []byte("x banana y")},
660 Document{Name: "f2", Content: []byte("x appelmoes y")},
661 Document{Name: "f3", Content: []byte("x appelmoes y")},
662 Document{Name: "f3", Content: []byte("x appelmoes y")})
663
664 t.Run("LineMatches", func(t *testing.T) {
665 sres := searchForTest(t, b, query.NewAnd(
666 &query.Substring{
667 Pattern: "banana",
668 },
669 &query.Not{Child: &query.Substring{
670 Pattern: "appel",
671 }}))
672
673 if sres.Stats.FilesConsidered != 1 {
674 t.Errorf("got %#v, want FilesConsidered: 1", sres.Stats)
675 }
676 })
677
678 t.Run("ChunkMatches", func(t *testing.T) {
679 sres := searchForTest(t, b, query.NewAnd(
680 &query.Substring{
681 Pattern: "banana",
682 },
683 &query.Not{Child: &query.Substring{
684 Pattern: "appel",
685 }}), chunkOpts)
686
687 if sres.Stats.FilesConsidered != 1 {
688 t.Errorf("got %#v, want FilesConsidered: 1", sres.Stats)
689 }
690 })
691}
692
693func TestFileSearch(t *testing.T) {
694 b := testIndexBuilder(t, nil,
695 Document{Name: "banzana", Content: []byte("x orange y")},
696 // -------------0123456
697 Document{Name: "banana", Content: []byte("x apple y")},
698 // -------------012345
699 )
700
701 t.Run("LineMatches", func(t *testing.T) {
702 sres := searchForTest(t, b, &query.Substring{
703 Pattern: "anan",
704 FileName: true,
705 })
706
707 matches := sres.Files
708 if len(matches) != 1 || len(matches[0].LineMatches) != 1 {
709 t.Fatalf("got %v, want 1 match", matches)
710 }
711
712 got := matches[0].LineMatches[0]
713 want := LineMatch{
714 Line: []byte("banana"),
715 LineFragments: []LineFragmentMatch{{
716 Offset: 1,
717 LineOffset: 1,
718 MatchLength: 4,
719 }},
720 FileName: true,
721 }
722
723 if !reflect.DeepEqual(got, want) {
724 t.Errorf("got %#v, want %#v", got, want)
725 }
726 })
727
728 t.Run("ChunkMatches", func(t *testing.T) {
729 sres := searchForTest(t, b, &query.Substring{
730 Pattern: "anan",
731 FileName: true,
732 }, chunkOpts)
733
734 matches := sres.Files
735 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 {
736 t.Fatalf("got %v, want 1 match", matches)
737 }
738
739 got := matches[0].ChunkMatches[0]
740 want := ChunkMatch{
741 Content: []byte("banana"),
742 ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1},
743 Ranges: []Range{{
744 Start: Location{ByteOffset: 1, LineNumber: 1, Column: 2},
745 End: Location{ByteOffset: 5, LineNumber: 1, Column: 6},
746 }},
747 FileName: true,
748 }
749
750 if diff := cmp.Diff(want, got); diff != "" {
751 t.Fatal(diff)
752 }
753 })
754
755 t.Run("FileNameSet", func(t *testing.T) {
756 sres := searchForTest(t, b, query.NewFileNameSet("banana"), chunkOpts)
757
758 matches := sres.Files
759 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 {
760 t.Fatalf("got %v, want 1 match", matches)
761 }
762
763 got := matches[0].ChunkMatches[0]
764 want := ChunkMatch{
765 Content: []byte("banana"),
766 ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1},
767 Ranges: []Range{{
768 Start: Location{ByteOffset: 0, LineNumber: 1, Column: 1},
769 End: Location{ByteOffset: 6, LineNumber: 1, Column: 7},
770 }},
771 FileName: true,
772 }
773
774 if diff := cmp.Diff(want, got); diff != "" {
775 t.Fatal(diff)
776 }
777 })
778}
779
780func TestFileCase(t *testing.T) {
781 b := testIndexBuilder(t, nil,
782 Document{Name: "BANANA", Content: []byte("x orange y")})
783
784 t.Run("LineMatches", func(t *testing.T) {
785 sres := searchForTest(t, b, &query.Substring{
786 Pattern: "banana",
787 FileName: true,
788 })
789
790 matches := sres.Files
791 if len(matches) != 1 || matches[0].FileName != "BANANA" {
792 t.Fatalf("got %v, want 1 match 'BANANA'", matches)
793 }
794 })
795
796 t.Run("ChunkMatches", func(t *testing.T) {
797 sres := searchForTest(t, b, &query.Substring{
798 Pattern: "banana",
799 FileName: true,
800 }, chunkOpts)
801
802 matches := sres.Files
803 if len(matches) != 1 || matches[0].FileName != "BANANA" {
804 t.Fatalf("got %v, want 1 match 'BANANA'", matches)
805 }
806 })
807}
808
809func TestFileRegexpSearchBruteForce(t *testing.T) {
810 b := testIndexBuilder(t, nil,
811 Document{Name: "banzana", Content: []byte("x orange y")},
812 Document{Name: "banana", Content: []byte("x apple y")},
813 )
814 t.Run("LineMatches", func(t *testing.T) {
815 sres := searchForTest(t, b, &query.Regexp{
816 Regexp: mustParseRE("[qn][zx]"),
817 FileName: true,
818 })
819
820 matches := sres.Files
821 if len(matches) != 1 || matches[0].FileName != "banzana" {
822 t.Fatalf("got %v, want 1 match on 'banzana'", matches)
823 }
824 })
825 t.Run("LineMatches", func(t *testing.T) {
826 sres := searchForTest(t, b, &query.Regexp{
827 Regexp: mustParseRE("[qn][zx]"),
828 FileName: true,
829 }, chunkOpts)
830
831 matches := sres.Files
832 if len(matches) != 1 || matches[0].FileName != "banzana" {
833 t.Fatalf("got %v, want 1 match on 'banzana'", matches)
834 }
835 })
836}
837
838func TestFileRegexpSearchShortString(t *testing.T) {
839 b := testIndexBuilder(t, nil,
840 Document{Name: "banana.py", Content: []byte("x orange y")})
841
842 t.Run("LineMatches", func(t *testing.T) {
843 sres := searchForTest(t, b, &query.Regexp{
844 Regexp: mustParseRE("ana.py"),
845 FileName: true,
846 })
847
848 matches := sres.Files
849 if len(matches) != 1 || matches[0].FileName != "banana.py" {
850 t.Fatalf("got %v, want 1 match on 'banana.py'", matches)
851 }
852 })
853
854 t.Run("ChunkMatches", func(t *testing.T) {
855 sres := searchForTest(t, b, &query.Regexp{
856 Regexp: mustParseRE("ana.py"),
857 FileName: true,
858 }, chunkOpts)
859
860 matches := sres.Files
861 if len(matches) != 1 || matches[0].FileName != "banana.py" {
862 t.Fatalf("got %v, want 1 match on 'banana.py'", matches)
863 }
864 })
865}
866
867func TestFileSubstringSearchBruteForce(t *testing.T) {
868 b := testIndexBuilder(t, nil,
869 Document{Name: "BANZANA", Content: []byte("x orange y")},
870 Document{Name: "banana", Content: []byte("x apple y")})
871
872 q := &query.Substring{
873 Pattern: "z",
874 FileName: true,
875 }
876
877 t.Run("LineMatches", func(t *testing.T) {
878 res := searchForTest(t, b, q)
879 if len(res.Files) != 1 || res.Files[0].FileName != "BANZANA" {
880 t.Fatalf("got %v, want 1 match on 'BANZANA''", res.Files)
881 }
882 })
883
884 t.Run("ChunkMatches", func(t *testing.T) {
885 res := searchForTest(t, b, q, chunkOpts)
886 if len(res.Files) != 1 || res.Files[0].FileName != "BANZANA" {
887 t.Fatalf("got %v, want 1 match on 'BANZANA''", res.Files)
888 }
889 })
890}
891
892func TestFileSubstringSearchBruteForceEnd(t *testing.T) {
893 b := testIndexBuilder(t, nil,
894 Document{Name: "BANZANA", Content: []byte("x orange y")},
895 Document{Name: "bananaq", Content: []byte("x apple y")})
896
897 q := &query.Substring{
898 Pattern: "q",
899 FileName: true,
900 }
901 t.Run("LineMatches", func(t *testing.T) {
902 res := searchForTest(t, b, q)
903 if want := "bananaq"; len(res.Files) != 1 || res.Files[0].FileName != want {
904 t.Fatalf("got %v, want 1 match in %q", res.Files, want)
905 }
906 })
907
908 t.Run("LineMatches", func(t *testing.T) {
909 res := searchForTest(t, b, q, chunkOpts)
910 if want := "bananaq"; len(res.Files) != 1 || res.Files[0].FileName != want {
911 t.Fatalf("got %v, want 1 match in %q", res.Files, want)
912 }
913 })
914}
915
916func TestSearchMatchAll(t *testing.T) {
917 b := testIndexBuilder(t, nil,
918 Document{Name: "banzana", Content: []byte("x orange y")},
919 Document{Name: "banana", Content: []byte("x apple y")})
920
921 t.Run("LineMatches", func(t *testing.T) {
922 sres := searchForTest(t, b, &query.Const{Value: true})
923 matches := sres.Files
924 if len(matches) != 2 {
925 t.Fatalf("got %v, want 2 matches", matches)
926 }
927 })
928
929 t.Run("ChunkMatches", func(t *testing.T) {
930 sres := searchForTest(t, b, &query.Const{Value: true}, chunkOpts)
931 matches := sres.Files
932 if len(matches) != 2 {
933 t.Fatalf("got %v, want 2 matches", matches)
934 }
935 })
936}
937
938func TestSearchNewline(t *testing.T) {
939 b := testIndexBuilder(t, nil,
940 Document{Name: "banzana", Content: []byte("abcd\ndefg")})
941
942 t.Run("LineMatches", func(t *testing.T) {
943 sres := searchForTest(t, b, &query.Substring{Pattern: "d\nd"})
944
945 // Just check that we don't crash.
946
947 matches := sres.Files
948 if len(matches) != 1 {
949 t.Fatalf("got %v, want 1 matches", matches)
950 }
951 })
952
953 t.Run("ChunkMatches", func(t *testing.T) {
954 sres := searchForTest(t, b, &query.Substring{Pattern: "d\nd"}, chunkOpts)
955
956 // Just check that we don't crash.
957
958 matches := sres.Files
959 if len(matches) != 1 {
960 t.Fatalf("got %v, want 1 matches", matches)
961 }
962 })
963}
964
965func TestSearchMatchAllRegexp(t *testing.T) {
966 b := testIndexBuilder(t, nil,
967 Document{Name: "banzana", Content: []byte("abcd")},
968 Document{Name: "banana", Content: []byte("pqrs")})
969
970 t.Run("LineMatches", func(t *testing.T) {
971 sres := searchForTest(t, b, &query.Regexp{Regexp: mustParseRE(".")})
972
973 matches := sres.Files
974 if len(matches) != 2 || sres.Stats.MatchCount != 2 {
975 t.Fatalf("got %v, want 2 matches", matches)
976 }
977 if len(matches[0].LineMatches[0].Line) != 4 || len(matches[1].LineMatches[0].Line) != 4 {
978 t.Fatalf("want 4 chars in every file, got %#v", matches)
979 }
980
981 })
982
983 t.Run("ChunkMatches", func(t *testing.T) {
984 sres := searchForTest(t, b, &query.Regexp{Regexp: mustParseRE(".")}, chunkOpts)
985
986 matches := sres.Files
987 if len(matches) != 2 || sres.Stats.MatchCount != 8 {
988 t.Fatalf("got %v, want 2 matches", matches)
989 }
990 if len(matches[0].ChunkMatches[0].Content) != 4 || len(matches[1].ChunkMatches[0].Content) != 4 {
991 t.Fatalf("want 4 chars in every file, got %#v", matches)
992 }
993
994 })
995}
996
997func TestFileRestriction(t *testing.T) {
998 b := testIndexBuilder(t, nil,
999 Document{Name: "banana1", Content: []byte("x orange y")},
1000 Document{Name: "banana2", Content: []byte("x apple y")},
1001 Document{Name: "orange", Content: []byte("x apple z")})
1002
1003 t.Run("LineMatches", func(t *testing.T) {
1004 sres := searchForTest(t, b, query.NewAnd(
1005 &query.Substring{
1006 Pattern: "banana",
1007 FileName: true,
1008 },
1009 &query.Substring{
1010 Pattern: "apple",
1011 }))
1012
1013 matches := sres.Files
1014 if len(matches) != 1 || len(matches[0].LineMatches) != 1 {
1015 t.Fatalf("got %v, want 1 match", matches)
1016 }
1017
1018 match := matches[0].LineMatches[0]
1019 got := string(match.Line)
1020 want := "x apple y"
1021 if got != want {
1022 t.Errorf("got match %#v, want line %q", match, want)
1023 }
1024 })
1025
1026 t.Run("ChunkMatches", func(t *testing.T) {
1027 sres := searchForTest(t, b, query.NewAnd(
1028 &query.Substring{
1029 Pattern: "banana",
1030 FileName: true,
1031 },
1032 &query.Substring{
1033 Pattern: "apple",
1034 }), chunkOpts)
1035
1036 matches := sres.Files
1037 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 {
1038 t.Fatalf("got %v, want 1 match", matches)
1039 }
1040
1041 match := matches[0].ChunkMatches[0]
1042 got := string(match.Content)
1043 want := "x apple y"
1044 if got != want {
1045 t.Errorf("got match %#v, want line %q", match, want)
1046 }
1047 })
1048}
1049
1050func TestFileNameBoundary(t *testing.T) {
1051 b := testIndexBuilder(t, nil,
1052 Document{Name: "banana2", Content: []byte("x apple y")},
1053 Document{Name: "helpers.go", Content: []byte("x apple y")},
1054 Document{Name: "foo", Content: []byte("x apple y")})
1055
1056 t.Run("LineMatches", func(t *testing.T) {
1057 sres := searchForTest(t, b, &query.Substring{
1058 Pattern: "helpers.go",
1059 FileName: true,
1060 })
1061
1062 matches := sres.Files
1063 if len(matches) != 1 || len(matches[0].LineMatches) != 1 {
1064 t.Fatalf("got %v, want 1 match", matches)
1065 }
1066 })
1067
1068 t.Run("ChunkMatches", func(t *testing.T) {
1069 sres := searchForTest(t, b, &query.Substring{
1070 Pattern: "helpers.go",
1071 FileName: true,
1072 }, chunkOpts)
1073
1074 matches := sres.Files
1075 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 {
1076 t.Fatalf("got %v, want 1 match", matches)
1077 }
1078 })
1079}
1080
1081func TestDocumentOrder(t *testing.T) {
1082 var docs []Document
1083 for i := 0; i < 3; i++ {
1084 docs = append(docs, Document{Name: fmt.Sprintf("f%d", i), Content: []byte("needle")})
1085 }
1086
1087 b := testIndexBuilder(t, nil, docs...)
1088
1089 t.Run("LineMatches", func(t *testing.T) {
1090 sres := searchForTest(t, b, query.NewAnd(
1091 &query.Substring{
1092 Pattern: "needle",
1093 }))
1094
1095 want := []string{"f0", "f1", "f2"}
1096 var got []string
1097 for _, f := range sres.Files {
1098 got = append(got, f.FileName)
1099 }
1100 if !reflect.DeepEqual(got, want) {
1101 t.Fatalf("got %v, want %v", got, want)
1102 }
1103 })
1104
1105 t.Run("ChunkMatches", func(t *testing.T) {
1106 sres := searchForTest(t, b,
1107 query.NewAnd(&query.Substring{
1108 Pattern: "needle",
1109 }),
1110 chunkOpts,
1111 )
1112
1113 want := []string{"f0", "f1", "f2"}
1114 var got []string
1115 for _, f := range sres.Files {
1116 got = append(got, f.FileName)
1117 }
1118 if !reflect.DeepEqual(got, want) {
1119 t.Fatalf("got %v, want %v", got, want)
1120 }
1121 })
1122}
1123
1124func TestBranchMask(t *testing.T) {
1125 b := testIndexBuilder(t, &Repository{
1126 Branches: []RepositoryBranch{
1127 {"master", "v-master"},
1128 {"stable", "v-stable"},
1129 {"bonzai", "v-bonzai"},
1130 },
1131 }, Document{Name: "f1", Content: []byte("needle"), Branches: []string{"master"}},
1132 Document{Name: "f2", Content: []byte("needle"), Branches: []string{"stable", "master"}},
1133 Document{Name: "f3", Content: []byte("needle"), Branches: []string{"stable", "master"}},
1134 Document{Name: "f4", Content: []byte("needle"), Branches: []string{"bonzai"}},
1135 )
1136
1137 t.Run("LineMatches", func(t *testing.T) {
1138 sres := searchForTest(t, b, query.NewAnd(
1139 &query.Substring{
1140 Pattern: "needle",
1141 },
1142 &query.Branch{
1143 Pattern: "table",
1144 }))
1145
1146 if len(sres.Files) != 2 || sres.Files[0].FileName != "f2" || sres.Files[1].FileName != "f3" {
1147 t.Fatalf("got %v, want 2 result from [f2,f3]", sres.Files)
1148 }
1149
1150 if len(sres.Files[0].Branches) != 1 || sres.Files[0].Branches[0] != "stable" {
1151 t.Fatalf("got %v, want 1 branch 'stable'", sres.Files[0].Branches)
1152 }
1153 })
1154
1155 t.Run("ChunkMatches", func(t *testing.T) {
1156 sres := searchForTest(t, b, query.NewAnd(
1157 &query.Substring{
1158 Pattern: "needle",
1159 },
1160 &query.Branch{
1161 Pattern: "table",
1162 }),
1163 chunkOpts,
1164 )
1165
1166 if len(sres.Files) != 2 || sres.Files[0].FileName != "f2" || sres.Files[1].FileName != "f3" {
1167 t.Fatalf("got %v, want 2 result from [f2,f3]", sres.Files)
1168 }
1169
1170 if len(sres.Files[0].Branches) != 1 || sres.Files[0].Branches[0] != "stable" {
1171 t.Fatalf("got %v, want 1 branch 'stable'", sres.Files[0].Branches)
1172 }
1173 })
1174}
1175
1176func TestBranchLimit(t *testing.T) {
1177 for limit := 64; limit <= 65; limit++ {
1178 r := &Repository{}
1179 for i := 0; i < limit; i++ {
1180 s := fmt.Sprintf("b%d", i)
1181 r.Branches = append(r.Branches, RepositoryBranch{
1182 s, "v-" + s,
1183 })
1184 }
1185 _, err := NewIndexBuilder(r)
1186 if limit == 64 && err != nil {
1187 t.Fatalf("NewIndexBuilder: %v", err)
1188 } else if limit == 65 && err == nil {
1189 t.Fatalf("NewIndexBuilder succeeded")
1190 }
1191 }
1192}
1193
1194func TestBranchReport(t *testing.T) {
1195 branches := []string{"stable", "master"}
1196 b := testIndexBuilder(t, &Repository{
1197 Branches: []RepositoryBranch{
1198 {"stable", "vs"},
1199 {"master", "vm"},
1200 },
1201 },
1202 Document{Name: "f2", Content: []byte("needle"), Branches: branches})
1203
1204 t.Run("LineMatches", func(t *testing.T) {
1205 sres := searchForTest(t, b, &query.Substring{
1206 Pattern: "needle",
1207 })
1208 if len(sres.Files) != 1 {
1209 t.Fatalf("got %v, want 1 result from f2", sres.Files)
1210 }
1211
1212 f := sres.Files[0]
1213 if !reflect.DeepEqual(f.Branches, branches) {
1214 t.Fatalf("got branches %q, want %q", f.Branches, branches)
1215 }
1216 })
1217
1218 t.Run("ChunkMatches", func(t *testing.T) {
1219 sres := searchForTest(t, b, &query.Substring{
1220 Pattern: "needle",
1221 }, chunkOpts)
1222 if len(sres.Files) != 1 {
1223 t.Fatalf("got %v, want 1 result from f2", sres.Files)
1224 }
1225
1226 f := sres.Files[0]
1227 if !reflect.DeepEqual(f.Branches, branches) {
1228 t.Fatalf("got branches %q, want %q", f.Branches, branches)
1229 }
1230 })
1231
1232}
1233
1234func TestBranchVersions(t *testing.T) {
1235 b := testIndexBuilder(t, &Repository{
1236 Branches: []RepositoryBranch{
1237 {"stable", "v-stable"},
1238 {"master", "v-master"},
1239 },
1240 }, Document{Name: "f2", Content: []byte("needle"), Branches: []string{"master"}})
1241
1242 t.Run("LineMatches", func(t *testing.T) {
1243 sres := searchForTest(t, b, &query.Substring{
1244 Pattern: "needle",
1245 })
1246 if len(sres.Files) != 1 {
1247 t.Fatalf("got %v, want 1 result from f2", sres.Files)
1248 }
1249
1250 f := sres.Files[0]
1251 if f.Version != "v-master" {
1252 t.Fatalf("got file %#v, want version 'v-master'", f)
1253 }
1254 })
1255
1256 t.Run("ChunkMatches", func(t *testing.T) {
1257 sres := searchForTest(t, b, &query.Substring{
1258 Pattern: "needle",
1259 }, chunkOpts)
1260 if len(sres.Files) != 1 {
1261 t.Fatalf("got %v, want 1 result from f2", sres.Files)
1262 }
1263
1264 f := sres.Files[0]
1265 if f.Version != "v-master" {
1266 t.Fatalf("got file %#v, want version 'v-master'", f)
1267 }
1268 })
1269}
1270
1271func mustParseRE(s string) *syntax.Regexp {
1272 r, err := syntax.Parse(s, syntax.Perl)
1273 if err != nil {
1274 panic(err)
1275 }
1276
1277 return r
1278}
1279
1280func TestRegexp(t *testing.T) {
1281 content := []byte("needle the bla")
1282 // ----------------01234567890123
1283
1284 b := testIndexBuilder(t, nil,
1285 Document{
1286 Name: "f1",
1287 Content: content,
1288 })
1289
1290 t.Run("LineMatches", func(t *testing.T) {
1291 sres := searchForTest(t, b,
1292 &query.Regexp{
1293 Regexp: mustParseRE("dle.*bla"),
1294 })
1295
1296 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 {
1297 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
1298 }
1299
1300 got := sres.Files[0].LineMatches[0]
1301 want := LineMatch{
1302 LineFragments: []LineFragmentMatch{{
1303 LineOffset: 3,
1304 Offset: 3,
1305 MatchLength: 11,
1306 }},
1307 Line: content,
1308 FileName: false,
1309 LineNumber: 1,
1310 LineStart: 0,
1311 LineEnd: 14,
1312 }
1313
1314 if !reflect.DeepEqual(got, want) {
1315 t.Errorf("got %#v, want %#v", got, want)
1316 }
1317 })
1318
1319 t.Run("ChunkMatches", func(t *testing.T) {
1320 sres := searchForTest(t, b,
1321 &query.Regexp{
1322 Regexp: mustParseRE("dle.*bla"),
1323 }, chunkOpts)
1324
1325 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 {
1326 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
1327 }
1328
1329 got := sres.Files[0].ChunkMatches[0]
1330 want := ChunkMatch{
1331 Content: content,
1332 ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1},
1333 Ranges: []Range{{
1334 Start: Location{ByteOffset: 3, LineNumber: 1, Column: 4},
1335 End: Location{ByteOffset: 14, LineNumber: 1, Column: 15},
1336 }},
1337 }
1338
1339 if diff := cmp.Diff(want, got); diff != "" {
1340 t.Fatal(diff)
1341 }
1342 })
1343}
1344
1345func TestRegexpFile(t *testing.T) {
1346 content := []byte("needle the bla")
1347
1348 name := "let's play: find the mussel"
1349 b := testIndexBuilder(t, nil,
1350 Document{Name: name, Content: content},
1351 Document{Name: "play.txt", Content: content})
1352
1353 t.Run("LineMatches", func(t *testing.T) {
1354 sres := searchForTest(t, b,
1355 &query.Regexp{
1356 Regexp: mustParseRE("play.*mussel"),
1357 FileName: true,
1358 })
1359
1360 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 {
1361 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
1362 }
1363
1364 if sres.Files[0].FileName != name {
1365 t.Errorf("got match %#v, want name %q", sres.Files[0], name)
1366 }
1367 })
1368
1369 t.Run("ChunkMatches", func(t *testing.T) {
1370 sres := searchForTest(t, b,
1371 &query.Regexp{
1372 Regexp: mustParseRE("play.*mussel"),
1373 FileName: true,
1374 }, chunkOpts)
1375
1376 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 {
1377 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
1378 }
1379
1380 if sres.Files[0].FileName != name {
1381 t.Errorf("got match %#v, want name %q", sres.Files[0], name)
1382 }
1383 })
1384}
1385
1386func TestRegexpOrder(t *testing.T) {
1387 content := []byte("bla the needle")
1388 // ----------------01234567890123
1389
1390 b := testIndexBuilder(t, nil,
1391 Document{Name: "f1", Content: content})
1392
1393 t.Run("LineMatches", func(t *testing.T) {
1394 sres := searchForTest(t, b,
1395 &query.Regexp{
1396 Regexp: mustParseRE("dle.*bla"),
1397 })
1398
1399 if len(sres.Files) != 0 {
1400 t.Fatalf("got %v, want 0 matches", sres.Files)
1401 }
1402 })
1403
1404 t.Run("ChunkMatches", func(t *testing.T) {
1405 sres := searchForTest(t, b,
1406 &query.Regexp{
1407 Regexp: mustParseRE("dle.*bla"),
1408 })
1409
1410 if len(sres.Files) != 0 {
1411 t.Fatalf("got %v, want 0 matches", sres.Files)
1412 }
1413 })
1414}
1415
1416func TestRepoName(t *testing.T) {
1417 content := []byte("bla the needle")
1418 // ----------------01234567890123
1419
1420 b := testIndexBuilder(t, &Repository{Name: "bla"},
1421 Document{Name: "f1", Content: content})
1422
1423 t.Run("LineMatches", func(t *testing.T) {
1424 sres := searchForTest(t, b,
1425 query.NewAnd(
1426 &query.Substring{Pattern: "needle"},
1427 &query.Repo{Regexp: regexp.MustCompile("foo")},
1428 ))
1429
1430 if len(sres.Files) != 0 {
1431 t.Fatalf("got %v, want 0 matches", sres.Files)
1432 }
1433
1434 if sres.Stats.FilesConsidered > 0 {
1435 t.Fatalf("got FilesConsidered %d, should have short circuited", sres.Stats.FilesConsidered)
1436 }
1437
1438 sres = searchForTest(t, b,
1439 query.NewAnd(
1440 &query.Substring{Pattern: "needle"},
1441 &query.Repo{Regexp: regexp.MustCompile("bla")},
1442 ))
1443 if len(sres.Files) != 1 {
1444 t.Fatalf("got %v, want 1 match", sres.Files)
1445 }
1446 })
1447
1448 t.Run("ChunkMatches", func(t *testing.T) {
1449 sres := searchForTest(t, b,
1450 query.NewAnd(
1451 &query.Substring{Pattern: "needle"},
1452 &query.Repo{Regexp: regexp.MustCompile("foo")},
1453 ),
1454 chunkOpts,
1455 )
1456
1457 if len(sres.Files) != 0 {
1458 t.Fatalf("got %v, want 0 matches", sres.Files)
1459 }
1460
1461 if sres.Stats.FilesConsidered > 0 {
1462 t.Fatalf("got FilesConsidered %d, should have short circuited", sres.Stats.FilesConsidered)
1463 }
1464
1465 sres = searchForTest(t, b,
1466 query.NewAnd(
1467 &query.Substring{Pattern: "needle"},
1468 &query.Repo{Regexp: regexp.MustCompile("bla")},
1469 ))
1470 if len(sres.Files) != 1 {
1471 t.Fatalf("got %v, want 1 match", sres.Files)
1472 }
1473 })
1474}
1475
1476func TestMergeMatches(t *testing.T) {
1477 content := []byte("blablabla")
1478 b := testIndexBuilder(t, nil,
1479 Document{Name: "f1", Content: content})
1480
1481 t.Run("LineMatches", func(t *testing.T) {
1482 sres := searchForTest(t, b,
1483 &query.Substring{Pattern: "bla"})
1484 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 {
1485 t.Fatalf("got %v, want 1 match", sres.Files)
1486 }
1487 })
1488
1489 t.Run("ChunkMatches", func(t *testing.T) {
1490 sres := searchForTest(t, b,
1491 &query.Substring{Pattern: "bla"},
1492 chunkOpts,
1493 )
1494 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 {
1495 t.Fatalf("got %v, want 1 match", sres.Files)
1496 }
1497 })
1498}
1499
1500func TestRepoURL(t *testing.T) {
1501 content := []byte("blablabla")
1502 b := testIndexBuilder(t, &Repository{
1503 Name: "name",
1504 URL: "URL",
1505 CommitURLTemplate: "commit",
1506 FileURLTemplate: "file-url",
1507 LineFragmentTemplate: "fragment",
1508 }, Document{Name: "f1", Content: content})
1509
1510 sres := searchForTest(t, b, &query.Substring{Pattern: "bla"})
1511
1512 if sres.RepoURLs["name"] != "file-url" {
1513 t.Errorf("got RepoURLs %v, want {name: URL}", sres.RepoURLs)
1514 }
1515 if sres.LineFragments["name"] != "fragment" {
1516 t.Errorf("got URLs %v, want {name: URL}", sres.LineFragments)
1517 }
1518}
1519
1520func TestRegexpCaseSensitive(t *testing.T) {
1521 content := []byte("bla\nfunc unmarshalGitiles\n")
1522 b := testIndexBuilder(t, nil, Document{
1523 Name: "f1",
1524 Content: content,
1525 })
1526
1527 t.Run("LineMatches", func(t *testing.T) {
1528 res := searchForTest(t, b,
1529 &query.Regexp{
1530 Regexp: mustParseRE("func.*Gitiles"),
1531 CaseSensitive: true,
1532 })
1533
1534 if len(res.Files) != 1 {
1535 t.Fatalf("got %v, want one match", res.Files)
1536 }
1537 })
1538
1539 t.Run("ChunkMatches", func(t *testing.T) {
1540 res := searchForTest(t, b,
1541 &query.Regexp{
1542 Regexp: mustParseRE("func.*Gitiles"),
1543 CaseSensitive: true,
1544 },
1545 chunkOpts,
1546 )
1547
1548 if len(res.Files) != 1 {
1549 t.Fatalf("got %v, want one match", res.Files)
1550 }
1551 })
1552}
1553
1554func TestRegexpCaseFolding(t *testing.T) {
1555 content := []byte("bla\nfunc unmarshalGitiles\n")
1556
1557 b := testIndexBuilder(t, nil,
1558 Document{Name: "f1", Content: content})
1559 res := searchForTest(t, b,
1560 &query.Regexp{
1561 Regexp: mustParseRE("func.*GITILES"),
1562 CaseSensitive: false,
1563 })
1564
1565 if len(res.Files) != 1 {
1566 t.Fatalf("got %v, want one match", res.Files)
1567 }
1568}
1569
1570func TestCaseRegexp(t *testing.T) {
1571 content := []byte("BLABLABLA")
1572 b := testIndexBuilder(t, nil,
1573 Document{Name: "f1", Content: content})
1574
1575 t.Run("LineMatches", func(t *testing.T) {
1576 res := searchForTest(t, b,
1577 &query.Regexp{
1578 Regexp: mustParseRE("[xb][xl][xa]"),
1579 CaseSensitive: true,
1580 })
1581
1582 if len(res.Files) > 0 {
1583 t.Fatalf("got %v, want no matches", res.Files)
1584 }
1585 })
1586
1587 t.Run("ChunkMatches", func(t *testing.T) {
1588 res := searchForTest(t, b,
1589 &query.Regexp{
1590 Regexp: mustParseRE("[xb][xl][xa]"),
1591 CaseSensitive: true,
1592 },
1593 chunkOpts,
1594 )
1595
1596 if len(res.Files) > 0 {
1597 t.Fatalf("got %v, want no matches", res.Files)
1598 }
1599 })
1600}
1601
1602func TestNegativeRegexp(t *testing.T) {
1603 content := []byte("BLABLABLA needle bla")
1604 b := testIndexBuilder(t, nil,
1605 Document{Name: "f1", Content: content})
1606
1607 t.Run("LineMatches", func(t *testing.T) {
1608 res := searchForTest(t, b,
1609 query.NewAnd(
1610 &query.Substring{
1611 Pattern: "needle",
1612 },
1613 &query.Not{
1614 Child: &query.Regexp{
1615 Regexp: mustParseRE(".cs"),
1616 },
1617 }))
1618
1619 if len(res.Files) != 1 {
1620 t.Fatalf("got %v, want 1 match", res.Files)
1621 }
1622 })
1623
1624 t.Run("ChunkMatches", func(t *testing.T) {
1625 res := searchForTest(t, b,
1626 query.NewAnd(
1627 &query.Substring{
1628 Pattern: "needle",
1629 },
1630 &query.Not{
1631 Child: &query.Regexp{
1632 Regexp: mustParseRE(".cs"),
1633 },
1634 },
1635 ),
1636 chunkOpts)
1637
1638 if len(res.Files) != 1 {
1639 t.Fatalf("got %v, want 1 match", res.Files)
1640 }
1641 })
1642}
1643
1644func TestSymbolRank(t *testing.T) {
1645 t.Skip()
1646
1647 content := []byte("func bla() blubxxxxx")
1648 // ----------------01234567890123456789
1649 b := testIndexBuilder(t, nil,
1650 Document{
1651 Name: "f1",
1652 Content: content,
1653 }, Document{
1654 Name: "f2",
1655 Content: content,
1656 Symbols: []DocumentSection{{5, 8}},
1657 }, Document{
1658 Name: "f3",
1659 Content: content,
1660 })
1661
1662 t.Run("LineMatches", func(t *testing.T) {
1663 res := searchForTest(t, b,
1664 &query.Substring{
1665 CaseSensitive: false,
1666 Pattern: "bla",
1667 })
1668
1669 if len(res.Files) != 3 {
1670 t.Fatalf("got %d files, want 3 files. Full data: %v", len(res.Files), res.Files)
1671 }
1672 if res.Files[0].FileName != "f2" {
1673 t.Errorf("got %#v, want 'f2' as top match", res.Files[0])
1674 }
1675 })
1676
1677 t.Run("ChunkMatches", func(t *testing.T) {
1678 res := searchForTest(t, b,
1679 &query.Substring{
1680 CaseSensitive: false,
1681 Pattern: "bla",
1682 }, chunkOpts)
1683
1684 if len(res.Files) != 3 {
1685 t.Fatalf("got %d files, want 3 files. Full data: %v", len(res.Files), res.Files)
1686 }
1687 if res.Files[0].FileName != "f2" {
1688 t.Errorf("got %#v, want 'f2' as top match", res.Files[0])
1689 }
1690 })
1691}
1692
1693func TestSymbolRankRegexpUTF8(t *testing.T) {
1694 t.Skip()
1695
1696 prefix := strings.Repeat(string([]rune{kelvinCodePoint}), 100) + "\n"
1697 content := []byte(prefix +
1698 "func bla() blub")
1699 // ------012345678901234
1700 b := testIndexBuilder(t, nil,
1701 Document{
1702 Name: "f1",
1703 Content: content,
1704 }, Document{
1705 Name: "f2",
1706 Content: content,
1707 Symbols: []DocumentSection{{uint32(len(prefix) + 5), uint32(len(prefix) + 8)}},
1708 }, Document{
1709 Name: "f3",
1710 Content: content,
1711 })
1712
1713 t.Run("LineMatches", func(t *testing.T) {
1714 res := searchForTest(t, b,
1715 &query.Regexp{
1716 Regexp: mustParseRE("b.a"),
1717 })
1718
1719 if len(res.Files) != 3 {
1720 t.Fatalf("got %#v, want 3 files", res.Files)
1721 }
1722 if res.Files[0].FileName != "f2" {
1723 t.Errorf("got %#v, want 'f2' as top match", res.Files[0])
1724 }
1725 })
1726
1727 t.Run("ChunjkMatches", func(t *testing.T) {
1728 res := searchForTest(t, b,
1729 &query.Regexp{
1730 Regexp: mustParseRE("b.a"),
1731 }, chunkOpts)
1732
1733 if len(res.Files) != 3 {
1734 t.Fatalf("got %#v, want 3 files", res.Files)
1735 }
1736 if res.Files[0].FileName != "f2" {
1737 t.Errorf("got %#v, want 'f2' as top match", res.Files[0])
1738 }
1739 })
1740}
1741
1742func TestPartialSymbolRank(t *testing.T) {
1743 t.Skip()
1744
1745 content := []byte("func bla() blub")
1746 // ----------------012345678901234
1747
1748 b := testIndexBuilder(t, nil,
1749 Document{
1750 Name: "f1",
1751 Content: content,
1752 Symbols: []DocumentSection{{4, 9}},
1753 }, Document{
1754 Name: "f2",
1755 Content: content,
1756 Symbols: []DocumentSection{{4, 8}},
1757 }, Document{
1758 Name: "f3",
1759 Content: content,
1760 Symbols: []DocumentSection{{4, 9}},
1761 })
1762
1763 t.Run("LineMatches", func(t *testing.T) {
1764 res := searchForTest(t, b,
1765 &query.Substring{
1766 Pattern: "bla",
1767 })
1768
1769 if len(res.Files) != 3 {
1770 t.Fatalf("got %#v, want 3 files", res.Files)
1771 }
1772 if res.Files[0].FileName != "f2" {
1773 t.Errorf("got %#v, want 'f2' as top match", res.Files[0])
1774 }
1775 })
1776
1777 t.Run("ChunkMatches", func(t *testing.T) {
1778 res := searchForTest(t, b,
1779 &query.Substring{
1780 Pattern: "bla",
1781 }, chunkOpts)
1782
1783 if len(res.Files) != 3 {
1784 t.Fatalf("got %#v, want 3 files", res.Files)
1785 }
1786 if res.Files[0].FileName != "f2" {
1787 t.Errorf("got %#v, want 'f2' as top match", res.Files[0])
1788 }
1789 })
1790}
1791
1792func TestNegativeRepo(t *testing.T) {
1793 content := []byte("bla the needle")
1794 // ----------------01234567890123
1795 b := testIndexBuilder(t, &Repository{
1796 Name: "bla",
1797 }, Document{Name: "f1", Content: content})
1798
1799 t.Run("LineMatches", func(t *testing.T) {
1800 sres := searchForTest(t, b,
1801 query.NewAnd(
1802 &query.Substring{Pattern: "needle"},
1803 &query.Not{Child: &query.Repo{Regexp: regexp.MustCompile("bla")}},
1804 ))
1805
1806 if len(sres.Files) != 0 {
1807 t.Fatalf("got %v, want 0 matches", sres.Files)
1808 }
1809 })
1810
1811 t.Run("ChunkMatches", func(t *testing.T) {
1812 sres := searchForTest(t, b,
1813 query.NewAnd(
1814 &query.Substring{Pattern: "needle"},
1815 &query.Not{Child: &query.Repo{Regexp: regexp.MustCompile("bla")}},
1816 ), chunkOpts)
1817
1818 if len(sres.Files) != 0 {
1819 t.Fatalf("got %v, want 0 matches", sres.Files)
1820 }
1821 })
1822}
1823
1824func TestListRepos(t *testing.T) {
1825 content := []byte("bla the needle\n")
1826 // ----------------012345678901234-
1827
1828 t.Run("default and minimal fallback", func(t *testing.T) {
1829 repo := &Repository{
1830 Name: "reponame",
1831 Branches: []RepositoryBranch{{Name: "main"}, {Name: "dev"}},
1832 }
1833 b := testIndexBuilder(t, repo,
1834 Document{Name: "f1", Content: content, Branches: []string{"main", "dev"}},
1835 Document{Name: "f2", Content: content, Branches: []string{"main"}},
1836 Document{Name: "f2", Content: content, Branches: []string{"dev"}},
1837 Document{Name: "f3", Content: content, Branches: []string{"dev"}})
1838
1839 searcher := searcherForTest(t, b)
1840
1841 for _, opts := range []*ListOptions{
1842 nil,
1843 {},
1844 {Field: RepoListFieldRepos},
1845 {Field: RepoListFieldReposMap},
1846 } {
1847 t.Run(fmt.Sprint(opts), func(t *testing.T) {
1848 q := &query.Repo{Regexp: regexp.MustCompile("epo")}
1849
1850 res, err := searcher.List(context.Background(), q, opts)
1851 if err != nil {
1852 t.Fatalf("List(%v): %v", q, err)
1853 }
1854
1855 want := &RepoList{
1856 Repos: []*RepoListEntry{{
1857 Repository: *repo,
1858 Stats: RepoStats{
1859 Documents: 4,
1860 ContentBytes: 68, // (15 bytes of content and 2 bytes of filename) x 4
1861 Shards: 1,
1862
1863 NewLinesCount: 4,
1864 DefaultBranchNewLinesCount: 2,
1865 OtherBranchesNewLinesCount: 3,
1866 },
1867 }},
1868 Stats: RepoStats{
1869 Repos: 1,
1870 Documents: 4,
1871 ContentBytes: 68,
1872 Shards: 1,
1873
1874 NewLinesCount: 4,
1875 DefaultBranchNewLinesCount: 2,
1876 OtherBranchesNewLinesCount: 3,
1877 },
1878 }
1879 ignored := []cmp.Option{
1880 cmpopts.EquateEmpty(),
1881 cmpopts.IgnoreFields(RepoListEntry{}, "IndexMetadata"),
1882 cmpopts.IgnoreFields(RepoStats{}, "IndexBytes"),
1883 cmpopts.IgnoreFields(Repository{}, "SubRepoMap"),
1884 cmpopts.IgnoreFields(Repository{}, "priority"),
1885 }
1886 if diff := cmp.Diff(want, res, ignored...); diff != "" {
1887 t.Fatalf("mismatch (-want +got):\n%s", diff)
1888 }
1889
1890 q = &query.Repo{Regexp: regexp.MustCompile("bla")}
1891 res, err = searcher.List(context.Background(), q, nil)
1892 if err != nil {
1893 t.Fatalf("List(%v): %v", q, err)
1894 }
1895 if len(res.Repos) != 0 || len(res.ReposMap) != 0 {
1896 t.Fatalf("got %v, want 0 matches", res)
1897 }
1898 })
1899 }
1900 })
1901
1902 t.Run("minimal", func(t *testing.T) {
1903 repo := &Repository{
1904 ID: 1234,
1905 Name: "reponame",
1906 Branches: []RepositoryBranch{{Name: "main"}, {Name: "dev"}},
1907 RawConfig: map[string]string{"repoid": "1234"},
1908 }
1909 b := testIndexBuilder(t, repo,
1910 Document{Name: "f1", Content: content, Branches: []string{"main", "dev"}},
1911 Document{Name: "f2", Content: content, Branches: []string{"main"}},
1912 Document{Name: "f2", Content: content, Branches: []string{"dev"}},
1913 Document{Name: "f3", Content: content, Branches: []string{"dev"}})
1914
1915 searcher := searcherForTest(t, b)
1916
1917 q := &query.Repo{Regexp: regexp.MustCompile("epo")}
1918 res, err := searcher.List(context.Background(), q, &ListOptions{Field: RepoListFieldReposMap})
1919 if err != nil {
1920 t.Fatalf("List(%v): %v", q, err)
1921 }
1922
1923 want := &RepoList{
1924 ReposMap: ReposMap{
1925 repo.ID: {
1926 HasSymbols: repo.HasSymbols,
1927 Branches: repo.Branches,
1928 },
1929 },
1930 Stats: RepoStats{
1931 Repos: 1,
1932 Shards: 1,
1933 Documents: 4,
1934 IndexBytes: 412,
1935 ContentBytes: 68,
1936 NewLinesCount: 4,
1937 DefaultBranchNewLinesCount: 2,
1938 OtherBranchesNewLinesCount: 3,
1939 },
1940 }
1941
1942 ignored := []cmp.Option{
1943 cmpopts.IgnoreFields(MinimalRepoListEntry{}, "IndexTimeUnix"),
1944 }
1945 if diff := cmp.Diff(want, res, ignored...); diff != "" {
1946 t.Fatalf("mismatch (-want +got):\n%s", diff)
1947 }
1948
1949 q = &query.Repo{Regexp: regexp.MustCompile("bla")}
1950 res, err = searcher.List(context.Background(), q, &ListOptions{Field: RepoListFieldReposMap})
1951 if err != nil {
1952 t.Fatalf("List(%v): %v", q, err)
1953 }
1954 if len(res.Repos) != 0 || len(res.ReposMap) != 0 {
1955 t.Fatalf("got %v, want 0 matches", res)
1956 }
1957 })
1958}
1959
1960func TestListReposByContent(t *testing.T) {
1961 content := []byte("bla the needle")
1962
1963 b := testIndexBuilder(t, &Repository{
1964 Name: "reponame",
1965 },
1966 Document{Name: "f1", Content: content},
1967 Document{Name: "f2", Content: content})
1968
1969 searcher := searcherForTest(t, b)
1970 q := &query.Substring{Pattern: "needle"}
1971 res, err := searcher.List(context.Background(), q, nil)
1972 if err != nil {
1973 t.Fatalf("List(%v): %v", q, err)
1974 }
1975 if len(res.Repos) != 1 || res.Repos[0].Repository.Name != "reponame" {
1976 t.Fatalf("got %v, want 1 matches", res)
1977 }
1978 if got := res.Repos[0].Stats.Shards; got != 1 {
1979 t.Fatalf("got %d, want 1 shard", got)
1980 }
1981 q = &query.Substring{Pattern: "foo"}
1982 res, err = searcher.List(context.Background(), q, nil)
1983 if err != nil {
1984 t.Fatalf("List(%v): %v", q, err)
1985 }
1986 if len(res.Repos) != 0 {
1987 t.Fatalf("got %v, want 0 matches", res)
1988 }
1989}
1990
1991func TestMetadata(t *testing.T) {
1992 content := []byte("bla the needle")
1993
1994 b := testIndexBuilder(t, &Repository{
1995 Name: "reponame",
1996 }, Document{Name: "f1", Content: content},
1997 Document{Name: "f2", Content: content})
1998
1999 var buf bytes.Buffer
2000 if err := b.Write(&buf); err != nil {
2001 t.Fatal(err)
2002 }
2003 f := &memSeeker{buf.Bytes()}
2004
2005 rd, _, err := ReadMetadata(f)
2006 if err != nil {
2007 t.Fatalf("ReadMetadata: %v", err)
2008 }
2009
2010 if got, want := rd[0].Name, "reponame"; got != want {
2011 t.Fatalf("got %q want %q", got, want)
2012 }
2013}
2014
2015func TestOr(t *testing.T) {
2016 b := testIndexBuilder(t, nil,
2017 Document{Name: "f1", Content: []byte("needle")},
2018 Document{Name: "f2", Content: []byte("banana")})
2019 t.Run("LineMatches", func(t *testing.T) {
2020 sres := searchForTest(t, b, query.NewOr(
2021 &query.Substring{Pattern: "needle"},
2022 &query.Substring{Pattern: "banana"}))
2023
2024 if len(sres.Files) != 2 {
2025 t.Fatalf("got %v, want 2 files", sres.Files)
2026 }
2027 })
2028
2029 t.Run("ChunkMatches", func(t *testing.T) {
2030 sres := searchForTest(t, b, query.NewOr(
2031 &query.Substring{Pattern: "needle"},
2032 &query.Substring{Pattern: "banana"}))
2033
2034 if len(sres.Files) != 2 {
2035 t.Fatalf("got %v, want 2 files", sres.Files)
2036 }
2037 })
2038}
2039
2040func TestFrequency(t *testing.T) {
2041 content := []byte("sla _Py_HashDouble(double v sla las las shd dot dot")
2042
2043 b := testIndexBuilder(t, nil,
2044 Document{
2045 Name: "f1",
2046 Content: content,
2047 })
2048
2049 t.Run("LineMatches", func(t *testing.T) {
2050 sres := searchForTest(t, b, &query.Substring{Pattern: "slashdot"})
2051 if len(sres.Files) != 0 {
2052 t.Errorf("got %v, wanted 0 matches", sres.Files)
2053 }
2054 })
2055
2056 t.Run("ChunkMatches", func(t *testing.T) {
2057 sres := searchForTest(t, b, &query.Substring{Pattern: "slashdot"}, chunkOpts)
2058 if len(sres.Files) != 0 {
2059 t.Errorf("got %v, wanted 0 matches", sres.Files)
2060 }
2061 })
2062}
2063
2064func TestMatchNewline(t *testing.T) {
2065 re, err := syntax.Parse("[^a]a", syntax.ClassNL)
2066 if err != nil {
2067 t.Fatalf("syntax.Parse: %v", err)
2068 }
2069
2070 content := []byte("pqr\nalex")
2071
2072 b := testIndexBuilder(t, nil,
2073 Document{
2074 Name: "f1",
2075 Content: content,
2076 })
2077
2078 t.Run("LineMatches", func(t *testing.T) {
2079 sres := searchForTest(t, b, &query.Regexp{Regexp: re, CaseSensitive: true})
2080 if len(sres.Files) != 1 {
2081 t.Errorf("got %v, wanted 1 matches", sres.Files)
2082 } else if l := sres.Files[0].LineMatches[0].Line; !bytes.Equal(l, content[len("pqr\n"):]) {
2083 t.Errorf("got match line %q, want %q", l, content)
2084 }
2085 })
2086
2087 t.Run("ChunkMatches", func(t *testing.T) {
2088 sres := searchForTest(t, b, &query.Regexp{Regexp: re, CaseSensitive: true}, chunkOpts)
2089 if len(sres.Files) != 1 {
2090 t.Errorf("got %v, wanted 1 matches", sres.Files)
2091 } else if c := sres.Files[0].ChunkMatches[0].Content; !bytes.Equal(c, content) {
2092 t.Errorf("got match line %q, want %q", c, content)
2093 }
2094 })
2095}
2096
2097func TestSubRepo(t *testing.T) {
2098 subRepos := map[string]*Repository{
2099 "sub": {
2100 Name: "sub-name",
2101 LineFragmentTemplate: "sub-line",
2102 },
2103 }
2104
2105 content := []byte("pqr\nalex")
2106
2107 b := testIndexBuilder(t, &Repository{
2108 SubRepoMap: subRepos,
2109 }, Document{
2110 Name: "sub/f1",
2111 Content: content,
2112 SubRepositoryPath: "sub",
2113 })
2114
2115 sres := searchForTest(t, b, &query.Substring{Pattern: "alex"})
2116 if len(sres.Files) != 1 {
2117 t.Fatalf("got %v, wanted 1 matches", sres.Files)
2118 }
2119
2120 f := sres.Files[0]
2121 if f.SubRepositoryPath != "sub" || f.SubRepositoryName != "sub-name" {
2122 t.Errorf("got %#v, want SubRepository{Path,Name} = {'sub', 'sub-name'}", f)
2123 }
2124
2125 if sres.LineFragments["sub-name"] != "sub-line" {
2126 t.Errorf("got LineFragmentTemplate %v, want {'sub':'sub-line'}", sres.LineFragments)
2127 }
2128}
2129
2130func TestSearchEither(t *testing.T) {
2131 b := testIndexBuilder(t, nil,
2132 Document{Name: "f1", Content: []byte("bla needle bla")},
2133 Document{Name: "needle-file-branch", Content: []byte("bla content")})
2134
2135 t.Run("LineMatches", func(t *testing.T) {
2136 sres := searchForTest(t, b, &query.Substring{Pattern: "needle"})
2137 if len(sres.Files) != 2 {
2138 t.Fatalf("got %v, wanted 2 matches", sres.Files)
2139 }
2140
2141 sres = searchForTest(t, b, &query.Substring{Pattern: "needle", Content: true})
2142 if len(sres.Files) != 1 {
2143 t.Fatalf("got %v, wanted 1 match", sres.Files)
2144 }
2145
2146 if got, want := sres.Files[0].FileName, "f1"; got != want {
2147 t.Errorf("got %q, want %q", got, want)
2148 }
2149 })
2150
2151 t.Run("ChunkMatches", func(t *testing.T) {
2152 sres := searchForTest(t, b, &query.Substring{Pattern: "needle"}, chunkOpts)
2153 if len(sres.Files) != 2 {
2154 t.Fatalf("got %v, wanted 2 matches", sres.Files)
2155 }
2156
2157 sres = searchForTest(t, b, &query.Substring{Pattern: "needle", Content: true}, chunkOpts)
2158 if len(sres.Files) != 1 {
2159 t.Fatalf("got %v, wanted 1 match", sres.Files)
2160 }
2161
2162 if got, want := sres.Files[0].FileName, "f1"; got != want {
2163 t.Errorf("got %q, want %q", got, want)
2164 }
2165 })
2166}
2167
2168func TestUnicodeExactMatch(t *testing.T) {
2169 needle := "néédlÉ"
2170 content := []byte("blá blá " + needle + " blâ")
2171
2172 b := testIndexBuilder(t, nil,
2173 Document{Name: "f1", Content: content})
2174
2175 t.Run("LineMatches", func(t *testing.T) {
2176 if res := searchForTest(t, b, &query.Substring{Pattern: needle, CaseSensitive: true}); len(res.Files) != 1 {
2177 t.Fatalf("case sensitive: got %v, wanted 1 match", res.Files)
2178 }
2179 })
2180
2181 t.Run("ChunkMatches", func(t *testing.T) {
2182 res := searchForTest(t, b, &query.Substring{Pattern: needle, CaseSensitive: true}, chunkOpts)
2183 if len(res.Files) != 1 {
2184 t.Fatalf("case sensitive: got %v, wanted 1 match", res.Files)
2185 }
2186 })
2187}
2188
2189func TestUnicodeCoverContent(t *testing.T) {
2190 needle := "néédlÉ"
2191 content := []byte("blá blá " + needle + " blâ")
2192
2193 b := testIndexBuilder(t, nil,
2194 Document{Name: "f1", Content: content})
2195
2196 t.Run("LineMatches", func(t *testing.T) {
2197 if res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ", CaseSensitive: true}); len(res.Files) != 0 {
2198 t.Fatalf("case sensitive: got %v, wanted 0 match", res.Files)
2199 }
2200
2201 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ"})
2202 if len(res.Files) != 1 {
2203 t.Fatalf("case insensitive: got %v, wanted 1 match", res.Files)
2204 }
2205
2206 if got, want := res.Files[0].LineMatches[0].LineFragments[0].Offset, uint32(strings.Index(string(content), needle)); got != want {
2207 t.Errorf("got %d want %d", got, want)
2208 }
2209 })
2210
2211 t.Run("ChunkMatches", func(t *testing.T) {
2212 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ", CaseSensitive: true}, chunkOpts)
2213 if len(res.Files) != 0 {
2214 t.Fatalf("case sensitive: got %v, wanted 0 match", res.Files)
2215 }
2216
2217 res = searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ"}, chunkOpts)
2218 if len(res.Files) != 1 {
2219 t.Fatalf("case insensitive: got %v, wanted 1 match", res.Files)
2220 }
2221
2222 got := res.Files[0].ChunkMatches[0].Ranges[0].Start.ByteOffset
2223 want := uint32(strings.Index(string(content), needle))
2224 if got != want {
2225 t.Errorf("got %d want %d", got, want)
2226 }
2227 })
2228}
2229
2230func TestUnicodeNonCoverContent(t *testing.T) {
2231 needle := "nééáádlÉ"
2232 content := []byte("blá blá " + needle + " blâ")
2233
2234 b := testIndexBuilder(t, nil,
2235 Document{Name: "f1", Content: content})
2236
2237 t.Run("LineMatches", func(t *testing.T) {
2238 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉÁÁDLÉ", Content: true})
2239 if len(res.Files) != 1 {
2240 t.Fatalf("got %v, wanted 1 match", res.Files)
2241 }
2242
2243 if got, want := res.Files[0].LineMatches[0].LineFragments[0].Offset, uint32(strings.Index(string(content), needle)); got != want {
2244 t.Errorf("got %d want %d", got, want)
2245 }
2246 })
2247
2248 t.Run("ChunkMatches", func(t *testing.T) {
2249 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉÁÁDLÉ", Content: true}, chunkOpts)
2250 if len(res.Files) != 1 {
2251 t.Fatalf("got %v, wanted 1 match", res.Files)
2252 }
2253
2254 got := res.Files[0].ChunkMatches[0].Ranges[0].Start.ByteOffset
2255 want := uint32(strings.Index(string(content), needle))
2256 if got != want {
2257 t.Errorf("got %d want %d", got, want)
2258 }
2259 })
2260}
2261
2262const kelvinCodePoint = 8490
2263
2264func TestUnicodeVariableLength(t *testing.T) {
2265 lower := 'k'
2266 upper := rune(kelvinCodePoint)
2267
2268 needle := "nee" + string([]rune{lower}) + "eed"
2269 corpus := []byte("nee" + string([]rune{upper}) + "eed" +
2270 " ee" + string([]rune{lower}) + "ee" +
2271 " ee" + string([]rune{upper}) + "ee")
2272
2273 t.Run("LineMatches", func(t *testing.T) {
2274 b := testIndexBuilder(t, nil,
2275 Document{Name: "f1", Content: []byte(corpus)})
2276
2277 res := searchForTest(t, b, &query.Substring{Pattern: needle, Content: true})
2278 if len(res.Files) != 1 {
2279 t.Fatalf("got %v, wanted 1 match", res.Files)
2280 }
2281 })
2282
2283 t.Run("ChunkMatches", func(t *testing.T) {
2284 b := testIndexBuilder(t, nil,
2285 Document{Name: "f1", Content: []byte(corpus)})
2286
2287 res := searchForTest(t, b, &query.Substring{Pattern: needle, Content: true}, chunkOpts)
2288 if len(res.Files) != 1 {
2289 t.Fatalf("got %v, wanted 1 match", res.Files)
2290 }
2291 })
2292}
2293
2294func TestUnicodeFileStartOffsets(t *testing.T) {
2295 unicode := "世界"
2296 wat := "waaaaaat"
2297 b := testIndexBuilder(t, nil,
2298 Document{
2299 Name: "f1",
2300 Content: []byte(unicode),
2301 },
2302 Document{
2303 Name: "f2",
2304 Content: []byte(wat),
2305 },
2306 )
2307 q := &query.Substring{Pattern: wat, Content: true}
2308 res := searchForTest(t, b, q)
2309 if len(res.Files) != 1 {
2310 t.Fatalf("got %v, wanted 1 match", res.Files)
2311 }
2312}
2313
2314func TestLongFileUTF8(t *testing.T) {
2315 needle := "neeedle"
2316
2317 // 6 bytes.
2318 unicode := "世界"
2319 content := []byte(strings.Repeat(unicode, 100) + needle)
2320 b := testIndexBuilder(t, nil,
2321 Document{
2322 Name: "f1",
2323 Content: []byte(strings.Repeat("a", 50)),
2324 },
2325 Document{
2326 Name: "f2",
2327 Content: content,
2328 })
2329
2330 t.Run("LineMatches", func(t *testing.T) {
2331 q := &query.Substring{Pattern: needle, Content: true}
2332 res := searchForTest(t, b, q)
2333 if len(res.Files) != 1 {
2334 t.Errorf("got %v, want 1 result", res)
2335 }
2336 })
2337
2338 t.Run("ChunkMatches", func(t *testing.T) {
2339 q := &query.Substring{Pattern: needle, Content: true}
2340 res := searchForTest(t, b, q, chunkOpts)
2341 if len(res.Files) != 1 {
2342 t.Errorf("got %v, want 1 result", res)
2343 }
2344 })
2345}
2346
2347func TestEstimateDocCount(t *testing.T) {
2348 content := []byte("bla needle bla")
2349 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2350 Document{Name: "f1", Content: content},
2351 Document{Name: "f2", Content: content},
2352 )
2353
2354 t.Run("LineMatches", func(t *testing.T) {
2355 if sres := searchForTest(t, b,
2356 query.NewAnd(
2357 &query.Substring{Pattern: "needle"},
2358 &query.Repo{Regexp: regexp.MustCompile("reponame")},
2359 ), SearchOptions{
2360 EstimateDocCount: true,
2361 }); sres.Stats.ShardFilesConsidered != 2 {
2362 t.Errorf("got FilesConsidered = %d, want 2", sres.Stats.FilesConsidered)
2363 }
2364 if sres := searchForTest(t, b,
2365 query.NewAnd(
2366 &query.Substring{Pattern: "needle"},
2367 &query.Repo{Regexp: regexp.MustCompile("nomatch")},
2368 ), SearchOptions{
2369 EstimateDocCount: true,
2370 }); sres.Stats.ShardFilesConsidered != 0 {
2371 t.Errorf("got FilesConsidered = %d, want 0", sres.Stats.FilesConsidered)
2372 }
2373 })
2374
2375 t.Run("ChunkMatches", func(t *testing.T) {
2376 if sres := searchForTest(t, b,
2377 query.NewAnd(
2378 &query.Substring{Pattern: "needle"},
2379 &query.Repo{Regexp: regexp.MustCompile("reponame")},
2380 ), SearchOptions{
2381 EstimateDocCount: true,
2382 ChunkMatches: true,
2383 }); sres.Stats.ShardFilesConsidered != 2 {
2384 t.Errorf("got FilesConsidered = %d, want 2", sres.Stats.FilesConsidered)
2385 }
2386 if sres := searchForTest(t, b,
2387 query.NewAnd(
2388 &query.Substring{Pattern: "needle"},
2389 &query.Repo{Regexp: regexp.MustCompile("nomatch")},
2390 ), SearchOptions{
2391 EstimateDocCount: true,
2392 ChunkMatches: true,
2393 }); sres.Stats.ShardFilesConsidered != 0 {
2394 t.Errorf("got FilesConsidered = %d, want 0", sres.Stats.FilesConsidered)
2395 }
2396 })
2397}
2398
2399func TestUTF8CorrectCorpus(t *testing.T) {
2400 needle := "neeedle"
2401
2402 // 6 bytes.
2403 unicode := "世界"
2404 b := testIndexBuilder(t, nil,
2405 Document{
2406 Name: "f1",
2407 Content: []byte(strings.Repeat(unicode, 100)),
2408 },
2409 Document{
2410 Name: "xxxxxneeedle",
2411 Content: []byte("hello"),
2412 })
2413
2414 t.Run("LineMatches", func(t *testing.T) {
2415 q := &query.Substring{Pattern: needle, FileName: true}
2416 res := searchForTest(t, b, q)
2417 if len(res.Files) != 1 {
2418 t.Errorf("got %v, want 1 result", res)
2419 }
2420 })
2421
2422 t.Run("ChunkMatches", func(t *testing.T) {
2423 q := &query.Substring{Pattern: needle, FileName: true}
2424 res := searchForTest(t, b, q, chunkOpts)
2425 if len(res.Files) != 1 {
2426 t.Errorf("got %v, want 1 result", res)
2427 }
2428 })
2429}
2430
2431func TestBuilderStats(t *testing.T) {
2432 b := testIndexBuilder(t, nil,
2433 Document{
2434 Name: "f1",
2435 Content: []byte(strings.Repeat("abcd", 1024)),
2436 })
2437 var buf bytes.Buffer
2438 if err := b.Write(&buf); err != nil {
2439 t.Fatal(err)
2440 }
2441
2442 if got, want := b.ContentSize(), uint32(2+4*1024); got != want {
2443 t.Errorf("got %d, want %d", got, want)
2444 }
2445}
2446
2447func TestIOStats(t *testing.T) {
2448 b := testIndexBuilder(t, nil,
2449 Document{
2450 Name: "f1",
2451 Content: []byte(strings.Repeat("abcd", 1024)),
2452 })
2453
2454 t.Run("LineMatches", func(t *testing.T) {
2455 q := &query.Substring{Pattern: "abc", CaseSensitive: true, Content: true}
2456 res := searchForTest(t, b, q)
2457
2458 // 4096 (content) + 2 (overhead: newlines or doc sections)
2459 if got, want := res.Stats.ContentBytesLoaded, int64(4098); got != want {
2460 t.Errorf("got content I/O %d, want %d", got, want)
2461 }
2462
2463 // 1024 entries, each 4 bytes apart. 4 fits into single byte
2464 // delta encoded.
2465 if got, want := res.Stats.IndexBytesLoaded, int64(1024); got != want {
2466 t.Errorf("got index I/O %d, want %d", got, want)
2467 }
2468 })
2469
2470 t.Run("ChunkMatches", func(t *testing.T) {
2471 q := &query.Substring{Pattern: "abc", CaseSensitive: true, Content: true}
2472 res := searchForTest(t, b, q, chunkOpts)
2473
2474 // 4096 (content) + 2 (overhead: newlines or doc sections)
2475 if got, want := res.Stats.ContentBytesLoaded, int64(4098); got != want {
2476 t.Errorf("got content I/O %d, want %d", got, want)
2477 }
2478
2479 // 1024 entries, each 4 bytes apart. 4 fits into single byte
2480 // delta encoded.
2481 if got, want := res.Stats.IndexBytesLoaded, int64(1024); got != want {
2482 t.Errorf("got index I/O %d, want %d", got, want)
2483 }
2484 })
2485}
2486
2487func TestStartLineAnchor(t *testing.T) {
2488 b := testIndexBuilder(t, nil,
2489 Document{
2490 Name: "f1",
2491 Content: []byte(
2492 `hello
2493start of middle of line
2494`),
2495 })
2496
2497 t.Run("LineMatches", func(t *testing.T) {
2498 q, err := query.Parse("^start")
2499 if err != nil {
2500 t.Errorf("parse: %v", err)
2501 }
2502
2503 res := searchForTest(t, b, q)
2504 if len(res.Files) != 1 {
2505 t.Errorf("got %v, want 1 file", res.Files)
2506 }
2507
2508 q, err = query.Parse("^middle")
2509 if err != nil {
2510 t.Errorf("parse: %v", err)
2511 }
2512 res = searchForTest(t, b, q)
2513 if len(res.Files) != 0 {
2514 t.Errorf("got %v, want 0 files", res.Files)
2515 }
2516 })
2517
2518 t.Run("ChunkMatches", func(t *testing.T) {
2519 q, err := query.Parse("^start")
2520 if err != nil {
2521 t.Errorf("parse: %v", err)
2522 }
2523
2524 res := searchForTest(t, b, q, chunkOpts)
2525 if len(res.Files) != 1 {
2526 t.Errorf("got %v, want 1 file", res.Files)
2527 }
2528
2529 q, err = query.Parse("^middle")
2530 if err != nil {
2531 t.Errorf("parse: %v", err)
2532 }
2533 res = searchForTest(t, b, q, chunkOpts)
2534 if len(res.Files) != 0 {
2535 t.Errorf("got %v, want 0 files", res.Files)
2536 }
2537 })
2538}
2539
2540func TestAndOrUnicode(t *testing.T) {
2541 q, err := query.Parse("orange.*apple")
2542 if err != nil {
2543 t.Errorf("parse: %v", err)
2544 }
2545 finalQ := query.NewAnd(q,
2546 query.NewOr(query.NewAnd(&query.Repo{Regexp: regexp.MustCompile("name")},
2547 query.NewOr(&query.Branch{Pattern: "master"}))))
2548
2549 b := testIndexBuilder(t, &Repository{
2550 Name: "name",
2551 Branches: []RepositoryBranch{{"master", "master-version"}},
2552 }, Document{
2553 Name: "f2",
2554 Content: []byte("orange\u2318apple"),
2555 // --------------0123456 78901
2556 Branches: []string{"master"},
2557 })
2558
2559 t.Run("LineMatches", func(t *testing.T) {
2560 res := searchForTest(t, b, finalQ)
2561 if len(res.Files) != 1 {
2562 t.Errorf("got %v, want 1 result", res.Files)
2563 }
2564 })
2565
2566 t.Run("ChunkMatches", func(t *testing.T) {
2567 res := searchForTest(t, b, finalQ, chunkOpts)
2568 if len(res.Files) != 1 {
2569 t.Errorf("got %v, want 1 result", res.Files)
2570 }
2571 })
2572}
2573
2574func TestAndShort(t *testing.T) {
2575 content := []byte("bla needle at orange bla")
2576 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2577 Document{Name: "f1", Content: content},
2578 Document{Name: "f2", Content: []byte("xx at xx")},
2579 Document{Name: "f3", Content: []byte("yy orange xx")},
2580 )
2581
2582 q := query.NewAnd(&query.Substring{Pattern: "at"},
2583 &query.Substring{Pattern: "orange"})
2584
2585 t.Run("LineMatches", func(t *testing.T) {
2586 res := searchForTest(t, b, q)
2587 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
2588 t.Errorf("got %v, want 1 result", res.Files)
2589 }
2590 })
2591
2592 t.Run("ChunkMatches", func(t *testing.T) {
2593 res := searchForTest(t, b, q, chunkOpts)
2594 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
2595 t.Errorf("got %v, want 1 result", res.Files)
2596 }
2597 })
2598}
2599
2600func TestNoCollectRegexpSubstring(t *testing.T) {
2601 content := []byte("bla final bla\nfoo final, foo")
2602 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2603 Document{Name: "f1", Content: content},
2604 )
2605
2606 q := &query.Regexp{
2607 Regexp: mustParseRE("final[,.]"),
2608 }
2609
2610 t.Run("LineMatches", func(t *testing.T) {
2611 res := searchForTest(t, b, q)
2612 if len(res.Files) != 1 {
2613 t.Fatalf("got %v, want 1 result", res.Files)
2614 }
2615 if f := res.Files[0]; len(f.LineMatches) != 1 {
2616 t.Fatalf("got line matches %v, want 1 line match", printLineMatches(f.LineMatches))
2617 }
2618 })
2619
2620 t.Run("ChunkMatches", func(t *testing.T) {
2621 res := searchForTest(t, b, q, chunkOpts)
2622 if len(res.Files) != 1 {
2623 t.Fatalf("got %v, want 1 result", res.Files)
2624 }
2625 if f := res.Files[0]; len(f.ChunkMatches) != 1 {
2626 t.Fatalf("got line matches %v, want 1 line match", printLineMatches(f.LineMatches))
2627 }
2628 })
2629}
2630
2631func printLineMatches(ms []LineMatch) string {
2632 var ss []string
2633 for _, m := range ms {
2634 ss = append(ss, fmt.Sprintf("%d:%q %v", m.LineNumber, m.Line, m.LineFragments))
2635 }
2636
2637 return strings.Join(ss, ", ")
2638}
2639
2640func TestLang(t *testing.T) {
2641 content := []byte("bla needle bla")
2642 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2643 Document{Name: "f1", Content: content},
2644 Document{Name: "f2", Language: "java", Content: content},
2645 Document{Name: "f3", Language: "cpp", Content: content},
2646 )
2647
2648 q := query.NewAnd(&query.Substring{Pattern: "needle"},
2649 &query.Language{Language: "cpp"})
2650
2651 t.Run("LineMatches", func(t *testing.T) {
2652 res := searchForTest(t, b, q)
2653 if len(res.Files) != 1 {
2654 t.Fatalf("got %v, want 1 result in f3", res.Files)
2655 }
2656 f := res.Files[0]
2657 if f.FileName != "f3" || f.Language != "cpp" {
2658 t.Fatalf("got %v, want 1 match with language cpp", f)
2659 }
2660 })
2661
2662 t.Run("ChunkMatches", func(t *testing.T) {
2663 res := searchForTest(t, b, q, chunkOpts)
2664 if len(res.Files) != 1 {
2665 t.Fatalf("got %v, want 1 result in f3", res.Files)
2666 }
2667 f := res.Files[0]
2668 if f.FileName != "f3" || f.Language != "cpp" {
2669 t.Fatalf("got %v, want 1 match with language cpp", f)
2670 }
2671 })
2672}
2673
2674func TestLangShortcut(t *testing.T) {
2675 content := []byte("bla needle bla")
2676 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2677 Document{Name: "f2", Language: "java", Content: content},
2678 Document{Name: "f3", Language: "cpp", Content: content},
2679 )
2680
2681 q := query.NewAnd(&query.Substring{Pattern: "needle"},
2682 &query.Language{Language: "fortran"})
2683
2684 t.Run("LineMatches", func(t *testing.T) {
2685 res := searchForTest(t, b, q)
2686 if len(res.Files) != 0 {
2687 t.Fatalf("got %v, want 0 results", res.Files)
2688 }
2689 if res.Stats.IndexBytesLoaded > 0 {
2690 t.Errorf("got IndexBytesLoaded %d, want 0", res.Stats.IndexBytesLoaded)
2691 }
2692 })
2693
2694 t.Run("ChunkMatches", func(t *testing.T) {
2695 res := searchForTest(t, b, q, chunkOpts)
2696 if len(res.Files) != 0 {
2697 t.Fatalf("got %v, want 0 results", res.Files)
2698 }
2699 if res.Stats.IndexBytesLoaded > 0 {
2700 t.Errorf("got IndexBytesLoaded %d, want 0", res.Stats.IndexBytesLoaded)
2701 }
2702 })
2703}
2704
2705func TestNoTextMatchAtoms(t *testing.T) {
2706 content := []byte("bla needle bla")
2707 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2708 Document{Name: "f1", Content: content},
2709 Document{Name: "f2", Language: "java", Content: content},
2710 Document{Name: "f3", Language: "cpp", Content: content},
2711 )
2712 q := query.NewAnd(&query.Language{Language: "java"})
2713 t.Run("LineMatches", func(t *testing.T) {
2714 res := searchForTest(t, b, q)
2715 if len(res.Files) != 1 {
2716 t.Fatalf("got %v, want 1 result in f3", res.Files)
2717 }
2718 })
2719
2720 t.Run("ChunkMatches", func(t *testing.T) {
2721 res := searchForTest(t, b, q, chunkOpts)
2722 if len(res.Files) != 1 {
2723 t.Fatalf("got %v, want 1 result in f3", res.Files)
2724 }
2725 })
2726}
2727
2728func TestNoPositiveAtoms(t *testing.T) {
2729 content := []byte("bla needle bla")
2730 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2731 Document{Name: "f1", Content: content},
2732 Document{Name: "f2", Content: content},
2733 )
2734
2735 q := query.NewAnd(
2736 &query.Not{Child: &query.Substring{Pattern: "xyz"}},
2737 &query.Repo{Regexp: regexp.MustCompile("reponame")})
2738 t.Run("LineMatches", func(t *testing.T) {
2739 res := searchForTest(t, b, q)
2740 if len(res.Files) != 2 {
2741 t.Fatalf("got %v, want 2 results in f3", res.Files)
2742 }
2743 })
2744 t.Run("ChunkMatches", func(t *testing.T) {
2745 res := searchForTest(t, b, q, chunkOpts)
2746 if len(res.Files) != 2 {
2747 t.Fatalf("got %v, want 2 results in f3", res.Files)
2748 }
2749 })
2750}
2751
2752func TestSymbolBoundaryStart(t *testing.T) {
2753 content := []byte("start\nbla bla\nend")
2754 // ----------------012345-67890123-456
2755
2756 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2757 Document{
2758 Name: "f1",
2759 Content: content,
2760 Symbols: []DocumentSection{{0, 5}, {14, 17}},
2761 },
2762 )
2763 q := &query.Symbol{
2764 Expr: &query.Substring{Pattern: "start"},
2765 }
2766 t.Run("LineMatches", func(t *testing.T) {
2767 res := searchForTest(t, b, q)
2768 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
2769 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2770 }
2771 m := res.Files[0].LineMatches[0].LineFragments[0]
2772 if m.Offset != 0 {
2773 t.Fatalf("got offset %d want 0", m.Offset)
2774 }
2775 })
2776
2777 t.Run("ChunkMatches", func(t *testing.T) {
2778 res := searchForTest(t, b, q, chunkOpts)
2779 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
2780 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2781 }
2782 m := res.Files[0].ChunkMatches[0].Ranges[0]
2783 if m.Start.ByteOffset != 0 {
2784 t.Fatalf("got offset %d want 0", m.Start.ByteOffset)
2785 }
2786 })
2787}
2788
2789func TestSymbolBoundaryEnd(t *testing.T) {
2790 content := []byte("start\nbla bla\nend")
2791 // ----------------012345-67890123-456
2792
2793 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2794 Document{
2795 Name: "f1",
2796 Content: content,
2797 Symbols: []DocumentSection{{14, 17}},
2798 },
2799 )
2800 q := &query.Symbol{
2801 Expr: &query.Substring{Pattern: "end"},
2802 }
2803 t.Run("LineMatches", func(t *testing.T) {
2804 res := searchForTest(t, b, q)
2805 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
2806 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2807 }
2808 m := res.Files[0].LineMatches[0].LineFragments[0]
2809 if m.Offset != 14 {
2810 t.Fatalf("got offset %d want 0", m.Offset)
2811 }
2812 })
2813
2814 t.Run("ChunkMatches", func(t *testing.T) {
2815 res := searchForTest(t, b, q, chunkOpts)
2816 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
2817 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2818 }
2819 m := res.Files[0].ChunkMatches[0].Ranges[0]
2820 if m.Start.ByteOffset != 14 {
2821 t.Fatalf("got offset %d want 0", m.Start.ByteOffset)
2822 }
2823 })
2824}
2825
2826func TestSymbolSubstring(t *testing.T) {
2827 content := []byte("bla\nsymblabla\nbla")
2828 // ----------------0123-4567890123-456
2829
2830 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2831 Document{
2832 Name: "f1",
2833 Content: content,
2834 Symbols: []DocumentSection{{4, 12}},
2835 },
2836 )
2837 q := &query.Symbol{
2838 Expr: &query.Substring{Pattern: "bla"},
2839 }
2840 t.Run("LineMatches", func(t *testing.T) {
2841 res := searchForTest(t, b, q)
2842 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
2843 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2844 }
2845 m := res.Files[0].LineMatches[0].LineFragments[0]
2846 if m.Offset != 7 || m.MatchLength != 3 {
2847 t.Fatalf("got offset %d, size %d want 7 size 3", m.Offset, m.MatchLength)
2848 }
2849 })
2850
2851 t.Run("ChunkMatches", func(t *testing.T) {
2852 res := searchForTest(t, b, q, chunkOpts)
2853 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
2854 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2855 }
2856 m := res.Files[0].ChunkMatches[0].Ranges[0]
2857 if m.Start.ByteOffset != 7 || m.End.ByteOffset != 10 {
2858 t.Fatalf("got offset %d, end %d want 7, 10", m.Start.ByteOffset, m.End.ByteOffset)
2859 }
2860 })
2861}
2862
2863func TestSymbolSubstringExact(t *testing.T) {
2864 content := []byte("bla\nsym\nbla\nsym\nasymb")
2865 // ----------------0123-4567-890123456-78901
2866
2867 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2868 Document{
2869 Name: "f1",
2870 Content: content,
2871 Symbols: []DocumentSection{{4, 7}},
2872 },
2873 )
2874 q := &query.Symbol{
2875 Expr: &query.Substring{Pattern: "sym"},
2876 }
2877 t.Run("LineMatches", func(t *testing.T) {
2878 res := searchForTest(t, b, q)
2879 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
2880 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2881 }
2882 m := res.Files[0].LineMatches[0].LineFragments[0]
2883 if m.Offset != 4 {
2884 t.Fatalf("got offset %d, want 7", m.Offset)
2885 }
2886 })
2887
2888 t.Run("ChunkMatches", func(t *testing.T) {
2889 res := searchForTest(t, b, q, chunkOpts)
2890 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
2891 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2892 }
2893 m := res.Files[0].ChunkMatches[0].Ranges[0]
2894 if m.Start.ByteOffset != 4 {
2895 t.Fatalf("got offset %d, want 7", m.Start.ByteOffset)
2896 }
2897 })
2898}
2899
2900func TestSymbolRegexpExact(t *testing.T) {
2901 content := []byte("blah\nbla\nbl")
2902 // ----------------01234-5678-90
2903
2904 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2905 Document{
2906 Name: "f1",
2907 Content: content,
2908 Symbols: []DocumentSection{{0, 4}, {5, 8}, {9, 11}},
2909 },
2910 )
2911 q := &query.Symbol{
2912 Expr: &query.Regexp{Regexp: mustParseRE("^bla$")},
2913 }
2914 t.Run("LineMatches", func(t *testing.T) {
2915 res := searchForTest(t, b, q)
2916 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
2917 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2918 }
2919 m := res.Files[0].LineMatches[0].LineFragments[0]
2920 if m.Offset != 5 {
2921 t.Fatalf("got offset %d, want 5", m.Offset)
2922 }
2923 })
2924
2925 t.Run("ChunkMatches", func(t *testing.T) {
2926 res := searchForTest(t, b, q, chunkOpts)
2927 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
2928 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2929 }
2930 m := res.Files[0].ChunkMatches[0].Ranges[0]
2931 if m.Start.ByteOffset != 5 {
2932 t.Fatalf("got offset %d, want 5", m.Start.ByteOffset)
2933 }
2934 })
2935}
2936
2937func TestSymbolRegexpPartial(t *testing.T) {
2938 content := []byte("abcdef")
2939 // ----------------012345
2940
2941 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2942 Document{
2943 Name: "f1",
2944 Content: content,
2945 Symbols: []DocumentSection{{0, 6}},
2946 },
2947 )
2948 q := &query.Symbol{
2949 Expr: &query.Regexp{Regexp: mustParseRE("(b|d)c(d|b)")},
2950 }
2951 t.Run("LineMatches", func(t *testing.T) {
2952 res := searchForTest(t, b, q)
2953 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
2954 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2955 }
2956 m := res.Files[0].LineMatches[0].LineFragments[0]
2957 if m.Offset != 1 {
2958 t.Fatalf("got offset %d, want 1", m.Offset)
2959 }
2960 if m.MatchLength != 3 {
2961 t.Fatalf("got match length %d, want 3", m.MatchLength)
2962 }
2963 })
2964
2965 t.Run("ChunkMatches", func(t *testing.T) {
2966 res := searchForTest(t, b, q, chunkOpts)
2967 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
2968 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2969 }
2970 m := res.Files[0].ChunkMatches[0].Ranges[0]
2971 if m.Start.ByteOffset != 1 {
2972 t.Fatalf("got offset %d, want 1", m.Start.ByteOffset)
2973 }
2974 if m.End.ByteOffset != 4 {
2975 t.Fatalf("got match end %d, want 4", m.End.ByteOffset)
2976 }
2977 })
2978}
2979
2980func TestSymbolRegexpAll(t *testing.T) {
2981 docs := []Document{
2982 {
2983 Name: "f1",
2984 Content: []byte("Hello Zoekt"),
2985 // --------------01234567890
2986 Symbols: []DocumentSection{{0, 5}, {6, 11}},
2987 },
2988 {
2989 Name: "f2",
2990 Content: []byte("Second Zoekt Third"),
2991 // --------------012345678901234567
2992 Symbols: []DocumentSection{{0, 6}, {7, 12}, {13, 18}},
2993 },
2994 }
2995
2996 b := testIndexBuilder(t, &Repository{Name: "reponame"}, docs...)
2997 q := &query.Symbol{
2998 Expr: &query.Regexp{Regexp: mustParseRE(".*")},
2999 }
3000 t.Run("LineMatches", func(t *testing.T) {
3001 res := searchForTest(t, b, q)
3002 if len(res.Files) != len(docs) {
3003 t.Fatalf("got %v, want %d file", res.Files, len(docs))
3004 }
3005 for i, want := range docs {
3006 got := res.Files[i].LineMatches[0].LineFragments
3007 if len(got) != len(want.Symbols) {
3008 t.Fatalf("got %d symbols, want %d symbols in doc %s", len(got), len(want.Symbols), want.Name)
3009 }
3010
3011 for j, sec := range want.Symbols {
3012 if sec.Start != got[j].Offset {
3013 t.Fatalf("got offset %d, want %d in doc %s", got[j].Offset, sec.Start, want.Name)
3014 }
3015 }
3016 }
3017 })
3018
3019 t.Run("ChunkMatches", func(t *testing.T) {
3020 res := searchForTest(t, b, q, chunkOpts)
3021 if len(res.Files) != len(docs) {
3022 t.Fatalf("got %v, want %d file", res.Files, len(docs))
3023 }
3024 for i, want := range docs {
3025 got := res.Files[i].ChunkMatches[0].Ranges
3026 if len(got) != len(want.Symbols) {
3027 t.Fatalf("got %d symbols, want %d symbols in doc %s", len(got), len(want.Symbols), want.Name)
3028 }
3029
3030 for j, sec := range want.Symbols {
3031 if sec.Start != uint32(got[j].Start.ByteOffset) {
3032 t.Fatalf("got offset %d, want %d in doc %s", got[j].Start.ByteOffset, sec.Start, want.Name)
3033 }
3034 }
3035 }
3036 })
3037}
3038
3039func TestHitIterTerminate(t *testing.T) {
3040 // contrived input: trigram frequencies forces selecting abc +
3041 // def for the distance iteration. There is no match, so this
3042 // will advance the compressedPostingIterator to beyond the
3043 // end.
3044 content := []byte("abc bcdbcd cdecde abcabc def efg")
3045 b := testIndexBuilder(t, nil,
3046 Document{
3047 Name: "f1",
3048 Content: content,
3049 },
3050 )
3051
3052 t.Run("LineMatches", func(t *testing.T) {
3053 searchForTest(t, b, &query.Substring{Pattern: "abcdef"})
3054 })
3055
3056 t.Run("ChunkMatches", func(t *testing.T) {
3057 searchForTest(t, b, &query.Substring{Pattern: "abcdef"}, chunkOpts)
3058 })
3059}
3060
3061func TestDistanceHitIterBailLast(t *testing.T) {
3062 content := []byte("AST AST AST UASH")
3063 b := testIndexBuilder(t, nil,
3064 Document{
3065 Name: "f1",
3066 Content: content,
3067 },
3068 )
3069 t.Run("LineMatches", func(t *testing.T) {
3070 res := searchForTest(t, b, &query.Substring{Pattern: "UAST"})
3071 if len(res.Files) != 0 {
3072 t.Fatalf("got %v, want no results", res.Files)
3073 }
3074 })
3075
3076 t.Run("LineMatches", func(t *testing.T) {
3077 res := searchForTest(t, b, &query.Substring{Pattern: "UAST"}, chunkOpts)
3078 if len(res.Files) != 0 {
3079 t.Fatalf("got %v, want no results", res.Files)
3080 }
3081 })
3082}
3083
3084func TestDocumentSectionRuneBoundary(t *testing.T) {
3085 content := string([]rune{kelvinCodePoint, kelvinCodePoint, kelvinCodePoint})
3086 b, err := NewIndexBuilder(nil)
3087 if err != nil {
3088 t.Fatalf("NewIndexBuilder: %v", err)
3089 }
3090
3091 for i, sec := range []DocumentSection{
3092 {2, 6},
3093 {3, 7},
3094 } {
3095 if err := b.Add(Document{
3096 Name: "f1",
3097 Content: []byte(content),
3098 Symbols: []DocumentSection{sec},
3099 }); err == nil {
3100 t.Errorf("%d: Add succeeded", i)
3101 }
3102 }
3103}
3104
3105func TestUnicodeQuery(t *testing.T) {
3106 content := string([]rune{kelvinCodePoint, kelvinCodePoint, kelvinCodePoint})
3107 b := testIndexBuilder(t, nil,
3108 Document{
3109 Name: "f1",
3110 Content: []byte(content),
3111 },
3112 )
3113
3114 q := &query.Substring{Pattern: content}
3115
3116 t.Run("LineMatches", func(t *testing.T) {
3117 res := searchForTest(t, b, q)
3118 if len(res.Files) != 1 {
3119 t.Fatalf("want 1 match, got %v", res.Files)
3120 }
3121
3122 f := res.Files[0]
3123 if len(f.LineMatches) != 1 {
3124 t.Fatalf("want 1 line, got %v", f.LineMatches)
3125 }
3126 l := f.LineMatches[0]
3127
3128 if len(l.LineFragments) != 1 {
3129 t.Fatalf("want 1 line fragment, got %v", l.LineFragments)
3130 }
3131 fr := l.LineFragments[0]
3132 if fr.MatchLength != len(content) {
3133 t.Fatalf("got MatchLength %d want %d", fr.MatchLength, len(content))
3134 }
3135 })
3136
3137 t.Run("ChunkMatches", func(t *testing.T) {
3138 res := searchForTest(t, b, q, chunkOpts)
3139 if len(res.Files) != 1 {
3140 t.Fatalf("want 1 match, got %v", res.Files)
3141 }
3142
3143 f := res.Files[0]
3144 if len(f.ChunkMatches) != 1 {
3145 t.Fatalf("want 1 line, got %v", f.LineMatches)
3146 }
3147 cm := f.ChunkMatches[0]
3148
3149 if len(cm.Ranges) != 1 {
3150 t.Fatalf("want 1 line fragment, got %v", cm.Ranges)
3151 }
3152 rr := cm.Ranges[0]
3153 if matchLen := rr.End.ByteOffset - rr.Start.ByteOffset; int(matchLen) != len(content) {
3154 t.Fatalf("got MatchLength %d want %d", matchLen, len(content))
3155 }
3156 })
3157}
3158
3159func TestSkipInvalidContent(t *testing.T) {
3160 for _, content := range []string{
3161 // Binary
3162 "abc def \x00 abc",
3163 } {
3164
3165 b, err := NewIndexBuilder(nil)
3166 if err != nil {
3167 t.Fatalf("NewIndexBuilder: %v", err)
3168 }
3169
3170 if err := b.Add(Document{
3171 Name: "f1",
3172 Content: []byte(content),
3173 }); err != nil {
3174 t.Fatal(err)
3175 }
3176
3177 t.Run("LineMatches", func(t *testing.T) {
3178 q := &query.Substring{Pattern: "abc def"}
3179 res := searchForTest(t, b, q)
3180 if len(res.Files) != 0 {
3181 t.Fatalf("got %v, want no results", res.Files)
3182 }
3183
3184 q = &query.Substring{Pattern: "NOT-INDEXED"}
3185 res = searchForTest(t, b, q)
3186 if len(res.Files) != 1 {
3187 t.Fatalf("got %v, want 1 result", res.Files)
3188 }
3189 })
3190
3191 t.Run("ChunkMatches", func(t *testing.T) {
3192 q := &query.Substring{Pattern: "abc def"}
3193 res := searchForTest(t, b, q, chunkOpts)
3194 if len(res.Files) != 0 {
3195 t.Fatalf("got %v, want no results", res.Files)
3196 }
3197
3198 q = &query.Substring{Pattern: "NOT-INDEXED"}
3199 res = searchForTest(t, b, q, chunkOpts)
3200 if len(res.Files) != 1 {
3201 t.Fatalf("got %v, want 1 result", res.Files)
3202 }
3203 })
3204 }
3205}
3206
3207func TestDocChecker(t *testing.T) {
3208 docChecker := DocChecker{}
3209
3210 // Test valid and invalid text
3211 for _, text := range []string{"", "simple ascii", "símplé unicödé", "\uFEFFwith utf8 'bom'", "with \uFFFD unicode replacement char"} {
3212 if err := docChecker.Check([]byte(text), 20000, false); err != nil {
3213 t.Errorf("Check(%q): %v", text, err)
3214 }
3215 }
3216 for _, text := range []string{"zero\x00byte", "xx", "0123456789abcdefghi"} {
3217 if err := docChecker.Check([]byte(text), 15, false); err == nil {
3218 t.Errorf("Check(%q) succeeded", text)
3219 }
3220 }
3221
3222 // Test valid and invalid text with an allowed large file
3223 for _, text := range []string{"0123456789abcdefghi", "qwertyuiopasdfghjklzxcvbnm"} {
3224 if err := docChecker.Check([]byte(text), 15, true); err != nil {
3225 t.Errorf("Check(%q): %v", text, err)
3226 }
3227 }
3228 for _, text := range []string{"zero\x00byte", "xx"} {
3229 if err := docChecker.Check([]byte(text), 15, true); err == nil {
3230 t.Errorf("Check(%q) succeeded", text)
3231 }
3232 }
3233}
3234
3235func TestLineAnd(t *testing.T) {
3236 b := testIndexBuilder(t, &Repository{Name: "reponame"},
3237 Document{Name: "f1", Content: []byte("apple\nbanana\napple banana chocolate apple pudding banana\ngrape")},
3238 Document{Name: "f2", Content: []byte("apple orange\nbanana")},
3239 Document{Name: "f3", Content: []byte("banana grape")},
3240 )
3241 pattern := "(apple)(?-s:.)*?(banana)"
3242 r, _ := syntax.Parse(pattern, syntax.Perl)
3243
3244 q := query.Regexp{
3245 Regexp: r,
3246 Content: true,
3247 }
3248 t.Run("LineMatches", func(t *testing.T) {
3249 res := searchForTest(t, b, &q)
3250 wantRegexpCount := 1
3251 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3252 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3253 }
3254 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
3255 t.Errorf("got %v, want 1 result", res.Files)
3256 }
3257 })
3258
3259 t.Run("ChunkMatches", func(t *testing.T) {
3260 res := searchForTest(t, b, &q, chunkOpts)
3261 wantRegexpCount := 1
3262 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3263 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3264 }
3265 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
3266 t.Errorf("got %v, want 1 result", res.Files)
3267 }
3268 })
3269}
3270
3271func TestLineAndFileName(t *testing.T) {
3272 b := testIndexBuilder(t, &Repository{Name: "reponame"},
3273 Document{Name: "f1", Content: []byte("apple banana\ngrape")},
3274 Document{Name: "f2", Content: []byte("apple banana\norange")},
3275 Document{Name: "apple banana", Content: []byte("banana grape")},
3276 )
3277 pattern := "(apple)(?-s:.)*?(banana)"
3278 r, _ := syntax.Parse(pattern, syntax.Perl)
3279
3280 q := query.Regexp{
3281 Regexp: r,
3282 FileName: true,
3283 }
3284 t.Run("LineMatches", func(t *testing.T) {
3285 res := searchForTest(t, b, &q)
3286 wantRegexpCount := 1
3287 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3288 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3289 }
3290 if len(res.Files) != 1 || res.Files[0].FileName != "apple banana" {
3291 t.Errorf("got %v, want 1 result", res.Files)
3292 }
3293 })
3294
3295 t.Run("ChunkMatches", func(t *testing.T) {
3296 res := searchForTest(t, b, &q, chunkOpts)
3297 wantRegexpCount := 1
3298 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3299 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3300 }
3301 if len(res.Files) != 1 || res.Files[0].FileName != "apple banana" {
3302 t.Errorf("got %v, want 1 result", res.Files)
3303 }
3304 })
3305}
3306
3307func TestMultiLineRegex(t *testing.T) {
3308 b := testIndexBuilder(t, &Repository{Name: "reponame"},
3309 Document{Name: "f1", Content: []byte("apple banana\ngrape")},
3310 Document{Name: "f2", Content: []byte("apple orange")},
3311 Document{Name: "f3", Content: []byte("grape apple")},
3312 )
3313 pattern := "(apple).*?[[:space:]].*?(grape)"
3314 r, _ := syntax.Parse(pattern, syntax.Perl)
3315
3316 q := query.Regexp{
3317 Regexp: r,
3318 }
3319 t.Run("LineMatches", func(t *testing.T) {
3320 res := searchForTest(t, b, &q)
3321 wantRegexpCount := 2
3322 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3323 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3324 }
3325 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
3326 t.Errorf("got %v, want 1 result", res.Files)
3327 }
3328 if l := len(res.Files[0].LineMatches); l != 2 {
3329 t.Errorf("got %v, want 2 line matches", l)
3330 }
3331 })
3332
3333 t.Run("ChunkMatches", func(t *testing.T) {
3334 res := searchForTest(t, b, &q, chunkOpts)
3335 wantRegexpCount := 2
3336 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3337 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3338 }
3339 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
3340 t.Errorf("got %v, want 1 result", res.Files)
3341 }
3342 if l := len(res.Files[0].ChunkMatches); l != 1 {
3343 t.Errorf("got %v, want 1 chunk matches", l)
3344 }
3345 if l := len(res.Files[0].ChunkMatches[0].Ranges); l != 1 {
3346 t.Errorf("got %v, want 1 chunk ranges", l)
3347 }
3348 })
3349}
3350
3351func TestSearchTypeFileName(t *testing.T) {
3352 b := testIndexBuilder(t, &Repository{
3353 Name: "reponame",
3354 },
3355 Document{Name: "f1", Content: []byte("bla the needle")},
3356 Document{Name: "f2", Content: []byte("another file another\nneedle")},
3357 // -----------------------------------012345678901234567890-123456
3358 )
3359
3360 t.Run("LineMatches", func(t *testing.T) {
3361 wantSingleMatch := func(res *SearchResult, want string) {
3362 t.Helper()
3363 fmatches := res.Files
3364 if len(fmatches) != 1 {
3365 t.Errorf("got %v, want 1 matches", len(fmatches))
3366 return
3367 }
3368 if len(fmatches[0].LineMatches) != 1 {
3369 t.Errorf("got %d line matches", len(fmatches[0].LineMatches))
3370 return
3371 }
3372 var got string
3373 if fmatches[0].LineMatches[0].FileName {
3374 got = fmatches[0].FileName
3375 } else {
3376 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].LineMatches[0].LineFragments[0].Offset)
3377 }
3378
3379 if got != want {
3380 t.Errorf("got %s, want %s", got, want)
3381 }
3382 }
3383
3384 // Only return the later match in the second file
3385 res := searchForTest(t, b, query.NewAnd(
3386 &query.Type{
3387 Type: query.TypeFileName,
3388 Child: &query.Substring{Pattern: "needle"},
3389 },
3390 &query.Substring{Pattern: "file"}))
3391 wantSingleMatch(res, "f2:8")
3392
3393 // Only return a filename result
3394 res = searchForTest(t, b,
3395 &query.Type{
3396 Type: query.TypeFileName,
3397 Child: &query.Substring{Pattern: "file"},
3398 })
3399 wantSingleMatch(res, "f2")
3400 })
3401
3402 t.Run("ChunkMatches", func(t *testing.T) {
3403 wantSingleMatch := func(res *SearchResult, want string) {
3404 t.Helper()
3405 fmatches := res.Files
3406 if len(fmatches) != 1 {
3407 t.Errorf("got %v, want 1 matches", len(fmatches))
3408 return
3409 }
3410 if len(fmatches[0].ChunkMatches) != 1 {
3411 t.Errorf("got %d line matches", len(fmatches[0].ChunkMatches))
3412 return
3413 }
3414 var got string
3415 if fmatches[0].ChunkMatches[0].FileName {
3416 got = fmatches[0].FileName
3417 } else {
3418 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset)
3419 }
3420
3421 if got != want {
3422 t.Errorf("got %s, want %s", got, want)
3423 }
3424 }
3425
3426 // Only return the later match in the second file
3427 res := searchForTest(t, b, query.NewAnd(
3428 &query.Type{
3429 Type: query.TypeFileName,
3430 Child: &query.Substring{Pattern: "needle"},
3431 },
3432 &query.Substring{Pattern: "file"}),
3433 chunkOpts,
3434 )
3435 wantSingleMatch(res, "f2:8")
3436
3437 // Only return a filename result
3438 res = searchForTest(t, b,
3439 &query.Type{
3440 Type: query.TypeFileName,
3441 Child: &query.Substring{Pattern: "file"},
3442 },
3443 chunkOpts,
3444 )
3445 wantSingleMatch(res, "f2")
3446 })
3447}
3448
3449func TestSearchTypeLanguage(t *testing.T) {
3450 b := testIndexBuilder(t, &Repository{
3451 Name: "reponame",
3452 },
3453 Document{Name: "apex.cls", Content: []byte("public class Car extends Vehicle {")},
3454 Document{Name: "tex.cls", Content: []byte(`\DeclareOption*{`)},
3455 Document{Name: "hello.h", Content: []byte(`#include <stdio.h>`)},
3456 )
3457
3458 t.Log(b.languageMap)
3459
3460 t.Run("LineMatches", func(t *testing.T) {
3461 wantSingleMatch := func(res *SearchResult, want string) {
3462 t.Helper()
3463 fmatches := res.Files
3464 if len(fmatches) != 1 {
3465 t.Errorf("got %v, want 1 matches", len(fmatches))
3466 return
3467 }
3468 if len(fmatches[0].LineMatches) != 1 {
3469 t.Errorf("got %d line matches", len(fmatches[0].LineMatches))
3470 return
3471 }
3472 var got string
3473 if fmatches[0].LineMatches[0].FileName {
3474 got = fmatches[0].FileName
3475 } else {
3476 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].LineMatches[0].LineFragments[0].Offset)
3477 }
3478
3479 if got != want {
3480 t.Errorf("got %s, want %s", got, want)
3481 }
3482 }
3483
3484 res := searchForTest(t, b, &query.Language{Language: "Apex"})
3485 wantSingleMatch(res, "apex.cls")
3486
3487 res = searchForTest(t, b, &query.Language{Language: "TeX"})
3488 wantSingleMatch(res, "tex.cls")
3489
3490 res = searchForTest(t, b, &query.Language{Language: "C"})
3491 wantSingleMatch(res, "hello.h")
3492
3493 // test fallback language search by pretending it's an older index version
3494 res = searchForTest(t, b, &query.Language{Language: "C++"})
3495 if len(res.Files) != 0 {
3496 t.Errorf("got %d results for C++, want 0", len(res.Files))
3497 }
3498
3499 b.featureVersion = 11 // force fallback
3500 res = searchForTest(t, b, &query.Language{Language: "C++"})
3501 wantSingleMatch(res, "hello.h")
3502 })
3503
3504 t.Run("ChunkMatches", func(t *testing.T) {
3505 wantSingleMatch := func(res *SearchResult, want string) {
3506 t.Helper()
3507 fmatches := res.Files
3508 if len(fmatches) != 1 {
3509 t.Errorf("got %v, want 1 matches", len(fmatches))
3510 return
3511 }
3512 if len(fmatches[0].ChunkMatches) != 1 {
3513 t.Errorf("got %d line matches", len(fmatches[0].ChunkMatches))
3514 return
3515 }
3516 var got string
3517 if fmatches[0].ChunkMatches[0].FileName {
3518 got = fmatches[0].FileName
3519 } else {
3520 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset)
3521 }
3522
3523 if got != want {
3524 t.Errorf("got %s, want %s", got, want)
3525 }
3526 }
3527
3528 b.featureVersion = FeatureVersion // reset feature version
3529 res := searchForTest(t, b, &query.Language{Language: "Apex"}, chunkOpts)
3530 wantSingleMatch(res, "apex.cls")
3531
3532 res = searchForTest(t, b, &query.Language{Language: "TeX"}, chunkOpts)
3533 wantSingleMatch(res, "tex.cls")
3534
3535 res = searchForTest(t, b, &query.Language{Language: "C"}, chunkOpts)
3536 wantSingleMatch(res, "hello.h")
3537
3538 // test fallback language search by pretending it's an older index version
3539 res = searchForTest(t, b, &query.Language{Language: "C++"}, chunkOpts)
3540 if len(res.Files) != 0 {
3541 t.Errorf("got %d results for C++, want 0", len(res.Files))
3542 }
3543
3544 b.featureVersion = 11 // force fallback
3545 res = searchForTest(t, b, &query.Language{Language: "C++"}, chunkOpts)
3546 wantSingleMatch(res, "hello.h")
3547 })
3548}
3549
3550func TestStats(t *testing.T) {
3551 ignored := []cmp.Option{
3552 cmpopts.EquateEmpty(),
3553 cmpopts.IgnoreFields(RepoListEntry{}, "Repository"),
3554 cmpopts.IgnoreFields(RepoListEntry{}, "IndexMetadata"),
3555 cmpopts.IgnoreFields(RepoStats{}, "IndexBytes"),
3556 }
3557
3558 repoListEntries := func(b *IndexBuilder) []RepoListEntry {
3559 searcher := searcherForTest(t, b)
3560 indexdata := searcher.(*indexData)
3561 return indexdata.repoListEntry
3562 }
3563
3564 t.Run("one empty repo", func(t *testing.T) {
3565 b := testIndexBuilder(t, nil)
3566 got := repoListEntries(b)
3567 want := []RepoListEntry{
3568 {
3569 Stats: RepoStats{
3570 Repos: 0,
3571 Shards: 1,
3572 Documents: 0,
3573 IndexBytes: 20,
3574 ContentBytes: 0,
3575 NewLinesCount: 0,
3576 DefaultBranchNewLinesCount: 0,
3577 OtherBranchesNewLinesCount: 0,
3578 },
3579 },
3580 }
3581
3582 if diff := cmp.Diff(want, got, ignored...); diff != "" {
3583 t.Fatalf("mismatch (-want +got):\n%s", diff)
3584 }
3585
3586 })
3587
3588 t.Run("one simple shard", func(t *testing.T) {
3589 b := testIndexBuilder(t, nil,
3590 Document{Name: "doc 0", Content: []byte("content 0")},
3591 Document{Name: "doc 1", Content: []byte("content 1")},
3592 )
3593 got := repoListEntries(b)
3594 want := []RepoListEntry{
3595 {
3596 Stats: RepoStats{
3597 Repos: 0,
3598 Shards: 1,
3599 Documents: 2,
3600 IndexBytes: 224,
3601 ContentBytes: 28,
3602 NewLinesCount: 0,
3603 DefaultBranchNewLinesCount: 0,
3604 OtherBranchesNewLinesCount: 0,
3605 },
3606 },
3607 }
3608
3609 if diff := cmp.Diff(want, got, ignored...); diff != "" {
3610 t.Fatalf("mismatch (-want +got):\n%s", diff)
3611 }
3612
3613 })
3614
3615 t.Run("one compound shard", func(t *testing.T) {
3616 b := testIndexBuilderCompound(t,
3617 []*Repository{
3618 {Name: "repo 0"},
3619 {Name: "repo 1"},
3620 },
3621 [][]Document{
3622 {
3623 {Name: "doc 0", Content: []byte("content 0")},
3624 {Name: "doc 1", Content: []byte("content 1")},
3625 },
3626 {
3627 {Name: "doc 2", Content: []byte("content 2")},
3628 {Name: "doc 3", Content: []byte("content 3")},
3629 },
3630 },
3631 )
3632 got := repoListEntries(b)
3633 want := []RepoListEntry{
3634 {
3635 Stats: RepoStats{
3636 Repos: 0,
3637 Shards: 1,
3638 Documents: 2,
3639 IndexBytes: 180,
3640 ContentBytes: 28,
3641 NewLinesCount: 0,
3642 DefaultBranchNewLinesCount: 0,
3643 OtherBranchesNewLinesCount: 0,
3644 },
3645 },
3646 {
3647 Stats: RepoStats{
3648 Repos: 0,
3649 Shards: 1,
3650 Documents: 2,
3651 IndexBytes: 180,
3652 ContentBytes: 28,
3653 NewLinesCount: 0,
3654 DefaultBranchNewLinesCount: 0,
3655 OtherBranchesNewLinesCount: 0,
3656 },
3657 },
3658 }
3659
3660 if diff := cmp.Diff(want, got, ignored...); diff != "" {
3661 t.Fatalf("mismatch (-want +got):\n%s", diff)
3662 }
3663 })
3664
3665 t.Run("compound shard with empty repos", func(t *testing.T) {
3666 b := testIndexBuilderCompound(t,
3667 []*Repository{
3668 {Name: "repo 0"},
3669 {Name: "repo 1"},
3670 {Name: "repo 2"},
3671 {Name: "repo 3"},
3672 {Name: "repo 4"},
3673 },
3674 [][]Document{
3675 {{Name: "doc 0", Content: []byte("content 0")}},
3676 nil,
3677 {{Name: "doc 1", Content: []byte("content 1")}},
3678 nil,
3679 nil,
3680 },
3681 )
3682 got := repoListEntries(b)
3683
3684 entryEmpty := RepoListEntry{Stats: RepoStats{
3685 Shards: 1,
3686 Documents: 0,
3687 ContentBytes: 0,
3688 }}
3689 entryNonEmpty := RepoListEntry{Stats: RepoStats{
3690 Shards: 1,
3691 Documents: 1,
3692 ContentBytes: 14,
3693 }}
3694
3695 want := []RepoListEntry{
3696 entryNonEmpty,
3697 entryEmpty,
3698 entryNonEmpty,
3699 entryEmpty,
3700 entryEmpty,
3701 }
3702
3703 if diff := cmp.Diff(want, got, ignored...); diff != "" {
3704 t.Fatalf("mismatch (-want +got):\n%s", diff)
3705 }
3706
3707 })
3708}
3709
3710// This tests the frequent pattern "\bLITERAL\b".
3711func TestWordSearch(t *testing.T) {
3712 content := []byte("needle the bla")
3713 // ----------------01234567890123
3714
3715 b := testIndexBuilder(t, nil,
3716 Document{
3717 Name: "f1",
3718 Content: content,
3719 })
3720
3721 t.Run("LineMatches", func(t *testing.T) {
3722 sres := searchForTest(t, b,
3723 &query.Regexp{
3724 Regexp: mustParseRE("\\bthe\\b"),
3725 CaseSensitive: true,
3726 Content: true,
3727 })
3728
3729 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 {
3730 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
3731 }
3732
3733 if sres.Stats.RegexpsConsidered != 0 {
3734 t.Fatal("expected regexp to be skipped")
3735 }
3736
3737 got := sres.Files[0].LineMatches[0]
3738 want := LineMatch{
3739 LineFragments: []LineFragmentMatch{{
3740 LineOffset: 7,
3741 Offset: 7,
3742 MatchLength: 3,
3743 }},
3744 Line: content,
3745 FileName: false,
3746 LineNumber: 1,
3747 LineStart: 0,
3748 LineEnd: 14,
3749 }
3750
3751 if !reflect.DeepEqual(got, want) {
3752 t.Errorf("got %#v, want %#v", got, want)
3753 }
3754 })
3755
3756 t.Run("ChunkMatches", func(t *testing.T) {
3757 sres := searchForTest(t, b,
3758 &query.Regexp{
3759 Regexp: mustParseRE("\\bthe\\b"),
3760 CaseSensitive: true,
3761 }, chunkOpts)
3762
3763 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 {
3764 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
3765 }
3766
3767 if sres.Stats.RegexpsConsidered != 0 {
3768 t.Fatal("expected regexp to be skipped")
3769 }
3770
3771 got := sres.Files[0].ChunkMatches[0]
3772 want := ChunkMatch{
3773 Content: content,
3774 ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1},
3775 Ranges: []Range{{
3776 Start: Location{ByteOffset: 7, LineNumber: 1, Column: 8},
3777 End: Location{ByteOffset: 10, LineNumber: 1, Column: 11},
3778 }},
3779 }
3780
3781 if diff := cmp.Diff(want, got); diff != "" {
3782 t.Fatal(diff)
3783 }
3784 })
3785}