fork of https://github.com/sourcegraph/zoekt
1// Copyright 2016 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package zoekt
16
17import (
18 "bytes"
19 "context"
20 "fmt"
21 "reflect"
22 "regexp/syntax"
23 "strings"
24 "testing"
25
26 "github.com/google/go-cmp/cmp"
27 "github.com/google/go-cmp/cmp/cmpopts"
28 "github.com/grafana/regexp"
29
30 "github.com/sourcegraph/zoekt/query"
31)
32
33func clearScores(r *SearchResult) {
34 for i := range r.Files {
35 r.Files[i].Score = 0.0
36 for j := range r.Files[i].LineMatches {
37 r.Files[i].LineMatches[j].Score = 0.0
38 }
39 for j := range r.Files[i].ChunkMatches {
40 r.Files[i].ChunkMatches[j].Score = 0.0
41 }
42 r.Files[i].Checksum = nil
43 r.Files[i].Debug = ""
44 }
45}
46
47func testIndexBuilder(t *testing.T, repo *Repository, docs ...Document) *IndexBuilder {
48 t.Helper()
49
50 b, err := NewIndexBuilder(repo)
51 if err != nil {
52 t.Fatalf("NewIndexBuilder: %v", err)
53 }
54
55 for i, d := range docs {
56 if err := b.Add(d); err != nil {
57 t.Fatalf("Add %d: %v", i, err)
58 }
59 }
60
61 return b
62}
63
64func testIndexBuilderCompound(t *testing.T, repos []*Repository, docs [][]Document) *IndexBuilder {
65 t.Helper()
66
67 b := newIndexBuilder()
68 b.indexFormatVersion = NextIndexFormatVersion
69
70 if len(repos) != len(docs) {
71 t.Fatalf("testIndexBuilderCompound: repos must be the same length as docs, got: len(repos)=%d len(docs)=%d", len(repos), len(docs))
72 }
73
74 for i, repo := range repos {
75 if err := b.setRepository(repo); err != nil {
76 t.Fatal(err)
77 }
78 for j, d := range docs[i] {
79 if err := b.Add(d); err != nil {
80 t.Fatalf("Add %d %d: %v", i, j, err)
81 }
82 }
83 }
84
85 return b
86}
87
88func TestBoundary(t *testing.T) {
89 b := testIndexBuilder(t, nil,
90 Document{Name: "f1", Content: []byte("x the")},
91 Document{Name: "f1", Content: []byte("reader")})
92 res := searchForTest(t, b, &query.Substring{Pattern: "there"})
93 if len(res.Files) > 0 {
94 t.Fatalf("got %v, want no matches", res.Files)
95 }
96}
97
98func TestDocSectionInvalid(t *testing.T) {
99 b, err := NewIndexBuilder(nil)
100 if err != nil {
101 t.Fatalf("NewIndexBuilder: %v", err)
102 }
103 doc := Document{
104 Name: "f1",
105 Content: []byte("01234567890123"),
106 Symbols: []DocumentSection{{5, 8}, {7, 9}},
107 }
108
109 if err := b.Add(doc); err == nil {
110 t.Errorf("overlapping doc sections should fail")
111 }
112
113 doc = Document{
114 Name: "f1",
115 Content: []byte("01234567890123"),
116 Symbols: []DocumentSection{{0, 20}},
117 }
118
119 if err := b.Add(doc); err == nil {
120 t.Errorf("doc sections beyond EOF should fail")
121 }
122}
123
124func TestBasic(t *testing.T) {
125 b := testIndexBuilder(t, nil,
126 Document{
127 Name: "f2",
128 Content: []byte("to carry water in the no later bla"),
129 // --------------0123456789012345678901234567890123
130 })
131
132 t.Run("LineMatch", func(t *testing.T) {
133 res := searchForTest(t, b, &query.Substring{
134 Pattern: "water",
135 CaseSensitive: true,
136 })
137 fmatches := res.Files
138 if len(fmatches) != 1 || len(fmatches[0].LineMatches) != 1 {
139 t.Fatalf("got %v, want 1 matches", fmatches)
140 }
141
142 got := fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].LineMatches[0].LineFragments[0].Offset)
143 want := "f2:9"
144 if got != want {
145 t.Errorf("1: got %s, want %s", got, want)
146 }
147 })
148
149 t.Run("ChunkMatch", func(t *testing.T) {
150 res := searchForTest(t, b, &query.Substring{
151 Pattern: "water",
152 CaseSensitive: true,
153 }, chunkOpts)
154 fmatches := res.Files
155 if len(fmatches) != 1 || len(fmatches[0].ChunkMatches) != 1 {
156 t.Fatalf("got %v, want 1 matches", fmatches)
157 }
158
159 got := fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset)
160 want := "f2:9"
161 if got != want {
162 t.Errorf("1: got %s, want %s", got, want)
163 }
164 })
165}
166
167func TestEmptyIndex(t *testing.T) {
168 b := testIndexBuilder(t, nil)
169 searcher := searcherForTest(t, b)
170
171 var opts SearchOptions
172 if _, err := searcher.Search(context.Background(), &query.Substring{}, &opts); err != nil {
173 t.Fatalf("Search: %v", err)
174 }
175
176 if _, err := searcher.List(context.Background(), &query.Repo{Regexp: regexp.MustCompile("")}, nil); err != nil {
177 t.Fatalf("List: %v", err)
178 }
179
180 if _, err := searcher.Search(context.Background(), &query.Substring{Pattern: "java", FileName: true}, &opts); err != nil {
181 t.Fatalf("Search: %v", err)
182 }
183}
184
185type memSeeker struct {
186 data []byte
187}
188
189func (s *memSeeker) Name() string {
190 return "memseeker"
191}
192
193func (s *memSeeker) Close() {}
194func (s *memSeeker) Read(off, sz uint32) ([]byte, error) {
195 return s.data[off : off+sz], nil
196}
197
198func (s *memSeeker) Size() (uint32, error) {
199 return uint32(len(s.data)), nil
200}
201
202func TestNewlines(t *testing.T) {
203 b := testIndexBuilder(t, nil,
204 Document{Name: "filename", Content: []byte("line1\nline2\nbla")})
205 // ---------------------------------------------012345-678901-234
206
207 t.Run("LineMatches", func(t *testing.T) {
208 sres := searchForTest(t, b, &query.Substring{Pattern: "ne2"})
209
210 matches := sres.Files
211 want := []FileMatch{{
212 FileName: "filename",
213 LineMatches: []LineMatch{{
214 LineFragments: []LineFragmentMatch{{
215 Offset: 8,
216 LineOffset: 2,
217 MatchLength: 3,
218 }},
219 Line: []byte("line2"),
220 LineStart: 6,
221 LineEnd: 11,
222 LineNumber: 2,
223 }},
224 }}
225
226 if !reflect.DeepEqual(matches, want) {
227 t.Errorf("got %v, want %v", matches, want)
228 }
229 })
230
231 t.Run("ChunkMatches", func(t *testing.T) {
232 sres := searchForTest(t, b, &query.Substring{Pattern: "ne2"}, chunkOpts)
233
234 matches := sres.Files
235 want := []FileMatch{{
236 FileName: "filename",
237 ChunkMatches: []ChunkMatch{{
238 Content: []byte("line2"),
239 ContentStart: Location{
240 ByteOffset: 6,
241 LineNumber: 2,
242 Column: 1,
243 },
244 Ranges: []Range{{
245 Start: Location{ByteOffset: 8, LineNumber: 2, Column: 3},
246 End: Location{ByteOffset: 11, LineNumber: 2, Column: 6},
247 }},
248 }},
249 }}
250
251 if diff := cmp.Diff(want, matches); diff != "" {
252 t.Fatal(diff)
253 }
254 })
255}
256
257// A result spanning multiple lines should have LineMatches that only cover
258// single lines.
259func TestQueryNewlines(t *testing.T) {
260 text := "line1\nline2\nbla"
261 b := testIndexBuilder(t, nil,
262 Document{Name: "filename", Content: []byte(text)})
263
264 t.Run("LineMatches", func(t *testing.T) {
265 sres := searchForTest(t, b, &query.Substring{Pattern: "ine2\nbla"})
266 matches := sres.Files
267 if len(matches) != 1 {
268 t.Fatalf("got %d file matches, want exactly one", len(matches))
269 }
270 m := matches[0]
271 if len(m.LineMatches) != 2 {
272 t.Fatalf("got %d line matches, want exactly two", len(m.LineMatches))
273 }
274 })
275
276 t.Run("ChunkMatches", func(t *testing.T) {
277 sres := searchForTest(t, b, &query.Substring{Pattern: "ine2\nbla"}, chunkOpts)
278 matches := sres.Files
279 if len(matches) != 1 {
280 t.Fatalf("got %d file matches, want exactly one", len(matches))
281 }
282 m := matches[0]
283 if len(m.ChunkMatches) != 1 {
284 t.Fatalf("got %d chunk matches, want exactly one", len(m.ChunkMatches))
285 }
286 })
287}
288
289var chunkOpts = SearchOptions{ChunkMatches: true}
290
291func searchForTest(t *testing.T, b *IndexBuilder, q query.Q, o ...SearchOptions) *SearchResult {
292 searcher := searcherForTest(t, b)
293 var opts SearchOptions
294 if len(o) > 0 {
295 opts = o[0]
296 }
297 res, err := searcher.Search(context.Background(), q, &opts)
298 if err != nil {
299 t.Fatalf("Search(%s): %v", q, err)
300 }
301 clearScores(res)
302 return res
303}
304
305func searcherForTest(t *testing.T, b *IndexBuilder) Searcher {
306 var buf bytes.Buffer
307 if err := b.Write(&buf); err != nil {
308 t.Fatal(err)
309 }
310 f := &memSeeker{buf.Bytes()}
311
312 searcher, err := NewSearcher(f)
313 if err != nil {
314 t.Fatalf("NewSearcher: %v", err)
315 }
316
317 return searcher
318}
319
320func TestCaseFold(t *testing.T) {
321 b := testIndexBuilder(t, nil,
322 Document{Name: "f1", Content: []byte("I love BaNaNAS.")},
323 // -----------------------------------012345678901234
324 )
325 t.Run("LineMatches", func(t *testing.T) {
326 sres := searchForTest(t, b, &query.Substring{
327 Pattern: "bananas",
328 CaseSensitive: true,
329 })
330 matches := sres.Files
331 if len(matches) != 0 {
332 t.Errorf("foldcase: got %#v, want 0 matches", matches)
333 }
334
335 sres = searchForTest(t, b,
336 &query.Substring{
337 Pattern: "BaNaNAS",
338 CaseSensitive: true,
339 })
340 matches = sres.Files
341 if len(matches) != 1 {
342 t.Errorf("no foldcase: got %v, want 1 matches", matches)
343 } else if matches[0].LineMatches[0].LineFragments[0].Offset != 7 {
344 t.Errorf("foldcase: got %v, want offsets 7", matches)
345 }
346 })
347
348 t.Run("ChunkMatches", func(t *testing.T) {
349 sres := searchForTest(t, b, &query.Substring{
350 Pattern: "bananas",
351 CaseSensitive: true,
352 }, chunkOpts)
353 matches := sres.Files
354 if len(matches) != 0 {
355 t.Errorf("foldcase: got %#v, want 0 matches", matches)
356 }
357
358 sres = searchForTest(t, b,
359 &query.Substring{
360 Pattern: "BaNaNAS",
361 CaseSensitive: true,
362 })
363 matches = sres.Files
364 if len(matches) != 1 {
365 t.Errorf("no foldcase: got %v, want 1 matches", matches)
366 } else if matches[0].LineMatches[0].LineFragments[0].Offset != 7 {
367 t.Errorf("foldcase: got %v, want offsets 7", matches)
368 }
369 })
370}
371
372func TestSearchStats(t *testing.T) {
373 ctx := context.Background()
374 searcher := searcherForTest(t, testIndexBuilder(t, nil,
375 Document{Name: "f1", Content: []byte("x banana y")},
376 Document{Name: "f2", Content: []byte("x apple y")},
377 Document{Name: "f3", Content: []byte("x banana apple y")},
378 // -----------------------------------0123456789012345
379 ))
380
381 andQuery := query.NewAnd(
382 &query.Substring{
383 Pattern: "banana",
384 },
385 &query.Substring{
386 Pattern: "apple",
387 },
388 )
389
390 t.Run("LineMatches", func(t *testing.T) {
391 sres, err := searcher.Search(ctx, andQuery, &SearchOptions{})
392 if err != nil {
393 t.Fatal(err)
394 }
395 matches := sres.Files
396 if len(matches) != 1 || len(matches[0].LineMatches) != 1 || len(matches[0].LineMatches[0].LineFragments) != 2 {
397 t.Fatalf("got %#v, want 1 match with 2 fragments", matches)
398 }
399
400 if matches[0].LineMatches[0].LineFragments[0].Offset != 2 || matches[0].LineMatches[0].LineFragments[1].Offset != 9 {
401 t.Fatalf("got %#v, want offsets 2,9", matches)
402 }
403 })
404 t.Run("ChunkMatches", func(t *testing.T) {
405 sres, err := searcher.Search(ctx, andQuery, &chunkOpts)
406 if err != nil {
407 t.Fatal(err)
408 }
409 matches := sres.Files
410 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 || len(matches[0].ChunkMatches[0].Ranges) != 2 {
411 t.Fatalf("got %#v, want 1 chunk match with 2 ranges", matches)
412 }
413
414 if matches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset != 2 || matches[0].ChunkMatches[0].Ranges[1].Start.ByteOffset != 9 {
415 t.Fatalf("got %#v, want offsets 2,9", matches)
416 }
417 })
418 t.Run("Stats", func(t *testing.T) {
419 cases := []struct {
420 Name string
421 Q query.Q
422 Want Stats
423 }{{
424 Name: "and-query",
425 Q: andQuery,
426 Want: Stats{
427 FilesLoaded: 1,
428 ContentBytesLoaded: 18,
429 IndexBytesLoaded: 8,
430 NgramMatches: 3, // we look at doc 1, because it's max(0,1) due to AND
431 NgramLookups: 104,
432 MatchCount: 2,
433 FileCount: 1,
434 FilesConsidered: 2,
435 ShardsScanned: 1,
436 },
437 }, {
438 Name: "one-trigram",
439 Q: &query.Substring{
440 Pattern: "a y",
441 Content: true,
442 CaseSensitive: true,
443 },
444 Want: Stats{
445 ContentBytesLoaded: 12,
446 IndexBytesLoaded: 1,
447 FileCount: 1,
448 FilesConsidered: 1,
449 FilesLoaded: 1,
450 ShardsScanned: 1,
451 MatchCount: 1,
452 NgramMatches: 1,
453 NgramLookups: 2, // once to lookup frequency then again to access posting list.
454 },
455 }, {
456 Name: "one-trigram-case-insensitive",
457 Q: &query.Substring{
458 Pattern: "a y",
459 Content: true,
460 },
461 Want: Stats{
462 ContentBytesLoaded: 12,
463 IndexBytesLoaded: 1,
464 FileCount: 1,
465 FilesConsidered: 1,
466 FilesLoaded: 1,
467 ShardsScanned: 1,
468 MatchCount: 1,
469 NgramMatches: 1,
470 NgramLookups: 8, // "a y" has 2**2 casings which we lookup twice.
471 },
472 }, {
473 Name: "one-trigram-pruned",
474 Q: &query.Substring{
475 Pattern: "foo",
476 Content: true,
477 CaseSensitive: true,
478 },
479 Want: Stats{
480 ShardsSkippedFilter: 1,
481 NgramLookups: 1, // only had to lookup once
482 },
483 }, {
484 Name: "one-trigram-branch-pruned",
485 Q: query.NewAnd(
486 &query.Substring{
487 Pattern: "foo",
488 Content: true,
489 CaseSensitive: true,
490 },
491 &query.Substring{
492 Pattern: "a y",
493 Content: true,
494 CaseSensitive: true,
495 },
496 ),
497 Want: Stats{
498 IndexBytesLoaded: 1, // we created an iterator for "a y" before pruning.
499 ShardsSkippedFilter: 1,
500 NgramLookups: 3, // we lookedup "foo" once (1), but lookedup and created "a y" (2).
501 },
502 }}
503
504 for _, tc := range cases {
505 t.Run(tc.Name, func(t *testing.T) {
506 sres, err := searcher.Search(ctx, tc.Q, &chunkOpts)
507 if err != nil {
508 t.Fatal(err)
509 }
510 if diff := cmp.Diff(tc.Want, sres.Stats); diff != "" {
511 t.Errorf("unexpected Stats (-want +got):\n%s", diff)
512 }
513 })
514 }
515
516 })
517}
518
519func TestAndNegateSearch(t *testing.T) {
520 b := testIndexBuilder(t, nil,
521 Document{Name: "f1", Content: []byte("x banana y")},
522 // -----------------------------------0123456789
523 Document{Name: "f4", Content: []byte("x banana apple y")})
524
525 t.Run("LineMatches", func(t *testing.T) {
526 sres := searchForTest(t, b, query.NewAnd(
527 &query.Substring{
528 Pattern: "banana",
529 },
530 &query.Not{Child: &query.Substring{
531 Pattern: "apple",
532 }}))
533
534 matches := sres.Files
535
536 if len(matches) != 1 || len(matches[0].LineMatches) != 1 {
537 t.Fatalf("got %v, want 1 match", matches)
538 }
539 if matches[0].FileName != "f1" {
540 t.Fatalf("got match %#v, want FileName: f1", matches[0])
541 }
542 if matches[0].LineMatches[0].LineFragments[0].Offset != 2 {
543 t.Fatalf("got %v, want offset 2", matches)
544 }
545 })
546
547 t.Run("ChunkMatches", func(t *testing.T) {
548 sres := searchForTest(t, b,
549 query.NewAnd(
550 &query.Substring{
551 Pattern: "banana",
552 },
553 &query.Not{Child: &query.Substring{
554 Pattern: "apple",
555 }},
556 ),
557 chunkOpts,
558 )
559
560 matches := sres.Files
561
562 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 {
563 t.Fatalf("got %v, want 1 match", matches)
564 }
565 if matches[0].FileName != "f1" {
566 t.Fatalf("got match %#v, want FileName: f1", matches[0])
567 }
568 if matches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset != 2 {
569 t.Fatalf("got %v, want offset 2", matches)
570 }
571 })
572}
573
574func TestNegativeMatchesOnlyShortcut(t *testing.T) {
575 b := testIndexBuilder(t, nil,
576 Document{Name: "f1", Content: []byte("x banana y")},
577 Document{Name: "f2", Content: []byte("x appelmoes y")},
578 Document{Name: "f3", Content: []byte("x appelmoes y")},
579 Document{Name: "f3", Content: []byte("x appelmoes y")})
580
581 t.Run("LineMatches", func(t *testing.T) {
582 sres := searchForTest(t, b, query.NewAnd(
583 &query.Substring{
584 Pattern: "banana",
585 },
586 &query.Not{Child: &query.Substring{
587 Pattern: "appel",
588 }}))
589
590 if sres.Stats.FilesConsidered != 1 {
591 t.Errorf("got %#v, want FilesConsidered: 1", sres.Stats)
592 }
593 })
594
595 t.Run("ChunkMatches", func(t *testing.T) {
596 sres := searchForTest(t, b, query.NewAnd(
597 &query.Substring{
598 Pattern: "banana",
599 },
600 &query.Not{Child: &query.Substring{
601 Pattern: "appel",
602 }}), chunkOpts)
603
604 if sres.Stats.FilesConsidered != 1 {
605 t.Errorf("got %#v, want FilesConsidered: 1", sres.Stats)
606 }
607 })
608}
609
610func TestFileSearch(t *testing.T) {
611 b := testIndexBuilder(t, nil,
612 Document{Name: "banzana", Content: []byte("x orange y")},
613 // -------------0123456
614 Document{Name: "banana", Content: []byte("x apple y")},
615 // -------------012345
616 )
617
618 t.Run("LineMatches", func(t *testing.T) {
619 sres := searchForTest(t, b, &query.Substring{
620 Pattern: "anan",
621 FileName: true,
622 })
623
624 matches := sres.Files
625 if len(matches) != 1 || len(matches[0].LineMatches) != 1 {
626 t.Fatalf("got %v, want 1 match", matches)
627 }
628
629 got := matches[0].LineMatches[0]
630 want := LineMatch{
631 Line: []byte("banana"),
632 LineFragments: []LineFragmentMatch{{
633 Offset: 1,
634 LineOffset: 1,
635 MatchLength: 4,
636 }},
637 FileName: true,
638 }
639
640 if !reflect.DeepEqual(got, want) {
641 t.Errorf("got %#v, want %#v", got, want)
642 }
643 })
644
645 t.Run("ChunkMatches", func(t *testing.T) {
646 sres := searchForTest(t, b, &query.Substring{
647 Pattern: "anan",
648 FileName: true,
649 }, chunkOpts)
650
651 matches := sres.Files
652 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 {
653 t.Fatalf("got %v, want 1 match", matches)
654 }
655
656 got := matches[0].ChunkMatches[0]
657 want := ChunkMatch{
658 Content: []byte("banana"),
659 ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1},
660 Ranges: []Range{{
661 Start: Location{ByteOffset: 1, LineNumber: 1, Column: 2},
662 End: Location{ByteOffset: 5, LineNumber: 1, Column: 6},
663 }},
664 FileName: true,
665 }
666
667 if diff := cmp.Diff(want, got); diff != "" {
668 t.Fatal(diff)
669 }
670 })
671
672 t.Run("FileNameSet", func(t *testing.T) {
673 sres := searchForTest(t, b, query.NewFileNameSet("banana"), chunkOpts)
674
675 matches := sres.Files
676 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 {
677 t.Fatalf("got %v, want 1 match", matches)
678 }
679
680 got := matches[0].ChunkMatches[0]
681 want := ChunkMatch{
682 Content: []byte("banana"),
683 ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1},
684 Ranges: []Range{{
685 Start: Location{ByteOffset: 0, LineNumber: 1, Column: 1},
686 End: Location{ByteOffset: 6, LineNumber: 1, Column: 7},
687 }},
688 FileName: true,
689 }
690
691 if diff := cmp.Diff(want, got); diff != "" {
692 t.Fatal(diff)
693 }
694 })
695}
696
697func TestFileCase(t *testing.T) {
698 b := testIndexBuilder(t, nil,
699 Document{Name: "BANANA", Content: []byte("x orange y")})
700
701 t.Run("LineMatches", func(t *testing.T) {
702 sres := searchForTest(t, b, &query.Substring{
703 Pattern: "banana",
704 FileName: true,
705 })
706
707 matches := sres.Files
708 if len(matches) != 1 || matches[0].FileName != "BANANA" {
709 t.Fatalf("got %v, want 1 match 'BANANA'", matches)
710 }
711 })
712
713 t.Run("ChunkMatches", func(t *testing.T) {
714 sres := searchForTest(t, b, &query.Substring{
715 Pattern: "banana",
716 FileName: true,
717 }, chunkOpts)
718
719 matches := sres.Files
720 if len(matches) != 1 || matches[0].FileName != "BANANA" {
721 t.Fatalf("got %v, want 1 match 'BANANA'", matches)
722 }
723 })
724}
725
726func TestFileRegexpSearchBruteForce(t *testing.T) {
727 b := testIndexBuilder(t, nil,
728 Document{Name: "banzana", Content: []byte("x orange y")},
729 Document{Name: "banana", Content: []byte("x apple y")},
730 )
731 t.Run("LineMatches", func(t *testing.T) {
732 sres := searchForTest(t, b, &query.Regexp{
733 Regexp: mustParseRE("[qn][zx]"),
734 FileName: true,
735 })
736
737 matches := sres.Files
738 if len(matches) != 1 || matches[0].FileName != "banzana" {
739 t.Fatalf("got %v, want 1 match on 'banzana'", matches)
740 }
741 })
742 t.Run("LineMatches", func(t *testing.T) {
743 sres := searchForTest(t, b, &query.Regexp{
744 Regexp: mustParseRE("[qn][zx]"),
745 FileName: true,
746 }, chunkOpts)
747
748 matches := sres.Files
749 if len(matches) != 1 || matches[0].FileName != "banzana" {
750 t.Fatalf("got %v, want 1 match on 'banzana'", matches)
751 }
752 })
753}
754
755func TestFileRegexpSearchShortString(t *testing.T) {
756 b := testIndexBuilder(t, nil,
757 Document{Name: "banana.py", Content: []byte("x orange y")})
758
759 t.Run("LineMatches", func(t *testing.T) {
760 sres := searchForTest(t, b, &query.Regexp{
761 Regexp: mustParseRE("ana.py"),
762 FileName: true,
763 })
764
765 matches := sres.Files
766 if len(matches) != 1 || matches[0].FileName != "banana.py" {
767 t.Fatalf("got %v, want 1 match on 'banana.py'", matches)
768 }
769 })
770
771 t.Run("ChunkMatches", func(t *testing.T) {
772 sres := searchForTest(t, b, &query.Regexp{
773 Regexp: mustParseRE("ana.py"),
774 FileName: true,
775 }, chunkOpts)
776
777 matches := sres.Files
778 if len(matches) != 1 || matches[0].FileName != "banana.py" {
779 t.Fatalf("got %v, want 1 match on 'banana.py'", matches)
780 }
781 })
782}
783
784func TestFileSubstringSearchBruteForce(t *testing.T) {
785 b := testIndexBuilder(t, nil,
786 Document{Name: "BANZANA", Content: []byte("x orange y")},
787 Document{Name: "banana", Content: []byte("x apple y")})
788
789 q := &query.Substring{
790 Pattern: "z",
791 FileName: true,
792 }
793
794 t.Run("LineMatches", func(t *testing.T) {
795 res := searchForTest(t, b, q)
796 if len(res.Files) != 1 || res.Files[0].FileName != "BANZANA" {
797 t.Fatalf("got %v, want 1 match on 'BANZANA''", res.Files)
798 }
799 })
800
801 t.Run("ChunkMatches", func(t *testing.T) {
802 res := searchForTest(t, b, q, chunkOpts)
803 if len(res.Files) != 1 || res.Files[0].FileName != "BANZANA" {
804 t.Fatalf("got %v, want 1 match on 'BANZANA''", res.Files)
805 }
806 })
807}
808
809func TestFileSubstringSearchBruteForceEnd(t *testing.T) {
810 b := testIndexBuilder(t, nil,
811 Document{Name: "BANZANA", Content: []byte("x orange y")},
812 Document{Name: "bananaq", Content: []byte("x apple y")})
813
814 q := &query.Substring{
815 Pattern: "q",
816 FileName: true,
817 }
818 t.Run("LineMatches", func(t *testing.T) {
819 res := searchForTest(t, b, q)
820 if want := "bananaq"; len(res.Files) != 1 || res.Files[0].FileName != want {
821 t.Fatalf("got %v, want 1 match in %q", res.Files, want)
822 }
823 })
824
825 t.Run("LineMatches", func(t *testing.T) {
826 res := searchForTest(t, b, q, chunkOpts)
827 if want := "bananaq"; len(res.Files) != 1 || res.Files[0].FileName != want {
828 t.Fatalf("got %v, want 1 match in %q", res.Files, want)
829 }
830 })
831}
832
833func TestSearchMatchAll(t *testing.T) {
834 b := testIndexBuilder(t, nil,
835 Document{Name: "banzana", Content: []byte("x orange y")},
836 Document{Name: "banana", Content: []byte("x apple y")})
837
838 t.Run("LineMatches", func(t *testing.T) {
839 sres := searchForTest(t, b, &query.Const{Value: true})
840 matches := sres.Files
841 if len(matches) != 2 {
842 t.Fatalf("got %v, want 2 matches", matches)
843 }
844 })
845
846 t.Run("ChunkMatches", func(t *testing.T) {
847 sres := searchForTest(t, b, &query.Const{Value: true}, chunkOpts)
848 matches := sres.Files
849 if len(matches) != 2 {
850 t.Fatalf("got %v, want 2 matches", matches)
851 }
852 })
853}
854
855func TestSearchNewline(t *testing.T) {
856 b := testIndexBuilder(t, nil,
857 Document{Name: "banzana", Content: []byte("abcd\ndefg")})
858
859 t.Run("LineMatches", func(t *testing.T) {
860 sres := searchForTest(t, b, &query.Substring{Pattern: "d\nd"})
861
862 // Just check that we don't crash.
863
864 matches := sres.Files
865 if len(matches) != 1 {
866 t.Fatalf("got %v, want 1 matches", matches)
867 }
868 })
869
870 t.Run("ChunkMatches", func(t *testing.T) {
871 sres := searchForTest(t, b, &query.Substring{Pattern: "d\nd"}, chunkOpts)
872
873 // Just check that we don't crash.
874
875 matches := sres.Files
876 if len(matches) != 1 {
877 t.Fatalf("got %v, want 1 matches", matches)
878 }
879 })
880}
881
882func TestSearchMatchAllRegexp(t *testing.T) {
883 b := testIndexBuilder(t, nil,
884 Document{Name: "banzana", Content: []byte("abcd")},
885 Document{Name: "banana", Content: []byte("pqrs")})
886
887 t.Run("LineMatches", func(t *testing.T) {
888 sres := searchForTest(t, b, &query.Regexp{Regexp: mustParseRE(".")})
889
890 matches := sres.Files
891 if len(matches) != 2 || sres.Stats.MatchCount != 2 {
892 t.Fatalf("got %v, want 2 matches", matches)
893 }
894 if len(matches[0].LineMatches[0].Line) != 4 || len(matches[1].LineMatches[0].Line) != 4 {
895 t.Fatalf("want 4 chars in every file, got %#v", matches)
896 }
897
898 })
899
900 t.Run("ChunkMatches", func(t *testing.T) {
901 sres := searchForTest(t, b, &query.Regexp{Regexp: mustParseRE(".")}, chunkOpts)
902
903 matches := sres.Files
904 if len(matches) != 2 || sres.Stats.MatchCount != 8 {
905 t.Fatalf("got %v, want 2 matches", matches)
906 }
907 if len(matches[0].ChunkMatches[0].Content) != 4 || len(matches[1].ChunkMatches[0].Content) != 4 {
908 t.Fatalf("want 4 chars in every file, got %#v", matches)
909 }
910
911 })
912}
913
914func TestFileRestriction(t *testing.T) {
915 b := testIndexBuilder(t, nil,
916 Document{Name: "banana1", Content: []byte("x orange y")},
917 Document{Name: "banana2", Content: []byte("x apple y")},
918 Document{Name: "orange", Content: []byte("x apple z")})
919
920 t.Run("LineMatches", func(t *testing.T) {
921 sres := searchForTest(t, b, query.NewAnd(
922 &query.Substring{
923 Pattern: "banana",
924 FileName: true,
925 },
926 &query.Substring{
927 Pattern: "apple",
928 }))
929
930 matches := sres.Files
931 if len(matches) != 1 || len(matches[0].LineMatches) != 1 {
932 t.Fatalf("got %v, want 1 match", matches)
933 }
934
935 match := matches[0].LineMatches[0]
936 got := string(match.Line)
937 want := "x apple y"
938 if got != want {
939 t.Errorf("got match %#v, want line %q", match, want)
940 }
941 })
942
943 t.Run("ChunkMatches", func(t *testing.T) {
944 sres := searchForTest(t, b, query.NewAnd(
945 &query.Substring{
946 Pattern: "banana",
947 FileName: true,
948 },
949 &query.Substring{
950 Pattern: "apple",
951 }), chunkOpts)
952
953 matches := sres.Files
954 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 {
955 t.Fatalf("got %v, want 1 match", matches)
956 }
957
958 match := matches[0].ChunkMatches[0]
959 got := string(match.Content)
960 want := "x apple y"
961 if got != want {
962 t.Errorf("got match %#v, want line %q", match, want)
963 }
964 })
965}
966
967func TestFileNameBoundary(t *testing.T) {
968 b := testIndexBuilder(t, nil,
969 Document{Name: "banana2", Content: []byte("x apple y")},
970 Document{Name: "helpers.go", Content: []byte("x apple y")},
971 Document{Name: "foo", Content: []byte("x apple y")})
972
973 t.Run("LineMatches", func(t *testing.T) {
974 sres := searchForTest(t, b, &query.Substring{
975 Pattern: "helpers.go",
976 FileName: true,
977 })
978
979 matches := sres.Files
980 if len(matches) != 1 || len(matches[0].LineMatches) != 1 {
981 t.Fatalf("got %v, want 1 match", matches)
982 }
983 })
984
985 t.Run("ChunkMatches", func(t *testing.T) {
986 sres := searchForTest(t, b, &query.Substring{
987 Pattern: "helpers.go",
988 FileName: true,
989 }, chunkOpts)
990
991 matches := sres.Files
992 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 {
993 t.Fatalf("got %v, want 1 match", matches)
994 }
995 })
996}
997
998func TestDocumentOrder(t *testing.T) {
999 var docs []Document
1000 for i := 0; i < 3; i++ {
1001 docs = append(docs, Document{Name: fmt.Sprintf("f%d", i), Content: []byte("needle")})
1002 }
1003
1004 b := testIndexBuilder(t, nil, docs...)
1005
1006 t.Run("LineMatches", func(t *testing.T) {
1007 sres := searchForTest(t, b, query.NewAnd(
1008 &query.Substring{
1009 Pattern: "needle",
1010 }))
1011
1012 want := []string{"f0", "f1", "f2"}
1013 var got []string
1014 for _, f := range sres.Files {
1015 got = append(got, f.FileName)
1016 }
1017 if !reflect.DeepEqual(got, want) {
1018 t.Fatalf("got %v, want %v", got, want)
1019 }
1020 })
1021
1022 t.Run("ChunkMatches", func(t *testing.T) {
1023 sres := searchForTest(t, b,
1024 query.NewAnd(&query.Substring{
1025 Pattern: "needle",
1026 }),
1027 chunkOpts,
1028 )
1029
1030 want := []string{"f0", "f1", "f2"}
1031 var got []string
1032 for _, f := range sres.Files {
1033 got = append(got, f.FileName)
1034 }
1035 if !reflect.DeepEqual(got, want) {
1036 t.Fatalf("got %v, want %v", got, want)
1037 }
1038 })
1039}
1040
1041func TestBranchMask(t *testing.T) {
1042 b := testIndexBuilder(t, &Repository{
1043 Branches: []RepositoryBranch{
1044 {"master", "v-master"},
1045 {"stable", "v-stable"},
1046 {"bonzai", "v-bonzai"},
1047 },
1048 }, Document{Name: "f1", Content: []byte("needle"), Branches: []string{"master"}},
1049 Document{Name: "f2", Content: []byte("needle"), Branches: []string{"stable", "master"}},
1050 Document{Name: "f3", Content: []byte("needle"), Branches: []string{"stable", "master"}},
1051 Document{Name: "f4", Content: []byte("needle"), Branches: []string{"bonzai"}},
1052 )
1053
1054 t.Run("LineMatches", func(t *testing.T) {
1055 sres := searchForTest(t, b, query.NewAnd(
1056 &query.Substring{
1057 Pattern: "needle",
1058 },
1059 &query.Branch{
1060 Pattern: "table",
1061 }))
1062
1063 if len(sres.Files) != 2 || sres.Files[0].FileName != "f2" || sres.Files[1].FileName != "f3" {
1064 t.Fatalf("got %v, want 2 result from [f2,f3]", sres.Files)
1065 }
1066
1067 if len(sres.Files[0].Branches) != 1 || sres.Files[0].Branches[0] != "stable" {
1068 t.Fatalf("got %v, want 1 branch 'stable'", sres.Files[0].Branches)
1069 }
1070 })
1071
1072 t.Run("ChunkMatches", func(t *testing.T) {
1073 sres := searchForTest(t, b, query.NewAnd(
1074 &query.Substring{
1075 Pattern: "needle",
1076 },
1077 &query.Branch{
1078 Pattern: "table",
1079 }),
1080 chunkOpts,
1081 )
1082
1083 if len(sres.Files) != 2 || sres.Files[0].FileName != "f2" || sres.Files[1].FileName != "f3" {
1084 t.Fatalf("got %v, want 2 result from [f2,f3]", sres.Files)
1085 }
1086
1087 if len(sres.Files[0].Branches) != 1 || sres.Files[0].Branches[0] != "stable" {
1088 t.Fatalf("got %v, want 1 branch 'stable'", sres.Files[0].Branches)
1089 }
1090 })
1091}
1092
1093func TestBranchLimit(t *testing.T) {
1094 for limit := 64; limit <= 65; limit++ {
1095 r := &Repository{}
1096 for i := 0; i < limit; i++ {
1097 s := fmt.Sprintf("b%d", i)
1098 r.Branches = append(r.Branches, RepositoryBranch{
1099 s, "v-" + s,
1100 })
1101 }
1102 _, err := NewIndexBuilder(r)
1103 if limit == 64 && err != nil {
1104 t.Fatalf("NewIndexBuilder: %v", err)
1105 } else if limit == 65 && err == nil {
1106 t.Fatalf("NewIndexBuilder succeeded")
1107 }
1108 }
1109}
1110
1111func TestBranchReport(t *testing.T) {
1112 branches := []string{"stable", "master"}
1113 b := testIndexBuilder(t, &Repository{
1114 Branches: []RepositoryBranch{
1115 {"stable", "vs"},
1116 {"master", "vm"},
1117 },
1118 },
1119 Document{Name: "f2", Content: []byte("needle"), Branches: branches})
1120
1121 t.Run("LineMatches", func(t *testing.T) {
1122 sres := searchForTest(t, b, &query.Substring{
1123 Pattern: "needle",
1124 })
1125 if len(sres.Files) != 1 {
1126 t.Fatalf("got %v, want 1 result from f2", sres.Files)
1127 }
1128
1129 f := sres.Files[0]
1130 if !reflect.DeepEqual(f.Branches, branches) {
1131 t.Fatalf("got branches %q, want %q", f.Branches, branches)
1132 }
1133 })
1134
1135 t.Run("ChunkMatches", func(t *testing.T) {
1136 sres := searchForTest(t, b, &query.Substring{
1137 Pattern: "needle",
1138 }, chunkOpts)
1139 if len(sres.Files) != 1 {
1140 t.Fatalf("got %v, want 1 result from f2", sres.Files)
1141 }
1142
1143 f := sres.Files[0]
1144 if !reflect.DeepEqual(f.Branches, branches) {
1145 t.Fatalf("got branches %q, want %q", f.Branches, branches)
1146 }
1147 })
1148
1149}
1150
1151func TestBranchVersions(t *testing.T) {
1152 b := testIndexBuilder(t, &Repository{
1153 Branches: []RepositoryBranch{
1154 {"stable", "v-stable"},
1155 {"master", "v-master"},
1156 },
1157 }, Document{Name: "f2", Content: []byte("needle"), Branches: []string{"master"}})
1158
1159 t.Run("LineMatches", func(t *testing.T) {
1160 sres := searchForTest(t, b, &query.Substring{
1161 Pattern: "needle",
1162 })
1163 if len(sres.Files) != 1 {
1164 t.Fatalf("got %v, want 1 result from f2", sres.Files)
1165 }
1166
1167 f := sres.Files[0]
1168 if f.Version != "v-master" {
1169 t.Fatalf("got file %#v, want version 'v-master'", f)
1170 }
1171 })
1172
1173 t.Run("ChunkMatches", func(t *testing.T) {
1174 sres := searchForTest(t, b, &query.Substring{
1175 Pattern: "needle",
1176 }, chunkOpts)
1177 if len(sres.Files) != 1 {
1178 t.Fatalf("got %v, want 1 result from f2", sres.Files)
1179 }
1180
1181 f := sres.Files[0]
1182 if f.Version != "v-master" {
1183 t.Fatalf("got file %#v, want version 'v-master'", f)
1184 }
1185 })
1186}
1187
1188func mustParseRE(s string) *syntax.Regexp {
1189 r, err := syntax.Parse(s, syntax.Perl)
1190 if err != nil {
1191 panic(err)
1192 }
1193
1194 return r
1195}
1196
1197func TestRegexp(t *testing.T) {
1198 content := []byte("needle the bla")
1199 // ----------------01234567890123
1200
1201 b := testIndexBuilder(t, nil,
1202 Document{
1203 Name: "f1",
1204 Content: content,
1205 })
1206
1207 t.Run("LineMatches", func(t *testing.T) {
1208 sres := searchForTest(t, b,
1209 &query.Regexp{
1210 Regexp: mustParseRE("dle.*bla"),
1211 })
1212
1213 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 {
1214 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
1215 }
1216
1217 got := sres.Files[0].LineMatches[0]
1218 want := LineMatch{
1219 LineFragments: []LineFragmentMatch{{
1220 LineOffset: 3,
1221 Offset: 3,
1222 MatchLength: 11,
1223 }},
1224 Line: content,
1225 FileName: false,
1226 LineNumber: 1,
1227 LineStart: 0,
1228 LineEnd: 14,
1229 }
1230
1231 if !reflect.DeepEqual(got, want) {
1232 t.Errorf("got %#v, want %#v", got, want)
1233 }
1234 })
1235
1236 t.Run("ChunkMatches", func(t *testing.T) {
1237 sres := searchForTest(t, b,
1238 &query.Regexp{
1239 Regexp: mustParseRE("dle.*bla"),
1240 }, chunkOpts)
1241
1242 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 {
1243 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
1244 }
1245
1246 got := sres.Files[0].ChunkMatches[0]
1247 want := ChunkMatch{
1248 Content: content,
1249 ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1},
1250 Ranges: []Range{{
1251 Start: Location{ByteOffset: 3, LineNumber: 1, Column: 4},
1252 End: Location{ByteOffset: 14, LineNumber: 1, Column: 15},
1253 }},
1254 }
1255
1256 if diff := cmp.Diff(want, got); diff != "" {
1257 t.Fatal(diff)
1258 }
1259 })
1260}
1261
1262func TestRegexpFile(t *testing.T) {
1263 content := []byte("needle the bla")
1264
1265 name := "let's play: find the mussel"
1266 b := testIndexBuilder(t, nil,
1267 Document{Name: name, Content: content},
1268 Document{Name: "play.txt", Content: content})
1269
1270 t.Run("LineMatches", func(t *testing.T) {
1271 sres := searchForTest(t, b,
1272 &query.Regexp{
1273 Regexp: mustParseRE("play.*mussel"),
1274 FileName: true,
1275 })
1276
1277 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 {
1278 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
1279 }
1280
1281 if sres.Files[0].FileName != name {
1282 t.Errorf("got match %#v, want name %q", sres.Files[0], name)
1283 }
1284 })
1285
1286 t.Run("ChunkMatches", func(t *testing.T) {
1287 sres := searchForTest(t, b,
1288 &query.Regexp{
1289 Regexp: mustParseRE("play.*mussel"),
1290 FileName: true,
1291 }, chunkOpts)
1292
1293 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 {
1294 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
1295 }
1296
1297 if sres.Files[0].FileName != name {
1298 t.Errorf("got match %#v, want name %q", sres.Files[0], name)
1299 }
1300 })
1301}
1302
1303func TestRegexpOrder(t *testing.T) {
1304 content := []byte("bla the needle")
1305 // ----------------01234567890123
1306
1307 b := testIndexBuilder(t, nil,
1308 Document{Name: "f1", Content: content})
1309
1310 t.Run("LineMatches", func(t *testing.T) {
1311 sres := searchForTest(t, b,
1312 &query.Regexp{
1313 Regexp: mustParseRE("dle.*bla"),
1314 })
1315
1316 if len(sres.Files) != 0 {
1317 t.Fatalf("got %v, want 0 matches", sres.Files)
1318 }
1319 })
1320
1321 t.Run("ChunkMatches", func(t *testing.T) {
1322 sres := searchForTest(t, b,
1323 &query.Regexp{
1324 Regexp: mustParseRE("dle.*bla"),
1325 })
1326
1327 if len(sres.Files) != 0 {
1328 t.Fatalf("got %v, want 0 matches", sres.Files)
1329 }
1330 })
1331}
1332
1333func TestRepoName(t *testing.T) {
1334 content := []byte("bla the needle")
1335 // ----------------01234567890123
1336
1337 b := testIndexBuilder(t, &Repository{Name: "bla"},
1338 Document{Name: "f1", Content: content})
1339
1340 t.Run("LineMatches", func(t *testing.T) {
1341 sres := searchForTest(t, b,
1342 query.NewAnd(
1343 &query.Substring{Pattern: "needle"},
1344 &query.Repo{Regexp: regexp.MustCompile("foo")},
1345 ))
1346
1347 if len(sres.Files) != 0 {
1348 t.Fatalf("got %v, want 0 matches", sres.Files)
1349 }
1350
1351 if sres.Stats.FilesConsidered > 0 {
1352 t.Fatalf("got FilesConsidered %d, should have short circuited", sres.Stats.FilesConsidered)
1353 }
1354
1355 sres = searchForTest(t, b,
1356 query.NewAnd(
1357 &query.Substring{Pattern: "needle"},
1358 &query.Repo{Regexp: regexp.MustCompile("bla")},
1359 ))
1360 if len(sres.Files) != 1 {
1361 t.Fatalf("got %v, want 1 match", sres.Files)
1362 }
1363 })
1364
1365 t.Run("ChunkMatches", func(t *testing.T) {
1366 sres := searchForTest(t, b,
1367 query.NewAnd(
1368 &query.Substring{Pattern: "needle"},
1369 &query.Repo{Regexp: regexp.MustCompile("foo")},
1370 ),
1371 chunkOpts,
1372 )
1373
1374 if len(sres.Files) != 0 {
1375 t.Fatalf("got %v, want 0 matches", sres.Files)
1376 }
1377
1378 if sres.Stats.FilesConsidered > 0 {
1379 t.Fatalf("got FilesConsidered %d, should have short circuited", sres.Stats.FilesConsidered)
1380 }
1381
1382 sres = searchForTest(t, b,
1383 query.NewAnd(
1384 &query.Substring{Pattern: "needle"},
1385 &query.Repo{Regexp: regexp.MustCompile("bla")},
1386 ))
1387 if len(sres.Files) != 1 {
1388 t.Fatalf("got %v, want 1 match", sres.Files)
1389 }
1390 })
1391}
1392
1393func TestMergeMatches(t *testing.T) {
1394 content := []byte("blablabla")
1395 b := testIndexBuilder(t, nil,
1396 Document{Name: "f1", Content: content})
1397
1398 t.Run("LineMatches", func(t *testing.T) {
1399 sres := searchForTest(t, b,
1400 &query.Substring{Pattern: "bla"})
1401 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 {
1402 t.Fatalf("got %v, want 1 match", sres.Files)
1403 }
1404 })
1405
1406 t.Run("ChunkMatches", func(t *testing.T) {
1407 sres := searchForTest(t, b,
1408 &query.Substring{Pattern: "bla"},
1409 chunkOpts,
1410 )
1411 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 {
1412 t.Fatalf("got %v, want 1 match", sres.Files)
1413 }
1414 })
1415}
1416
1417func TestRepoURL(t *testing.T) {
1418 content := []byte("blablabla")
1419 b := testIndexBuilder(t, &Repository{
1420 Name: "name",
1421 URL: "URL",
1422 CommitURLTemplate: "commit",
1423 FileURLTemplate: "file-url",
1424 LineFragmentTemplate: "fragment",
1425 }, Document{Name: "f1", Content: content})
1426
1427 sres := searchForTest(t, b, &query.Substring{Pattern: "bla"})
1428
1429 if sres.RepoURLs["name"] != "file-url" {
1430 t.Errorf("got RepoURLs %v, want {name: URL}", sres.RepoURLs)
1431 }
1432 if sres.LineFragments["name"] != "fragment" {
1433 t.Errorf("got URLs %v, want {name: URL}", sres.LineFragments)
1434 }
1435}
1436
1437func TestRegexpCaseSensitive(t *testing.T) {
1438 content := []byte("bla\nfunc unmarshalGitiles\n")
1439 b := testIndexBuilder(t, nil, Document{
1440 Name: "f1",
1441 Content: content,
1442 })
1443
1444 t.Run("LineMatches", func(t *testing.T) {
1445 res := searchForTest(t, b,
1446 &query.Regexp{
1447 Regexp: mustParseRE("func.*Gitiles"),
1448 CaseSensitive: true,
1449 })
1450
1451 if len(res.Files) != 1 {
1452 t.Fatalf("got %v, want one match", res.Files)
1453 }
1454 })
1455
1456 t.Run("ChunkMatches", func(t *testing.T) {
1457 res := searchForTest(t, b,
1458 &query.Regexp{
1459 Regexp: mustParseRE("func.*Gitiles"),
1460 CaseSensitive: true,
1461 },
1462 chunkOpts,
1463 )
1464
1465 if len(res.Files) != 1 {
1466 t.Fatalf("got %v, want one match", res.Files)
1467 }
1468 })
1469}
1470
1471func TestRegexpCaseFolding(t *testing.T) {
1472 content := []byte("bla\nfunc unmarshalGitiles\n")
1473
1474 b := testIndexBuilder(t, nil,
1475 Document{Name: "f1", Content: content})
1476 res := searchForTest(t, b,
1477 &query.Regexp{
1478 Regexp: mustParseRE("func.*GITILES"),
1479 CaseSensitive: false,
1480 })
1481
1482 if len(res.Files) != 1 {
1483 t.Fatalf("got %v, want one match", res.Files)
1484 }
1485}
1486
1487func TestCaseRegexp(t *testing.T) {
1488 content := []byte("BLABLABLA")
1489 b := testIndexBuilder(t, nil,
1490 Document{Name: "f1", Content: content})
1491
1492 t.Run("LineMatches", func(t *testing.T) {
1493 res := searchForTest(t, b,
1494 &query.Regexp{
1495 Regexp: mustParseRE("[xb][xl][xa]"),
1496 CaseSensitive: true,
1497 })
1498
1499 if len(res.Files) > 0 {
1500 t.Fatalf("got %v, want no matches", res.Files)
1501 }
1502 })
1503
1504 t.Run("ChunkMatches", func(t *testing.T) {
1505 res := searchForTest(t, b,
1506 &query.Regexp{
1507 Regexp: mustParseRE("[xb][xl][xa]"),
1508 CaseSensitive: true,
1509 },
1510 chunkOpts,
1511 )
1512
1513 if len(res.Files) > 0 {
1514 t.Fatalf("got %v, want no matches", res.Files)
1515 }
1516 })
1517}
1518
1519func TestNegativeRegexp(t *testing.T) {
1520 content := []byte("BLABLABLA needle bla")
1521 b := testIndexBuilder(t, nil,
1522 Document{Name: "f1", Content: content})
1523
1524 t.Run("LineMatches", func(t *testing.T) {
1525 res := searchForTest(t, b,
1526 query.NewAnd(
1527 &query.Substring{
1528 Pattern: "needle",
1529 },
1530 &query.Not{
1531 Child: &query.Regexp{
1532 Regexp: mustParseRE(".cs"),
1533 },
1534 }))
1535
1536 if len(res.Files) != 1 {
1537 t.Fatalf("got %v, want 1 match", res.Files)
1538 }
1539 })
1540
1541 t.Run("ChunkMatches", func(t *testing.T) {
1542 res := searchForTest(t, b,
1543 query.NewAnd(
1544 &query.Substring{
1545 Pattern: "needle",
1546 },
1547 &query.Not{
1548 Child: &query.Regexp{
1549 Regexp: mustParseRE(".cs"),
1550 },
1551 },
1552 ),
1553 chunkOpts)
1554
1555 if len(res.Files) != 1 {
1556 t.Fatalf("got %v, want 1 match", res.Files)
1557 }
1558 })
1559}
1560
1561func TestSymbolRank(t *testing.T) {
1562 t.Skip()
1563
1564 content := []byte("func bla() blubxxxxx")
1565 // ----------------01234567890123456789
1566 b := testIndexBuilder(t, nil,
1567 Document{
1568 Name: "f1",
1569 Content: content,
1570 }, Document{
1571 Name: "f2",
1572 Content: content,
1573 Symbols: []DocumentSection{{5, 8}},
1574 }, Document{
1575 Name: "f3",
1576 Content: content,
1577 })
1578
1579 t.Run("LineMatches", func(t *testing.T) {
1580 res := searchForTest(t, b,
1581 &query.Substring{
1582 CaseSensitive: false,
1583 Pattern: "bla",
1584 })
1585
1586 if len(res.Files) != 3 {
1587 t.Fatalf("got %d files, want 3 files. Full data: %v", len(res.Files), res.Files)
1588 }
1589 if res.Files[0].FileName != "f2" {
1590 t.Errorf("got %#v, want 'f2' as top match", res.Files[0])
1591 }
1592 })
1593
1594 t.Run("ChunkMatches", func(t *testing.T) {
1595 res := searchForTest(t, b,
1596 &query.Substring{
1597 CaseSensitive: false,
1598 Pattern: "bla",
1599 }, chunkOpts)
1600
1601 if len(res.Files) != 3 {
1602 t.Fatalf("got %d files, want 3 files. Full data: %v", len(res.Files), res.Files)
1603 }
1604 if res.Files[0].FileName != "f2" {
1605 t.Errorf("got %#v, want 'f2' as top match", res.Files[0])
1606 }
1607 })
1608}
1609
1610func TestSymbolRankRegexpUTF8(t *testing.T) {
1611 t.Skip()
1612
1613 prefix := strings.Repeat(string([]rune{kelvinCodePoint}), 100) + "\n"
1614 content := []byte(prefix +
1615 "func bla() blub")
1616 // ------012345678901234
1617 b := testIndexBuilder(t, nil,
1618 Document{
1619 Name: "f1",
1620 Content: content,
1621 }, Document{
1622 Name: "f2",
1623 Content: content,
1624 Symbols: []DocumentSection{{uint32(len(prefix) + 5), uint32(len(prefix) + 8)}},
1625 }, Document{
1626 Name: "f3",
1627 Content: content,
1628 })
1629
1630 t.Run("LineMatches", func(t *testing.T) {
1631 res := searchForTest(t, b,
1632 &query.Regexp{
1633 Regexp: mustParseRE("b.a"),
1634 })
1635
1636 if len(res.Files) != 3 {
1637 t.Fatalf("got %#v, want 3 files", res.Files)
1638 }
1639 if res.Files[0].FileName != "f2" {
1640 t.Errorf("got %#v, want 'f2' as top match", res.Files[0])
1641 }
1642 })
1643
1644 t.Run("ChunjkMatches", func(t *testing.T) {
1645 res := searchForTest(t, b,
1646 &query.Regexp{
1647 Regexp: mustParseRE("b.a"),
1648 }, chunkOpts)
1649
1650 if len(res.Files) != 3 {
1651 t.Fatalf("got %#v, want 3 files", res.Files)
1652 }
1653 if res.Files[0].FileName != "f2" {
1654 t.Errorf("got %#v, want 'f2' as top match", res.Files[0])
1655 }
1656 })
1657}
1658
1659func TestPartialSymbolRank(t *testing.T) {
1660 t.Skip()
1661
1662 content := []byte("func bla() blub")
1663 // ----------------012345678901234
1664
1665 b := testIndexBuilder(t, nil,
1666 Document{
1667 Name: "f1",
1668 Content: content,
1669 Symbols: []DocumentSection{{4, 9}},
1670 }, Document{
1671 Name: "f2",
1672 Content: content,
1673 Symbols: []DocumentSection{{4, 8}},
1674 }, Document{
1675 Name: "f3",
1676 Content: content,
1677 Symbols: []DocumentSection{{4, 9}},
1678 })
1679
1680 t.Run("LineMatches", func(t *testing.T) {
1681 res := searchForTest(t, b,
1682 &query.Substring{
1683 Pattern: "bla",
1684 })
1685
1686 if len(res.Files) != 3 {
1687 t.Fatalf("got %#v, want 3 files", res.Files)
1688 }
1689 if res.Files[0].FileName != "f2" {
1690 t.Errorf("got %#v, want 'f2' as top match", res.Files[0])
1691 }
1692 })
1693
1694 t.Run("ChunkMatches", func(t *testing.T) {
1695 res := searchForTest(t, b,
1696 &query.Substring{
1697 Pattern: "bla",
1698 }, chunkOpts)
1699
1700 if len(res.Files) != 3 {
1701 t.Fatalf("got %#v, want 3 files", res.Files)
1702 }
1703 if res.Files[0].FileName != "f2" {
1704 t.Errorf("got %#v, want 'f2' as top match", res.Files[0])
1705 }
1706 })
1707}
1708
1709func TestNegativeRepo(t *testing.T) {
1710 content := []byte("bla the needle")
1711 // ----------------01234567890123
1712 b := testIndexBuilder(t, &Repository{
1713 Name: "bla",
1714 }, Document{Name: "f1", Content: content})
1715
1716 t.Run("LineMatches", func(t *testing.T) {
1717 sres := searchForTest(t, b,
1718 query.NewAnd(
1719 &query.Substring{Pattern: "needle"},
1720 &query.Not{Child: &query.Repo{Regexp: regexp.MustCompile("bla")}},
1721 ))
1722
1723 if len(sres.Files) != 0 {
1724 t.Fatalf("got %v, want 0 matches", sres.Files)
1725 }
1726 })
1727
1728 t.Run("ChunkMatches", func(t *testing.T) {
1729 sres := searchForTest(t, b,
1730 query.NewAnd(
1731 &query.Substring{Pattern: "needle"},
1732 &query.Not{Child: &query.Repo{Regexp: regexp.MustCompile("bla")}},
1733 ), chunkOpts)
1734
1735 if len(sres.Files) != 0 {
1736 t.Fatalf("got %v, want 0 matches", sres.Files)
1737 }
1738 })
1739}
1740
1741func TestListRepos(t *testing.T) {
1742 content := []byte("bla the needle\n")
1743 // ----------------012345678901234-
1744
1745 t.Run("default and minimal fallback", func(t *testing.T) {
1746 repo := &Repository{
1747 Name: "reponame",
1748 Branches: []RepositoryBranch{{Name: "main"}, {Name: "dev"}},
1749 }
1750 b := testIndexBuilder(t, repo,
1751 Document{Name: "f1", Content: content, Branches: []string{"main", "dev"}},
1752 Document{Name: "f2", Content: content, Branches: []string{"main"}},
1753 Document{Name: "f2", Content: content, Branches: []string{"dev"}},
1754 Document{Name: "f3", Content: content, Branches: []string{"dev"}})
1755
1756 searcher := searcherForTest(t, b)
1757
1758 for _, opts := range []*ListOptions{
1759 nil,
1760 {Minimal: false},
1761 {Minimal: true},
1762 } {
1763 t.Run(fmt.Sprint(opts), func(t *testing.T) {
1764 q := &query.Repo{Regexp: regexp.MustCompile("epo")}
1765
1766 res, err := searcher.List(context.Background(), q, opts)
1767 if err != nil {
1768 t.Fatalf("List(%v): %v", q, err)
1769 }
1770
1771 want := &RepoList{
1772 Repos: []*RepoListEntry{{
1773 Repository: *repo,
1774 Stats: RepoStats{
1775 Documents: 4,
1776 ContentBytes: 68, // (15 bytes of content and 2 bytes of filename) x 4
1777 Shards: 1,
1778
1779 NewLinesCount: 4,
1780 DefaultBranchNewLinesCount: 2,
1781 OtherBranchesNewLinesCount: 3,
1782 },
1783 }},
1784 Stats: RepoStats{
1785 Repos: 1,
1786 Documents: 4,
1787 ContentBytes: 68,
1788 Shards: 1,
1789
1790 NewLinesCount: 4,
1791 DefaultBranchNewLinesCount: 2,
1792 OtherBranchesNewLinesCount: 3,
1793 },
1794 }
1795 ignored := []cmp.Option{
1796 cmpopts.EquateEmpty(),
1797 cmpopts.IgnoreFields(RepoListEntry{}, "IndexMetadata"),
1798 cmpopts.IgnoreFields(RepoStats{}, "IndexBytes"),
1799 cmpopts.IgnoreFields(Repository{}, "SubRepoMap"),
1800 cmpopts.IgnoreFields(Repository{}, "priority"),
1801 }
1802 if diff := cmp.Diff(want, res, ignored...); diff != "" {
1803 t.Fatalf("mismatch (-want +got):\n%s", diff)
1804 }
1805
1806 q = &query.Repo{Regexp: regexp.MustCompile("bla")}
1807 res, err = searcher.List(context.Background(), q, nil)
1808 if err != nil {
1809 t.Fatalf("List(%v): %v", q, err)
1810 }
1811 if len(res.Repos) != 0 || len(res.Minimal) != 0 {
1812 t.Fatalf("got %v, want 0 matches", res)
1813 }
1814 })
1815 }
1816 })
1817
1818 t.Run("minimal", func(t *testing.T) {
1819 repo := &Repository{
1820 ID: 1234,
1821 Name: "reponame",
1822 Branches: []RepositoryBranch{{Name: "main"}, {Name: "dev"}},
1823 RawConfig: map[string]string{"repoid": "1234"},
1824 }
1825 b := testIndexBuilder(t, repo,
1826 Document{Name: "f1", Content: content, Branches: []string{"main", "dev"}},
1827 Document{Name: "f2", Content: content, Branches: []string{"main"}},
1828 Document{Name: "f2", Content: content, Branches: []string{"dev"}},
1829 Document{Name: "f3", Content: content, Branches: []string{"dev"}})
1830
1831 searcher := searcherForTest(t, b)
1832
1833 q := &query.Repo{Regexp: regexp.MustCompile("epo")}
1834 res, err := searcher.List(context.Background(), q, &ListOptions{Minimal: true})
1835 if err != nil {
1836 t.Fatalf("List(%v): %v", q, err)
1837 }
1838
1839 want := &RepoList{
1840 Minimal: map[uint32]*MinimalRepoListEntry{
1841 repo.ID: {
1842 HasSymbols: repo.HasSymbols,
1843 Branches: repo.Branches,
1844 },
1845 },
1846 Stats: RepoStats{
1847 Repos: 1,
1848 Shards: 1,
1849 Documents: 4,
1850 IndexBytes: 412,
1851 ContentBytes: 68,
1852 NewLinesCount: 4,
1853 DefaultBranchNewLinesCount: 2,
1854 OtherBranchesNewLinesCount: 3,
1855 },
1856 }
1857
1858 ignored := []cmp.Option{
1859 cmpopts.IgnoreFields(MinimalRepoListEntry{}, "IndexTimeUnix"),
1860 }
1861 if diff := cmp.Diff(want, res, ignored...); diff != "" {
1862 t.Fatalf("mismatch (-want +got):\n%s", diff)
1863 }
1864
1865 q = &query.Repo{Regexp: regexp.MustCompile("bla")}
1866 res, err = searcher.List(context.Background(), q, &ListOptions{Minimal: true})
1867 if err != nil {
1868 t.Fatalf("List(%v): %v", q, err)
1869 }
1870 if len(res.Repos) != 0 || len(res.Minimal) != 0 {
1871 t.Fatalf("got %v, want 0 matches", res)
1872 }
1873 })
1874}
1875
1876func TestListReposByContent(t *testing.T) {
1877 content := []byte("bla the needle")
1878
1879 b := testIndexBuilder(t, &Repository{
1880 Name: "reponame",
1881 },
1882 Document{Name: "f1", Content: content},
1883 Document{Name: "f2", Content: content})
1884
1885 searcher := searcherForTest(t, b)
1886 q := &query.Substring{Pattern: "needle"}
1887 res, err := searcher.List(context.Background(), q, nil)
1888 if err != nil {
1889 t.Fatalf("List(%v): %v", q, err)
1890 }
1891 if len(res.Repos) != 1 || res.Repos[0].Repository.Name != "reponame" {
1892 t.Fatalf("got %v, want 1 matches", res)
1893 }
1894 if got := res.Repos[0].Stats.Shards; got != 1 {
1895 t.Fatalf("got %d, want 1 shard", got)
1896 }
1897 q = &query.Substring{Pattern: "foo"}
1898 res, err = searcher.List(context.Background(), q, nil)
1899 if err != nil {
1900 t.Fatalf("List(%v): %v", q, err)
1901 }
1902 if len(res.Repos) != 0 {
1903 t.Fatalf("got %v, want 0 matches", res)
1904 }
1905}
1906
1907func TestMetadata(t *testing.T) {
1908 content := []byte("bla the needle")
1909
1910 b := testIndexBuilder(t, &Repository{
1911 Name: "reponame",
1912 }, Document{Name: "f1", Content: content},
1913 Document{Name: "f2", Content: content})
1914
1915 var buf bytes.Buffer
1916 if err := b.Write(&buf); err != nil {
1917 t.Fatal(err)
1918 }
1919 f := &memSeeker{buf.Bytes()}
1920
1921 rd, _, err := ReadMetadata(f)
1922 if err != nil {
1923 t.Fatalf("ReadMetadata: %v", err)
1924 }
1925
1926 if got, want := rd[0].Name, "reponame"; got != want {
1927 t.Fatalf("got %q want %q", got, want)
1928 }
1929}
1930
1931func TestOr(t *testing.T) {
1932 b := testIndexBuilder(t, nil,
1933 Document{Name: "f1", Content: []byte("needle")},
1934 Document{Name: "f2", Content: []byte("banana")})
1935 t.Run("LineMatches", func(t *testing.T) {
1936 sres := searchForTest(t, b, query.NewOr(
1937 &query.Substring{Pattern: "needle"},
1938 &query.Substring{Pattern: "banana"}))
1939
1940 if len(sres.Files) != 2 {
1941 t.Fatalf("got %v, want 2 files", sres.Files)
1942 }
1943 })
1944
1945 t.Run("ChunkMatches", func(t *testing.T) {
1946 sres := searchForTest(t, b, query.NewOr(
1947 &query.Substring{Pattern: "needle"},
1948 &query.Substring{Pattern: "banana"}))
1949
1950 if len(sres.Files) != 2 {
1951 t.Fatalf("got %v, want 2 files", sres.Files)
1952 }
1953 })
1954}
1955
1956func TestFrequency(t *testing.T) {
1957 content := []byte("sla _Py_HashDouble(double v sla las las shd dot dot")
1958
1959 b := testIndexBuilder(t, nil,
1960 Document{
1961 Name: "f1",
1962 Content: content,
1963 })
1964
1965 t.Run("LineMatches", func(t *testing.T) {
1966 sres := searchForTest(t, b, &query.Substring{Pattern: "slashdot"})
1967 if len(sres.Files) != 0 {
1968 t.Errorf("got %v, wanted 0 matches", sres.Files)
1969 }
1970 })
1971
1972 t.Run("ChunkMatches", func(t *testing.T) {
1973 sres := searchForTest(t, b, &query.Substring{Pattern: "slashdot"}, chunkOpts)
1974 if len(sres.Files) != 0 {
1975 t.Errorf("got %v, wanted 0 matches", sres.Files)
1976 }
1977 })
1978}
1979
1980func TestMatchNewline(t *testing.T) {
1981 re, err := syntax.Parse("[^a]a", syntax.ClassNL)
1982 if err != nil {
1983 t.Fatalf("syntax.Parse: %v", err)
1984 }
1985
1986 content := []byte("pqr\nalex")
1987
1988 b := testIndexBuilder(t, nil,
1989 Document{
1990 Name: "f1",
1991 Content: content,
1992 })
1993
1994 t.Run("LineMatches", func(t *testing.T) {
1995 sres := searchForTest(t, b, &query.Regexp{Regexp: re, CaseSensitive: true})
1996 if len(sres.Files) != 1 {
1997 t.Errorf("got %v, wanted 1 matches", sres.Files)
1998 } else if l := sres.Files[0].LineMatches[0].Line; !bytes.Equal(l, content[len("pqr\n"):]) {
1999 t.Errorf("got match line %q, want %q", l, content)
2000 }
2001 })
2002
2003 t.Run("ChunkMatches", func(t *testing.T) {
2004 sres := searchForTest(t, b, &query.Regexp{Regexp: re, CaseSensitive: true}, chunkOpts)
2005 if len(sres.Files) != 1 {
2006 t.Errorf("got %v, wanted 1 matches", sres.Files)
2007 } else if c := sres.Files[0].ChunkMatches[0].Content; !bytes.Equal(c, content) {
2008 t.Errorf("got match line %q, want %q", c, content)
2009 }
2010 })
2011}
2012
2013func TestSubRepo(t *testing.T) {
2014 subRepos := map[string]*Repository{
2015 "sub": {
2016 Name: "sub-name",
2017 LineFragmentTemplate: "sub-line",
2018 },
2019 }
2020
2021 content := []byte("pqr\nalex")
2022
2023 b := testIndexBuilder(t, &Repository{
2024 SubRepoMap: subRepos,
2025 }, Document{
2026 Name: "sub/f1",
2027 Content: content,
2028 SubRepositoryPath: "sub",
2029 })
2030
2031 sres := searchForTest(t, b, &query.Substring{Pattern: "alex"})
2032 if len(sres.Files) != 1 {
2033 t.Fatalf("got %v, wanted 1 matches", sres.Files)
2034 }
2035
2036 f := sres.Files[0]
2037 if f.SubRepositoryPath != "sub" || f.SubRepositoryName != "sub-name" {
2038 t.Errorf("got %#v, want SubRepository{Path,Name} = {'sub', 'sub-name'}", f)
2039 }
2040
2041 if sres.LineFragments["sub-name"] != "sub-line" {
2042 t.Errorf("got LineFragmentTemplate %v, want {'sub':'sub-line'}", sres.LineFragments)
2043 }
2044}
2045
2046func TestSearchEither(t *testing.T) {
2047 b := testIndexBuilder(t, nil,
2048 Document{Name: "f1", Content: []byte("bla needle bla")},
2049 Document{Name: "needle-file-branch", Content: []byte("bla content")})
2050
2051 t.Run("LineMatches", func(t *testing.T) {
2052 sres := searchForTest(t, b, &query.Substring{Pattern: "needle"})
2053 if len(sres.Files) != 2 {
2054 t.Fatalf("got %v, wanted 2 matches", sres.Files)
2055 }
2056
2057 sres = searchForTest(t, b, &query.Substring{Pattern: "needle", Content: true})
2058 if len(sres.Files) != 1 {
2059 t.Fatalf("got %v, wanted 1 match", sres.Files)
2060 }
2061
2062 if got, want := sres.Files[0].FileName, "f1"; got != want {
2063 t.Errorf("got %q, want %q", got, want)
2064 }
2065 })
2066
2067 t.Run("ChunkMatches", func(t *testing.T) {
2068 sres := searchForTest(t, b, &query.Substring{Pattern: "needle"}, chunkOpts)
2069 if len(sres.Files) != 2 {
2070 t.Fatalf("got %v, wanted 2 matches", sres.Files)
2071 }
2072
2073 sres = searchForTest(t, b, &query.Substring{Pattern: "needle", Content: true}, chunkOpts)
2074 if len(sres.Files) != 1 {
2075 t.Fatalf("got %v, wanted 1 match", sres.Files)
2076 }
2077
2078 if got, want := sres.Files[0].FileName, "f1"; got != want {
2079 t.Errorf("got %q, want %q", got, want)
2080 }
2081 })
2082}
2083
2084func TestUnicodeExactMatch(t *testing.T) {
2085 needle := "néédlÉ"
2086 content := []byte("blá blá " + needle + " blâ")
2087
2088 b := testIndexBuilder(t, nil,
2089 Document{Name: "f1", Content: content})
2090
2091 t.Run("LineMatches", func(t *testing.T) {
2092 if res := searchForTest(t, b, &query.Substring{Pattern: needle, CaseSensitive: true}); len(res.Files) != 1 {
2093 t.Fatalf("case sensitive: got %v, wanted 1 match", res.Files)
2094 }
2095 })
2096
2097 t.Run("ChunkMatches", func(t *testing.T) {
2098 res := searchForTest(t, b, &query.Substring{Pattern: needle, CaseSensitive: true}, chunkOpts)
2099 if len(res.Files) != 1 {
2100 t.Fatalf("case sensitive: got %v, wanted 1 match", res.Files)
2101 }
2102 })
2103}
2104
2105func TestUnicodeCoverContent(t *testing.T) {
2106 needle := "néédlÉ"
2107 content := []byte("blá blá " + needle + " blâ")
2108
2109 b := testIndexBuilder(t, nil,
2110 Document{Name: "f1", Content: content})
2111
2112 t.Run("LineMatches", func(t *testing.T) {
2113 if res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ", CaseSensitive: true}); len(res.Files) != 0 {
2114 t.Fatalf("case sensitive: got %v, wanted 0 match", res.Files)
2115 }
2116
2117 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ"})
2118 if len(res.Files) != 1 {
2119 t.Fatalf("case insensitive: got %v, wanted 1 match", res.Files)
2120 }
2121
2122 if got, want := res.Files[0].LineMatches[0].LineFragments[0].Offset, uint32(strings.Index(string(content), needle)); got != want {
2123 t.Errorf("got %d want %d", got, want)
2124 }
2125 })
2126
2127 t.Run("ChunkMatches", func(t *testing.T) {
2128 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ", CaseSensitive: true}, chunkOpts)
2129 if len(res.Files) != 0 {
2130 t.Fatalf("case sensitive: got %v, wanted 0 match", res.Files)
2131 }
2132
2133 res = searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ"}, chunkOpts)
2134 if len(res.Files) != 1 {
2135 t.Fatalf("case insensitive: got %v, wanted 1 match", res.Files)
2136 }
2137
2138 got := res.Files[0].ChunkMatches[0].Ranges[0].Start.ByteOffset
2139 want := uint32(strings.Index(string(content), needle))
2140 if got != want {
2141 t.Errorf("got %d want %d", got, want)
2142 }
2143 })
2144}
2145
2146func TestUnicodeNonCoverContent(t *testing.T) {
2147 needle := "nééáádlÉ"
2148 content := []byte("blá blá " + needle + " blâ")
2149
2150 b := testIndexBuilder(t, nil,
2151 Document{Name: "f1", Content: content})
2152
2153 t.Run("LineMatches", func(t *testing.T) {
2154 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉÁÁDLÉ", Content: true})
2155 if len(res.Files) != 1 {
2156 t.Fatalf("got %v, wanted 1 match", res.Files)
2157 }
2158
2159 if got, want := res.Files[0].LineMatches[0].LineFragments[0].Offset, uint32(strings.Index(string(content), needle)); got != want {
2160 t.Errorf("got %d want %d", got, want)
2161 }
2162 })
2163
2164 t.Run("ChunkMatches", func(t *testing.T) {
2165 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉÁÁDLÉ", Content: true}, chunkOpts)
2166 if len(res.Files) != 1 {
2167 t.Fatalf("got %v, wanted 1 match", res.Files)
2168 }
2169
2170 got := res.Files[0].ChunkMatches[0].Ranges[0].Start.ByteOffset
2171 want := uint32(strings.Index(string(content), needle))
2172 if got != want {
2173 t.Errorf("got %d want %d", got, want)
2174 }
2175 })
2176}
2177
2178const kelvinCodePoint = 8490
2179
2180func TestUnicodeVariableLength(t *testing.T) {
2181 lower := 'k'
2182 upper := rune(kelvinCodePoint)
2183
2184 needle := "nee" + string([]rune{lower}) + "eed"
2185 corpus := []byte("nee" + string([]rune{upper}) + "eed" +
2186 " ee" + string([]rune{lower}) + "ee" +
2187 " ee" + string([]rune{upper}) + "ee")
2188
2189 t.Run("LineMatches", func(t *testing.T) {
2190 b := testIndexBuilder(t, nil,
2191 Document{Name: "f1", Content: []byte(corpus)})
2192
2193 res := searchForTest(t, b, &query.Substring{Pattern: needle, Content: true})
2194 if len(res.Files) != 1 {
2195 t.Fatalf("got %v, wanted 1 match", res.Files)
2196 }
2197 })
2198
2199 t.Run("ChunkMatches", func(t *testing.T) {
2200 b := testIndexBuilder(t, nil,
2201 Document{Name: "f1", Content: []byte(corpus)})
2202
2203 res := searchForTest(t, b, &query.Substring{Pattern: needle, Content: true}, chunkOpts)
2204 if len(res.Files) != 1 {
2205 t.Fatalf("got %v, wanted 1 match", res.Files)
2206 }
2207 })
2208}
2209
2210func TestUnicodeFileStartOffsets(t *testing.T) {
2211 unicode := "世界"
2212 wat := "waaaaaat"
2213 b := testIndexBuilder(t, nil,
2214 Document{
2215 Name: "f1",
2216 Content: []byte(unicode),
2217 },
2218 Document{
2219 Name: "f2",
2220 Content: []byte(wat),
2221 },
2222 )
2223 q := &query.Substring{Pattern: wat, Content: true}
2224 res := searchForTest(t, b, q)
2225 if len(res.Files) != 1 {
2226 t.Fatalf("got %v, wanted 1 match", res.Files)
2227 }
2228}
2229
2230func TestLongFileUTF8(t *testing.T) {
2231 needle := "neeedle"
2232
2233 // 6 bytes.
2234 unicode := "世界"
2235 content := []byte(strings.Repeat(unicode, 100) + needle)
2236 b := testIndexBuilder(t, nil,
2237 Document{
2238 Name: "f1",
2239 Content: []byte(strings.Repeat("a", 50)),
2240 },
2241 Document{
2242 Name: "f2",
2243 Content: content,
2244 })
2245
2246 t.Run("LineMatches", func(t *testing.T) {
2247 q := &query.Substring{Pattern: needle, Content: true}
2248 res := searchForTest(t, b, q)
2249 if len(res.Files) != 1 {
2250 t.Errorf("got %v, want 1 result", res)
2251 }
2252 })
2253
2254 t.Run("ChunkMatches", func(t *testing.T) {
2255 q := &query.Substring{Pattern: needle, Content: true}
2256 res := searchForTest(t, b, q, chunkOpts)
2257 if len(res.Files) != 1 {
2258 t.Errorf("got %v, want 1 result", res)
2259 }
2260 })
2261}
2262
2263func TestEstimateDocCount(t *testing.T) {
2264 content := []byte("bla needle bla")
2265 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2266 Document{Name: "f1", Content: content},
2267 Document{Name: "f2", Content: content},
2268 )
2269
2270 t.Run("LineMatches", func(t *testing.T) {
2271 if sres := searchForTest(t, b,
2272 query.NewAnd(
2273 &query.Substring{Pattern: "needle"},
2274 &query.Repo{Regexp: regexp.MustCompile("reponame")},
2275 ), SearchOptions{
2276 EstimateDocCount: true,
2277 }); sres.Stats.ShardFilesConsidered != 2 {
2278 t.Errorf("got FilesConsidered = %d, want 2", sres.Stats.FilesConsidered)
2279 }
2280 if sres := searchForTest(t, b,
2281 query.NewAnd(
2282 &query.Substring{Pattern: "needle"},
2283 &query.Repo{Regexp: regexp.MustCompile("nomatch")},
2284 ), SearchOptions{
2285 EstimateDocCount: true,
2286 }); sres.Stats.ShardFilesConsidered != 0 {
2287 t.Errorf("got FilesConsidered = %d, want 0", sres.Stats.FilesConsidered)
2288 }
2289 })
2290
2291 t.Run("ChunkMatches", func(t *testing.T) {
2292 if sres := searchForTest(t, b,
2293 query.NewAnd(
2294 &query.Substring{Pattern: "needle"},
2295 &query.Repo{Regexp: regexp.MustCompile("reponame")},
2296 ), SearchOptions{
2297 EstimateDocCount: true,
2298 ChunkMatches: true,
2299 }); sres.Stats.ShardFilesConsidered != 2 {
2300 t.Errorf("got FilesConsidered = %d, want 2", sres.Stats.FilesConsidered)
2301 }
2302 if sres := searchForTest(t, b,
2303 query.NewAnd(
2304 &query.Substring{Pattern: "needle"},
2305 &query.Repo{Regexp: regexp.MustCompile("nomatch")},
2306 ), SearchOptions{
2307 EstimateDocCount: true,
2308 ChunkMatches: true,
2309 }); sres.Stats.ShardFilesConsidered != 0 {
2310 t.Errorf("got FilesConsidered = %d, want 0", sres.Stats.FilesConsidered)
2311 }
2312 })
2313}
2314
2315func TestUTF8CorrectCorpus(t *testing.T) {
2316 needle := "neeedle"
2317
2318 // 6 bytes.
2319 unicode := "世界"
2320 b := testIndexBuilder(t, nil,
2321 Document{
2322 Name: "f1",
2323 Content: []byte(strings.Repeat(unicode, 100)),
2324 },
2325 Document{
2326 Name: "xxxxxneeedle",
2327 Content: []byte("hello"),
2328 })
2329
2330 t.Run("LineMatches", func(t *testing.T) {
2331 q := &query.Substring{Pattern: needle, FileName: true}
2332 res := searchForTest(t, b, q)
2333 if len(res.Files) != 1 {
2334 t.Errorf("got %v, want 1 result", res)
2335 }
2336 })
2337
2338 t.Run("ChunkMatches", func(t *testing.T) {
2339 q := &query.Substring{Pattern: needle, FileName: true}
2340 res := searchForTest(t, b, q, chunkOpts)
2341 if len(res.Files) != 1 {
2342 t.Errorf("got %v, want 1 result", res)
2343 }
2344 })
2345}
2346
2347func TestBuilderStats(t *testing.T) {
2348 b := testIndexBuilder(t, nil,
2349 Document{
2350 Name: "f1",
2351 Content: []byte(strings.Repeat("abcd", 1024)),
2352 })
2353 var buf bytes.Buffer
2354 if err := b.Write(&buf); err != nil {
2355 t.Fatal(err)
2356 }
2357
2358 if got, want := b.ContentSize(), uint32(2+4*1024); got != want {
2359 t.Errorf("got %d, want %d", got, want)
2360 }
2361}
2362
2363func TestIOStats(t *testing.T) {
2364 b := testIndexBuilder(t, nil,
2365 Document{
2366 Name: "f1",
2367 Content: []byte(strings.Repeat("abcd", 1024)),
2368 })
2369
2370 t.Run("LineMatches", func(t *testing.T) {
2371 q := &query.Substring{Pattern: "abc", CaseSensitive: true, Content: true}
2372 res := searchForTest(t, b, q)
2373
2374 // 4096 (content) + 2 (overhead: newlines or doc sections)
2375 if got, want := res.Stats.ContentBytesLoaded, int64(4098); got != want {
2376 t.Errorf("got content I/O %d, want %d", got, want)
2377 }
2378
2379 // 1024 entries, each 4 bytes apart. 4 fits into single byte
2380 // delta encoded.
2381 if got, want := res.Stats.IndexBytesLoaded, int64(1024); got != want {
2382 t.Errorf("got index I/O %d, want %d", got, want)
2383 }
2384 })
2385
2386 t.Run("ChunkMatches", func(t *testing.T) {
2387 q := &query.Substring{Pattern: "abc", CaseSensitive: true, Content: true}
2388 res := searchForTest(t, b, q, chunkOpts)
2389
2390 // 4096 (content) + 2 (overhead: newlines or doc sections)
2391 if got, want := res.Stats.ContentBytesLoaded, int64(4098); got != want {
2392 t.Errorf("got content I/O %d, want %d", got, want)
2393 }
2394
2395 // 1024 entries, each 4 bytes apart. 4 fits into single byte
2396 // delta encoded.
2397 if got, want := res.Stats.IndexBytesLoaded, int64(1024); got != want {
2398 t.Errorf("got index I/O %d, want %d", got, want)
2399 }
2400 })
2401}
2402
2403func TestStartLineAnchor(t *testing.T) {
2404 b := testIndexBuilder(t, nil,
2405 Document{
2406 Name: "f1",
2407 Content: []byte(
2408 `hello
2409start of middle of line
2410`),
2411 })
2412
2413 t.Run("LineMatches", func(t *testing.T) {
2414 q, err := query.Parse("^start")
2415 if err != nil {
2416 t.Errorf("parse: %v", err)
2417 }
2418
2419 res := searchForTest(t, b, q)
2420 if len(res.Files) != 1 {
2421 t.Errorf("got %v, want 1 file", res.Files)
2422 }
2423
2424 q, err = query.Parse("^middle")
2425 if err != nil {
2426 t.Errorf("parse: %v", err)
2427 }
2428 res = searchForTest(t, b, q)
2429 if len(res.Files) != 0 {
2430 t.Errorf("got %v, want 0 files", res.Files)
2431 }
2432 })
2433
2434 t.Run("ChunkMatches", func(t *testing.T) {
2435 q, err := query.Parse("^start")
2436 if err != nil {
2437 t.Errorf("parse: %v", err)
2438 }
2439
2440 res := searchForTest(t, b, q, chunkOpts)
2441 if len(res.Files) != 1 {
2442 t.Errorf("got %v, want 1 file", res.Files)
2443 }
2444
2445 q, err = query.Parse("^middle")
2446 if err != nil {
2447 t.Errorf("parse: %v", err)
2448 }
2449 res = searchForTest(t, b, q, chunkOpts)
2450 if len(res.Files) != 0 {
2451 t.Errorf("got %v, want 0 files", res.Files)
2452 }
2453 })
2454}
2455
2456func TestAndOrUnicode(t *testing.T) {
2457 q, err := query.Parse("orange.*apple")
2458 if err != nil {
2459 t.Errorf("parse: %v", err)
2460 }
2461 finalQ := query.NewAnd(q,
2462 query.NewOr(query.NewAnd(&query.Repo{Regexp: regexp.MustCompile("name")},
2463 query.NewOr(&query.Branch{Pattern: "master"}))))
2464
2465 b := testIndexBuilder(t, &Repository{
2466 Name: "name",
2467 Branches: []RepositoryBranch{{"master", "master-version"}},
2468 }, Document{
2469 Name: "f2",
2470 Content: []byte("orange\u2318apple"),
2471 // --------------0123456 78901
2472 Branches: []string{"master"},
2473 })
2474
2475 t.Run("LineMatches", func(t *testing.T) {
2476 res := searchForTest(t, b, finalQ)
2477 if len(res.Files) != 1 {
2478 t.Errorf("got %v, want 1 result", res.Files)
2479 }
2480 })
2481
2482 t.Run("ChunkMatches", func(t *testing.T) {
2483 res := searchForTest(t, b, finalQ, chunkOpts)
2484 if len(res.Files) != 1 {
2485 t.Errorf("got %v, want 1 result", res.Files)
2486 }
2487 })
2488}
2489
2490func TestAndShort(t *testing.T) {
2491 content := []byte("bla needle at orange bla")
2492 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2493 Document{Name: "f1", Content: content},
2494 Document{Name: "f2", Content: []byte("xx at xx")},
2495 Document{Name: "f3", Content: []byte("yy orange xx")},
2496 )
2497
2498 q := query.NewAnd(&query.Substring{Pattern: "at"},
2499 &query.Substring{Pattern: "orange"})
2500
2501 t.Run("LineMatches", func(t *testing.T) {
2502 res := searchForTest(t, b, q)
2503 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
2504 t.Errorf("got %v, want 1 result", res.Files)
2505 }
2506 })
2507
2508 t.Run("ChunkMatches", func(t *testing.T) {
2509 res := searchForTest(t, b, q, chunkOpts)
2510 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
2511 t.Errorf("got %v, want 1 result", res.Files)
2512 }
2513 })
2514}
2515
2516func TestNoCollectRegexpSubstring(t *testing.T) {
2517 content := []byte("bla final bla\nfoo final, foo")
2518 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2519 Document{Name: "f1", Content: content},
2520 )
2521
2522 q := &query.Regexp{
2523 Regexp: mustParseRE("final[,.]"),
2524 }
2525
2526 t.Run("LineMatches", func(t *testing.T) {
2527 res := searchForTest(t, b, q)
2528 if len(res.Files) != 1 {
2529 t.Fatalf("got %v, want 1 result", res.Files)
2530 }
2531 if f := res.Files[0]; len(f.LineMatches) != 1 {
2532 t.Fatalf("got line matches %v, want 1 line match", printLineMatches(f.LineMatches))
2533 }
2534 })
2535
2536 t.Run("ChunkMatches", func(t *testing.T) {
2537 res := searchForTest(t, b, q, chunkOpts)
2538 if len(res.Files) != 1 {
2539 t.Fatalf("got %v, want 1 result", res.Files)
2540 }
2541 if f := res.Files[0]; len(f.ChunkMatches) != 1 {
2542 t.Fatalf("got line matches %v, want 1 line match", printLineMatches(f.LineMatches))
2543 }
2544 })
2545}
2546
2547func printLineMatches(ms []LineMatch) string {
2548 var ss []string
2549 for _, m := range ms {
2550 ss = append(ss, fmt.Sprintf("%d:%q %v", m.LineNumber, m.Line, m.LineFragments))
2551 }
2552
2553 return strings.Join(ss, ", ")
2554}
2555
2556func TestLang(t *testing.T) {
2557 content := []byte("bla needle bla")
2558 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2559 Document{Name: "f1", Content: content},
2560 Document{Name: "f2", Language: "java", Content: content},
2561 Document{Name: "f3", Language: "cpp", Content: content},
2562 )
2563
2564 q := query.NewAnd(&query.Substring{Pattern: "needle"},
2565 &query.Language{Language: "cpp"})
2566
2567 t.Run("LineMatches", func(t *testing.T) {
2568 res := searchForTest(t, b, q)
2569 if len(res.Files) != 1 {
2570 t.Fatalf("got %v, want 1 result in f3", res.Files)
2571 }
2572 f := res.Files[0]
2573 if f.FileName != "f3" || f.Language != "cpp" {
2574 t.Fatalf("got %v, want 1 match with language cpp", f)
2575 }
2576 })
2577
2578 t.Run("ChunkMatches", func(t *testing.T) {
2579 res := searchForTest(t, b, q, chunkOpts)
2580 if len(res.Files) != 1 {
2581 t.Fatalf("got %v, want 1 result in f3", res.Files)
2582 }
2583 f := res.Files[0]
2584 if f.FileName != "f3" || f.Language != "cpp" {
2585 t.Fatalf("got %v, want 1 match with language cpp", f)
2586 }
2587 })
2588}
2589
2590func TestLangShortcut(t *testing.T) {
2591 content := []byte("bla needle bla")
2592 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2593 Document{Name: "f2", Language: "java", Content: content},
2594 Document{Name: "f3", Language: "cpp", Content: content},
2595 )
2596
2597 q := query.NewAnd(&query.Substring{Pattern: "needle"},
2598 &query.Language{Language: "fortran"})
2599
2600 t.Run("LineMatches", func(t *testing.T) {
2601 res := searchForTest(t, b, q)
2602 if len(res.Files) != 0 {
2603 t.Fatalf("got %v, want 0 results", res.Files)
2604 }
2605 if res.Stats.IndexBytesLoaded > 0 {
2606 t.Errorf("got IndexBytesLoaded %d, want 0", res.Stats.IndexBytesLoaded)
2607 }
2608 })
2609
2610 t.Run("ChunkMatches", func(t *testing.T) {
2611 res := searchForTest(t, b, q, chunkOpts)
2612 if len(res.Files) != 0 {
2613 t.Fatalf("got %v, want 0 results", res.Files)
2614 }
2615 if res.Stats.IndexBytesLoaded > 0 {
2616 t.Errorf("got IndexBytesLoaded %d, want 0", res.Stats.IndexBytesLoaded)
2617 }
2618 })
2619}
2620
2621func TestNoTextMatchAtoms(t *testing.T) {
2622 content := []byte("bla needle bla")
2623 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2624 Document{Name: "f1", Content: content},
2625 Document{Name: "f2", Language: "java", Content: content},
2626 Document{Name: "f3", Language: "cpp", Content: content},
2627 )
2628 q := query.NewAnd(&query.Language{Language: "java"})
2629 t.Run("LineMatches", func(t *testing.T) {
2630 res := searchForTest(t, b, q)
2631 if len(res.Files) != 1 {
2632 t.Fatalf("got %v, want 1 result in f3", res.Files)
2633 }
2634 })
2635
2636 t.Run("ChunkMatches", func(t *testing.T) {
2637 res := searchForTest(t, b, q, chunkOpts)
2638 if len(res.Files) != 1 {
2639 t.Fatalf("got %v, want 1 result in f3", res.Files)
2640 }
2641 })
2642}
2643
2644func TestNoPositiveAtoms(t *testing.T) {
2645 content := []byte("bla needle bla")
2646 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2647 Document{Name: "f1", Content: content},
2648 Document{Name: "f2", Content: content},
2649 )
2650
2651 q := query.NewAnd(
2652 &query.Not{Child: &query.Substring{Pattern: "xyz"}},
2653 &query.Repo{Regexp: regexp.MustCompile("reponame")})
2654 t.Run("LineMatches", func(t *testing.T) {
2655 res := searchForTest(t, b, q)
2656 if len(res.Files) != 2 {
2657 t.Fatalf("got %v, want 2 results in f3", res.Files)
2658 }
2659 })
2660 t.Run("ChunkMatches", func(t *testing.T) {
2661 res := searchForTest(t, b, q, chunkOpts)
2662 if len(res.Files) != 2 {
2663 t.Fatalf("got %v, want 2 results in f3", res.Files)
2664 }
2665 })
2666}
2667
2668func TestSymbolBoundaryStart(t *testing.T) {
2669 content := []byte("start\nbla bla\nend")
2670 // ----------------012345-67890123-456
2671
2672 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2673 Document{
2674 Name: "f1",
2675 Content: content,
2676 Symbols: []DocumentSection{{0, 5}, {14, 17}},
2677 },
2678 )
2679 q := &query.Symbol{
2680 Expr: &query.Substring{Pattern: "start"},
2681 }
2682 t.Run("LineMatches", func(t *testing.T) {
2683 res := searchForTest(t, b, q)
2684 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
2685 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2686 }
2687 m := res.Files[0].LineMatches[0].LineFragments[0]
2688 if m.Offset != 0 {
2689 t.Fatalf("got offset %d want 0", m.Offset)
2690 }
2691 })
2692
2693 t.Run("ChunkMatches", func(t *testing.T) {
2694 res := searchForTest(t, b, q, chunkOpts)
2695 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
2696 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2697 }
2698 m := res.Files[0].ChunkMatches[0].Ranges[0]
2699 if m.Start.ByteOffset != 0 {
2700 t.Fatalf("got offset %d want 0", m.Start.ByteOffset)
2701 }
2702 })
2703}
2704
2705func TestSymbolBoundaryEnd(t *testing.T) {
2706 content := []byte("start\nbla bla\nend")
2707 // ----------------012345-67890123-456
2708
2709 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2710 Document{
2711 Name: "f1",
2712 Content: content,
2713 Symbols: []DocumentSection{{14, 17}},
2714 },
2715 )
2716 q := &query.Symbol{
2717 Expr: &query.Substring{Pattern: "end"},
2718 }
2719 t.Run("LineMatches", func(t *testing.T) {
2720 res := searchForTest(t, b, q)
2721 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
2722 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2723 }
2724 m := res.Files[0].LineMatches[0].LineFragments[0]
2725 if m.Offset != 14 {
2726 t.Fatalf("got offset %d want 0", m.Offset)
2727 }
2728 })
2729
2730 t.Run("ChunkMatches", func(t *testing.T) {
2731 res := searchForTest(t, b, q, chunkOpts)
2732 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
2733 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2734 }
2735 m := res.Files[0].ChunkMatches[0].Ranges[0]
2736 if m.Start.ByteOffset != 14 {
2737 t.Fatalf("got offset %d want 0", m.Start.ByteOffset)
2738 }
2739 })
2740}
2741
2742func TestSymbolSubstring(t *testing.T) {
2743 content := []byte("bla\nsymblabla\nbla")
2744 // ----------------0123-4567890123-456
2745
2746 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2747 Document{
2748 Name: "f1",
2749 Content: content,
2750 Symbols: []DocumentSection{{4, 12}},
2751 },
2752 )
2753 q := &query.Symbol{
2754 Expr: &query.Substring{Pattern: "bla"},
2755 }
2756 t.Run("LineMatches", func(t *testing.T) {
2757 res := searchForTest(t, b, q)
2758 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
2759 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2760 }
2761 m := res.Files[0].LineMatches[0].LineFragments[0]
2762 if m.Offset != 7 || m.MatchLength != 3 {
2763 t.Fatalf("got offset %d, size %d want 7 size 3", m.Offset, m.MatchLength)
2764 }
2765 })
2766
2767 t.Run("ChunkMatches", func(t *testing.T) {
2768 res := searchForTest(t, b, q, chunkOpts)
2769 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
2770 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2771 }
2772 m := res.Files[0].ChunkMatches[0].Ranges[0]
2773 if m.Start.ByteOffset != 7 || m.End.ByteOffset != 10 {
2774 t.Fatalf("got offset %d, end %d want 7, 10", m.Start.ByteOffset, m.End.ByteOffset)
2775 }
2776 })
2777}
2778
2779func TestSymbolSubstringExact(t *testing.T) {
2780 content := []byte("bla\nsym\nbla\nsym\nasymb")
2781 // ----------------0123-4567-890123456-78901
2782
2783 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2784 Document{
2785 Name: "f1",
2786 Content: content,
2787 Symbols: []DocumentSection{{4, 7}},
2788 },
2789 )
2790 q := &query.Symbol{
2791 Expr: &query.Substring{Pattern: "sym"},
2792 }
2793 t.Run("LineMatches", func(t *testing.T) {
2794 res := searchForTest(t, b, q)
2795 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
2796 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2797 }
2798 m := res.Files[0].LineMatches[0].LineFragments[0]
2799 if m.Offset != 4 {
2800 t.Fatalf("got offset %d, want 7", m.Offset)
2801 }
2802 })
2803
2804 t.Run("ChunkMatches", func(t *testing.T) {
2805 res := searchForTest(t, b, q, chunkOpts)
2806 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
2807 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2808 }
2809 m := res.Files[0].ChunkMatches[0].Ranges[0]
2810 if m.Start.ByteOffset != 4 {
2811 t.Fatalf("got offset %d, want 7", m.Start.ByteOffset)
2812 }
2813 })
2814}
2815
2816func TestSymbolRegexpExact(t *testing.T) {
2817 content := []byte("blah\nbla\nbl")
2818 // ----------------01234-5678-90
2819
2820 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2821 Document{
2822 Name: "f1",
2823 Content: content,
2824 Symbols: []DocumentSection{{0, 4}, {5, 8}, {9, 11}},
2825 },
2826 )
2827 q := &query.Symbol{
2828 Expr: &query.Regexp{Regexp: mustParseRE("^bla$")},
2829 }
2830 t.Run("LineMatches", func(t *testing.T) {
2831 res := searchForTest(t, b, q)
2832 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
2833 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2834 }
2835 m := res.Files[0].LineMatches[0].LineFragments[0]
2836 if m.Offset != 5 {
2837 t.Fatalf("got offset %d, want 5", m.Offset)
2838 }
2839 })
2840
2841 t.Run("ChunkMatches", func(t *testing.T) {
2842 res := searchForTest(t, b, q, chunkOpts)
2843 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
2844 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2845 }
2846 m := res.Files[0].ChunkMatches[0].Ranges[0]
2847 if m.Start.ByteOffset != 5 {
2848 t.Fatalf("got offset %d, want 5", m.Start.ByteOffset)
2849 }
2850 })
2851}
2852
2853func TestSymbolRegexpPartial(t *testing.T) {
2854 content := []byte("abcdef")
2855 // ----------------012345
2856
2857 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2858 Document{
2859 Name: "f1",
2860 Content: content,
2861 Symbols: []DocumentSection{{0, 6}},
2862 },
2863 )
2864 q := &query.Symbol{
2865 Expr: &query.Regexp{Regexp: mustParseRE("(b|d)c(d|b)")},
2866 }
2867 t.Run("LineMatches", func(t *testing.T) {
2868 res := searchForTest(t, b, q)
2869 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
2870 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2871 }
2872 m := res.Files[0].LineMatches[0].LineFragments[0]
2873 if m.Offset != 1 {
2874 t.Fatalf("got offset %d, want 1", m.Offset)
2875 }
2876 if m.MatchLength != 3 {
2877 t.Fatalf("got match length %d, want 3", m.MatchLength)
2878 }
2879 })
2880
2881 t.Run("ChunkMatches", func(t *testing.T) {
2882 res := searchForTest(t, b, q, chunkOpts)
2883 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
2884 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2885 }
2886 m := res.Files[0].ChunkMatches[0].Ranges[0]
2887 if m.Start.ByteOffset != 1 {
2888 t.Fatalf("got offset %d, want 1", m.Start.ByteOffset)
2889 }
2890 if m.End.ByteOffset != 4 {
2891 t.Fatalf("got match end %d, want 4", m.End.ByteOffset)
2892 }
2893 })
2894}
2895
2896func TestSymbolRegexpAll(t *testing.T) {
2897 docs := []Document{
2898 {
2899 Name: "f1",
2900 Content: []byte("Hello Zoekt"),
2901 // --------------01234567890
2902 Symbols: []DocumentSection{{0, 5}, {6, 11}},
2903 },
2904 {
2905 Name: "f2",
2906 Content: []byte("Second Zoekt Third"),
2907 // --------------012345678901234567
2908 Symbols: []DocumentSection{{0, 6}, {7, 12}, {13, 18}},
2909 },
2910 }
2911
2912 b := testIndexBuilder(t, &Repository{Name: "reponame"}, docs...)
2913 q := &query.Symbol{
2914 Expr: &query.Regexp{Regexp: mustParseRE(".*")},
2915 }
2916 t.Run("LineMatches", func(t *testing.T) {
2917 res := searchForTest(t, b, q)
2918 if len(res.Files) != len(docs) {
2919 t.Fatalf("got %v, want %d file", res.Files, len(docs))
2920 }
2921 for i, want := range docs {
2922 got := res.Files[i].LineMatches[0].LineFragments
2923 if len(got) != len(want.Symbols) {
2924 t.Fatalf("got %d symbols, want %d symbols in doc %s", len(got), len(want.Symbols), want.Name)
2925 }
2926
2927 for j, sec := range want.Symbols {
2928 if sec.Start != got[j].Offset {
2929 t.Fatalf("got offset %d, want %d in doc %s", got[j].Offset, sec.Start, want.Name)
2930 }
2931 }
2932 }
2933 })
2934
2935 t.Run("ChunkMatches", func(t *testing.T) {
2936 res := searchForTest(t, b, q, chunkOpts)
2937 if len(res.Files) != len(docs) {
2938 t.Fatalf("got %v, want %d file", res.Files, len(docs))
2939 }
2940 for i, want := range docs {
2941 got := res.Files[i].ChunkMatches[0].Ranges
2942 if len(got) != len(want.Symbols) {
2943 t.Fatalf("got %d symbols, want %d symbols in doc %s", len(got), len(want.Symbols), want.Name)
2944 }
2945
2946 for j, sec := range want.Symbols {
2947 if sec.Start != uint32(got[j].Start.ByteOffset) {
2948 t.Fatalf("got offset %d, want %d in doc %s", got[j].Start.ByteOffset, sec.Start, want.Name)
2949 }
2950 }
2951 }
2952 })
2953}
2954
2955func TestHitIterTerminate(t *testing.T) {
2956 // contrived input: trigram frequencies forces selecting abc +
2957 // def for the distance iteration. There is no match, so this
2958 // will advance the compressedPostingIterator to beyond the
2959 // end.
2960 content := []byte("abc bcdbcd cdecde abcabc def efg")
2961 b := testIndexBuilder(t, nil,
2962 Document{
2963 Name: "f1",
2964 Content: content,
2965 },
2966 )
2967
2968 t.Run("LineMatches", func(t *testing.T) {
2969 searchForTest(t, b, &query.Substring{Pattern: "abcdef"})
2970 })
2971
2972 t.Run("ChunkMatches", func(t *testing.T) {
2973 searchForTest(t, b, &query.Substring{Pattern: "abcdef"}, chunkOpts)
2974 })
2975}
2976
2977func TestDistanceHitIterBailLast(t *testing.T) {
2978 content := []byte("AST AST AST UASH")
2979 b := testIndexBuilder(t, nil,
2980 Document{
2981 Name: "f1",
2982 Content: content,
2983 },
2984 )
2985 t.Run("LineMatches", func(t *testing.T) {
2986 res := searchForTest(t, b, &query.Substring{Pattern: "UAST"})
2987 if len(res.Files) != 0 {
2988 t.Fatalf("got %v, want no results", res.Files)
2989 }
2990 })
2991
2992 t.Run("LineMatches", func(t *testing.T) {
2993 res := searchForTest(t, b, &query.Substring{Pattern: "UAST"}, chunkOpts)
2994 if len(res.Files) != 0 {
2995 t.Fatalf("got %v, want no results", res.Files)
2996 }
2997 })
2998}
2999
3000func TestDocumentSectionRuneBoundary(t *testing.T) {
3001 content := string([]rune{kelvinCodePoint, kelvinCodePoint, kelvinCodePoint})
3002 b, err := NewIndexBuilder(nil)
3003 if err != nil {
3004 t.Fatalf("NewIndexBuilder: %v", err)
3005 }
3006
3007 for i, sec := range []DocumentSection{
3008 {2, 6},
3009 {3, 7},
3010 } {
3011 if err := b.Add(Document{
3012 Name: "f1",
3013 Content: []byte(content),
3014 Symbols: []DocumentSection{sec},
3015 }); err == nil {
3016 t.Errorf("%d: Add succeeded", i)
3017 }
3018 }
3019}
3020
3021func TestUnicodeQuery(t *testing.T) {
3022 content := string([]rune{kelvinCodePoint, kelvinCodePoint, kelvinCodePoint})
3023 b := testIndexBuilder(t, nil,
3024 Document{
3025 Name: "f1",
3026 Content: []byte(content),
3027 },
3028 )
3029
3030 q := &query.Substring{Pattern: content}
3031
3032 t.Run("LineMatches", func(t *testing.T) {
3033 res := searchForTest(t, b, q)
3034 if len(res.Files) != 1 {
3035 t.Fatalf("want 1 match, got %v", res.Files)
3036 }
3037
3038 f := res.Files[0]
3039 if len(f.LineMatches) != 1 {
3040 t.Fatalf("want 1 line, got %v", f.LineMatches)
3041 }
3042 l := f.LineMatches[0]
3043
3044 if len(l.LineFragments) != 1 {
3045 t.Fatalf("want 1 line fragment, got %v", l.LineFragments)
3046 }
3047 fr := l.LineFragments[0]
3048 if fr.MatchLength != len(content) {
3049 t.Fatalf("got MatchLength %d want %d", fr.MatchLength, len(content))
3050 }
3051 })
3052
3053 t.Run("ChunkMatches", func(t *testing.T) {
3054 res := searchForTest(t, b, q, chunkOpts)
3055 if len(res.Files) != 1 {
3056 t.Fatalf("want 1 match, got %v", res.Files)
3057 }
3058
3059 f := res.Files[0]
3060 if len(f.ChunkMatches) != 1 {
3061 t.Fatalf("want 1 line, got %v", f.LineMatches)
3062 }
3063 cm := f.ChunkMatches[0]
3064
3065 if len(cm.Ranges) != 1 {
3066 t.Fatalf("want 1 line fragment, got %v", cm.Ranges)
3067 }
3068 rr := cm.Ranges[0]
3069 if matchLen := rr.End.ByteOffset - rr.Start.ByteOffset; int(matchLen) != len(content) {
3070 t.Fatalf("got MatchLength %d want %d", matchLen, len(content))
3071 }
3072 })
3073}
3074
3075func TestSkipInvalidContent(t *testing.T) {
3076 for _, content := range []string{
3077 // Binary
3078 "abc def \x00 abc",
3079 } {
3080
3081 b, err := NewIndexBuilder(nil)
3082 if err != nil {
3083 t.Fatalf("NewIndexBuilder: %v", err)
3084 }
3085
3086 if err := b.Add(Document{
3087 Name: "f1",
3088 Content: []byte(content),
3089 }); err != nil {
3090 t.Fatal(err)
3091 }
3092
3093 t.Run("LineMatches", func(t *testing.T) {
3094 q := &query.Substring{Pattern: "abc def"}
3095 res := searchForTest(t, b, q)
3096 if len(res.Files) != 0 {
3097 t.Fatalf("got %v, want no results", res.Files)
3098 }
3099
3100 q = &query.Substring{Pattern: "NOT-INDEXED"}
3101 res = searchForTest(t, b, q)
3102 if len(res.Files) != 1 {
3103 t.Fatalf("got %v, want 1 result", res.Files)
3104 }
3105 })
3106
3107 t.Run("ChunkMatches", func(t *testing.T) {
3108 q := &query.Substring{Pattern: "abc def"}
3109 res := searchForTest(t, b, q, chunkOpts)
3110 if len(res.Files) != 0 {
3111 t.Fatalf("got %v, want no results", res.Files)
3112 }
3113
3114 q = &query.Substring{Pattern: "NOT-INDEXED"}
3115 res = searchForTest(t, b, q, chunkOpts)
3116 if len(res.Files) != 1 {
3117 t.Fatalf("got %v, want 1 result", res.Files)
3118 }
3119 })
3120 }
3121}
3122
3123func TestCheckText(t *testing.T) {
3124 for _, text := range []string{"", "simple ascii", "símplé unicödé", "\uFEFFwith utf8 'bom'", "with \uFFFD unicode replacement char"} {
3125 if err := CheckText([]byte(text), 20000); err != nil {
3126 t.Errorf("CheckText(%q): %v", text, err)
3127 }
3128 }
3129 for _, text := range []string{"zero\x00byte", "xx", "0123456789abcdefghi"} {
3130 if err := CheckText([]byte(text), 15); err == nil {
3131 t.Errorf("CheckText(%q) succeeded", text)
3132 }
3133 }
3134}
3135
3136func TestLineAnd(t *testing.T) {
3137 b := testIndexBuilder(t, &Repository{Name: "reponame"},
3138 Document{Name: "f1", Content: []byte("apple\nbanana\napple banana chocolate apple pudding banana\ngrape")},
3139 Document{Name: "f2", Content: []byte("apple orange\nbanana")},
3140 Document{Name: "f3", Content: []byte("banana grape")},
3141 )
3142 pattern := "(apple)(?-s:.)*?(banana)"
3143 r, _ := syntax.Parse(pattern, syntax.Perl)
3144
3145 q := query.Regexp{
3146 Regexp: r,
3147 Content: true,
3148 }
3149 t.Run("LineMatches", func(t *testing.T) {
3150 res := searchForTest(t, b, &q)
3151 wantRegexpCount := 1
3152 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3153 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3154 }
3155 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
3156 t.Errorf("got %v, want 1 result", res.Files)
3157 }
3158 })
3159
3160 t.Run("ChunkMatches", func(t *testing.T) {
3161 res := searchForTest(t, b, &q, chunkOpts)
3162 wantRegexpCount := 1
3163 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3164 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3165 }
3166 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
3167 t.Errorf("got %v, want 1 result", res.Files)
3168 }
3169 })
3170}
3171
3172func TestLineAndFileName(t *testing.T) {
3173 b := testIndexBuilder(t, &Repository{Name: "reponame"},
3174 Document{Name: "f1", Content: []byte("apple banana\ngrape")},
3175 Document{Name: "f2", Content: []byte("apple banana\norange")},
3176 Document{Name: "apple banana", Content: []byte("banana grape")},
3177 )
3178 pattern := "(apple)(?-s:.)*?(banana)"
3179 r, _ := syntax.Parse(pattern, syntax.Perl)
3180
3181 q := query.Regexp{
3182 Regexp: r,
3183 FileName: true,
3184 }
3185 t.Run("LineMatches", func(t *testing.T) {
3186 res := searchForTest(t, b, &q)
3187 wantRegexpCount := 1
3188 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3189 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3190 }
3191 if len(res.Files) != 1 || res.Files[0].FileName != "apple banana" {
3192 t.Errorf("got %v, want 1 result", res.Files)
3193 }
3194 })
3195
3196 t.Run("ChunkMatches", func(t *testing.T) {
3197 res := searchForTest(t, b, &q, chunkOpts)
3198 wantRegexpCount := 1
3199 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3200 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3201 }
3202 if len(res.Files) != 1 || res.Files[0].FileName != "apple banana" {
3203 t.Errorf("got %v, want 1 result", res.Files)
3204 }
3205 })
3206}
3207
3208func TestMultiLineRegex(t *testing.T) {
3209 b := testIndexBuilder(t, &Repository{Name: "reponame"},
3210 Document{Name: "f1", Content: []byte("apple banana\ngrape")},
3211 Document{Name: "f2", Content: []byte("apple orange")},
3212 Document{Name: "f3", Content: []byte("grape apple")},
3213 )
3214 pattern := "(apple).*?[[:space:]].*?(grape)"
3215 r, _ := syntax.Parse(pattern, syntax.Perl)
3216
3217 q := query.Regexp{
3218 Regexp: r,
3219 }
3220 t.Run("LineMatches", func(t *testing.T) {
3221 res := searchForTest(t, b, &q)
3222 wantRegexpCount := 2
3223 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3224 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3225 }
3226 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
3227 t.Errorf("got %v, want 1 result", res.Files)
3228 }
3229 if l := len(res.Files[0].LineMatches); l != 2 {
3230 t.Errorf("got %v, want 2 line matches", l)
3231 }
3232 })
3233
3234 t.Run("ChunkMatches", func(t *testing.T) {
3235 res := searchForTest(t, b, &q, chunkOpts)
3236 wantRegexpCount := 2
3237 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3238 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3239 }
3240 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
3241 t.Errorf("got %v, want 1 result", res.Files)
3242 }
3243 if l := len(res.Files[0].ChunkMatches); l != 1 {
3244 t.Errorf("got %v, want 1 chunk matches", l)
3245 }
3246 if l := len(res.Files[0].ChunkMatches[0].Ranges); l != 1 {
3247 t.Errorf("got %v, want 1 chunk ranges", l)
3248 }
3249 })
3250}
3251
3252func TestSearchTypeFileName(t *testing.T) {
3253 b := testIndexBuilder(t, &Repository{
3254 Name: "reponame",
3255 },
3256 Document{Name: "f1", Content: []byte("bla the needle")},
3257 Document{Name: "f2", Content: []byte("another file another\nneedle")},
3258 // -----------------------------------012345678901234567890-123456
3259 )
3260
3261 t.Run("LineMatches", func(t *testing.T) {
3262 wantSingleMatch := func(res *SearchResult, want string) {
3263 t.Helper()
3264 fmatches := res.Files
3265 if len(fmatches) != 1 {
3266 t.Errorf("got %v, want 1 matches", len(fmatches))
3267 return
3268 }
3269 if len(fmatches[0].LineMatches) != 1 {
3270 t.Errorf("got %d line matches", len(fmatches[0].LineMatches))
3271 return
3272 }
3273 var got string
3274 if fmatches[0].LineMatches[0].FileName {
3275 got = fmatches[0].FileName
3276 } else {
3277 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].LineMatches[0].LineFragments[0].Offset)
3278 }
3279
3280 if got != want {
3281 t.Errorf("got %s, want %s", got, want)
3282 }
3283 }
3284
3285 // Only return the later match in the second file
3286 res := searchForTest(t, b, query.NewAnd(
3287 &query.Type{
3288 Type: query.TypeFileName,
3289 Child: &query.Substring{Pattern: "needle"},
3290 },
3291 &query.Substring{Pattern: "file"}))
3292 wantSingleMatch(res, "f2:8")
3293
3294 // Only return a filename result
3295 res = searchForTest(t, b,
3296 &query.Type{
3297 Type: query.TypeFileName,
3298 Child: &query.Substring{Pattern: "file"},
3299 })
3300 wantSingleMatch(res, "f2")
3301 })
3302
3303 t.Run("ChunkMatches", func(t *testing.T) {
3304 wantSingleMatch := func(res *SearchResult, want string) {
3305 t.Helper()
3306 fmatches := res.Files
3307 if len(fmatches) != 1 {
3308 t.Errorf("got %v, want 1 matches", len(fmatches))
3309 return
3310 }
3311 if len(fmatches[0].ChunkMatches) != 1 {
3312 t.Errorf("got %d line matches", len(fmatches[0].ChunkMatches))
3313 return
3314 }
3315 var got string
3316 if fmatches[0].ChunkMatches[0].FileName {
3317 got = fmatches[0].FileName
3318 } else {
3319 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset)
3320 }
3321
3322 if got != want {
3323 t.Errorf("got %s, want %s", got, want)
3324 }
3325 }
3326
3327 // Only return the later match in the second file
3328 res := searchForTest(t, b, query.NewAnd(
3329 &query.Type{
3330 Type: query.TypeFileName,
3331 Child: &query.Substring{Pattern: "needle"},
3332 },
3333 &query.Substring{Pattern: "file"}),
3334 chunkOpts,
3335 )
3336 wantSingleMatch(res, "f2:8")
3337
3338 // Only return a filename result
3339 res = searchForTest(t, b,
3340 &query.Type{
3341 Type: query.TypeFileName,
3342 Child: &query.Substring{Pattern: "file"},
3343 },
3344 chunkOpts,
3345 )
3346 wantSingleMatch(res, "f2")
3347 })
3348}
3349
3350func TestSearchTypeLanguage(t *testing.T) {
3351 b := testIndexBuilder(t, &Repository{
3352 Name: "reponame",
3353 },
3354 Document{Name: "apex.cls", Content: []byte("public class Car extends Vehicle {")},
3355 Document{Name: "tex.cls", Content: []byte(`\DeclareOption*{`)},
3356 Document{Name: "hello.h", Content: []byte(`#include <stdio.h>`)},
3357 )
3358
3359 t.Log(b.languageMap)
3360
3361 t.Run("LineMatches", func(t *testing.T) {
3362 wantSingleMatch := func(res *SearchResult, want string) {
3363 t.Helper()
3364 fmatches := res.Files
3365 if len(fmatches) != 1 {
3366 t.Errorf("got %v, want 1 matches", len(fmatches))
3367 return
3368 }
3369 if len(fmatches[0].LineMatches) != 1 {
3370 t.Errorf("got %d line matches", len(fmatches[0].LineMatches))
3371 return
3372 }
3373 var got string
3374 if fmatches[0].LineMatches[0].FileName {
3375 got = fmatches[0].FileName
3376 } else {
3377 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].LineMatches[0].LineFragments[0].Offset)
3378 }
3379
3380 if got != want {
3381 t.Errorf("got %s, want %s", got, want)
3382 }
3383 }
3384
3385 res := searchForTest(t, b, &query.Language{Language: "Apex"})
3386 wantSingleMatch(res, "apex.cls")
3387
3388 res = searchForTest(t, b, &query.Language{Language: "TeX"})
3389 wantSingleMatch(res, "tex.cls")
3390
3391 res = searchForTest(t, b, &query.Language{Language: "C"})
3392 wantSingleMatch(res, "hello.h")
3393
3394 // test fallback language search by pretending it's an older index version
3395 res = searchForTest(t, b, &query.Language{Language: "C++"})
3396 if len(res.Files) != 0 {
3397 t.Errorf("got %d results for C++, want 0", len(res.Files))
3398 }
3399
3400 b.featureVersion = 11 // force fallback
3401 res = searchForTest(t, b, &query.Language{Language: "C++"})
3402 wantSingleMatch(res, "hello.h")
3403 })
3404
3405 t.Run("ChunkMatches", func(t *testing.T) {
3406 wantSingleMatch := func(res *SearchResult, want string) {
3407 t.Helper()
3408 fmatches := res.Files
3409 if len(fmatches) != 1 {
3410 t.Errorf("got %v, want 1 matches", len(fmatches))
3411 return
3412 }
3413 if len(fmatches[0].ChunkMatches) != 1 {
3414 t.Errorf("got %d line matches", len(fmatches[0].ChunkMatches))
3415 return
3416 }
3417 var got string
3418 if fmatches[0].ChunkMatches[0].FileName {
3419 got = fmatches[0].FileName
3420 } else {
3421 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset)
3422 }
3423
3424 if got != want {
3425 t.Errorf("got %s, want %s", got, want)
3426 }
3427 }
3428
3429 b.featureVersion = FeatureVersion // reset feature version
3430 res := searchForTest(t, b, &query.Language{Language: "Apex"}, chunkOpts)
3431 wantSingleMatch(res, "apex.cls")
3432
3433 res = searchForTest(t, b, &query.Language{Language: "TeX"}, chunkOpts)
3434 wantSingleMatch(res, "tex.cls")
3435
3436 res = searchForTest(t, b, &query.Language{Language: "C"}, chunkOpts)
3437 wantSingleMatch(res, "hello.h")
3438
3439 // test fallback language search by pretending it's an older index version
3440 res = searchForTest(t, b, &query.Language{Language: "C++"}, chunkOpts)
3441 if len(res.Files) != 0 {
3442 t.Errorf("got %d results for C++, want 0", len(res.Files))
3443 }
3444
3445 b.featureVersion = 11 // force fallback
3446 res = searchForTest(t, b, &query.Language{Language: "C++"}, chunkOpts)
3447 wantSingleMatch(res, "hello.h")
3448 })
3449}
3450
3451func TestStats(t *testing.T) {
3452 ignored := []cmp.Option{
3453 cmpopts.EquateEmpty(),
3454 cmpopts.IgnoreFields(RepoListEntry{}, "Repository"),
3455 cmpopts.IgnoreFields(RepoListEntry{}, "IndexMetadata"),
3456 cmpopts.IgnoreFields(RepoStats{}, "IndexBytes"),
3457 }
3458
3459 repoListEntries := func(b *IndexBuilder) []RepoListEntry {
3460 searcher := searcherForTest(t, b)
3461 indexdata := searcher.(*indexData)
3462 return indexdata.repoListEntry
3463 }
3464
3465 t.Run("one empty repo", func(t *testing.T) {
3466 b := testIndexBuilder(t, nil)
3467 got := repoListEntries(b)
3468 want := []RepoListEntry{
3469 {
3470 Stats: RepoStats{
3471 Repos: 0,
3472 Shards: 1,
3473 Documents: 0,
3474 IndexBytes: 20,
3475 ContentBytes: 0,
3476 NewLinesCount: 0,
3477 DefaultBranchNewLinesCount: 0,
3478 OtherBranchesNewLinesCount: 0,
3479 },
3480 },
3481 }
3482
3483 if diff := cmp.Diff(want, got, ignored...); diff != "" {
3484 t.Fatalf("mismatch (-want +got):\n%s", diff)
3485 }
3486
3487 })
3488
3489 t.Run("one simple shard", func(t *testing.T) {
3490 b := testIndexBuilder(t, nil,
3491 Document{Name: "doc 0", Content: []byte("content 0")},
3492 Document{Name: "doc 1", Content: []byte("content 1")},
3493 )
3494 got := repoListEntries(b)
3495 want := []RepoListEntry{
3496 {
3497 Stats: RepoStats{
3498 Repos: 0,
3499 Shards: 1,
3500 Documents: 2,
3501 IndexBytes: 224,
3502 ContentBytes: 28,
3503 NewLinesCount: 0,
3504 DefaultBranchNewLinesCount: 0,
3505 OtherBranchesNewLinesCount: 0,
3506 },
3507 },
3508 }
3509
3510 if diff := cmp.Diff(want, got, ignored...); diff != "" {
3511 t.Fatalf("mismatch (-want +got):\n%s", diff)
3512 }
3513
3514 })
3515
3516 t.Run("one compound shard", func(t *testing.T) {
3517 b := testIndexBuilderCompound(t,
3518 []*Repository{
3519 {Name: "repo 0"},
3520 {Name: "repo 1"},
3521 },
3522 [][]Document{
3523 {
3524 {Name: "doc 0", Content: []byte("content 0")},
3525 {Name: "doc 1", Content: []byte("content 1")},
3526 },
3527 {
3528 {Name: "doc 2", Content: []byte("content 2")},
3529 {Name: "doc 3", Content: []byte("content 3")},
3530 },
3531 },
3532 )
3533 got := repoListEntries(b)
3534 want := []RepoListEntry{
3535 {
3536 Stats: RepoStats{
3537 Repos: 0,
3538 Shards: 1,
3539 Documents: 2,
3540 IndexBytes: 180,
3541 ContentBytes: 28,
3542 NewLinesCount: 0,
3543 DefaultBranchNewLinesCount: 0,
3544 OtherBranchesNewLinesCount: 0,
3545 },
3546 },
3547 {
3548 Stats: RepoStats{
3549 Repos: 0,
3550 Shards: 1,
3551 Documents: 2,
3552 IndexBytes: 180,
3553 ContentBytes: 28,
3554 NewLinesCount: 0,
3555 DefaultBranchNewLinesCount: 0,
3556 OtherBranchesNewLinesCount: 0,
3557 },
3558 },
3559 }
3560
3561 if diff := cmp.Diff(want, got, ignored...); diff != "" {
3562 t.Fatalf("mismatch (-want +got):\n%s", diff)
3563 }
3564 })
3565
3566 t.Run("compound shard with empty repos", func(t *testing.T) {
3567 b := testIndexBuilderCompound(t,
3568 []*Repository{
3569 {Name: "repo 0"},
3570 {Name: "repo 1"},
3571 {Name: "repo 2"},
3572 {Name: "repo 3"},
3573 {Name: "repo 4"},
3574 },
3575 [][]Document{
3576 {{Name: "doc 0", Content: []byte("content 0")}},
3577 nil,
3578 {{Name: "doc 1", Content: []byte("content 1")}},
3579 nil,
3580 nil,
3581 },
3582 )
3583 got := repoListEntries(b)
3584
3585 entryEmpty := RepoListEntry{Stats: RepoStats{
3586 Shards: 1,
3587 Documents: 0,
3588 ContentBytes: 0,
3589 }}
3590 entryNonEmpty := RepoListEntry{Stats: RepoStats{
3591 Shards: 1,
3592 Documents: 1,
3593 ContentBytes: 14,
3594 }}
3595
3596 want := []RepoListEntry{
3597 entryNonEmpty,
3598 entryEmpty,
3599 entryNonEmpty,
3600 entryEmpty,
3601 entryEmpty,
3602 }
3603
3604 if diff := cmp.Diff(want, got, ignored...); diff != "" {
3605 t.Fatalf("mismatch (-want +got):\n%s", diff)
3606 }
3607
3608 })
3609}
3610
3611// This tests the frequent pattern "\bLITERAL\b".
3612func TestWordSearch(t *testing.T) {
3613 content := []byte("needle the bla")
3614 // ----------------01234567890123
3615
3616 b := testIndexBuilder(t, nil,
3617 Document{
3618 Name: "f1",
3619 Content: content,
3620 })
3621
3622 t.Run("LineMatches", func(t *testing.T) {
3623 sres := searchForTest(t, b,
3624 &query.Regexp{
3625 Regexp: mustParseRE("\\bthe\\b"),
3626 CaseSensitive: true,
3627 Content: true,
3628 })
3629
3630 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 {
3631 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
3632 }
3633
3634 if sres.Stats.RegexpsConsidered != 0 {
3635 t.Fatal("expected regexp to be skipped")
3636 }
3637
3638 got := sres.Files[0].LineMatches[0]
3639 want := LineMatch{
3640 LineFragments: []LineFragmentMatch{{
3641 LineOffset: 7,
3642 Offset: 7,
3643 MatchLength: 3,
3644 }},
3645 Line: content,
3646 FileName: false,
3647 LineNumber: 1,
3648 LineStart: 0,
3649 LineEnd: 14,
3650 }
3651
3652 if !reflect.DeepEqual(got, want) {
3653 t.Errorf("got %#v, want %#v", got, want)
3654 }
3655 })
3656
3657 t.Run("ChunkMatches", func(t *testing.T) {
3658 sres := searchForTest(t, b,
3659 &query.Regexp{
3660 Regexp: mustParseRE("\\bthe\\b"),
3661 CaseSensitive: true,
3662 }, chunkOpts)
3663
3664 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 {
3665 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
3666 }
3667
3668 if sres.Stats.RegexpsConsidered != 0 {
3669 t.Fatal("expected regexp to be skipped")
3670 }
3671
3672 got := sres.Files[0].ChunkMatches[0]
3673 want := ChunkMatch{
3674 Content: content,
3675 ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1},
3676 Ranges: []Range{{
3677 Start: Location{ByteOffset: 7, LineNumber: 1, Column: 8},
3678 End: Location{ByteOffset: 10, LineNumber: 1, Column: 11},
3679 }},
3680 }
3681
3682 if diff := cmp.Diff(want, got); diff != "" {
3683 t.Fatal(diff)
3684 }
3685 })
3686}