fork of https://github.com/sourcegraph/zoekt
1// Copyright 2016 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package zoekt
16
17import (
18 "bytes"
19 "context"
20 "fmt"
21 "reflect"
22 "regexp/syntax"
23 "strings"
24 "testing"
25
26 "github.com/google/go-cmp/cmp"
27 "github.com/google/go-cmp/cmp/cmpopts"
28 "github.com/grafana/regexp"
29
30 "github.com/sourcegraph/zoekt/query"
31)
32
33func clearScores(r *SearchResult) {
34 for i := range r.Files {
35 r.Files[i].Score = 0.0
36 for j := range r.Files[i].LineMatches {
37 r.Files[i].LineMatches[j].Score = 0.0
38 }
39 for j := range r.Files[i].ChunkMatches {
40 r.Files[i].ChunkMatches[j].Score = 0.0
41 }
42 r.Files[i].Checksum = nil
43 r.Files[i].Debug = ""
44 }
45}
46
47func testIndexBuilder(t *testing.T, repo *Repository, docs ...Document) *IndexBuilder {
48 t.Helper()
49
50 b, err := NewIndexBuilder(repo)
51 if err != nil {
52 t.Fatalf("NewIndexBuilder: %v", err)
53 }
54
55 for i, d := range docs {
56 if err := b.Add(d); err != nil {
57 t.Fatalf("Add %d: %v", i, err)
58 }
59 }
60
61 return b
62}
63
64func testIndexBuilderCompound(t *testing.T, repos []*Repository, docs [][]Document) *IndexBuilder {
65 t.Helper()
66
67 b := newIndexBuilder()
68 b.indexFormatVersion = NextIndexFormatVersion
69
70 if len(repos) != len(docs) {
71 t.Fatalf("testIndexBuilderCompound: repos must be the same length as docs, got: len(repos)=%d len(docs)=%d", len(repos), len(docs))
72 }
73
74 for i, repo := range repos {
75 if err := b.setRepository(repo); err != nil {
76 t.Fatal(err)
77 }
78 for j, d := range docs[i] {
79 if err := b.Add(d); err != nil {
80 t.Fatalf("Add %d %d: %v", i, j, err)
81 }
82 }
83 }
84
85 return b
86}
87
88func TestBoundary(t *testing.T) {
89 b := testIndexBuilder(t, nil,
90 Document{Name: "f1", Content: []byte("x the")},
91 Document{Name: "f1", Content: []byte("reader")})
92 res := searchForTest(t, b, &query.Substring{Pattern: "there"})
93 if len(res.Files) > 0 {
94 t.Fatalf("got %v, want no matches", res.Files)
95 }
96}
97
98func TestDocSectionInvalid(t *testing.T) {
99 b, err := NewIndexBuilder(nil)
100 if err != nil {
101 t.Fatalf("NewIndexBuilder: %v", err)
102 }
103 doc := Document{
104 Name: "f1",
105 Content: []byte("01234567890123"),
106 Symbols: []DocumentSection{{5, 8}, {7, 9}},
107 }
108
109 if err := b.Add(doc); err == nil {
110 t.Errorf("overlapping doc sections should fail")
111 }
112
113 doc = Document{
114 Name: "f1",
115 Content: []byte("01234567890123"),
116 Symbols: []DocumentSection{{0, 20}},
117 }
118
119 if err := b.Add(doc); err == nil {
120 t.Errorf("doc sections beyond EOF should fail")
121 }
122}
123
124func TestBasic(t *testing.T) {
125 b := testIndexBuilder(t, nil,
126 Document{
127 Name: "f2",
128 Content: []byte("to carry water in the no later bla"),
129 // --------------0123456789012345678901234567890123
130 })
131
132 t.Run("LineMatch", func(t *testing.T) {
133 res := searchForTest(t, b, &query.Substring{
134 Pattern: "water",
135 CaseSensitive: true,
136 })
137 fmatches := res.Files
138 if len(fmatches) != 1 || len(fmatches[0].LineMatches) != 1 {
139 t.Fatalf("got %v, want 1 matches", fmatches)
140 }
141
142 got := fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].LineMatches[0].LineFragments[0].Offset)
143 want := "f2:9"
144 if got != want {
145 t.Errorf("1: got %s, want %s", got, want)
146 }
147 })
148
149 t.Run("ChunkMatch", func(t *testing.T) {
150 res := searchForTest(t, b, &query.Substring{
151 Pattern: "water",
152 CaseSensitive: true,
153 }, chunkOpts)
154 fmatches := res.Files
155 if len(fmatches) != 1 || len(fmatches[0].ChunkMatches) != 1 {
156 t.Fatalf("got %v, want 1 matches", fmatches)
157 }
158
159 got := fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset)
160 want := "f2:9"
161 if got != want {
162 t.Errorf("1: got %s, want %s", got, want)
163 }
164 })
165}
166
167func TestEmptyIndex(t *testing.T) {
168 b := testIndexBuilder(t, nil)
169 searcher := searcherForTest(t, b)
170
171 var opts SearchOptions
172 if _, err := searcher.Search(context.Background(), &query.Substring{}, &opts); err != nil {
173 t.Fatalf("Search: %v", err)
174 }
175
176 if _, err := searcher.List(context.Background(), &query.Repo{Regexp: regexp.MustCompile("")}, nil); err != nil {
177 t.Fatalf("List: %v", err)
178 }
179
180 if _, err := searcher.Search(context.Background(), &query.Substring{Pattern: "java", FileName: true}, &opts); err != nil {
181 t.Fatalf("Search: %v", err)
182 }
183}
184
185type memSeeker struct {
186 data []byte
187}
188
189func (s *memSeeker) Name() string {
190 return "memseeker"
191}
192
193func (s *memSeeker) Close() {}
194func (s *memSeeker) Read(off, sz uint32) ([]byte, error) {
195 return s.data[off : off+sz], nil
196}
197
198func (s *memSeeker) Size() (uint32, error) {
199 return uint32(len(s.data)), nil
200}
201
202func TestNewlines(t *testing.T) {
203 b := testIndexBuilder(t, nil,
204 Document{Name: "filename", Content: []byte("line1\nline2\nbla")})
205 // ---------------------------------------------012345-678901-234
206
207 t.Run("LineMatches", func(t *testing.T) {
208 sres := searchForTest(t, b, &query.Substring{Pattern: "ne2"})
209
210 matches := sres.Files
211 want := []FileMatch{{
212 FileName: "filename",
213 LineMatches: []LineMatch{{
214 LineFragments: []LineFragmentMatch{{
215 Offset: 8,
216 LineOffset: 2,
217 MatchLength: 3,
218 }},
219 Line: []byte("line2"),
220 LineStart: 6,
221 LineEnd: 11,
222 LineNumber: 2,
223 }},
224 }}
225
226 if !reflect.DeepEqual(matches, want) {
227 t.Errorf("got %v, want %v", matches, want)
228 }
229 })
230
231 t.Run("ChunkMatches", func(t *testing.T) {
232 sres := searchForTest(t, b, &query.Substring{Pattern: "ne2"}, chunkOpts)
233
234 matches := sres.Files
235 want := []FileMatch{{
236 FileName: "filename",
237 ChunkMatches: []ChunkMatch{{
238 Content: []byte("line2"),
239 ContentStart: Location{
240 ByteOffset: 6,
241 LineNumber: 2,
242 Column: 1,
243 },
244 Ranges: []Range{{
245 Start: Location{ByteOffset: 8, LineNumber: 2, Column: 3},
246 End: Location{ByteOffset: 11, LineNumber: 2, Column: 6},
247 }},
248 }},
249 }}
250
251 if diff := cmp.Diff(want, matches); diff != "" {
252 t.Fatal(diff)
253 }
254 })
255}
256
257// A result spanning multiple lines should have LineMatches that only cover
258// single lines.
259func TestQueryNewlines(t *testing.T) {
260 text := "line1\nline2\nbla"
261 b := testIndexBuilder(t, nil,
262 Document{Name: "filename", Content: []byte(text)})
263
264 t.Run("LineMatches", func(t *testing.T) {
265 sres := searchForTest(t, b, &query.Substring{Pattern: "ine2\nbla"})
266 matches := sres.Files
267 if len(matches) != 1 {
268 t.Fatalf("got %d file matches, want exactly one", len(matches))
269 }
270 m := matches[0]
271 if len(m.LineMatches) != 2 {
272 t.Fatalf("got %d line matches, want exactly two", len(m.LineMatches))
273 }
274 })
275
276 t.Run("ChunkMatches", func(t *testing.T) {
277 sres := searchForTest(t, b, &query.Substring{Pattern: "ine2\nbla"}, chunkOpts)
278 matches := sres.Files
279 if len(matches) != 1 {
280 t.Fatalf("got %d file matches, want exactly one", len(matches))
281 }
282 m := matches[0]
283 if len(m.ChunkMatches) != 1 {
284 t.Fatalf("got %d chunk matches, want exactly one", len(m.ChunkMatches))
285 }
286 })
287}
288
289var chunkOpts = SearchOptions{ChunkMatches: true}
290
291func searchForTest(t *testing.T, b *IndexBuilder, q query.Q, o ...SearchOptions) *SearchResult {
292 searcher := searcherForTest(t, b)
293 var opts SearchOptions
294 if len(o) > 0 {
295 opts = o[0]
296 }
297 res, err := searcher.Search(context.Background(), q, &opts)
298 if err != nil {
299 t.Fatalf("Search(%s): %v", q, err)
300 }
301 clearScores(res)
302 return res
303}
304
305func searcherForTest(t *testing.T, b *IndexBuilder) Searcher {
306 var buf bytes.Buffer
307 if err := b.Write(&buf); err != nil {
308 t.Fatal(err)
309 }
310 f := &memSeeker{buf.Bytes()}
311
312 searcher, err := NewSearcher(f)
313 if err != nil {
314 t.Fatalf("NewSearcher: %v", err)
315 }
316
317 return searcher
318}
319
320func TestCaseFold(t *testing.T) {
321 b := testIndexBuilder(t, nil,
322 Document{Name: "f1", Content: []byte("I love BaNaNAS.")},
323 // -----------------------------------012345678901234
324 )
325 t.Run("LineMatches", func(t *testing.T) {
326 sres := searchForTest(t, b, &query.Substring{
327 Pattern: "bananas",
328 CaseSensitive: true,
329 })
330 matches := sres.Files
331 if len(matches) != 0 {
332 t.Errorf("foldcase: got %#v, want 0 matches", matches)
333 }
334
335 sres = searchForTest(t, b,
336 &query.Substring{
337 Pattern: "BaNaNAS",
338 CaseSensitive: true,
339 })
340 matches = sres.Files
341 if len(matches) != 1 {
342 t.Errorf("no foldcase: got %v, want 1 matches", matches)
343 } else if matches[0].LineMatches[0].LineFragments[0].Offset != 7 {
344 t.Errorf("foldcase: got %v, want offsets 7", matches)
345 }
346 })
347
348 t.Run("ChunkMatches", func(t *testing.T) {
349 sres := searchForTest(t, b, &query.Substring{
350 Pattern: "bananas",
351 CaseSensitive: true,
352 }, chunkOpts)
353 matches := sres.Files
354 if len(matches) != 0 {
355 t.Errorf("foldcase: got %#v, want 0 matches", matches)
356 }
357
358 sres = searchForTest(t, b,
359 &query.Substring{
360 Pattern: "BaNaNAS",
361 CaseSensitive: true,
362 })
363 matches = sres.Files
364 if len(matches) != 1 {
365 t.Errorf("no foldcase: got %v, want 1 matches", matches)
366 } else if matches[0].LineMatches[0].LineFragments[0].Offset != 7 {
367 t.Errorf("foldcase: got %v, want offsets 7", matches)
368 }
369 })
370}
371
372func TestSearchStats(t *testing.T) {
373 ctx := context.Background()
374 searcher := searcherForTest(t, testIndexBuilder(t, nil,
375 Document{Name: "f1", Content: []byte("x banana y")},
376 Document{Name: "f2", Content: []byte("x apple y")},
377 Document{Name: "f3", Content: []byte("x banana apple y")},
378 // -----------------------------------0123456789012345
379 ))
380
381 andQuery := query.NewAnd(
382 &query.Substring{
383 Pattern: "banana",
384 },
385 &query.Substring{
386 Pattern: "apple",
387 },
388 )
389
390 t.Run("LineMatches", func(t *testing.T) {
391 sres, err := searcher.Search(ctx, andQuery, &SearchOptions{})
392 if err != nil {
393 t.Fatal(err)
394 }
395 matches := sres.Files
396 if len(matches) != 1 || len(matches[0].LineMatches) != 1 || len(matches[0].LineMatches[0].LineFragments) != 2 {
397 t.Fatalf("got %#v, want 1 match with 2 fragments", matches)
398 }
399
400 if matches[0].LineMatches[0].LineFragments[0].Offset != 2 || matches[0].LineMatches[0].LineFragments[1].Offset != 9 {
401 t.Fatalf("got %#v, want offsets 2,9", matches)
402 }
403 })
404 t.Run("ChunkMatches", func(t *testing.T) {
405 sres, err := searcher.Search(ctx, andQuery, &chunkOpts)
406 if err != nil {
407 t.Fatal(err)
408 }
409 matches := sres.Files
410 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 || len(matches[0].ChunkMatches[0].Ranges) != 2 {
411 t.Fatalf("got %#v, want 1 chunk match with 2 ranges", matches)
412 }
413
414 if matches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset != 2 || matches[0].ChunkMatches[0].Ranges[1].Start.ByteOffset != 9 {
415 t.Fatalf("got %#v, want offsets 2,9", matches)
416 }
417 })
418 t.Run("Stats", func(t *testing.T) {
419 cases := []struct {
420 Name string
421 Q query.Q
422 Want Stats
423 }{{
424 Name: "and-query",
425 Q: andQuery,
426 Want: Stats{
427 FilesLoaded: 1,
428 ContentBytesLoaded: 18,
429 IndexBytesLoaded: 8,
430 NgramMatches: 3, // we look at doc 1, because it's max(0,1) due to AND
431 NgramLookups: 104,
432 MatchCount: 2,
433 FileCount: 1,
434 FilesConsidered: 2,
435 ShardsScanned: 1,
436 },
437 }, {
438 Name: "one-trigram",
439 Q: &query.Substring{
440 Pattern: "a y",
441 Content: true,
442 CaseSensitive: true,
443 },
444 Want: Stats{
445 ContentBytesLoaded: 12,
446 IndexBytesLoaded: 1,
447 FileCount: 1,
448 FilesConsidered: 1,
449 FilesLoaded: 1,
450 ShardsScanned: 1,
451 MatchCount: 1,
452 NgramMatches: 1,
453 NgramLookups: 2, // once to lookup frequency then again to access posting list.
454 },
455 }, {
456 Name: "one-trigram-case-insensitive",
457 Q: &query.Substring{
458 Pattern: "a y",
459 Content: true,
460 },
461 Want: Stats{
462 ContentBytesLoaded: 12,
463 IndexBytesLoaded: 1,
464 FileCount: 1,
465 FilesConsidered: 1,
466 FilesLoaded: 1,
467 ShardsScanned: 1,
468 MatchCount: 1,
469 NgramMatches: 1,
470 NgramLookups: 8, // "a y" has 2**2 casings which we lookup twice.
471 },
472 }, {
473 Name: "one-trigram-pruned",
474 Q: &query.Substring{
475 Pattern: "foo",
476 Content: true,
477 CaseSensitive: true,
478 },
479 Want: Stats{
480 ShardsSkippedFilter: 1,
481 NgramLookups: 1, // only had to lookup once
482 },
483 }, {
484 Name: "one-trigram-branch-pruned",
485 Q: query.NewAnd(
486 &query.Substring{
487 Pattern: "foo",
488 Content: true,
489 CaseSensitive: true,
490 },
491 &query.Substring{
492 Pattern: "a y",
493 Content: true,
494 CaseSensitive: true,
495 },
496 ),
497 Want: Stats{
498 IndexBytesLoaded: 1, // we created an iterator for "a y" before pruning.
499 ShardsSkippedFilter: 1,
500 NgramLookups: 3, // we lookedup "foo" once (1), but lookedup and created "a y" (2).
501 },
502 }}
503
504 for _, tc := range cases {
505 t.Run(tc.Name, func(t *testing.T) {
506 sres, err := searcher.Search(ctx, tc.Q, &chunkOpts)
507 if err != nil {
508 t.Fatal(err)
509 }
510 if diff := cmp.Diff(tc.Want, sres.Stats, cmpopts.IgnoreFields(Stats{}, "MatchTreeConstruction", "MatchTreeSearch")); diff != "" {
511 t.Errorf("unexpected Stats (-want +got):\n%s", diff)
512 }
513 })
514 }
515
516 })
517}
518
519func TestAndNegateSearch(t *testing.T) {
520 b := testIndexBuilder(t, nil,
521 Document{Name: "f1", Content: []byte("x banana y")},
522 // -----------------------------------0123456789
523 Document{Name: "f4", Content: []byte("x banana apple y")})
524
525 t.Run("LineMatches", func(t *testing.T) {
526 sres := searchForTest(t, b, query.NewAnd(
527 &query.Substring{
528 Pattern: "banana",
529 },
530 &query.Not{Child: &query.Substring{
531 Pattern: "apple",
532 }}))
533
534 matches := sres.Files
535
536 if len(matches) != 1 || len(matches[0].LineMatches) != 1 {
537 t.Fatalf("got %v, want 1 match", matches)
538 }
539 if matches[0].FileName != "f1" {
540 t.Fatalf("got match %#v, want FileName: f1", matches[0])
541 }
542 if matches[0].LineMatches[0].LineFragments[0].Offset != 2 {
543 t.Fatalf("got %v, want offset 2", matches)
544 }
545 })
546
547 t.Run("ChunkMatches", func(t *testing.T) {
548 sres := searchForTest(t, b,
549 query.NewAnd(
550 &query.Substring{
551 Pattern: "banana",
552 },
553 &query.Not{Child: &query.Substring{
554 Pattern: "apple",
555 }},
556 ),
557 chunkOpts,
558 )
559
560 matches := sres.Files
561
562 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 {
563 t.Fatalf("got %v, want 1 match", matches)
564 }
565 if matches[0].FileName != "f1" {
566 t.Fatalf("got match %#v, want FileName: f1", matches[0])
567 }
568 if matches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset != 2 {
569 t.Fatalf("got %v, want offset 2", matches)
570 }
571 })
572}
573
574func TestNegativeMatchesOnlyShortcut(t *testing.T) {
575 b := testIndexBuilder(t, nil,
576 Document{Name: "f1", Content: []byte("x banana y")},
577 Document{Name: "f2", Content: []byte("x appelmoes y")},
578 Document{Name: "f3", Content: []byte("x appelmoes y")},
579 Document{Name: "f3", Content: []byte("x appelmoes y")})
580
581 t.Run("LineMatches", func(t *testing.T) {
582 sres := searchForTest(t, b, query.NewAnd(
583 &query.Substring{
584 Pattern: "banana",
585 },
586 &query.Not{Child: &query.Substring{
587 Pattern: "appel",
588 }}))
589
590 if sres.Stats.FilesConsidered != 1 {
591 t.Errorf("got %#v, want FilesConsidered: 1", sres.Stats)
592 }
593 })
594
595 t.Run("ChunkMatches", func(t *testing.T) {
596 sres := searchForTest(t, b, query.NewAnd(
597 &query.Substring{
598 Pattern: "banana",
599 },
600 &query.Not{Child: &query.Substring{
601 Pattern: "appel",
602 }}), chunkOpts)
603
604 if sres.Stats.FilesConsidered != 1 {
605 t.Errorf("got %#v, want FilesConsidered: 1", sres.Stats)
606 }
607 })
608}
609
610func TestFileSearch(t *testing.T) {
611 b := testIndexBuilder(t, nil,
612 Document{Name: "banzana", Content: []byte("x orange y")},
613 // -------------0123456
614 Document{Name: "banana", Content: []byte("x apple y")},
615 // -------------012345
616 )
617
618 t.Run("LineMatches", func(t *testing.T) {
619 sres := searchForTest(t, b, &query.Substring{
620 Pattern: "anan",
621 FileName: true,
622 })
623
624 matches := sres.Files
625 if len(matches) != 1 || len(matches[0].LineMatches) != 1 {
626 t.Fatalf("got %v, want 1 match", matches)
627 }
628
629 got := matches[0].LineMatches[0]
630 want := LineMatch{
631 Line: []byte("banana"),
632 LineFragments: []LineFragmentMatch{{
633 Offset: 1,
634 LineOffset: 1,
635 MatchLength: 4,
636 }},
637 FileName: true,
638 }
639
640 if !reflect.DeepEqual(got, want) {
641 t.Errorf("got %#v, want %#v", got, want)
642 }
643 })
644
645 t.Run("ChunkMatches", func(t *testing.T) {
646 sres := searchForTest(t, b, &query.Substring{
647 Pattern: "anan",
648 FileName: true,
649 }, chunkOpts)
650
651 matches := sres.Files
652 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 {
653 t.Fatalf("got %v, want 1 match", matches)
654 }
655
656 got := matches[0].ChunkMatches[0]
657 want := ChunkMatch{
658 Content: []byte("banana"),
659 ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1},
660 Ranges: []Range{{
661 Start: Location{ByteOffset: 1, LineNumber: 1, Column: 2},
662 End: Location{ByteOffset: 5, LineNumber: 1, Column: 6},
663 }},
664 FileName: true,
665 }
666
667 if diff := cmp.Diff(want, got); diff != "" {
668 t.Fatal(diff)
669 }
670 })
671
672 t.Run("FileNameSet", func(t *testing.T) {
673 sres := searchForTest(t, b, query.NewFileNameSet("banana"), chunkOpts)
674
675 matches := sres.Files
676 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 {
677 t.Fatalf("got %v, want 1 match", matches)
678 }
679
680 got := matches[0].ChunkMatches[0]
681 want := ChunkMatch{
682 Content: []byte("banana"),
683 ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1},
684 Ranges: []Range{{
685 Start: Location{ByteOffset: 0, LineNumber: 1, Column: 1},
686 End: Location{ByteOffset: 6, LineNumber: 1, Column: 7},
687 }},
688 FileName: true,
689 }
690
691 if diff := cmp.Diff(want, got); diff != "" {
692 t.Fatal(diff)
693 }
694 })
695}
696
697func TestFileCase(t *testing.T) {
698 b := testIndexBuilder(t, nil,
699 Document{Name: "BANANA", Content: []byte("x orange y")})
700
701 t.Run("LineMatches", func(t *testing.T) {
702 sres := searchForTest(t, b, &query.Substring{
703 Pattern: "banana",
704 FileName: true,
705 })
706
707 matches := sres.Files
708 if len(matches) != 1 || matches[0].FileName != "BANANA" {
709 t.Fatalf("got %v, want 1 match 'BANANA'", matches)
710 }
711 })
712
713 t.Run("ChunkMatches", func(t *testing.T) {
714 sres := searchForTest(t, b, &query.Substring{
715 Pattern: "banana",
716 FileName: true,
717 }, chunkOpts)
718
719 matches := sres.Files
720 if len(matches) != 1 || matches[0].FileName != "BANANA" {
721 t.Fatalf("got %v, want 1 match 'BANANA'", matches)
722 }
723 })
724}
725
726func TestFileRegexpSearchBruteForce(t *testing.T) {
727 b := testIndexBuilder(t, nil,
728 Document{Name: "banzana", Content: []byte("x orange y")},
729 Document{Name: "banana", Content: []byte("x apple y")},
730 )
731 t.Run("LineMatches", func(t *testing.T) {
732 sres := searchForTest(t, b, &query.Regexp{
733 Regexp: mustParseRE("[qn][zx]"),
734 FileName: true,
735 })
736
737 matches := sres.Files
738 if len(matches) != 1 || matches[0].FileName != "banzana" {
739 t.Fatalf("got %v, want 1 match on 'banzana'", matches)
740 }
741 })
742 t.Run("LineMatches", func(t *testing.T) {
743 sres := searchForTest(t, b, &query.Regexp{
744 Regexp: mustParseRE("[qn][zx]"),
745 FileName: true,
746 }, chunkOpts)
747
748 matches := sres.Files
749 if len(matches) != 1 || matches[0].FileName != "banzana" {
750 t.Fatalf("got %v, want 1 match on 'banzana'", matches)
751 }
752 })
753}
754
755func TestFileRegexpSearchShortString(t *testing.T) {
756 b := testIndexBuilder(t, nil,
757 Document{Name: "banana.py", Content: []byte("x orange y")})
758
759 t.Run("LineMatches", func(t *testing.T) {
760 sres := searchForTest(t, b, &query.Regexp{
761 Regexp: mustParseRE("ana.py"),
762 FileName: true,
763 })
764
765 matches := sres.Files
766 if len(matches) != 1 || matches[0].FileName != "banana.py" {
767 t.Fatalf("got %v, want 1 match on 'banana.py'", matches)
768 }
769 })
770
771 t.Run("ChunkMatches", func(t *testing.T) {
772 sres := searchForTest(t, b, &query.Regexp{
773 Regexp: mustParseRE("ana.py"),
774 FileName: true,
775 }, chunkOpts)
776
777 matches := sres.Files
778 if len(matches) != 1 || matches[0].FileName != "banana.py" {
779 t.Fatalf("got %v, want 1 match on 'banana.py'", matches)
780 }
781 })
782}
783
784func TestFileSubstringSearchBruteForce(t *testing.T) {
785 b := testIndexBuilder(t, nil,
786 Document{Name: "BANZANA", Content: []byte("x orange y")},
787 Document{Name: "banana", Content: []byte("x apple y")})
788
789 q := &query.Substring{
790 Pattern: "z",
791 FileName: true,
792 }
793
794 t.Run("LineMatches", func(t *testing.T) {
795 res := searchForTest(t, b, q)
796 if len(res.Files) != 1 || res.Files[0].FileName != "BANZANA" {
797 t.Fatalf("got %v, want 1 match on 'BANZANA''", res.Files)
798 }
799 })
800
801 t.Run("ChunkMatches", func(t *testing.T) {
802 res := searchForTest(t, b, q, chunkOpts)
803 if len(res.Files) != 1 || res.Files[0].FileName != "BANZANA" {
804 t.Fatalf("got %v, want 1 match on 'BANZANA''", res.Files)
805 }
806 })
807}
808
809func TestFileSubstringSearchBruteForceEnd(t *testing.T) {
810 b := testIndexBuilder(t, nil,
811 Document{Name: "BANZANA", Content: []byte("x orange y")},
812 Document{Name: "bananaq", Content: []byte("x apple y")})
813
814 q := &query.Substring{
815 Pattern: "q",
816 FileName: true,
817 }
818 t.Run("LineMatches", func(t *testing.T) {
819 res := searchForTest(t, b, q)
820 if want := "bananaq"; len(res.Files) != 1 || res.Files[0].FileName != want {
821 t.Fatalf("got %v, want 1 match in %q", res.Files, want)
822 }
823 })
824
825 t.Run("LineMatches", func(t *testing.T) {
826 res := searchForTest(t, b, q, chunkOpts)
827 if want := "bananaq"; len(res.Files) != 1 || res.Files[0].FileName != want {
828 t.Fatalf("got %v, want 1 match in %q", res.Files, want)
829 }
830 })
831}
832
833func TestSearchMatchAll(t *testing.T) {
834 b := testIndexBuilder(t, nil,
835 Document{Name: "banzana", Content: []byte("x orange y")},
836 Document{Name: "banana", Content: []byte("x apple y")})
837
838 t.Run("LineMatches", func(t *testing.T) {
839 sres := searchForTest(t, b, &query.Const{Value: true})
840 matches := sres.Files
841 if len(matches) != 2 {
842 t.Fatalf("got %v, want 2 matches", matches)
843 }
844 })
845
846 t.Run("ChunkMatches", func(t *testing.T) {
847 sres := searchForTest(t, b, &query.Const{Value: true}, chunkOpts)
848 matches := sres.Files
849 if len(matches) != 2 {
850 t.Fatalf("got %v, want 2 matches", matches)
851 }
852 })
853}
854
855func TestSearchNewline(t *testing.T) {
856 b := testIndexBuilder(t, nil,
857 Document{Name: "banzana", Content: []byte("abcd\ndefg")})
858
859 t.Run("LineMatches", func(t *testing.T) {
860 sres := searchForTest(t, b, &query.Substring{Pattern: "d\nd"})
861
862 // Just check that we don't crash.
863
864 matches := sres.Files
865 if len(matches) != 1 {
866 t.Fatalf("got %v, want 1 matches", matches)
867 }
868 })
869
870 t.Run("ChunkMatches", func(t *testing.T) {
871 sres := searchForTest(t, b, &query.Substring{Pattern: "d\nd"}, chunkOpts)
872
873 // Just check that we don't crash.
874
875 matches := sres.Files
876 if len(matches) != 1 {
877 t.Fatalf("got %v, want 1 matches", matches)
878 }
879 })
880}
881
882func TestSearchMatchAllRegexp(t *testing.T) {
883 b := testIndexBuilder(t, nil,
884 Document{Name: "banzana", Content: []byte("abcd")},
885 Document{Name: "banana", Content: []byte("pqrs")})
886
887 t.Run("LineMatches", func(t *testing.T) {
888 sres := searchForTest(t, b, &query.Regexp{Regexp: mustParseRE(".")})
889
890 matches := sres.Files
891 if len(matches) != 2 || sres.Stats.MatchCount != 2 {
892 t.Fatalf("got %v, want 2 matches", matches)
893 }
894 if len(matches[0].LineMatches[0].Line) != 4 || len(matches[1].LineMatches[0].Line) != 4 {
895 t.Fatalf("want 4 chars in every file, got %#v", matches)
896 }
897
898 })
899
900 t.Run("ChunkMatches", func(t *testing.T) {
901 sres := searchForTest(t, b, &query.Regexp{Regexp: mustParseRE(".")}, chunkOpts)
902
903 matches := sres.Files
904 if len(matches) != 2 || sres.Stats.MatchCount != 8 {
905 t.Fatalf("got %v, want 2 matches", matches)
906 }
907 if len(matches[0].ChunkMatches[0].Content) != 4 || len(matches[1].ChunkMatches[0].Content) != 4 {
908 t.Fatalf("want 4 chars in every file, got %#v", matches)
909 }
910
911 })
912}
913
914func TestFileRestriction(t *testing.T) {
915 b := testIndexBuilder(t, nil,
916 Document{Name: "banana1", Content: []byte("x orange y")},
917 Document{Name: "banana2", Content: []byte("x apple y")},
918 Document{Name: "orange", Content: []byte("x apple z")})
919
920 t.Run("LineMatches", func(t *testing.T) {
921 sres := searchForTest(t, b, query.NewAnd(
922 &query.Substring{
923 Pattern: "banana",
924 FileName: true,
925 },
926 &query.Substring{
927 Pattern: "apple",
928 }))
929
930 matches := sres.Files
931 if len(matches) != 1 || len(matches[0].LineMatches) != 1 {
932 t.Fatalf("got %v, want 1 match", matches)
933 }
934
935 match := matches[0].LineMatches[0]
936 got := string(match.Line)
937 want := "x apple y"
938 if got != want {
939 t.Errorf("got match %#v, want line %q", match, want)
940 }
941 })
942
943 t.Run("ChunkMatches", func(t *testing.T) {
944 sres := searchForTest(t, b, query.NewAnd(
945 &query.Substring{
946 Pattern: "banana",
947 FileName: true,
948 },
949 &query.Substring{
950 Pattern: "apple",
951 }), chunkOpts)
952
953 matches := sres.Files
954 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 {
955 t.Fatalf("got %v, want 1 match", matches)
956 }
957
958 match := matches[0].ChunkMatches[0]
959 got := string(match.Content)
960 want := "x apple y"
961 if got != want {
962 t.Errorf("got match %#v, want line %q", match, want)
963 }
964 })
965}
966
967func TestFileNameBoundary(t *testing.T) {
968 b := testIndexBuilder(t, nil,
969 Document{Name: "banana2", Content: []byte("x apple y")},
970 Document{Name: "helpers.go", Content: []byte("x apple y")},
971 Document{Name: "foo", Content: []byte("x apple y")})
972
973 t.Run("LineMatches", func(t *testing.T) {
974 sres := searchForTest(t, b, &query.Substring{
975 Pattern: "helpers.go",
976 FileName: true,
977 })
978
979 matches := sres.Files
980 if len(matches) != 1 || len(matches[0].LineMatches) != 1 {
981 t.Fatalf("got %v, want 1 match", matches)
982 }
983 })
984
985 t.Run("ChunkMatches", func(t *testing.T) {
986 sres := searchForTest(t, b, &query.Substring{
987 Pattern: "helpers.go",
988 FileName: true,
989 }, chunkOpts)
990
991 matches := sres.Files
992 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 {
993 t.Fatalf("got %v, want 1 match", matches)
994 }
995 })
996}
997
998func TestDocumentOrder(t *testing.T) {
999 var docs []Document
1000 for i := 0; i < 3; i++ {
1001 docs = append(docs, Document{Name: fmt.Sprintf("f%d", i), Content: []byte("needle")})
1002 }
1003
1004 b := testIndexBuilder(t, nil, docs...)
1005
1006 t.Run("LineMatches", func(t *testing.T) {
1007 sres := searchForTest(t, b, query.NewAnd(
1008 &query.Substring{
1009 Pattern: "needle",
1010 }))
1011
1012 want := []string{"f0", "f1", "f2"}
1013 var got []string
1014 for _, f := range sres.Files {
1015 got = append(got, f.FileName)
1016 }
1017 if !reflect.DeepEqual(got, want) {
1018 t.Fatalf("got %v, want %v", got, want)
1019 }
1020 })
1021
1022 t.Run("ChunkMatches", func(t *testing.T) {
1023 sres := searchForTest(t, b,
1024 query.NewAnd(&query.Substring{
1025 Pattern: "needle",
1026 }),
1027 chunkOpts,
1028 )
1029
1030 want := []string{"f0", "f1", "f2"}
1031 var got []string
1032 for _, f := range sres.Files {
1033 got = append(got, f.FileName)
1034 }
1035 if !reflect.DeepEqual(got, want) {
1036 t.Fatalf("got %v, want %v", got, want)
1037 }
1038 })
1039}
1040
1041func TestBranchMask(t *testing.T) {
1042 b := testIndexBuilder(t, &Repository{
1043 Branches: []RepositoryBranch{
1044 {"master", "v-master"},
1045 {"stable", "v-stable"},
1046 {"bonzai", "v-bonzai"},
1047 },
1048 }, Document{Name: "f1", Content: []byte("needle"), Branches: []string{"master"}},
1049 Document{Name: "f2", Content: []byte("needle"), Branches: []string{"stable", "master"}},
1050 Document{Name: "f3", Content: []byte("needle"), Branches: []string{"stable", "master"}},
1051 Document{Name: "f4", Content: []byte("needle"), Branches: []string{"bonzai"}},
1052 )
1053
1054 t.Run("LineMatches", func(t *testing.T) {
1055 sres := searchForTest(t, b, query.NewAnd(
1056 &query.Substring{
1057 Pattern: "needle",
1058 },
1059 &query.Branch{
1060 Pattern: "table",
1061 }))
1062
1063 if len(sres.Files) != 2 || sres.Files[0].FileName != "f2" || sres.Files[1].FileName != "f3" {
1064 t.Fatalf("got %v, want 2 result from [f2,f3]", sres.Files)
1065 }
1066
1067 if len(sres.Files[0].Branches) != 1 || sres.Files[0].Branches[0] != "stable" {
1068 t.Fatalf("got %v, want 1 branch 'stable'", sres.Files[0].Branches)
1069 }
1070 })
1071
1072 t.Run("ChunkMatches", func(t *testing.T) {
1073 sres := searchForTest(t, b, query.NewAnd(
1074 &query.Substring{
1075 Pattern: "needle",
1076 },
1077 &query.Branch{
1078 Pattern: "table",
1079 }),
1080 chunkOpts,
1081 )
1082
1083 if len(sres.Files) != 2 || sres.Files[0].FileName != "f2" || sres.Files[1].FileName != "f3" {
1084 t.Fatalf("got %v, want 2 result from [f2,f3]", sres.Files)
1085 }
1086
1087 if len(sres.Files[0].Branches) != 1 || sres.Files[0].Branches[0] != "stable" {
1088 t.Fatalf("got %v, want 1 branch 'stable'", sres.Files[0].Branches)
1089 }
1090 })
1091}
1092
1093func TestBranchLimit(t *testing.T) {
1094 for limit := 64; limit <= 65; limit++ {
1095 r := &Repository{}
1096 for i := 0; i < limit; i++ {
1097 s := fmt.Sprintf("b%d", i)
1098 r.Branches = append(r.Branches, RepositoryBranch{
1099 s, "v-" + s,
1100 })
1101 }
1102 _, err := NewIndexBuilder(r)
1103 if limit == 64 && err != nil {
1104 t.Fatalf("NewIndexBuilder: %v", err)
1105 } else if limit == 65 && err == nil {
1106 t.Fatalf("NewIndexBuilder succeeded")
1107 }
1108 }
1109}
1110
1111func TestBranchReport(t *testing.T) {
1112 branches := []string{"stable", "master"}
1113 b := testIndexBuilder(t, &Repository{
1114 Branches: []RepositoryBranch{
1115 {"stable", "vs"},
1116 {"master", "vm"},
1117 },
1118 },
1119 Document{Name: "f2", Content: []byte("needle"), Branches: branches})
1120
1121 t.Run("LineMatches", func(t *testing.T) {
1122 sres := searchForTest(t, b, &query.Substring{
1123 Pattern: "needle",
1124 })
1125 if len(sres.Files) != 1 {
1126 t.Fatalf("got %v, want 1 result from f2", sres.Files)
1127 }
1128
1129 f := sres.Files[0]
1130 if !reflect.DeepEqual(f.Branches, branches) {
1131 t.Fatalf("got branches %q, want %q", f.Branches, branches)
1132 }
1133 })
1134
1135 t.Run("ChunkMatches", func(t *testing.T) {
1136 sres := searchForTest(t, b, &query.Substring{
1137 Pattern: "needle",
1138 }, chunkOpts)
1139 if len(sres.Files) != 1 {
1140 t.Fatalf("got %v, want 1 result from f2", sres.Files)
1141 }
1142
1143 f := sres.Files[0]
1144 if !reflect.DeepEqual(f.Branches, branches) {
1145 t.Fatalf("got branches %q, want %q", f.Branches, branches)
1146 }
1147 })
1148
1149}
1150
1151func TestBranchVersions(t *testing.T) {
1152 b := testIndexBuilder(t, &Repository{
1153 Branches: []RepositoryBranch{
1154 {"stable", "v-stable"},
1155 {"master", "v-master"},
1156 },
1157 }, Document{Name: "f2", Content: []byte("needle"), Branches: []string{"master"}})
1158
1159 t.Run("LineMatches", func(t *testing.T) {
1160 sres := searchForTest(t, b, &query.Substring{
1161 Pattern: "needle",
1162 })
1163 if len(sres.Files) != 1 {
1164 t.Fatalf("got %v, want 1 result from f2", sres.Files)
1165 }
1166
1167 f := sres.Files[0]
1168 if f.Version != "v-master" {
1169 t.Fatalf("got file %#v, want version 'v-master'", f)
1170 }
1171 })
1172
1173 t.Run("ChunkMatches", func(t *testing.T) {
1174 sres := searchForTest(t, b, &query.Substring{
1175 Pattern: "needle",
1176 }, chunkOpts)
1177 if len(sres.Files) != 1 {
1178 t.Fatalf("got %v, want 1 result from f2", sres.Files)
1179 }
1180
1181 f := sres.Files[0]
1182 if f.Version != "v-master" {
1183 t.Fatalf("got file %#v, want version 'v-master'", f)
1184 }
1185 })
1186}
1187
1188func mustParseRE(s string) *syntax.Regexp {
1189 r, err := syntax.Parse(s, syntax.Perl)
1190 if err != nil {
1191 panic(err)
1192 }
1193
1194 return r
1195}
1196
1197func TestRegexp(t *testing.T) {
1198 content := []byte("needle the bla")
1199 // ----------------01234567890123
1200
1201 b := testIndexBuilder(t, nil,
1202 Document{
1203 Name: "f1",
1204 Content: content,
1205 })
1206
1207 t.Run("LineMatches", func(t *testing.T) {
1208 sres := searchForTest(t, b,
1209 &query.Regexp{
1210 Regexp: mustParseRE("dle.*bla"),
1211 })
1212
1213 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 {
1214 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
1215 }
1216
1217 got := sres.Files[0].LineMatches[0]
1218 want := LineMatch{
1219 LineFragments: []LineFragmentMatch{{
1220 LineOffset: 3,
1221 Offset: 3,
1222 MatchLength: 11,
1223 }},
1224 Line: content,
1225 FileName: false,
1226 LineNumber: 1,
1227 LineStart: 0,
1228 LineEnd: 14,
1229 }
1230
1231 if !reflect.DeepEqual(got, want) {
1232 t.Errorf("got %#v, want %#v", got, want)
1233 }
1234 })
1235
1236 t.Run("ChunkMatches", func(t *testing.T) {
1237 sres := searchForTest(t, b,
1238 &query.Regexp{
1239 Regexp: mustParseRE("dle.*bla"),
1240 }, chunkOpts)
1241
1242 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 {
1243 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
1244 }
1245
1246 got := sres.Files[0].ChunkMatches[0]
1247 want := ChunkMatch{
1248 Content: content,
1249 ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1},
1250 Ranges: []Range{{
1251 Start: Location{ByteOffset: 3, LineNumber: 1, Column: 4},
1252 End: Location{ByteOffset: 14, LineNumber: 1, Column: 15},
1253 }},
1254 }
1255
1256 if diff := cmp.Diff(want, got); diff != "" {
1257 t.Fatal(diff)
1258 }
1259 })
1260}
1261
1262func TestRegexpFile(t *testing.T) {
1263 content := []byte("needle the bla")
1264
1265 name := "let's play: find the mussel"
1266 b := testIndexBuilder(t, nil,
1267 Document{Name: name, Content: content},
1268 Document{Name: "play.txt", Content: content})
1269
1270 t.Run("LineMatches", func(t *testing.T) {
1271 sres := searchForTest(t, b,
1272 &query.Regexp{
1273 Regexp: mustParseRE("play.*mussel"),
1274 FileName: true,
1275 })
1276
1277 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 {
1278 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
1279 }
1280
1281 if sres.Files[0].FileName != name {
1282 t.Errorf("got match %#v, want name %q", sres.Files[0], name)
1283 }
1284 })
1285
1286 t.Run("ChunkMatches", func(t *testing.T) {
1287 sres := searchForTest(t, b,
1288 &query.Regexp{
1289 Regexp: mustParseRE("play.*mussel"),
1290 FileName: true,
1291 }, chunkOpts)
1292
1293 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 {
1294 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
1295 }
1296
1297 if sres.Files[0].FileName != name {
1298 t.Errorf("got match %#v, want name %q", sres.Files[0], name)
1299 }
1300 })
1301}
1302
1303func TestRegexpOrder(t *testing.T) {
1304 content := []byte("bla the needle")
1305 // ----------------01234567890123
1306
1307 b := testIndexBuilder(t, nil,
1308 Document{Name: "f1", Content: content})
1309
1310 t.Run("LineMatches", func(t *testing.T) {
1311 sres := searchForTest(t, b,
1312 &query.Regexp{
1313 Regexp: mustParseRE("dle.*bla"),
1314 })
1315
1316 if len(sres.Files) != 0 {
1317 t.Fatalf("got %v, want 0 matches", sres.Files)
1318 }
1319 })
1320
1321 t.Run("ChunkMatches", func(t *testing.T) {
1322 sres := searchForTest(t, b,
1323 &query.Regexp{
1324 Regexp: mustParseRE("dle.*bla"),
1325 })
1326
1327 if len(sres.Files) != 0 {
1328 t.Fatalf("got %v, want 0 matches", sres.Files)
1329 }
1330 })
1331}
1332
1333func TestRepoName(t *testing.T) {
1334 content := []byte("bla the needle")
1335 // ----------------01234567890123
1336
1337 b := testIndexBuilder(t, &Repository{Name: "bla"},
1338 Document{Name: "f1", Content: content})
1339
1340 t.Run("LineMatches", func(t *testing.T) {
1341 sres := searchForTest(t, b,
1342 query.NewAnd(
1343 &query.Substring{Pattern: "needle"},
1344 &query.Repo{Regexp: regexp.MustCompile("foo")},
1345 ))
1346
1347 if len(sres.Files) != 0 {
1348 t.Fatalf("got %v, want 0 matches", sres.Files)
1349 }
1350
1351 if sres.Stats.FilesConsidered > 0 {
1352 t.Fatalf("got FilesConsidered %d, should have short circuited", sres.Stats.FilesConsidered)
1353 }
1354
1355 sres = searchForTest(t, b,
1356 query.NewAnd(
1357 &query.Substring{Pattern: "needle"},
1358 &query.Repo{Regexp: regexp.MustCompile("bla")},
1359 ))
1360 if len(sres.Files) != 1 {
1361 t.Fatalf("got %v, want 1 match", sres.Files)
1362 }
1363 })
1364
1365 t.Run("ChunkMatches", func(t *testing.T) {
1366 sres := searchForTest(t, b,
1367 query.NewAnd(
1368 &query.Substring{Pattern: "needle"},
1369 &query.Repo{Regexp: regexp.MustCompile("foo")},
1370 ),
1371 chunkOpts,
1372 )
1373
1374 if len(sres.Files) != 0 {
1375 t.Fatalf("got %v, want 0 matches", sres.Files)
1376 }
1377
1378 if sres.Stats.FilesConsidered > 0 {
1379 t.Fatalf("got FilesConsidered %d, should have short circuited", sres.Stats.FilesConsidered)
1380 }
1381
1382 sres = searchForTest(t, b,
1383 query.NewAnd(
1384 &query.Substring{Pattern: "needle"},
1385 &query.Repo{Regexp: regexp.MustCompile("bla")},
1386 ))
1387 if len(sres.Files) != 1 {
1388 t.Fatalf("got %v, want 1 match", sres.Files)
1389 }
1390 })
1391}
1392
1393func TestMergeMatches(t *testing.T) {
1394 content := []byte("blablabla")
1395 b := testIndexBuilder(t, nil,
1396 Document{Name: "f1", Content: content})
1397
1398 t.Run("LineMatches", func(t *testing.T) {
1399 sres := searchForTest(t, b,
1400 &query.Substring{Pattern: "bla"})
1401 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 {
1402 t.Fatalf("got %v, want 1 match", sres.Files)
1403 }
1404 })
1405
1406 t.Run("ChunkMatches", func(t *testing.T) {
1407 sres := searchForTest(t, b,
1408 &query.Substring{Pattern: "bla"},
1409 chunkOpts,
1410 )
1411 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 {
1412 t.Fatalf("got %v, want 1 match", sres.Files)
1413 }
1414 })
1415}
1416
1417func TestRepoURL(t *testing.T) {
1418 content := []byte("blablabla")
1419 b := testIndexBuilder(t, &Repository{
1420 Name: "name",
1421 URL: "URL",
1422 CommitURLTemplate: "commit",
1423 FileURLTemplate: "file-url",
1424 LineFragmentTemplate: "fragment",
1425 }, Document{Name: "f1", Content: content})
1426
1427 sres := searchForTest(t, b, &query.Substring{Pattern: "bla"})
1428
1429 if sres.RepoURLs["name"] != "file-url" {
1430 t.Errorf("got RepoURLs %v, want {name: URL}", sres.RepoURLs)
1431 }
1432 if sres.LineFragments["name"] != "fragment" {
1433 t.Errorf("got URLs %v, want {name: URL}", sres.LineFragments)
1434 }
1435}
1436
1437func TestRegexpCaseSensitive(t *testing.T) {
1438 content := []byte("bla\nfunc unmarshalGitiles\n")
1439 b := testIndexBuilder(t, nil, Document{
1440 Name: "f1",
1441 Content: content,
1442 })
1443
1444 t.Run("LineMatches", func(t *testing.T) {
1445 res := searchForTest(t, b,
1446 &query.Regexp{
1447 Regexp: mustParseRE("func.*Gitiles"),
1448 CaseSensitive: true,
1449 })
1450
1451 if len(res.Files) != 1 {
1452 t.Fatalf("got %v, want one match", res.Files)
1453 }
1454 })
1455
1456 t.Run("ChunkMatches", func(t *testing.T) {
1457 res := searchForTest(t, b,
1458 &query.Regexp{
1459 Regexp: mustParseRE("func.*Gitiles"),
1460 CaseSensitive: true,
1461 },
1462 chunkOpts,
1463 )
1464
1465 if len(res.Files) != 1 {
1466 t.Fatalf("got %v, want one match", res.Files)
1467 }
1468 })
1469}
1470
1471func TestRegexpCaseFolding(t *testing.T) {
1472 content := []byte("bla\nfunc unmarshalGitiles\n")
1473
1474 b := testIndexBuilder(t, nil,
1475 Document{Name: "f1", Content: content})
1476 res := searchForTest(t, b,
1477 &query.Regexp{
1478 Regexp: mustParseRE("func.*GITILES"),
1479 CaseSensitive: false,
1480 })
1481
1482 if len(res.Files) != 1 {
1483 t.Fatalf("got %v, want one match", res.Files)
1484 }
1485}
1486
1487func TestCaseRegexp(t *testing.T) {
1488 content := []byte("BLABLABLA")
1489 b := testIndexBuilder(t, nil,
1490 Document{Name: "f1", Content: content})
1491
1492 t.Run("LineMatches", func(t *testing.T) {
1493 res := searchForTest(t, b,
1494 &query.Regexp{
1495 Regexp: mustParseRE("[xb][xl][xa]"),
1496 CaseSensitive: true,
1497 })
1498
1499 if len(res.Files) > 0 {
1500 t.Fatalf("got %v, want no matches", res.Files)
1501 }
1502 })
1503
1504 t.Run("ChunkMatches", func(t *testing.T) {
1505 res := searchForTest(t, b,
1506 &query.Regexp{
1507 Regexp: mustParseRE("[xb][xl][xa]"),
1508 CaseSensitive: true,
1509 },
1510 chunkOpts,
1511 )
1512
1513 if len(res.Files) > 0 {
1514 t.Fatalf("got %v, want no matches", res.Files)
1515 }
1516 })
1517}
1518
1519func TestNegativeRegexp(t *testing.T) {
1520 content := []byte("BLABLABLA needle bla")
1521 b := testIndexBuilder(t, nil,
1522 Document{Name: "f1", Content: content})
1523
1524 t.Run("LineMatches", func(t *testing.T) {
1525 res := searchForTest(t, b,
1526 query.NewAnd(
1527 &query.Substring{
1528 Pattern: "needle",
1529 },
1530 &query.Not{
1531 Child: &query.Regexp{
1532 Regexp: mustParseRE(".cs"),
1533 },
1534 }))
1535
1536 if len(res.Files) != 1 {
1537 t.Fatalf("got %v, want 1 match", res.Files)
1538 }
1539 })
1540
1541 t.Run("ChunkMatches", func(t *testing.T) {
1542 res := searchForTest(t, b,
1543 query.NewAnd(
1544 &query.Substring{
1545 Pattern: "needle",
1546 },
1547 &query.Not{
1548 Child: &query.Regexp{
1549 Regexp: mustParseRE(".cs"),
1550 },
1551 },
1552 ),
1553 chunkOpts)
1554
1555 if len(res.Files) != 1 {
1556 t.Fatalf("got %v, want 1 match", res.Files)
1557 }
1558 })
1559}
1560
1561func TestSymbolRank(t *testing.T) {
1562 t.Skip()
1563
1564 content := []byte("func bla() blubxxxxx")
1565 // ----------------01234567890123456789
1566 b := testIndexBuilder(t, nil,
1567 Document{
1568 Name: "f1",
1569 Content: content,
1570 }, Document{
1571 Name: "f2",
1572 Content: content,
1573 Symbols: []DocumentSection{{5, 8}},
1574 }, Document{
1575 Name: "f3",
1576 Content: content,
1577 })
1578
1579 t.Run("LineMatches", func(t *testing.T) {
1580 res := searchForTest(t, b,
1581 &query.Substring{
1582 CaseSensitive: false,
1583 Pattern: "bla",
1584 })
1585
1586 if len(res.Files) != 3 {
1587 t.Fatalf("got %d files, want 3 files. Full data: %v", len(res.Files), res.Files)
1588 }
1589 if res.Files[0].FileName != "f2" {
1590 t.Errorf("got %#v, want 'f2' as top match", res.Files[0])
1591 }
1592 })
1593
1594 t.Run("ChunkMatches", func(t *testing.T) {
1595 res := searchForTest(t, b,
1596 &query.Substring{
1597 CaseSensitive: false,
1598 Pattern: "bla",
1599 }, chunkOpts)
1600
1601 if len(res.Files) != 3 {
1602 t.Fatalf("got %d files, want 3 files. Full data: %v", len(res.Files), res.Files)
1603 }
1604 if res.Files[0].FileName != "f2" {
1605 t.Errorf("got %#v, want 'f2' as top match", res.Files[0])
1606 }
1607 })
1608}
1609
1610func TestSymbolRankRegexpUTF8(t *testing.T) {
1611 t.Skip()
1612
1613 prefix := strings.Repeat(string([]rune{kelvinCodePoint}), 100) + "\n"
1614 content := []byte(prefix +
1615 "func bla() blub")
1616 // ------012345678901234
1617 b := testIndexBuilder(t, nil,
1618 Document{
1619 Name: "f1",
1620 Content: content,
1621 }, Document{
1622 Name: "f2",
1623 Content: content,
1624 Symbols: []DocumentSection{{uint32(len(prefix) + 5), uint32(len(prefix) + 8)}},
1625 }, Document{
1626 Name: "f3",
1627 Content: content,
1628 })
1629
1630 t.Run("LineMatches", func(t *testing.T) {
1631 res := searchForTest(t, b,
1632 &query.Regexp{
1633 Regexp: mustParseRE("b.a"),
1634 })
1635
1636 if len(res.Files) != 3 {
1637 t.Fatalf("got %#v, want 3 files", res.Files)
1638 }
1639 if res.Files[0].FileName != "f2" {
1640 t.Errorf("got %#v, want 'f2' as top match", res.Files[0])
1641 }
1642 })
1643
1644 t.Run("ChunjkMatches", func(t *testing.T) {
1645 res := searchForTest(t, b,
1646 &query.Regexp{
1647 Regexp: mustParseRE("b.a"),
1648 }, chunkOpts)
1649
1650 if len(res.Files) != 3 {
1651 t.Fatalf("got %#v, want 3 files", res.Files)
1652 }
1653 if res.Files[0].FileName != "f2" {
1654 t.Errorf("got %#v, want 'f2' as top match", res.Files[0])
1655 }
1656 })
1657}
1658
1659func TestPartialSymbolRank(t *testing.T) {
1660 t.Skip()
1661
1662 content := []byte("func bla() blub")
1663 // ----------------012345678901234
1664
1665 b := testIndexBuilder(t, nil,
1666 Document{
1667 Name: "f1",
1668 Content: content,
1669 Symbols: []DocumentSection{{4, 9}},
1670 }, Document{
1671 Name: "f2",
1672 Content: content,
1673 Symbols: []DocumentSection{{4, 8}},
1674 }, Document{
1675 Name: "f3",
1676 Content: content,
1677 Symbols: []DocumentSection{{4, 9}},
1678 })
1679
1680 t.Run("LineMatches", func(t *testing.T) {
1681 res := searchForTest(t, b,
1682 &query.Substring{
1683 Pattern: "bla",
1684 })
1685
1686 if len(res.Files) != 3 {
1687 t.Fatalf("got %#v, want 3 files", res.Files)
1688 }
1689 if res.Files[0].FileName != "f2" {
1690 t.Errorf("got %#v, want 'f2' as top match", res.Files[0])
1691 }
1692 })
1693
1694 t.Run("ChunkMatches", func(t *testing.T) {
1695 res := searchForTest(t, b,
1696 &query.Substring{
1697 Pattern: "bla",
1698 }, chunkOpts)
1699
1700 if len(res.Files) != 3 {
1701 t.Fatalf("got %#v, want 3 files", res.Files)
1702 }
1703 if res.Files[0].FileName != "f2" {
1704 t.Errorf("got %#v, want 'f2' as top match", res.Files[0])
1705 }
1706 })
1707}
1708
1709func TestNegativeRepo(t *testing.T) {
1710 content := []byte("bla the needle")
1711 // ----------------01234567890123
1712 b := testIndexBuilder(t, &Repository{
1713 Name: "bla",
1714 }, Document{Name: "f1", Content: content})
1715
1716 t.Run("LineMatches", func(t *testing.T) {
1717 sres := searchForTest(t, b,
1718 query.NewAnd(
1719 &query.Substring{Pattern: "needle"},
1720 &query.Not{Child: &query.Repo{Regexp: regexp.MustCompile("bla")}},
1721 ))
1722
1723 if len(sres.Files) != 0 {
1724 t.Fatalf("got %v, want 0 matches", sres.Files)
1725 }
1726 })
1727
1728 t.Run("ChunkMatches", func(t *testing.T) {
1729 sres := searchForTest(t, b,
1730 query.NewAnd(
1731 &query.Substring{Pattern: "needle"},
1732 &query.Not{Child: &query.Repo{Regexp: regexp.MustCompile("bla")}},
1733 ), chunkOpts)
1734
1735 if len(sres.Files) != 0 {
1736 t.Fatalf("got %v, want 0 matches", sres.Files)
1737 }
1738 })
1739}
1740
1741func TestListRepos(t *testing.T) {
1742 content := []byte("bla the needle\n")
1743 // ----------------012345678901234-
1744
1745 t.Run("default and minimal fallback", func(t *testing.T) {
1746 repo := &Repository{
1747 Name: "reponame",
1748 Branches: []RepositoryBranch{{Name: "main"}, {Name: "dev"}},
1749 }
1750 b := testIndexBuilder(t, repo,
1751 Document{Name: "f1", Content: content, Branches: []string{"main", "dev"}},
1752 Document{Name: "f2", Content: content, Branches: []string{"main"}},
1753 Document{Name: "f2", Content: content, Branches: []string{"dev"}},
1754 Document{Name: "f3", Content: content, Branches: []string{"dev"}})
1755
1756 searcher := searcherForTest(t, b)
1757
1758 for _, opts := range []*ListOptions{
1759 nil,
1760 {},
1761 {Field: RepoListFieldRepos},
1762 {Field: RepoListFieldReposMap},
1763 } {
1764 t.Run(fmt.Sprint(opts), func(t *testing.T) {
1765 q := &query.Repo{Regexp: regexp.MustCompile("epo")}
1766
1767 res, err := searcher.List(context.Background(), q, opts)
1768 if err != nil {
1769 t.Fatalf("List(%v): %v", q, err)
1770 }
1771
1772 want := &RepoList{
1773 Repos: []*RepoListEntry{{
1774 Repository: *repo,
1775 Stats: RepoStats{
1776 Documents: 4,
1777 ContentBytes: 68, // (15 bytes of content and 2 bytes of filename) x 4
1778 Shards: 1,
1779
1780 NewLinesCount: 4,
1781 DefaultBranchNewLinesCount: 2,
1782 OtherBranchesNewLinesCount: 3,
1783 },
1784 }},
1785 Stats: RepoStats{
1786 Repos: 1,
1787 Documents: 4,
1788 ContentBytes: 68,
1789 Shards: 1,
1790
1791 NewLinesCount: 4,
1792 DefaultBranchNewLinesCount: 2,
1793 OtherBranchesNewLinesCount: 3,
1794 },
1795 }
1796 ignored := []cmp.Option{
1797 cmpopts.EquateEmpty(),
1798 cmpopts.IgnoreFields(RepoListEntry{}, "IndexMetadata"),
1799 cmpopts.IgnoreFields(RepoStats{}, "IndexBytes"),
1800 cmpopts.IgnoreFields(Repository{}, "SubRepoMap"),
1801 cmpopts.IgnoreFields(Repository{}, "priority"),
1802 }
1803 if diff := cmp.Diff(want, res, ignored...); diff != "" {
1804 t.Fatalf("mismatch (-want +got):\n%s", diff)
1805 }
1806
1807 q = &query.Repo{Regexp: regexp.MustCompile("bla")}
1808 res, err = searcher.List(context.Background(), q, nil)
1809 if err != nil {
1810 t.Fatalf("List(%v): %v", q, err)
1811 }
1812 if len(res.Repos) != 0 || len(res.ReposMap) != 0 {
1813 t.Fatalf("got %v, want 0 matches", res)
1814 }
1815 })
1816 }
1817 })
1818
1819 t.Run("minimal", func(t *testing.T) {
1820 repo := &Repository{
1821 ID: 1234,
1822 Name: "reponame",
1823 Branches: []RepositoryBranch{{Name: "main"}, {Name: "dev"}},
1824 RawConfig: map[string]string{"repoid": "1234"},
1825 }
1826 b := testIndexBuilder(t, repo,
1827 Document{Name: "f1", Content: content, Branches: []string{"main", "dev"}},
1828 Document{Name: "f2", Content: content, Branches: []string{"main"}},
1829 Document{Name: "f2", Content: content, Branches: []string{"dev"}},
1830 Document{Name: "f3", Content: content, Branches: []string{"dev"}})
1831
1832 searcher := searcherForTest(t, b)
1833
1834 q := &query.Repo{Regexp: regexp.MustCompile("epo")}
1835 res, err := searcher.List(context.Background(), q, &ListOptions{Field: RepoListFieldReposMap})
1836 if err != nil {
1837 t.Fatalf("List(%v): %v", q, err)
1838 }
1839
1840 want := &RepoList{
1841 ReposMap: ReposMap{
1842 repo.ID: {
1843 HasSymbols: repo.HasSymbols,
1844 Branches: repo.Branches,
1845 },
1846 },
1847 Stats: RepoStats{
1848 Repos: 1,
1849 Shards: 1,
1850 Documents: 4,
1851 IndexBytes: 412,
1852 ContentBytes: 68,
1853 NewLinesCount: 4,
1854 DefaultBranchNewLinesCount: 2,
1855 OtherBranchesNewLinesCount: 3,
1856 },
1857 }
1858
1859 ignored := []cmp.Option{
1860 cmpopts.IgnoreFields(MinimalRepoListEntry{}, "IndexTimeUnix"),
1861 }
1862 if diff := cmp.Diff(want, res, ignored...); diff != "" {
1863 t.Fatalf("mismatch (-want +got):\n%s", diff)
1864 }
1865
1866 q = &query.Repo{Regexp: regexp.MustCompile("bla")}
1867 res, err = searcher.List(context.Background(), q, &ListOptions{Field: RepoListFieldReposMap})
1868 if err != nil {
1869 t.Fatalf("List(%v): %v", q, err)
1870 }
1871 if len(res.Repos) != 0 || len(res.ReposMap) != 0 {
1872 t.Fatalf("got %v, want 0 matches", res)
1873 }
1874 })
1875}
1876
1877func TestListReposByContent(t *testing.T) {
1878 content := []byte("bla the needle")
1879
1880 b := testIndexBuilder(t, &Repository{
1881 Name: "reponame",
1882 },
1883 Document{Name: "f1", Content: content},
1884 Document{Name: "f2", Content: content})
1885
1886 searcher := searcherForTest(t, b)
1887 q := &query.Substring{Pattern: "needle"}
1888 res, err := searcher.List(context.Background(), q, nil)
1889 if err != nil {
1890 t.Fatalf("List(%v): %v", q, err)
1891 }
1892 if len(res.Repos) != 1 || res.Repos[0].Repository.Name != "reponame" {
1893 t.Fatalf("got %v, want 1 matches", res)
1894 }
1895 if got := res.Repos[0].Stats.Shards; got != 1 {
1896 t.Fatalf("got %d, want 1 shard", got)
1897 }
1898 q = &query.Substring{Pattern: "foo"}
1899 res, err = searcher.List(context.Background(), q, nil)
1900 if err != nil {
1901 t.Fatalf("List(%v): %v", q, err)
1902 }
1903 if len(res.Repos) != 0 {
1904 t.Fatalf("got %v, want 0 matches", res)
1905 }
1906}
1907
1908func TestMetadata(t *testing.T) {
1909 content := []byte("bla the needle")
1910
1911 b := testIndexBuilder(t, &Repository{
1912 Name: "reponame",
1913 }, Document{Name: "f1", Content: content},
1914 Document{Name: "f2", Content: content})
1915
1916 var buf bytes.Buffer
1917 if err := b.Write(&buf); err != nil {
1918 t.Fatal(err)
1919 }
1920 f := &memSeeker{buf.Bytes()}
1921
1922 rd, _, err := ReadMetadata(f)
1923 if err != nil {
1924 t.Fatalf("ReadMetadata: %v", err)
1925 }
1926
1927 if got, want := rd[0].Name, "reponame"; got != want {
1928 t.Fatalf("got %q want %q", got, want)
1929 }
1930}
1931
1932func TestOr(t *testing.T) {
1933 b := testIndexBuilder(t, nil,
1934 Document{Name: "f1", Content: []byte("needle")},
1935 Document{Name: "f2", Content: []byte("banana")})
1936 t.Run("LineMatches", func(t *testing.T) {
1937 sres := searchForTest(t, b, query.NewOr(
1938 &query.Substring{Pattern: "needle"},
1939 &query.Substring{Pattern: "banana"}))
1940
1941 if len(sres.Files) != 2 {
1942 t.Fatalf("got %v, want 2 files", sres.Files)
1943 }
1944 })
1945
1946 t.Run("ChunkMatches", func(t *testing.T) {
1947 sres := searchForTest(t, b, query.NewOr(
1948 &query.Substring{Pattern: "needle"},
1949 &query.Substring{Pattern: "banana"}))
1950
1951 if len(sres.Files) != 2 {
1952 t.Fatalf("got %v, want 2 files", sres.Files)
1953 }
1954 })
1955}
1956
1957func TestFrequency(t *testing.T) {
1958 content := []byte("sla _Py_HashDouble(double v sla las las shd dot dot")
1959
1960 b := testIndexBuilder(t, nil,
1961 Document{
1962 Name: "f1",
1963 Content: content,
1964 })
1965
1966 t.Run("LineMatches", func(t *testing.T) {
1967 sres := searchForTest(t, b, &query.Substring{Pattern: "slashdot"})
1968 if len(sres.Files) != 0 {
1969 t.Errorf("got %v, wanted 0 matches", sres.Files)
1970 }
1971 })
1972
1973 t.Run("ChunkMatches", func(t *testing.T) {
1974 sres := searchForTest(t, b, &query.Substring{Pattern: "slashdot"}, chunkOpts)
1975 if len(sres.Files) != 0 {
1976 t.Errorf("got %v, wanted 0 matches", sres.Files)
1977 }
1978 })
1979}
1980
1981func TestMatchNewline(t *testing.T) {
1982 re, err := syntax.Parse("[^a]a", syntax.ClassNL)
1983 if err != nil {
1984 t.Fatalf("syntax.Parse: %v", err)
1985 }
1986
1987 content := []byte("pqr\nalex")
1988
1989 b := testIndexBuilder(t, nil,
1990 Document{
1991 Name: "f1",
1992 Content: content,
1993 })
1994
1995 t.Run("LineMatches", func(t *testing.T) {
1996 sres := searchForTest(t, b, &query.Regexp{Regexp: re, CaseSensitive: true})
1997 if len(sres.Files) != 1 {
1998 t.Errorf("got %v, wanted 1 matches", sres.Files)
1999 } else if l := sres.Files[0].LineMatches[0].Line; !bytes.Equal(l, content[len("pqr\n"):]) {
2000 t.Errorf("got match line %q, want %q", l, content)
2001 }
2002 })
2003
2004 t.Run("ChunkMatches", func(t *testing.T) {
2005 sres := searchForTest(t, b, &query.Regexp{Regexp: re, CaseSensitive: true}, chunkOpts)
2006 if len(sres.Files) != 1 {
2007 t.Errorf("got %v, wanted 1 matches", sres.Files)
2008 } else if c := sres.Files[0].ChunkMatches[0].Content; !bytes.Equal(c, content) {
2009 t.Errorf("got match line %q, want %q", c, content)
2010 }
2011 })
2012}
2013
2014func TestSubRepo(t *testing.T) {
2015 subRepos := map[string]*Repository{
2016 "sub": {
2017 Name: "sub-name",
2018 LineFragmentTemplate: "sub-line",
2019 },
2020 }
2021
2022 content := []byte("pqr\nalex")
2023
2024 b := testIndexBuilder(t, &Repository{
2025 SubRepoMap: subRepos,
2026 }, Document{
2027 Name: "sub/f1",
2028 Content: content,
2029 SubRepositoryPath: "sub",
2030 })
2031
2032 sres := searchForTest(t, b, &query.Substring{Pattern: "alex"})
2033 if len(sres.Files) != 1 {
2034 t.Fatalf("got %v, wanted 1 matches", sres.Files)
2035 }
2036
2037 f := sres.Files[0]
2038 if f.SubRepositoryPath != "sub" || f.SubRepositoryName != "sub-name" {
2039 t.Errorf("got %#v, want SubRepository{Path,Name} = {'sub', 'sub-name'}", f)
2040 }
2041
2042 if sres.LineFragments["sub-name"] != "sub-line" {
2043 t.Errorf("got LineFragmentTemplate %v, want {'sub':'sub-line'}", sres.LineFragments)
2044 }
2045}
2046
2047func TestSearchEither(t *testing.T) {
2048 b := testIndexBuilder(t, nil,
2049 Document{Name: "f1", Content: []byte("bla needle bla")},
2050 Document{Name: "needle-file-branch", Content: []byte("bla content")})
2051
2052 t.Run("LineMatches", func(t *testing.T) {
2053 sres := searchForTest(t, b, &query.Substring{Pattern: "needle"})
2054 if len(sres.Files) != 2 {
2055 t.Fatalf("got %v, wanted 2 matches", sres.Files)
2056 }
2057
2058 sres = searchForTest(t, b, &query.Substring{Pattern: "needle", Content: true})
2059 if len(sres.Files) != 1 {
2060 t.Fatalf("got %v, wanted 1 match", sres.Files)
2061 }
2062
2063 if got, want := sres.Files[0].FileName, "f1"; got != want {
2064 t.Errorf("got %q, want %q", got, want)
2065 }
2066 })
2067
2068 t.Run("ChunkMatches", func(t *testing.T) {
2069 sres := searchForTest(t, b, &query.Substring{Pattern: "needle"}, chunkOpts)
2070 if len(sres.Files) != 2 {
2071 t.Fatalf("got %v, wanted 2 matches", sres.Files)
2072 }
2073
2074 sres = searchForTest(t, b, &query.Substring{Pattern: "needle", Content: true}, chunkOpts)
2075 if len(sres.Files) != 1 {
2076 t.Fatalf("got %v, wanted 1 match", sres.Files)
2077 }
2078
2079 if got, want := sres.Files[0].FileName, "f1"; got != want {
2080 t.Errorf("got %q, want %q", got, want)
2081 }
2082 })
2083}
2084
2085func TestUnicodeExactMatch(t *testing.T) {
2086 needle := "néédlÉ"
2087 content := []byte("blá blá " + needle + " blâ")
2088
2089 b := testIndexBuilder(t, nil,
2090 Document{Name: "f1", Content: content})
2091
2092 t.Run("LineMatches", func(t *testing.T) {
2093 if res := searchForTest(t, b, &query.Substring{Pattern: needle, CaseSensitive: true}); len(res.Files) != 1 {
2094 t.Fatalf("case sensitive: got %v, wanted 1 match", res.Files)
2095 }
2096 })
2097
2098 t.Run("ChunkMatches", func(t *testing.T) {
2099 res := searchForTest(t, b, &query.Substring{Pattern: needle, CaseSensitive: true}, chunkOpts)
2100 if len(res.Files) != 1 {
2101 t.Fatalf("case sensitive: got %v, wanted 1 match", res.Files)
2102 }
2103 })
2104}
2105
2106func TestUnicodeCoverContent(t *testing.T) {
2107 needle := "néédlÉ"
2108 content := []byte("blá blá " + needle + " blâ")
2109
2110 b := testIndexBuilder(t, nil,
2111 Document{Name: "f1", Content: content})
2112
2113 t.Run("LineMatches", func(t *testing.T) {
2114 if res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ", CaseSensitive: true}); len(res.Files) != 0 {
2115 t.Fatalf("case sensitive: got %v, wanted 0 match", res.Files)
2116 }
2117
2118 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ"})
2119 if len(res.Files) != 1 {
2120 t.Fatalf("case insensitive: got %v, wanted 1 match", res.Files)
2121 }
2122
2123 if got, want := res.Files[0].LineMatches[0].LineFragments[0].Offset, uint32(strings.Index(string(content), needle)); got != want {
2124 t.Errorf("got %d want %d", got, want)
2125 }
2126 })
2127
2128 t.Run("ChunkMatches", func(t *testing.T) {
2129 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ", CaseSensitive: true}, chunkOpts)
2130 if len(res.Files) != 0 {
2131 t.Fatalf("case sensitive: got %v, wanted 0 match", res.Files)
2132 }
2133
2134 res = searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ"}, chunkOpts)
2135 if len(res.Files) != 1 {
2136 t.Fatalf("case insensitive: got %v, wanted 1 match", res.Files)
2137 }
2138
2139 got := res.Files[0].ChunkMatches[0].Ranges[0].Start.ByteOffset
2140 want := uint32(strings.Index(string(content), needle))
2141 if got != want {
2142 t.Errorf("got %d want %d", got, want)
2143 }
2144 })
2145}
2146
2147func TestUnicodeNonCoverContent(t *testing.T) {
2148 needle := "nééáádlÉ"
2149 content := []byte("blá blá " + needle + " blâ")
2150
2151 b := testIndexBuilder(t, nil,
2152 Document{Name: "f1", Content: content})
2153
2154 t.Run("LineMatches", func(t *testing.T) {
2155 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉÁÁDLÉ", Content: true})
2156 if len(res.Files) != 1 {
2157 t.Fatalf("got %v, wanted 1 match", res.Files)
2158 }
2159
2160 if got, want := res.Files[0].LineMatches[0].LineFragments[0].Offset, uint32(strings.Index(string(content), needle)); got != want {
2161 t.Errorf("got %d want %d", got, want)
2162 }
2163 })
2164
2165 t.Run("ChunkMatches", func(t *testing.T) {
2166 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉÁÁDLÉ", Content: true}, chunkOpts)
2167 if len(res.Files) != 1 {
2168 t.Fatalf("got %v, wanted 1 match", res.Files)
2169 }
2170
2171 got := res.Files[0].ChunkMatches[0].Ranges[0].Start.ByteOffset
2172 want := uint32(strings.Index(string(content), needle))
2173 if got != want {
2174 t.Errorf("got %d want %d", got, want)
2175 }
2176 })
2177}
2178
2179const kelvinCodePoint = 8490
2180
2181func TestUnicodeVariableLength(t *testing.T) {
2182 lower := 'k'
2183 upper := rune(kelvinCodePoint)
2184
2185 needle := "nee" + string([]rune{lower}) + "eed"
2186 corpus := []byte("nee" + string([]rune{upper}) + "eed" +
2187 " ee" + string([]rune{lower}) + "ee" +
2188 " ee" + string([]rune{upper}) + "ee")
2189
2190 t.Run("LineMatches", func(t *testing.T) {
2191 b := testIndexBuilder(t, nil,
2192 Document{Name: "f1", Content: []byte(corpus)})
2193
2194 res := searchForTest(t, b, &query.Substring{Pattern: needle, Content: true})
2195 if len(res.Files) != 1 {
2196 t.Fatalf("got %v, wanted 1 match", res.Files)
2197 }
2198 })
2199
2200 t.Run("ChunkMatches", func(t *testing.T) {
2201 b := testIndexBuilder(t, nil,
2202 Document{Name: "f1", Content: []byte(corpus)})
2203
2204 res := searchForTest(t, b, &query.Substring{Pattern: needle, Content: true}, chunkOpts)
2205 if len(res.Files) != 1 {
2206 t.Fatalf("got %v, wanted 1 match", res.Files)
2207 }
2208 })
2209}
2210
2211func TestUnicodeFileStartOffsets(t *testing.T) {
2212 unicode := "世界"
2213 wat := "waaaaaat"
2214 b := testIndexBuilder(t, nil,
2215 Document{
2216 Name: "f1",
2217 Content: []byte(unicode),
2218 },
2219 Document{
2220 Name: "f2",
2221 Content: []byte(wat),
2222 },
2223 )
2224 q := &query.Substring{Pattern: wat, Content: true}
2225 res := searchForTest(t, b, q)
2226 if len(res.Files) != 1 {
2227 t.Fatalf("got %v, wanted 1 match", res.Files)
2228 }
2229}
2230
2231func TestLongFileUTF8(t *testing.T) {
2232 needle := "neeedle"
2233
2234 // 6 bytes.
2235 unicode := "世界"
2236 content := []byte(strings.Repeat(unicode, 100) + needle)
2237 b := testIndexBuilder(t, nil,
2238 Document{
2239 Name: "f1",
2240 Content: []byte(strings.Repeat("a", 50)),
2241 },
2242 Document{
2243 Name: "f2",
2244 Content: content,
2245 })
2246
2247 t.Run("LineMatches", func(t *testing.T) {
2248 q := &query.Substring{Pattern: needle, Content: true}
2249 res := searchForTest(t, b, q)
2250 if len(res.Files) != 1 {
2251 t.Errorf("got %v, want 1 result", res)
2252 }
2253 })
2254
2255 t.Run("ChunkMatches", func(t *testing.T) {
2256 q := &query.Substring{Pattern: needle, Content: true}
2257 res := searchForTest(t, b, q, chunkOpts)
2258 if len(res.Files) != 1 {
2259 t.Errorf("got %v, want 1 result", res)
2260 }
2261 })
2262}
2263
2264func TestEstimateDocCount(t *testing.T) {
2265 content := []byte("bla needle bla")
2266 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2267 Document{Name: "f1", Content: content},
2268 Document{Name: "f2", Content: content},
2269 )
2270
2271 t.Run("LineMatches", func(t *testing.T) {
2272 if sres := searchForTest(t, b,
2273 query.NewAnd(
2274 &query.Substring{Pattern: "needle"},
2275 &query.Repo{Regexp: regexp.MustCompile("reponame")},
2276 ), SearchOptions{
2277 EstimateDocCount: true,
2278 }); sres.Stats.ShardFilesConsidered != 2 {
2279 t.Errorf("got FilesConsidered = %d, want 2", sres.Stats.FilesConsidered)
2280 }
2281 if sres := searchForTest(t, b,
2282 query.NewAnd(
2283 &query.Substring{Pattern: "needle"},
2284 &query.Repo{Regexp: regexp.MustCompile("nomatch")},
2285 ), SearchOptions{
2286 EstimateDocCount: true,
2287 }); sres.Stats.ShardFilesConsidered != 0 {
2288 t.Errorf("got FilesConsidered = %d, want 0", sres.Stats.FilesConsidered)
2289 }
2290 })
2291
2292 t.Run("ChunkMatches", func(t *testing.T) {
2293 if sres := searchForTest(t, b,
2294 query.NewAnd(
2295 &query.Substring{Pattern: "needle"},
2296 &query.Repo{Regexp: regexp.MustCompile("reponame")},
2297 ), SearchOptions{
2298 EstimateDocCount: true,
2299 ChunkMatches: true,
2300 }); sres.Stats.ShardFilesConsidered != 2 {
2301 t.Errorf("got FilesConsidered = %d, want 2", sres.Stats.FilesConsidered)
2302 }
2303 if sres := searchForTest(t, b,
2304 query.NewAnd(
2305 &query.Substring{Pattern: "needle"},
2306 &query.Repo{Regexp: regexp.MustCompile("nomatch")},
2307 ), SearchOptions{
2308 EstimateDocCount: true,
2309 ChunkMatches: true,
2310 }); sres.Stats.ShardFilesConsidered != 0 {
2311 t.Errorf("got FilesConsidered = %d, want 0", sres.Stats.FilesConsidered)
2312 }
2313 })
2314}
2315
2316func TestUTF8CorrectCorpus(t *testing.T) {
2317 needle := "neeedle"
2318
2319 // 6 bytes.
2320 unicode := "世界"
2321 b := testIndexBuilder(t, nil,
2322 Document{
2323 Name: "f1",
2324 Content: []byte(strings.Repeat(unicode, 100)),
2325 },
2326 Document{
2327 Name: "xxxxxneeedle",
2328 Content: []byte("hello"),
2329 })
2330
2331 t.Run("LineMatches", func(t *testing.T) {
2332 q := &query.Substring{Pattern: needle, FileName: true}
2333 res := searchForTest(t, b, q)
2334 if len(res.Files) != 1 {
2335 t.Errorf("got %v, want 1 result", res)
2336 }
2337 })
2338
2339 t.Run("ChunkMatches", func(t *testing.T) {
2340 q := &query.Substring{Pattern: needle, FileName: true}
2341 res := searchForTest(t, b, q, chunkOpts)
2342 if len(res.Files) != 1 {
2343 t.Errorf("got %v, want 1 result", res)
2344 }
2345 })
2346}
2347
2348func TestBuilderStats(t *testing.T) {
2349 b := testIndexBuilder(t, nil,
2350 Document{
2351 Name: "f1",
2352 Content: []byte(strings.Repeat("abcd", 1024)),
2353 })
2354 var buf bytes.Buffer
2355 if err := b.Write(&buf); err != nil {
2356 t.Fatal(err)
2357 }
2358
2359 if got, want := b.ContentSize(), uint32(2+4*1024); got != want {
2360 t.Errorf("got %d, want %d", got, want)
2361 }
2362}
2363
2364func TestIOStats(t *testing.T) {
2365 b := testIndexBuilder(t, nil,
2366 Document{
2367 Name: "f1",
2368 Content: []byte(strings.Repeat("abcd", 1024)),
2369 })
2370
2371 t.Run("LineMatches", func(t *testing.T) {
2372 q := &query.Substring{Pattern: "abc", CaseSensitive: true, Content: true}
2373 res := searchForTest(t, b, q)
2374
2375 // 4096 (content) + 2 (overhead: newlines or doc sections)
2376 if got, want := res.Stats.ContentBytesLoaded, int64(4098); got != want {
2377 t.Errorf("got content I/O %d, want %d", got, want)
2378 }
2379
2380 // 1024 entries, each 4 bytes apart. 4 fits into single byte
2381 // delta encoded.
2382 if got, want := res.Stats.IndexBytesLoaded, int64(1024); got != want {
2383 t.Errorf("got index I/O %d, want %d", got, want)
2384 }
2385 })
2386
2387 t.Run("ChunkMatches", func(t *testing.T) {
2388 q := &query.Substring{Pattern: "abc", CaseSensitive: true, Content: true}
2389 res := searchForTest(t, b, q, chunkOpts)
2390
2391 // 4096 (content) + 2 (overhead: newlines or doc sections)
2392 if got, want := res.Stats.ContentBytesLoaded, int64(4098); got != want {
2393 t.Errorf("got content I/O %d, want %d", got, want)
2394 }
2395
2396 // 1024 entries, each 4 bytes apart. 4 fits into single byte
2397 // delta encoded.
2398 if got, want := res.Stats.IndexBytesLoaded, int64(1024); got != want {
2399 t.Errorf("got index I/O %d, want %d", got, want)
2400 }
2401 })
2402}
2403
2404func TestStartLineAnchor(t *testing.T) {
2405 b := testIndexBuilder(t, nil,
2406 Document{
2407 Name: "f1",
2408 Content: []byte(
2409 `hello
2410start of middle of line
2411`),
2412 })
2413
2414 t.Run("LineMatches", func(t *testing.T) {
2415 q, err := query.Parse("^start")
2416 if err != nil {
2417 t.Errorf("parse: %v", err)
2418 }
2419
2420 res := searchForTest(t, b, q)
2421 if len(res.Files) != 1 {
2422 t.Errorf("got %v, want 1 file", res.Files)
2423 }
2424
2425 q, err = query.Parse("^middle")
2426 if err != nil {
2427 t.Errorf("parse: %v", err)
2428 }
2429 res = searchForTest(t, b, q)
2430 if len(res.Files) != 0 {
2431 t.Errorf("got %v, want 0 files", res.Files)
2432 }
2433 })
2434
2435 t.Run("ChunkMatches", func(t *testing.T) {
2436 q, err := query.Parse("^start")
2437 if err != nil {
2438 t.Errorf("parse: %v", err)
2439 }
2440
2441 res := searchForTest(t, b, q, chunkOpts)
2442 if len(res.Files) != 1 {
2443 t.Errorf("got %v, want 1 file", res.Files)
2444 }
2445
2446 q, err = query.Parse("^middle")
2447 if err != nil {
2448 t.Errorf("parse: %v", err)
2449 }
2450 res = searchForTest(t, b, q, chunkOpts)
2451 if len(res.Files) != 0 {
2452 t.Errorf("got %v, want 0 files", res.Files)
2453 }
2454 })
2455}
2456
2457func TestAndOrUnicode(t *testing.T) {
2458 q, err := query.Parse("orange.*apple")
2459 if err != nil {
2460 t.Errorf("parse: %v", err)
2461 }
2462 finalQ := query.NewAnd(q,
2463 query.NewOr(query.NewAnd(&query.Repo{Regexp: regexp.MustCompile("name")},
2464 query.NewOr(&query.Branch{Pattern: "master"}))))
2465
2466 b := testIndexBuilder(t, &Repository{
2467 Name: "name",
2468 Branches: []RepositoryBranch{{"master", "master-version"}},
2469 }, Document{
2470 Name: "f2",
2471 Content: []byte("orange\u2318apple"),
2472 // --------------0123456 78901
2473 Branches: []string{"master"},
2474 })
2475
2476 t.Run("LineMatches", func(t *testing.T) {
2477 res := searchForTest(t, b, finalQ)
2478 if len(res.Files) != 1 {
2479 t.Errorf("got %v, want 1 result", res.Files)
2480 }
2481 })
2482
2483 t.Run("ChunkMatches", func(t *testing.T) {
2484 res := searchForTest(t, b, finalQ, chunkOpts)
2485 if len(res.Files) != 1 {
2486 t.Errorf("got %v, want 1 result", res.Files)
2487 }
2488 })
2489}
2490
2491func TestAndShort(t *testing.T) {
2492 content := []byte("bla needle at orange bla")
2493 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2494 Document{Name: "f1", Content: content},
2495 Document{Name: "f2", Content: []byte("xx at xx")},
2496 Document{Name: "f3", Content: []byte("yy orange xx")},
2497 )
2498
2499 q := query.NewAnd(&query.Substring{Pattern: "at"},
2500 &query.Substring{Pattern: "orange"})
2501
2502 t.Run("LineMatches", func(t *testing.T) {
2503 res := searchForTest(t, b, q)
2504 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
2505 t.Errorf("got %v, want 1 result", res.Files)
2506 }
2507 })
2508
2509 t.Run("ChunkMatches", func(t *testing.T) {
2510 res := searchForTest(t, b, q, chunkOpts)
2511 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
2512 t.Errorf("got %v, want 1 result", res.Files)
2513 }
2514 })
2515}
2516
2517func TestNoCollectRegexpSubstring(t *testing.T) {
2518 content := []byte("bla final bla\nfoo final, foo")
2519 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2520 Document{Name: "f1", Content: content},
2521 )
2522
2523 q := &query.Regexp{
2524 Regexp: mustParseRE("final[,.]"),
2525 }
2526
2527 t.Run("LineMatches", func(t *testing.T) {
2528 res := searchForTest(t, b, q)
2529 if len(res.Files) != 1 {
2530 t.Fatalf("got %v, want 1 result", res.Files)
2531 }
2532 if f := res.Files[0]; len(f.LineMatches) != 1 {
2533 t.Fatalf("got line matches %v, want 1 line match", printLineMatches(f.LineMatches))
2534 }
2535 })
2536
2537 t.Run("ChunkMatches", func(t *testing.T) {
2538 res := searchForTest(t, b, q, chunkOpts)
2539 if len(res.Files) != 1 {
2540 t.Fatalf("got %v, want 1 result", res.Files)
2541 }
2542 if f := res.Files[0]; len(f.ChunkMatches) != 1 {
2543 t.Fatalf("got line matches %v, want 1 line match", printLineMatches(f.LineMatches))
2544 }
2545 })
2546}
2547
2548func printLineMatches(ms []LineMatch) string {
2549 var ss []string
2550 for _, m := range ms {
2551 ss = append(ss, fmt.Sprintf("%d:%q %v", m.LineNumber, m.Line, m.LineFragments))
2552 }
2553
2554 return strings.Join(ss, ", ")
2555}
2556
2557func TestLang(t *testing.T) {
2558 content := []byte("bla needle bla")
2559 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2560 Document{Name: "f1", Content: content},
2561 Document{Name: "f2", Language: "java", Content: content},
2562 Document{Name: "f3", Language: "cpp", Content: content},
2563 )
2564
2565 q := query.NewAnd(&query.Substring{Pattern: "needle"},
2566 &query.Language{Language: "cpp"})
2567
2568 t.Run("LineMatches", func(t *testing.T) {
2569 res := searchForTest(t, b, q)
2570 if len(res.Files) != 1 {
2571 t.Fatalf("got %v, want 1 result in f3", res.Files)
2572 }
2573 f := res.Files[0]
2574 if f.FileName != "f3" || f.Language != "cpp" {
2575 t.Fatalf("got %v, want 1 match with language cpp", f)
2576 }
2577 })
2578
2579 t.Run("ChunkMatches", func(t *testing.T) {
2580 res := searchForTest(t, b, q, chunkOpts)
2581 if len(res.Files) != 1 {
2582 t.Fatalf("got %v, want 1 result in f3", res.Files)
2583 }
2584 f := res.Files[0]
2585 if f.FileName != "f3" || f.Language != "cpp" {
2586 t.Fatalf("got %v, want 1 match with language cpp", f)
2587 }
2588 })
2589}
2590
2591func TestLangShortcut(t *testing.T) {
2592 content := []byte("bla needle bla")
2593 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2594 Document{Name: "f2", Language: "java", Content: content},
2595 Document{Name: "f3", Language: "cpp", Content: content},
2596 )
2597
2598 q := query.NewAnd(&query.Substring{Pattern: "needle"},
2599 &query.Language{Language: "fortran"})
2600
2601 t.Run("LineMatches", func(t *testing.T) {
2602 res := searchForTest(t, b, q)
2603 if len(res.Files) != 0 {
2604 t.Fatalf("got %v, want 0 results", res.Files)
2605 }
2606 if res.Stats.IndexBytesLoaded > 0 {
2607 t.Errorf("got IndexBytesLoaded %d, want 0", res.Stats.IndexBytesLoaded)
2608 }
2609 })
2610
2611 t.Run("ChunkMatches", func(t *testing.T) {
2612 res := searchForTest(t, b, q, chunkOpts)
2613 if len(res.Files) != 0 {
2614 t.Fatalf("got %v, want 0 results", res.Files)
2615 }
2616 if res.Stats.IndexBytesLoaded > 0 {
2617 t.Errorf("got IndexBytesLoaded %d, want 0", res.Stats.IndexBytesLoaded)
2618 }
2619 })
2620}
2621
2622func TestNoTextMatchAtoms(t *testing.T) {
2623 content := []byte("bla needle bla")
2624 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2625 Document{Name: "f1", Content: content},
2626 Document{Name: "f2", Language: "java", Content: content},
2627 Document{Name: "f3", Language: "cpp", Content: content},
2628 )
2629 q := query.NewAnd(&query.Language{Language: "java"})
2630 t.Run("LineMatches", func(t *testing.T) {
2631 res := searchForTest(t, b, q)
2632 if len(res.Files) != 1 {
2633 t.Fatalf("got %v, want 1 result in f3", res.Files)
2634 }
2635 })
2636
2637 t.Run("ChunkMatches", func(t *testing.T) {
2638 res := searchForTest(t, b, q, chunkOpts)
2639 if len(res.Files) != 1 {
2640 t.Fatalf("got %v, want 1 result in f3", res.Files)
2641 }
2642 })
2643}
2644
2645func TestNoPositiveAtoms(t *testing.T) {
2646 content := []byte("bla needle bla")
2647 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2648 Document{Name: "f1", Content: content},
2649 Document{Name: "f2", Content: content},
2650 )
2651
2652 q := query.NewAnd(
2653 &query.Not{Child: &query.Substring{Pattern: "xyz"}},
2654 &query.Repo{Regexp: regexp.MustCompile("reponame")})
2655 t.Run("LineMatches", func(t *testing.T) {
2656 res := searchForTest(t, b, q)
2657 if len(res.Files) != 2 {
2658 t.Fatalf("got %v, want 2 results in f3", res.Files)
2659 }
2660 })
2661 t.Run("ChunkMatches", func(t *testing.T) {
2662 res := searchForTest(t, b, q, chunkOpts)
2663 if len(res.Files) != 2 {
2664 t.Fatalf("got %v, want 2 results in f3", res.Files)
2665 }
2666 })
2667}
2668
2669func TestSymbolBoundaryStart(t *testing.T) {
2670 content := []byte("start\nbla bla\nend")
2671 // ----------------012345-67890123-456
2672
2673 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2674 Document{
2675 Name: "f1",
2676 Content: content,
2677 Symbols: []DocumentSection{{0, 5}, {14, 17}},
2678 },
2679 )
2680 q := &query.Symbol{
2681 Expr: &query.Substring{Pattern: "start"},
2682 }
2683 t.Run("LineMatches", func(t *testing.T) {
2684 res := searchForTest(t, b, q)
2685 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
2686 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2687 }
2688 m := res.Files[0].LineMatches[0].LineFragments[0]
2689 if m.Offset != 0 {
2690 t.Fatalf("got offset %d want 0", m.Offset)
2691 }
2692 })
2693
2694 t.Run("ChunkMatches", func(t *testing.T) {
2695 res := searchForTest(t, b, q, chunkOpts)
2696 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
2697 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2698 }
2699 m := res.Files[0].ChunkMatches[0].Ranges[0]
2700 if m.Start.ByteOffset != 0 {
2701 t.Fatalf("got offset %d want 0", m.Start.ByteOffset)
2702 }
2703 })
2704}
2705
2706func TestSymbolBoundaryEnd(t *testing.T) {
2707 content := []byte("start\nbla bla\nend")
2708 // ----------------012345-67890123-456
2709
2710 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2711 Document{
2712 Name: "f1",
2713 Content: content,
2714 Symbols: []DocumentSection{{14, 17}},
2715 },
2716 )
2717 q := &query.Symbol{
2718 Expr: &query.Substring{Pattern: "end"},
2719 }
2720 t.Run("LineMatches", func(t *testing.T) {
2721 res := searchForTest(t, b, q)
2722 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
2723 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2724 }
2725 m := res.Files[0].LineMatches[0].LineFragments[0]
2726 if m.Offset != 14 {
2727 t.Fatalf("got offset %d want 0", m.Offset)
2728 }
2729 })
2730
2731 t.Run("ChunkMatches", func(t *testing.T) {
2732 res := searchForTest(t, b, q, chunkOpts)
2733 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
2734 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2735 }
2736 m := res.Files[0].ChunkMatches[0].Ranges[0]
2737 if m.Start.ByteOffset != 14 {
2738 t.Fatalf("got offset %d want 0", m.Start.ByteOffset)
2739 }
2740 })
2741}
2742
2743func TestSymbolSubstring(t *testing.T) {
2744 content := []byte("bla\nsymblabla\nbla")
2745 // ----------------0123-4567890123-456
2746
2747 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2748 Document{
2749 Name: "f1",
2750 Content: content,
2751 Symbols: []DocumentSection{{4, 12}},
2752 },
2753 )
2754 q := &query.Symbol{
2755 Expr: &query.Substring{Pattern: "bla"},
2756 }
2757 t.Run("LineMatches", func(t *testing.T) {
2758 res := searchForTest(t, b, q)
2759 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
2760 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2761 }
2762 m := res.Files[0].LineMatches[0].LineFragments[0]
2763 if m.Offset != 7 || m.MatchLength != 3 {
2764 t.Fatalf("got offset %d, size %d want 7 size 3", m.Offset, m.MatchLength)
2765 }
2766 })
2767
2768 t.Run("ChunkMatches", func(t *testing.T) {
2769 res := searchForTest(t, b, q, chunkOpts)
2770 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
2771 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2772 }
2773 m := res.Files[0].ChunkMatches[0].Ranges[0]
2774 if m.Start.ByteOffset != 7 || m.End.ByteOffset != 10 {
2775 t.Fatalf("got offset %d, end %d want 7, 10", m.Start.ByteOffset, m.End.ByteOffset)
2776 }
2777 })
2778}
2779
2780func TestSymbolSubstringExact(t *testing.T) {
2781 content := []byte("bla\nsym\nbla\nsym\nasymb")
2782 // ----------------0123-4567-890123456-78901
2783
2784 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2785 Document{
2786 Name: "f1",
2787 Content: content,
2788 Symbols: []DocumentSection{{4, 7}},
2789 },
2790 )
2791 q := &query.Symbol{
2792 Expr: &query.Substring{Pattern: "sym"},
2793 }
2794 t.Run("LineMatches", func(t *testing.T) {
2795 res := searchForTest(t, b, q)
2796 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
2797 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2798 }
2799 m := res.Files[0].LineMatches[0].LineFragments[0]
2800 if m.Offset != 4 {
2801 t.Fatalf("got offset %d, want 7", m.Offset)
2802 }
2803 })
2804
2805 t.Run("ChunkMatches", func(t *testing.T) {
2806 res := searchForTest(t, b, q, chunkOpts)
2807 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
2808 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2809 }
2810 m := res.Files[0].ChunkMatches[0].Ranges[0]
2811 if m.Start.ByteOffset != 4 {
2812 t.Fatalf("got offset %d, want 7", m.Start.ByteOffset)
2813 }
2814 })
2815}
2816
2817func TestSymbolRegexpExact(t *testing.T) {
2818 content := []byte("blah\nbla\nbl")
2819 // ----------------01234-5678-90
2820
2821 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2822 Document{
2823 Name: "f1",
2824 Content: content,
2825 Symbols: []DocumentSection{{0, 4}, {5, 8}, {9, 11}},
2826 },
2827 )
2828 q := &query.Symbol{
2829 Expr: &query.Regexp{Regexp: mustParseRE("^bla$")},
2830 }
2831 t.Run("LineMatches", func(t *testing.T) {
2832 res := searchForTest(t, b, q)
2833 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
2834 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2835 }
2836 m := res.Files[0].LineMatches[0].LineFragments[0]
2837 if m.Offset != 5 {
2838 t.Fatalf("got offset %d, want 5", m.Offset)
2839 }
2840 })
2841
2842 t.Run("ChunkMatches", func(t *testing.T) {
2843 res := searchForTest(t, b, q, chunkOpts)
2844 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
2845 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2846 }
2847 m := res.Files[0].ChunkMatches[0].Ranges[0]
2848 if m.Start.ByteOffset != 5 {
2849 t.Fatalf("got offset %d, want 5", m.Start.ByteOffset)
2850 }
2851 })
2852}
2853
2854func TestSymbolRegexpPartial(t *testing.T) {
2855 content := []byte("abcdef")
2856 // ----------------012345
2857
2858 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2859 Document{
2860 Name: "f1",
2861 Content: content,
2862 Symbols: []DocumentSection{{0, 6}},
2863 },
2864 )
2865 q := &query.Symbol{
2866 Expr: &query.Regexp{Regexp: mustParseRE("(b|d)c(d|b)")},
2867 }
2868 t.Run("LineMatches", func(t *testing.T) {
2869 res := searchForTest(t, b, q)
2870 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
2871 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2872 }
2873 m := res.Files[0].LineMatches[0].LineFragments[0]
2874 if m.Offset != 1 {
2875 t.Fatalf("got offset %d, want 1", m.Offset)
2876 }
2877 if m.MatchLength != 3 {
2878 t.Fatalf("got match length %d, want 3", m.MatchLength)
2879 }
2880 })
2881
2882 t.Run("ChunkMatches", func(t *testing.T) {
2883 res := searchForTest(t, b, q, chunkOpts)
2884 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
2885 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2886 }
2887 m := res.Files[0].ChunkMatches[0].Ranges[0]
2888 if m.Start.ByteOffset != 1 {
2889 t.Fatalf("got offset %d, want 1", m.Start.ByteOffset)
2890 }
2891 if m.End.ByteOffset != 4 {
2892 t.Fatalf("got match end %d, want 4", m.End.ByteOffset)
2893 }
2894 })
2895}
2896
2897func TestSymbolRegexpAll(t *testing.T) {
2898 docs := []Document{
2899 {
2900 Name: "f1",
2901 Content: []byte("Hello Zoekt"),
2902 // --------------01234567890
2903 Symbols: []DocumentSection{{0, 5}, {6, 11}},
2904 },
2905 {
2906 Name: "f2",
2907 Content: []byte("Second Zoekt Third"),
2908 // --------------012345678901234567
2909 Symbols: []DocumentSection{{0, 6}, {7, 12}, {13, 18}},
2910 },
2911 }
2912
2913 b := testIndexBuilder(t, &Repository{Name: "reponame"}, docs...)
2914 q := &query.Symbol{
2915 Expr: &query.Regexp{Regexp: mustParseRE(".*")},
2916 }
2917 t.Run("LineMatches", func(t *testing.T) {
2918 res := searchForTest(t, b, q)
2919 if len(res.Files) != len(docs) {
2920 t.Fatalf("got %v, want %d file", res.Files, len(docs))
2921 }
2922 for i, want := range docs {
2923 got := res.Files[i].LineMatches[0].LineFragments
2924 if len(got) != len(want.Symbols) {
2925 t.Fatalf("got %d symbols, want %d symbols in doc %s", len(got), len(want.Symbols), want.Name)
2926 }
2927
2928 for j, sec := range want.Symbols {
2929 if sec.Start != got[j].Offset {
2930 t.Fatalf("got offset %d, want %d in doc %s", got[j].Offset, sec.Start, want.Name)
2931 }
2932 }
2933 }
2934 })
2935
2936 t.Run("ChunkMatches", func(t *testing.T) {
2937 res := searchForTest(t, b, q, chunkOpts)
2938 if len(res.Files) != len(docs) {
2939 t.Fatalf("got %v, want %d file", res.Files, len(docs))
2940 }
2941 for i, want := range docs {
2942 got := res.Files[i].ChunkMatches[0].Ranges
2943 if len(got) != len(want.Symbols) {
2944 t.Fatalf("got %d symbols, want %d symbols in doc %s", len(got), len(want.Symbols), want.Name)
2945 }
2946
2947 for j, sec := range want.Symbols {
2948 if sec.Start != uint32(got[j].Start.ByteOffset) {
2949 t.Fatalf("got offset %d, want %d in doc %s", got[j].Start.ByteOffset, sec.Start, want.Name)
2950 }
2951 }
2952 }
2953 })
2954}
2955
2956func TestHitIterTerminate(t *testing.T) {
2957 // contrived input: trigram frequencies forces selecting abc +
2958 // def for the distance iteration. There is no match, so this
2959 // will advance the compressedPostingIterator to beyond the
2960 // end.
2961 content := []byte("abc bcdbcd cdecde abcabc def efg")
2962 b := testIndexBuilder(t, nil,
2963 Document{
2964 Name: "f1",
2965 Content: content,
2966 },
2967 )
2968
2969 t.Run("LineMatches", func(t *testing.T) {
2970 searchForTest(t, b, &query.Substring{Pattern: "abcdef"})
2971 })
2972
2973 t.Run("ChunkMatches", func(t *testing.T) {
2974 searchForTest(t, b, &query.Substring{Pattern: "abcdef"}, chunkOpts)
2975 })
2976}
2977
2978func TestDistanceHitIterBailLast(t *testing.T) {
2979 content := []byte("AST AST AST UASH")
2980 b := testIndexBuilder(t, nil,
2981 Document{
2982 Name: "f1",
2983 Content: content,
2984 },
2985 )
2986 t.Run("LineMatches", func(t *testing.T) {
2987 res := searchForTest(t, b, &query.Substring{Pattern: "UAST"})
2988 if len(res.Files) != 0 {
2989 t.Fatalf("got %v, want no results", res.Files)
2990 }
2991 })
2992
2993 t.Run("LineMatches", func(t *testing.T) {
2994 res := searchForTest(t, b, &query.Substring{Pattern: "UAST"}, chunkOpts)
2995 if len(res.Files) != 0 {
2996 t.Fatalf("got %v, want no results", res.Files)
2997 }
2998 })
2999}
3000
3001func TestDocumentSectionRuneBoundary(t *testing.T) {
3002 content := string([]rune{kelvinCodePoint, kelvinCodePoint, kelvinCodePoint})
3003 b, err := NewIndexBuilder(nil)
3004 if err != nil {
3005 t.Fatalf("NewIndexBuilder: %v", err)
3006 }
3007
3008 for i, sec := range []DocumentSection{
3009 {2, 6},
3010 {3, 7},
3011 } {
3012 if err := b.Add(Document{
3013 Name: "f1",
3014 Content: []byte(content),
3015 Symbols: []DocumentSection{sec},
3016 }); err == nil {
3017 t.Errorf("%d: Add succeeded", i)
3018 }
3019 }
3020}
3021
3022func TestUnicodeQuery(t *testing.T) {
3023 content := string([]rune{kelvinCodePoint, kelvinCodePoint, kelvinCodePoint})
3024 b := testIndexBuilder(t, nil,
3025 Document{
3026 Name: "f1",
3027 Content: []byte(content),
3028 },
3029 )
3030
3031 q := &query.Substring{Pattern: content}
3032
3033 t.Run("LineMatches", func(t *testing.T) {
3034 res := searchForTest(t, b, q)
3035 if len(res.Files) != 1 {
3036 t.Fatalf("want 1 match, got %v", res.Files)
3037 }
3038
3039 f := res.Files[0]
3040 if len(f.LineMatches) != 1 {
3041 t.Fatalf("want 1 line, got %v", f.LineMatches)
3042 }
3043 l := f.LineMatches[0]
3044
3045 if len(l.LineFragments) != 1 {
3046 t.Fatalf("want 1 line fragment, got %v", l.LineFragments)
3047 }
3048 fr := l.LineFragments[0]
3049 if fr.MatchLength != len(content) {
3050 t.Fatalf("got MatchLength %d want %d", fr.MatchLength, len(content))
3051 }
3052 })
3053
3054 t.Run("ChunkMatches", func(t *testing.T) {
3055 res := searchForTest(t, b, q, chunkOpts)
3056 if len(res.Files) != 1 {
3057 t.Fatalf("want 1 match, got %v", res.Files)
3058 }
3059
3060 f := res.Files[0]
3061 if len(f.ChunkMatches) != 1 {
3062 t.Fatalf("want 1 line, got %v", f.LineMatches)
3063 }
3064 cm := f.ChunkMatches[0]
3065
3066 if len(cm.Ranges) != 1 {
3067 t.Fatalf("want 1 line fragment, got %v", cm.Ranges)
3068 }
3069 rr := cm.Ranges[0]
3070 if matchLen := rr.End.ByteOffset - rr.Start.ByteOffset; int(matchLen) != len(content) {
3071 t.Fatalf("got MatchLength %d want %d", matchLen, len(content))
3072 }
3073 })
3074}
3075
3076func TestSkipInvalidContent(t *testing.T) {
3077 for _, content := range []string{
3078 // Binary
3079 "abc def \x00 abc",
3080 } {
3081
3082 b, err := NewIndexBuilder(nil)
3083 if err != nil {
3084 t.Fatalf("NewIndexBuilder: %v", err)
3085 }
3086
3087 if err := b.Add(Document{
3088 Name: "f1",
3089 Content: []byte(content),
3090 }); err != nil {
3091 t.Fatal(err)
3092 }
3093
3094 t.Run("LineMatches", func(t *testing.T) {
3095 q := &query.Substring{Pattern: "abc def"}
3096 res := searchForTest(t, b, q)
3097 if len(res.Files) != 0 {
3098 t.Fatalf("got %v, want no results", res.Files)
3099 }
3100
3101 q = &query.Substring{Pattern: "NOT-INDEXED"}
3102 res = searchForTest(t, b, q)
3103 if len(res.Files) != 1 {
3104 t.Fatalf("got %v, want 1 result", res.Files)
3105 }
3106 })
3107
3108 t.Run("ChunkMatches", func(t *testing.T) {
3109 q := &query.Substring{Pattern: "abc def"}
3110 res := searchForTest(t, b, q, chunkOpts)
3111 if len(res.Files) != 0 {
3112 t.Fatalf("got %v, want no results", res.Files)
3113 }
3114
3115 q = &query.Substring{Pattern: "NOT-INDEXED"}
3116 res = searchForTest(t, b, q, chunkOpts)
3117 if len(res.Files) != 1 {
3118 t.Fatalf("got %v, want 1 result", res.Files)
3119 }
3120 })
3121 }
3122}
3123
3124func TestCheckText(t *testing.T) {
3125 for _, text := range []string{"", "simple ascii", "símplé unicödé", "\uFEFFwith utf8 'bom'", "with \uFFFD unicode replacement char"} {
3126 if err := CheckText([]byte(text), 20000); err != nil {
3127 t.Errorf("CheckText(%q): %v", text, err)
3128 }
3129 }
3130 for _, text := range []string{"zero\x00byte", "xx", "0123456789abcdefghi"} {
3131 if err := CheckText([]byte(text), 15); err == nil {
3132 t.Errorf("CheckText(%q) succeeded", text)
3133 }
3134 }
3135}
3136
3137func TestLineAnd(t *testing.T) {
3138 b := testIndexBuilder(t, &Repository{Name: "reponame"},
3139 Document{Name: "f1", Content: []byte("apple\nbanana\napple banana chocolate apple pudding banana\ngrape")},
3140 Document{Name: "f2", Content: []byte("apple orange\nbanana")},
3141 Document{Name: "f3", Content: []byte("banana grape")},
3142 )
3143 pattern := "(apple)(?-s:.)*?(banana)"
3144 r, _ := syntax.Parse(pattern, syntax.Perl)
3145
3146 q := query.Regexp{
3147 Regexp: r,
3148 Content: true,
3149 }
3150 t.Run("LineMatches", func(t *testing.T) {
3151 res := searchForTest(t, b, &q)
3152 wantRegexpCount := 1
3153 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3154 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3155 }
3156 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
3157 t.Errorf("got %v, want 1 result", res.Files)
3158 }
3159 })
3160
3161 t.Run("ChunkMatches", func(t *testing.T) {
3162 res := searchForTest(t, b, &q, chunkOpts)
3163 wantRegexpCount := 1
3164 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3165 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3166 }
3167 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
3168 t.Errorf("got %v, want 1 result", res.Files)
3169 }
3170 })
3171}
3172
3173func TestLineAndFileName(t *testing.T) {
3174 b := testIndexBuilder(t, &Repository{Name: "reponame"},
3175 Document{Name: "f1", Content: []byte("apple banana\ngrape")},
3176 Document{Name: "f2", Content: []byte("apple banana\norange")},
3177 Document{Name: "apple banana", Content: []byte("banana grape")},
3178 )
3179 pattern := "(apple)(?-s:.)*?(banana)"
3180 r, _ := syntax.Parse(pattern, syntax.Perl)
3181
3182 q := query.Regexp{
3183 Regexp: r,
3184 FileName: true,
3185 }
3186 t.Run("LineMatches", func(t *testing.T) {
3187 res := searchForTest(t, b, &q)
3188 wantRegexpCount := 1
3189 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3190 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3191 }
3192 if len(res.Files) != 1 || res.Files[0].FileName != "apple banana" {
3193 t.Errorf("got %v, want 1 result", res.Files)
3194 }
3195 })
3196
3197 t.Run("ChunkMatches", func(t *testing.T) {
3198 res := searchForTest(t, b, &q, chunkOpts)
3199 wantRegexpCount := 1
3200 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3201 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3202 }
3203 if len(res.Files) != 1 || res.Files[0].FileName != "apple banana" {
3204 t.Errorf("got %v, want 1 result", res.Files)
3205 }
3206 })
3207}
3208
3209func TestMultiLineRegex(t *testing.T) {
3210 b := testIndexBuilder(t, &Repository{Name: "reponame"},
3211 Document{Name: "f1", Content: []byte("apple banana\ngrape")},
3212 Document{Name: "f2", Content: []byte("apple orange")},
3213 Document{Name: "f3", Content: []byte("grape apple")},
3214 )
3215 pattern := "(apple).*?[[:space:]].*?(grape)"
3216 r, _ := syntax.Parse(pattern, syntax.Perl)
3217
3218 q := query.Regexp{
3219 Regexp: r,
3220 }
3221 t.Run("LineMatches", func(t *testing.T) {
3222 res := searchForTest(t, b, &q)
3223 wantRegexpCount := 2
3224 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3225 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3226 }
3227 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
3228 t.Errorf("got %v, want 1 result", res.Files)
3229 }
3230 if l := len(res.Files[0].LineMatches); l != 2 {
3231 t.Errorf("got %v, want 2 line matches", l)
3232 }
3233 })
3234
3235 t.Run("ChunkMatches", func(t *testing.T) {
3236 res := searchForTest(t, b, &q, chunkOpts)
3237 wantRegexpCount := 2
3238 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3239 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3240 }
3241 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
3242 t.Errorf("got %v, want 1 result", res.Files)
3243 }
3244 if l := len(res.Files[0].ChunkMatches); l != 1 {
3245 t.Errorf("got %v, want 1 chunk matches", l)
3246 }
3247 if l := len(res.Files[0].ChunkMatches[0].Ranges); l != 1 {
3248 t.Errorf("got %v, want 1 chunk ranges", l)
3249 }
3250 })
3251}
3252
3253func TestSearchTypeFileName(t *testing.T) {
3254 b := testIndexBuilder(t, &Repository{
3255 Name: "reponame",
3256 },
3257 Document{Name: "f1", Content: []byte("bla the needle")},
3258 Document{Name: "f2", Content: []byte("another file another\nneedle")},
3259 // -----------------------------------012345678901234567890-123456
3260 )
3261
3262 t.Run("LineMatches", func(t *testing.T) {
3263 wantSingleMatch := func(res *SearchResult, want string) {
3264 t.Helper()
3265 fmatches := res.Files
3266 if len(fmatches) != 1 {
3267 t.Errorf("got %v, want 1 matches", len(fmatches))
3268 return
3269 }
3270 if len(fmatches[0].LineMatches) != 1 {
3271 t.Errorf("got %d line matches", len(fmatches[0].LineMatches))
3272 return
3273 }
3274 var got string
3275 if fmatches[0].LineMatches[0].FileName {
3276 got = fmatches[0].FileName
3277 } else {
3278 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].LineMatches[0].LineFragments[0].Offset)
3279 }
3280
3281 if got != want {
3282 t.Errorf("got %s, want %s", got, want)
3283 }
3284 }
3285
3286 // Only return the later match in the second file
3287 res := searchForTest(t, b, query.NewAnd(
3288 &query.Type{
3289 Type: query.TypeFileName,
3290 Child: &query.Substring{Pattern: "needle"},
3291 },
3292 &query.Substring{Pattern: "file"}))
3293 wantSingleMatch(res, "f2:8")
3294
3295 // Only return a filename result
3296 res = searchForTest(t, b,
3297 &query.Type{
3298 Type: query.TypeFileName,
3299 Child: &query.Substring{Pattern: "file"},
3300 })
3301 wantSingleMatch(res, "f2")
3302 })
3303
3304 t.Run("ChunkMatches", func(t *testing.T) {
3305 wantSingleMatch := func(res *SearchResult, want string) {
3306 t.Helper()
3307 fmatches := res.Files
3308 if len(fmatches) != 1 {
3309 t.Errorf("got %v, want 1 matches", len(fmatches))
3310 return
3311 }
3312 if len(fmatches[0].ChunkMatches) != 1 {
3313 t.Errorf("got %d line matches", len(fmatches[0].ChunkMatches))
3314 return
3315 }
3316 var got string
3317 if fmatches[0].ChunkMatches[0].FileName {
3318 got = fmatches[0].FileName
3319 } else {
3320 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset)
3321 }
3322
3323 if got != want {
3324 t.Errorf("got %s, want %s", got, want)
3325 }
3326 }
3327
3328 // Only return the later match in the second file
3329 res := searchForTest(t, b, query.NewAnd(
3330 &query.Type{
3331 Type: query.TypeFileName,
3332 Child: &query.Substring{Pattern: "needle"},
3333 },
3334 &query.Substring{Pattern: "file"}),
3335 chunkOpts,
3336 )
3337 wantSingleMatch(res, "f2:8")
3338
3339 // Only return a filename result
3340 res = searchForTest(t, b,
3341 &query.Type{
3342 Type: query.TypeFileName,
3343 Child: &query.Substring{Pattern: "file"},
3344 },
3345 chunkOpts,
3346 )
3347 wantSingleMatch(res, "f2")
3348 })
3349}
3350
3351func TestSearchTypeLanguage(t *testing.T) {
3352 b := testIndexBuilder(t, &Repository{
3353 Name: "reponame",
3354 },
3355 Document{Name: "apex.cls", Content: []byte("public class Car extends Vehicle {")},
3356 Document{Name: "tex.cls", Content: []byte(`\DeclareOption*{`)},
3357 Document{Name: "hello.h", Content: []byte(`#include <stdio.h>`)},
3358 )
3359
3360 t.Log(b.languageMap)
3361
3362 t.Run("LineMatches", func(t *testing.T) {
3363 wantSingleMatch := func(res *SearchResult, want string) {
3364 t.Helper()
3365 fmatches := res.Files
3366 if len(fmatches) != 1 {
3367 t.Errorf("got %v, want 1 matches", len(fmatches))
3368 return
3369 }
3370 if len(fmatches[0].LineMatches) != 1 {
3371 t.Errorf("got %d line matches", len(fmatches[0].LineMatches))
3372 return
3373 }
3374 var got string
3375 if fmatches[0].LineMatches[0].FileName {
3376 got = fmatches[0].FileName
3377 } else {
3378 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].LineMatches[0].LineFragments[0].Offset)
3379 }
3380
3381 if got != want {
3382 t.Errorf("got %s, want %s", got, want)
3383 }
3384 }
3385
3386 res := searchForTest(t, b, &query.Language{Language: "Apex"})
3387 wantSingleMatch(res, "apex.cls")
3388
3389 res = searchForTest(t, b, &query.Language{Language: "TeX"})
3390 wantSingleMatch(res, "tex.cls")
3391
3392 res = searchForTest(t, b, &query.Language{Language: "C"})
3393 wantSingleMatch(res, "hello.h")
3394
3395 // test fallback language search by pretending it's an older index version
3396 res = searchForTest(t, b, &query.Language{Language: "C++"})
3397 if len(res.Files) != 0 {
3398 t.Errorf("got %d results for C++, want 0", len(res.Files))
3399 }
3400
3401 b.featureVersion = 11 // force fallback
3402 res = searchForTest(t, b, &query.Language{Language: "C++"})
3403 wantSingleMatch(res, "hello.h")
3404 })
3405
3406 t.Run("ChunkMatches", func(t *testing.T) {
3407 wantSingleMatch := func(res *SearchResult, want string) {
3408 t.Helper()
3409 fmatches := res.Files
3410 if len(fmatches) != 1 {
3411 t.Errorf("got %v, want 1 matches", len(fmatches))
3412 return
3413 }
3414 if len(fmatches[0].ChunkMatches) != 1 {
3415 t.Errorf("got %d line matches", len(fmatches[0].ChunkMatches))
3416 return
3417 }
3418 var got string
3419 if fmatches[0].ChunkMatches[0].FileName {
3420 got = fmatches[0].FileName
3421 } else {
3422 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset)
3423 }
3424
3425 if got != want {
3426 t.Errorf("got %s, want %s", got, want)
3427 }
3428 }
3429
3430 b.featureVersion = FeatureVersion // reset feature version
3431 res := searchForTest(t, b, &query.Language{Language: "Apex"}, chunkOpts)
3432 wantSingleMatch(res, "apex.cls")
3433
3434 res = searchForTest(t, b, &query.Language{Language: "TeX"}, chunkOpts)
3435 wantSingleMatch(res, "tex.cls")
3436
3437 res = searchForTest(t, b, &query.Language{Language: "C"}, chunkOpts)
3438 wantSingleMatch(res, "hello.h")
3439
3440 // test fallback language search by pretending it's an older index version
3441 res = searchForTest(t, b, &query.Language{Language: "C++"}, chunkOpts)
3442 if len(res.Files) != 0 {
3443 t.Errorf("got %d results for C++, want 0", len(res.Files))
3444 }
3445
3446 b.featureVersion = 11 // force fallback
3447 res = searchForTest(t, b, &query.Language{Language: "C++"}, chunkOpts)
3448 wantSingleMatch(res, "hello.h")
3449 })
3450}
3451
3452func TestStats(t *testing.T) {
3453 ignored := []cmp.Option{
3454 cmpopts.EquateEmpty(),
3455 cmpopts.IgnoreFields(RepoListEntry{}, "Repository"),
3456 cmpopts.IgnoreFields(RepoListEntry{}, "IndexMetadata"),
3457 cmpopts.IgnoreFields(RepoStats{}, "IndexBytes"),
3458 }
3459
3460 repoListEntries := func(b *IndexBuilder) []RepoListEntry {
3461 searcher := searcherForTest(t, b)
3462 indexdata := searcher.(*indexData)
3463 return indexdata.repoListEntry
3464 }
3465
3466 t.Run("one empty repo", func(t *testing.T) {
3467 b := testIndexBuilder(t, nil)
3468 got := repoListEntries(b)
3469 want := []RepoListEntry{
3470 {
3471 Stats: RepoStats{
3472 Repos: 0,
3473 Shards: 1,
3474 Documents: 0,
3475 IndexBytes: 20,
3476 ContentBytes: 0,
3477 NewLinesCount: 0,
3478 DefaultBranchNewLinesCount: 0,
3479 OtherBranchesNewLinesCount: 0,
3480 },
3481 },
3482 }
3483
3484 if diff := cmp.Diff(want, got, ignored...); diff != "" {
3485 t.Fatalf("mismatch (-want +got):\n%s", diff)
3486 }
3487
3488 })
3489
3490 t.Run("one simple shard", func(t *testing.T) {
3491 b := testIndexBuilder(t, nil,
3492 Document{Name: "doc 0", Content: []byte("content 0")},
3493 Document{Name: "doc 1", Content: []byte("content 1")},
3494 )
3495 got := repoListEntries(b)
3496 want := []RepoListEntry{
3497 {
3498 Stats: RepoStats{
3499 Repos: 0,
3500 Shards: 1,
3501 Documents: 2,
3502 IndexBytes: 224,
3503 ContentBytes: 28,
3504 NewLinesCount: 0,
3505 DefaultBranchNewLinesCount: 0,
3506 OtherBranchesNewLinesCount: 0,
3507 },
3508 },
3509 }
3510
3511 if diff := cmp.Diff(want, got, ignored...); diff != "" {
3512 t.Fatalf("mismatch (-want +got):\n%s", diff)
3513 }
3514
3515 })
3516
3517 t.Run("one compound shard", func(t *testing.T) {
3518 b := testIndexBuilderCompound(t,
3519 []*Repository{
3520 {Name: "repo 0"},
3521 {Name: "repo 1"},
3522 },
3523 [][]Document{
3524 {
3525 {Name: "doc 0", Content: []byte("content 0")},
3526 {Name: "doc 1", Content: []byte("content 1")},
3527 },
3528 {
3529 {Name: "doc 2", Content: []byte("content 2")},
3530 {Name: "doc 3", Content: []byte("content 3")},
3531 },
3532 },
3533 )
3534 got := repoListEntries(b)
3535 want := []RepoListEntry{
3536 {
3537 Stats: RepoStats{
3538 Repos: 0,
3539 Shards: 1,
3540 Documents: 2,
3541 IndexBytes: 180,
3542 ContentBytes: 28,
3543 NewLinesCount: 0,
3544 DefaultBranchNewLinesCount: 0,
3545 OtherBranchesNewLinesCount: 0,
3546 },
3547 },
3548 {
3549 Stats: RepoStats{
3550 Repos: 0,
3551 Shards: 1,
3552 Documents: 2,
3553 IndexBytes: 180,
3554 ContentBytes: 28,
3555 NewLinesCount: 0,
3556 DefaultBranchNewLinesCount: 0,
3557 OtherBranchesNewLinesCount: 0,
3558 },
3559 },
3560 }
3561
3562 if diff := cmp.Diff(want, got, ignored...); diff != "" {
3563 t.Fatalf("mismatch (-want +got):\n%s", diff)
3564 }
3565 })
3566
3567 t.Run("compound shard with empty repos", func(t *testing.T) {
3568 b := testIndexBuilderCompound(t,
3569 []*Repository{
3570 {Name: "repo 0"},
3571 {Name: "repo 1"},
3572 {Name: "repo 2"},
3573 {Name: "repo 3"},
3574 {Name: "repo 4"},
3575 },
3576 [][]Document{
3577 {{Name: "doc 0", Content: []byte("content 0")}},
3578 nil,
3579 {{Name: "doc 1", Content: []byte("content 1")}},
3580 nil,
3581 nil,
3582 },
3583 )
3584 got := repoListEntries(b)
3585
3586 entryEmpty := RepoListEntry{Stats: RepoStats{
3587 Shards: 1,
3588 Documents: 0,
3589 ContentBytes: 0,
3590 }}
3591 entryNonEmpty := RepoListEntry{Stats: RepoStats{
3592 Shards: 1,
3593 Documents: 1,
3594 ContentBytes: 14,
3595 }}
3596
3597 want := []RepoListEntry{
3598 entryNonEmpty,
3599 entryEmpty,
3600 entryNonEmpty,
3601 entryEmpty,
3602 entryEmpty,
3603 }
3604
3605 if diff := cmp.Diff(want, got, ignored...); diff != "" {
3606 t.Fatalf("mismatch (-want +got):\n%s", diff)
3607 }
3608
3609 })
3610}
3611
3612// This tests the frequent pattern "\bLITERAL\b".
3613func TestWordSearch(t *testing.T) {
3614 content := []byte("needle the bla")
3615 // ----------------01234567890123
3616
3617 b := testIndexBuilder(t, nil,
3618 Document{
3619 Name: "f1",
3620 Content: content,
3621 })
3622
3623 t.Run("LineMatches", func(t *testing.T) {
3624 sres := searchForTest(t, b,
3625 &query.Regexp{
3626 Regexp: mustParseRE("\\bthe\\b"),
3627 CaseSensitive: true,
3628 Content: true,
3629 })
3630
3631 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 {
3632 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
3633 }
3634
3635 if sres.Stats.RegexpsConsidered != 0 {
3636 t.Fatal("expected regexp to be skipped")
3637 }
3638
3639 got := sres.Files[0].LineMatches[0]
3640 want := LineMatch{
3641 LineFragments: []LineFragmentMatch{{
3642 LineOffset: 7,
3643 Offset: 7,
3644 MatchLength: 3,
3645 }},
3646 Line: content,
3647 FileName: false,
3648 LineNumber: 1,
3649 LineStart: 0,
3650 LineEnd: 14,
3651 }
3652
3653 if !reflect.DeepEqual(got, want) {
3654 t.Errorf("got %#v, want %#v", got, want)
3655 }
3656 })
3657
3658 t.Run("ChunkMatches", func(t *testing.T) {
3659 sres := searchForTest(t, b,
3660 &query.Regexp{
3661 Regexp: mustParseRE("\\bthe\\b"),
3662 CaseSensitive: true,
3663 }, chunkOpts)
3664
3665 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 {
3666 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
3667 }
3668
3669 if sres.Stats.RegexpsConsidered != 0 {
3670 t.Fatal("expected regexp to be skipped")
3671 }
3672
3673 got := sres.Files[0].ChunkMatches[0]
3674 want := ChunkMatch{
3675 Content: content,
3676 ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1},
3677 Ranges: []Range{{
3678 Start: Location{ByteOffset: 7, LineNumber: 1, Column: 8},
3679 End: Location{ByteOffset: 10, LineNumber: 1, Column: 11},
3680 }},
3681 }
3682
3683 if diff := cmp.Diff(want, got); diff != "" {
3684 t.Fatal(diff)
3685 }
3686 })
3687}