fork of https://github.com/sourcegraph/zoekt
1// Copyright 2016 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package zoekt
16
17import (
18 "bytes"
19 "context"
20 "fmt"
21 "os"
22 "reflect"
23 "regexp/syntax"
24 "strings"
25 "testing"
26
27 "github.com/google/go-cmp/cmp"
28 "github.com/google/go-cmp/cmp/cmpopts"
29 "github.com/grafana/regexp"
30 "github.com/kylelemons/godebug/pretty"
31
32 "github.com/sourcegraph/zoekt/query"
33)
34
35func clearScores(r *SearchResult) {
36 for i := range r.Files {
37 r.Files[i].Score = 0.0
38 for j := range r.Files[i].LineMatches {
39 r.Files[i].LineMatches[j].Score = 0.0
40 }
41 for j := range r.Files[i].ChunkMatches {
42 r.Files[i].ChunkMatches[j].Score = 0.0
43 }
44 r.Files[i].Checksum = nil
45 r.Files[i].Debug = ""
46 }
47}
48
49func testIndexBuilder(t *testing.T, repo *Repository, docs ...Document) *IndexBuilder {
50 t.Helper()
51
52 b, err := NewIndexBuilder(repo)
53 if err != nil {
54 t.Fatalf("NewIndexBuilder: %v", err)
55 }
56
57 for i, d := range docs {
58 if err := b.Add(d); err != nil {
59 t.Fatalf("Add %d: %v", i, err)
60 }
61 }
62
63 return b
64}
65
66func testIndexBuilderCompound(t *testing.T, repos []*Repository, docs [][]Document) *IndexBuilder {
67 t.Helper()
68
69 b := newIndexBuilder()
70 b.indexFormatVersion = NextIndexFormatVersion
71
72 if len(repos) != len(docs) {
73 t.Fatalf("testIndexBuilderCompound: repos must be the same length as docs, got: len(repos)=%d len(docs)=%d", len(repos), len(docs))
74 }
75
76 for i, repo := range repos {
77 if err := b.setRepository(repo); err != nil {
78 t.Fatal(err)
79 }
80 for j, d := range docs[i] {
81 if err := b.Add(d); err != nil {
82 t.Fatalf("Add %d %d: %v", i, j, err)
83 }
84 }
85 }
86
87 return b
88}
89
90func TestBoundary(t *testing.T) {
91 b := testIndexBuilder(t, nil,
92 Document{Name: "f1", Content: []byte("x the")},
93 Document{Name: "f1", Content: []byte("reader")})
94 res := searchForTest(t, b, &query.Substring{Pattern: "there"})
95 if len(res.Files) > 0 {
96 t.Fatalf("got %v, want no matches", res.Files)
97 }
98}
99
100func TestDocSectionInvalid(t *testing.T) {
101 b, err := NewIndexBuilder(nil)
102 if err != nil {
103 t.Fatalf("NewIndexBuilder: %v", err)
104 }
105 doc := Document{
106 Name: "f1",
107 Content: []byte("01234567890123"),
108 Symbols: []DocumentSection{{5, 8}, {7, 9}},
109 }
110
111 if err := b.Add(doc); err == nil {
112 t.Errorf("overlapping doc sections should fail")
113 }
114
115 doc = Document{
116 Name: "f1",
117 Content: []byte("01234567890123"),
118 Symbols: []DocumentSection{{0, 20}},
119 }
120
121 if err := b.Add(doc); err == nil {
122 t.Errorf("doc sections beyond EOF should fail")
123 }
124}
125
126func TestBasic(t *testing.T) {
127 b := testIndexBuilder(t, nil,
128 Document{
129 Name: "f2",
130 Content: []byte("to carry water in the no later bla"),
131 // --------------0123456789012345678901234567890123
132 })
133
134 t.Run("LineMatch", func(t *testing.T) {
135 res := searchForTest(t, b, &query.Substring{
136 Pattern: "water",
137 CaseSensitive: true,
138 })
139 fmatches := res.Files
140 if len(fmatches) != 1 || len(fmatches[0].LineMatches) != 1 {
141 t.Fatalf("got %v, want 1 matches", fmatches)
142 }
143
144 got := fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].LineMatches[0].LineFragments[0].Offset)
145 want := "f2:9"
146 if got != want {
147 t.Errorf("1: got %s, want %s", got, want)
148 }
149 })
150
151 t.Run("ChunkMatch", func(t *testing.T) {
152 res := searchForTest(t, b, &query.Substring{
153 Pattern: "water",
154 CaseSensitive: true,
155 }, chunkOpts)
156 fmatches := res.Files
157 if len(fmatches) != 1 || len(fmatches[0].ChunkMatches) != 1 {
158 t.Fatalf("got %v, want 1 matches", fmatches)
159 }
160
161 got := fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset)
162 want := "f2:9"
163 if got != want {
164 t.Errorf("1: got %s, want %s", got, want)
165 }
166 })
167}
168
169func TestEmptyIndex(t *testing.T) {
170 b := testIndexBuilder(t, nil)
171 searcher := searcherForTest(t, b)
172
173 var opts SearchOptions
174 if _, err := searcher.Search(context.Background(), &query.Substring{}, &opts); err != nil {
175 t.Fatalf("Search: %v", err)
176 }
177
178 if _, err := searcher.List(context.Background(), &query.Repo{Regexp: regexp.MustCompile("")}, nil); err != nil {
179 t.Fatalf("List: %v", err)
180 }
181
182 if _, err := searcher.Search(context.Background(), &query.Substring{Pattern: "java", FileName: true}, &opts); err != nil {
183 t.Fatalf("Search: %v", err)
184 }
185}
186
187type memSeeker struct {
188 data []byte
189}
190
191func (s *memSeeker) Name() string {
192 return "memseeker"
193}
194
195func (s *memSeeker) Close() {}
196func (s *memSeeker) Read(off, sz uint32) ([]byte, error) {
197 return s.data[off : off+sz], nil
198}
199
200func (s *memSeeker) Size() (uint32, error) {
201 return uint32(len(s.data)), nil
202}
203
204func TestNewlines(t *testing.T) {
205 b := testIndexBuilder(t, nil,
206 Document{Name: "filename", Content: []byte("line1\nline2\nbla")})
207 // ---------------------------------------------012345-678901-234
208
209 t.Run("LineMatches", func(t *testing.T) {
210 sres := searchForTest(t, b, &query.Substring{Pattern: "ne2"})
211
212 matches := sres.Files
213 want := []FileMatch{{
214 FileName: "filename",
215 LineMatches: []LineMatch{{
216 LineFragments: []LineFragmentMatch{{
217 Offset: 8,
218 LineOffset: 2,
219 MatchLength: 3,
220 }},
221 Line: []byte("line2"),
222 LineStart: 6,
223 LineEnd: 11,
224 LineNumber: 2,
225 }},
226 }}
227
228 if !reflect.DeepEqual(matches, want) {
229 t.Errorf("got %v, want %v", matches, want)
230 }
231 })
232
233 t.Run("ChunkMatches", func(t *testing.T) {
234 sres := searchForTest(t, b, &query.Substring{Pattern: "ne2"}, chunkOpts)
235
236 matches := sres.Files
237 want := []FileMatch{{
238 FileName: "filename",
239 ChunkMatches: []ChunkMatch{{
240 Content: []byte("line2"),
241 ContentStart: Location{
242 ByteOffset: 6,
243 LineNumber: 2,
244 Column: 1,
245 },
246 Ranges: []Range{{
247 Start: Location{ByteOffset: 8, LineNumber: 2, Column: 3},
248 End: Location{ByteOffset: 11, LineNumber: 2, Column: 6},
249 }},
250 }},
251 }}
252
253 if diff := cmp.Diff(want, matches); diff != "" {
254 t.Fatal(diff)
255 }
256 })
257}
258
259// A result spanning multiple lines should have LineMatches that only cover
260// single lines.
261func TestQueryNewlines(t *testing.T) {
262 text := "line1\nline2\nbla"
263 b := testIndexBuilder(t, nil,
264 Document{Name: "filename", Content: []byte(text)})
265
266 t.Run("LineMatches", func(t *testing.T) {
267 sres := searchForTest(t, b, &query.Substring{Pattern: "ine2\nbla"})
268 matches := sres.Files
269 if len(matches) != 1 {
270 t.Fatalf("got %d file matches, want exactly one", len(matches))
271 }
272 m := matches[0]
273 if len(m.LineMatches) != 2 {
274 t.Fatalf("got %d line matches, want exactly two", len(m.LineMatches))
275 }
276 })
277
278 t.Run("ChunkMatches", func(t *testing.T) {
279 sres := searchForTest(t, b, &query.Substring{Pattern: "ine2\nbla"}, chunkOpts)
280 matches := sres.Files
281 if len(matches) != 1 {
282 t.Fatalf("got %d file matches, want exactly one", len(matches))
283 }
284 m := matches[0]
285 if len(m.ChunkMatches) != 1 {
286 t.Fatalf("got %d chunk matches, want exactly one", len(m.ChunkMatches))
287 }
288 })
289}
290
291var chunkOpts = SearchOptions{ChunkMatches: true}
292
293func searchForTest(t *testing.T, b *IndexBuilder, q query.Q, o ...SearchOptions) *SearchResult {
294 searcher := searcherForTest(t, b)
295 var opts SearchOptions
296 if len(o) > 0 {
297 opts = o[0]
298 }
299 res, err := searcher.Search(context.Background(), q, &opts)
300 if err != nil {
301 t.Fatalf("Search(%s): %v", q, err)
302 }
303 clearScores(res)
304 return res
305}
306
307func searcherForTest(t *testing.T, b *IndexBuilder) Searcher {
308 var buf bytes.Buffer
309 if err := b.Write(&buf); err != nil {
310 t.Fatal(err)
311 }
312 f := &memSeeker{buf.Bytes()}
313
314 searcher, err := NewSearcher(f)
315 if err != nil {
316 t.Fatalf("NewSearcher: %v", err)
317 }
318
319 return searcher
320}
321
322func TestCaseFold(t *testing.T) {
323 b := testIndexBuilder(t, nil,
324 Document{Name: "f1", Content: []byte("I love BaNaNAS.")},
325 // -----------------------------------012345678901234
326 )
327 t.Run("LineMatches", func(t *testing.T) {
328 sres := searchForTest(t, b, &query.Substring{
329 Pattern: "bananas",
330 CaseSensitive: true,
331 })
332 matches := sres.Files
333 if len(matches) != 0 {
334 t.Errorf("foldcase: got %#v, want 0 matches", matches)
335 }
336
337 sres = searchForTest(t, b,
338 &query.Substring{
339 Pattern: "BaNaNAS",
340 CaseSensitive: true,
341 })
342 matches = sres.Files
343 if len(matches) != 1 {
344 t.Errorf("no foldcase: got %v, want 1 matches", matches)
345 } else if matches[0].LineMatches[0].LineFragments[0].Offset != 7 {
346 t.Errorf("foldcase: got %v, want offsets 7", matches)
347 }
348 })
349
350 t.Run("ChunkMatches", func(t *testing.T) {
351 sres := searchForTest(t, b, &query.Substring{
352 Pattern: "bananas",
353 CaseSensitive: true,
354 }, chunkOpts)
355 matches := sres.Files
356 if len(matches) != 0 {
357 t.Errorf("foldcase: got %#v, want 0 matches", matches)
358 }
359
360 sres = searchForTest(t, b,
361 &query.Substring{
362 Pattern: "BaNaNAS",
363 CaseSensitive: true,
364 })
365 matches = sres.Files
366 if len(matches) != 1 {
367 t.Errorf("no foldcase: got %v, want 1 matches", matches)
368 } else if matches[0].LineMatches[0].LineFragments[0].Offset != 7 {
369 t.Errorf("foldcase: got %v, want offsets 7", matches)
370 }
371 })
372}
373
374func TestAndSearch(t *testing.T) {
375 b := testIndexBuilder(t, nil,
376 Document{Name: "f1", Content: []byte("x banana y")},
377 Document{Name: "f2", Content: []byte("x apple y")},
378 Document{Name: "f3", Content: []byte("x banana apple y")},
379 // ---------------------------------------0123456789012345
380 )
381
382 t.Run("LineMatches", func(t *testing.T) {
383 sres := searchForTest(t, b, query.NewAnd(
384 &query.Substring{
385 Pattern: "banana",
386 },
387 &query.Substring{
388 Pattern: "apple",
389 },
390 ))
391 matches := sres.Files
392 if len(matches) != 1 || len(matches[0].LineMatches) != 1 || len(matches[0].LineMatches[0].LineFragments) != 2 {
393 t.Fatalf("got %#v, want 1 match with 2 fragments", matches)
394 }
395
396 if matches[0].LineMatches[0].LineFragments[0].Offset != 2 || matches[0].LineMatches[0].LineFragments[1].Offset != 9 {
397 t.Fatalf("got %#v, want offsets 2,9", matches)
398 }
399
400 wantStats := Stats{
401 FilesLoaded: 1,
402 ContentBytesLoaded: 18,
403 IndexBytesLoaded: 8,
404 NgramMatches: 3, // we look at doc 1, because it's max(0,1) due to AND
405 MatchCount: 1,
406 FileCount: 1,
407 FilesConsidered: 2,
408 ShardsScanned: 1,
409 }
410 if diff := pretty.Compare(wantStats, sres.Stats); diff != "" {
411 t.Errorf("got stats diff %s", diff)
412 }
413 })
414
415 t.Run("ChunkMatches", func(t *testing.T) {
416 sres := searchForTest(t, b, query.NewAnd(
417 &query.Substring{
418 Pattern: "banana",
419 },
420 &query.Substring{
421 Pattern: "apple",
422 },
423 ), chunkOpts)
424 matches := sres.Files
425 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 || len(matches[0].ChunkMatches[0].Ranges) != 2 {
426 t.Fatalf("got %#v, want 1 chunk match with 2 ranges", matches)
427 }
428
429 if matches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset != 2 || matches[0].ChunkMatches[0].Ranges[1].Start.ByteOffset != 9 {
430 t.Fatalf("got %#v, want offsets 2,9", matches)
431 }
432
433 wantStats := Stats{
434 FilesLoaded: 1,
435 ContentBytesLoaded: 18,
436 IndexBytesLoaded: 8,
437 NgramMatches: 3, // we look at doc 1, because it's max(0,1) due to AND
438 MatchCount: 2,
439 FileCount: 1,
440 FilesConsidered: 2,
441 ShardsScanned: 1,
442 }
443 if diff := pretty.Compare(wantStats, sres.Stats); diff != "" {
444 t.Errorf("got stats diff %s", diff)
445 }
446 })
447}
448
449func TestAndNegateSearch(t *testing.T) {
450 b := testIndexBuilder(t, nil,
451 Document{Name: "f1", Content: []byte("x banana y")},
452 // -----------------------------------0123456789
453 Document{Name: "f4", Content: []byte("x banana apple y")})
454
455 t.Run("LineMatches", func(t *testing.T) {
456 sres := searchForTest(t, b, query.NewAnd(
457 &query.Substring{
458 Pattern: "banana",
459 },
460 &query.Not{Child: &query.Substring{
461 Pattern: "apple",
462 }}))
463
464 matches := sres.Files
465
466 if len(matches) != 1 || len(matches[0].LineMatches) != 1 {
467 t.Fatalf("got %v, want 1 match", matches)
468 }
469 if matches[0].FileName != "f1" {
470 t.Fatalf("got match %#v, want FileName: f1", matches[0])
471 }
472 if matches[0].LineMatches[0].LineFragments[0].Offset != 2 {
473 t.Fatalf("got %v, want offset 2", matches)
474 }
475 })
476
477 t.Run("ChunkMatches", func(t *testing.T) {
478 sres := searchForTest(t, b,
479 query.NewAnd(
480 &query.Substring{
481 Pattern: "banana",
482 },
483 &query.Not{Child: &query.Substring{
484 Pattern: "apple",
485 }},
486 ),
487 chunkOpts,
488 )
489
490 matches := sres.Files
491
492 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 {
493 t.Fatalf("got %v, want 1 match", matches)
494 }
495 if matches[0].FileName != "f1" {
496 t.Fatalf("got match %#v, want FileName: f1", matches[0])
497 }
498 if matches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset != 2 {
499 t.Fatalf("got %v, want offset 2", matches)
500 }
501 })
502}
503
504func TestNegativeMatchesOnlyShortcut(t *testing.T) {
505 b := testIndexBuilder(t, nil,
506 Document{Name: "f1", Content: []byte("x banana y")},
507 Document{Name: "f2", Content: []byte("x appelmoes y")},
508 Document{Name: "f3", Content: []byte("x appelmoes y")},
509 Document{Name: "f3", Content: []byte("x appelmoes y")})
510
511 t.Run("LineMatches", func(t *testing.T) {
512 sres := searchForTest(t, b, query.NewAnd(
513 &query.Substring{
514 Pattern: "banana",
515 },
516 &query.Not{Child: &query.Substring{
517 Pattern: "appel",
518 }}))
519
520 if sres.Stats.FilesConsidered != 1 {
521 t.Errorf("got %#v, want FilesConsidered: 1", sres.Stats)
522 }
523 })
524
525 t.Run("ChunkMatches", func(t *testing.T) {
526 sres := searchForTest(t, b, query.NewAnd(
527 &query.Substring{
528 Pattern: "banana",
529 },
530 &query.Not{Child: &query.Substring{
531 Pattern: "appel",
532 }}), chunkOpts)
533
534 if sres.Stats.FilesConsidered != 1 {
535 t.Errorf("got %#v, want FilesConsidered: 1", sres.Stats)
536 }
537 })
538}
539
540func TestFileSearch(t *testing.T) {
541 b := testIndexBuilder(t, nil,
542 Document{Name: "banzana", Content: []byte("x orange y")},
543 // -------------0123456
544 Document{Name: "banana", Content: []byte("x apple y")},
545 // -------------012345
546 )
547
548 t.Run("LineMatches", func(t *testing.T) {
549 sres := searchForTest(t, b, &query.Substring{
550 Pattern: "anan",
551 FileName: true,
552 })
553
554 matches := sres.Files
555 if len(matches) != 1 || len(matches[0].LineMatches) != 1 {
556 t.Fatalf("got %v, want 1 match", matches)
557 }
558
559 got := matches[0].LineMatches[0]
560 want := LineMatch{
561 Line: []byte("banana"),
562 LineFragments: []LineFragmentMatch{{
563 Offset: 1,
564 LineOffset: 1,
565 MatchLength: 4,
566 }},
567 FileName: true,
568 }
569
570 if !reflect.DeepEqual(got, want) {
571 t.Errorf("got %#v, want %#v", got, want)
572 }
573 })
574
575 t.Run("ChunkMatches", func(t *testing.T) {
576 sres := searchForTest(t, b, &query.Substring{
577 Pattern: "anan",
578 FileName: true,
579 }, chunkOpts)
580
581 matches := sres.Files
582 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 {
583 t.Fatalf("got %v, want 1 match", matches)
584 }
585
586 got := matches[0].ChunkMatches[0]
587 want := ChunkMatch{
588 Content: []byte("banana"),
589 ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1},
590 Ranges: []Range{{
591 Start: Location{ByteOffset: 1, LineNumber: 1, Column: 2},
592 End: Location{ByteOffset: 5, LineNumber: 1, Column: 6},
593 }},
594 FileName: true,
595 }
596
597 if diff := cmp.Diff(want, got); diff != "" {
598 t.Fatal(diff)
599 }
600 })
601
602 t.Run("FileNameSet", func(t *testing.T) {
603 sres := searchForTest(t, b, query.NewFileNameSet("banana"), chunkOpts)
604
605 matches := sres.Files
606 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 {
607 t.Fatalf("got %v, want 1 match", matches)
608 }
609
610 got := matches[0].ChunkMatches[0]
611 want := ChunkMatch{
612 Content: []byte("banana"),
613 ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1},
614 Ranges: []Range{{
615 Start: Location{ByteOffset: 0, LineNumber: 1, Column: 1},
616 End: Location{ByteOffset: 6, LineNumber: 1, Column: 7},
617 }},
618 FileName: true,
619 }
620
621 if diff := cmp.Diff(want, got); diff != "" {
622 t.Fatal(diff)
623 }
624 })
625}
626
627func TestFileCase(t *testing.T) {
628 b := testIndexBuilder(t, nil,
629 Document{Name: "BANANA", Content: []byte("x orange y")})
630
631 t.Run("LineMatches", func(t *testing.T) {
632 sres := searchForTest(t, b, &query.Substring{
633 Pattern: "banana",
634 FileName: true,
635 })
636
637 matches := sres.Files
638 if len(matches) != 1 || matches[0].FileName != "BANANA" {
639 t.Fatalf("got %v, want 1 match 'BANANA'", matches)
640 }
641 })
642
643 t.Run("ChunkMatches", func(t *testing.T) {
644 sres := searchForTest(t, b, &query.Substring{
645 Pattern: "banana",
646 FileName: true,
647 }, chunkOpts)
648
649 matches := sres.Files
650 if len(matches) != 1 || matches[0].FileName != "BANANA" {
651 t.Fatalf("got %v, want 1 match 'BANANA'", matches)
652 }
653 })
654}
655
656func TestFileRegexpSearchBruteForce(t *testing.T) {
657 b := testIndexBuilder(t, nil,
658 Document{Name: "banzana", Content: []byte("x orange y")},
659 Document{Name: "banana", Content: []byte("x apple y")},
660 )
661 t.Run("LineMatches", func(t *testing.T) {
662 sres := searchForTest(t, b, &query.Regexp{
663 Regexp: mustParseRE("[qn][zx]"),
664 FileName: true,
665 })
666
667 matches := sres.Files
668 if len(matches) != 1 || matches[0].FileName != "banzana" {
669 t.Fatalf("got %v, want 1 match on 'banzana'", matches)
670 }
671 })
672 t.Run("LineMatches", func(t *testing.T) {
673 sres := searchForTest(t, b, &query.Regexp{
674 Regexp: mustParseRE("[qn][zx]"),
675 FileName: true,
676 }, chunkOpts)
677
678 matches := sres.Files
679 if len(matches) != 1 || matches[0].FileName != "banzana" {
680 t.Fatalf("got %v, want 1 match on 'banzana'", matches)
681 }
682 })
683}
684
685func TestFileRegexpSearchShortString(t *testing.T) {
686 b := testIndexBuilder(t, nil,
687 Document{Name: "banana.py", Content: []byte("x orange y")})
688
689 t.Run("LineMatches", func(t *testing.T) {
690 sres := searchForTest(t, b, &query.Regexp{
691 Regexp: mustParseRE("ana.py"),
692 FileName: true,
693 })
694
695 matches := sres.Files
696 if len(matches) != 1 || matches[0].FileName != "banana.py" {
697 t.Fatalf("got %v, want 1 match on 'banana.py'", matches)
698 }
699 })
700
701 t.Run("ChunkMatches", func(t *testing.T) {
702 sres := searchForTest(t, b, &query.Regexp{
703 Regexp: mustParseRE("ana.py"),
704 FileName: true,
705 }, chunkOpts)
706
707 matches := sres.Files
708 if len(matches) != 1 || matches[0].FileName != "banana.py" {
709 t.Fatalf("got %v, want 1 match on 'banana.py'", matches)
710 }
711 })
712}
713
714func TestFileSubstringSearchBruteForce(t *testing.T) {
715 b := testIndexBuilder(t, nil,
716 Document{Name: "BANZANA", Content: []byte("x orange y")},
717 Document{Name: "banana", Content: []byte("x apple y")})
718
719 q := &query.Substring{
720 Pattern: "z",
721 FileName: true,
722 }
723
724 t.Run("LineMatches", func(t *testing.T) {
725 res := searchForTest(t, b, q)
726 if len(res.Files) != 1 || res.Files[0].FileName != "BANZANA" {
727 t.Fatalf("got %v, want 1 match on 'BANZANA''", res.Files)
728 }
729 })
730
731 t.Run("ChunkMatches", func(t *testing.T) {
732 res := searchForTest(t, b, q, chunkOpts)
733 if len(res.Files) != 1 || res.Files[0].FileName != "BANZANA" {
734 t.Fatalf("got %v, want 1 match on 'BANZANA''", res.Files)
735 }
736 })
737}
738
739func TestFileSubstringSearchBruteForceEnd(t *testing.T) {
740 b := testIndexBuilder(t, nil,
741 Document{Name: "BANZANA", Content: []byte("x orange y")},
742 Document{Name: "bananaq", Content: []byte("x apple y")})
743
744 q := &query.Substring{
745 Pattern: "q",
746 FileName: true,
747 }
748 t.Run("LineMatches", func(t *testing.T) {
749 res := searchForTest(t, b, q)
750 if want := "bananaq"; len(res.Files) != 1 || res.Files[0].FileName != want {
751 t.Fatalf("got %v, want 1 match in %q", res.Files, want)
752 }
753 })
754
755 t.Run("LineMatches", func(t *testing.T) {
756 res := searchForTest(t, b, q, chunkOpts)
757 if want := "bananaq"; len(res.Files) != 1 || res.Files[0].FileName != want {
758 t.Fatalf("got %v, want 1 match in %q", res.Files, want)
759 }
760 })
761}
762
763func TestSearchMatchAll(t *testing.T) {
764 b := testIndexBuilder(t, nil,
765 Document{Name: "banzana", Content: []byte("x orange y")},
766 Document{Name: "banana", Content: []byte("x apple y")})
767
768 t.Run("LineMatches", func(t *testing.T) {
769 sres := searchForTest(t, b, &query.Const{Value: true})
770 matches := sres.Files
771 if len(matches) != 2 {
772 t.Fatalf("got %v, want 2 matches", matches)
773 }
774 })
775
776 t.Run("ChunkMatches", func(t *testing.T) {
777 sres := searchForTest(t, b, &query.Const{Value: true}, chunkOpts)
778 matches := sres.Files
779 if len(matches) != 2 {
780 t.Fatalf("got %v, want 2 matches", matches)
781 }
782 })
783}
784
785func TestSearchNewline(t *testing.T) {
786 b := testIndexBuilder(t, nil,
787 Document{Name: "banzana", Content: []byte("abcd\ndefg")})
788
789 t.Run("LineMatches", func(t *testing.T) {
790 sres := searchForTest(t, b, &query.Substring{Pattern: "d\nd"})
791
792 // Just check that we don't crash.
793
794 matches := sres.Files
795 if len(matches) != 1 {
796 t.Fatalf("got %v, want 1 matches", matches)
797 }
798 })
799
800 t.Run("ChunkMatches", func(t *testing.T) {
801 sres := searchForTest(t, b, &query.Substring{Pattern: "d\nd"}, chunkOpts)
802
803 // Just check that we don't crash.
804
805 matches := sres.Files
806 if len(matches) != 1 {
807 t.Fatalf("got %v, want 1 matches", matches)
808 }
809 })
810}
811
812func TestSearchMatchAllRegexp(t *testing.T) {
813 b := testIndexBuilder(t, nil,
814 Document{Name: "banzana", Content: []byte("abcd")},
815 Document{Name: "banana", Content: []byte("pqrs")})
816
817 t.Run("LineMatches", func(t *testing.T) {
818 sres := searchForTest(t, b, &query.Regexp{Regexp: mustParseRE(".")})
819
820 matches := sres.Files
821 if len(matches) != 2 || sres.Stats.MatchCount != 2 {
822 t.Fatalf("got %v, want 2 matches", matches)
823 }
824 if len(matches[0].LineMatches[0].Line) != 4 || len(matches[1].LineMatches[0].Line) != 4 {
825 t.Fatalf("want 4 chars in every file, got %#v", matches)
826 }
827
828 })
829
830 t.Run("ChunkMatches", func(t *testing.T) {
831 sres := searchForTest(t, b, &query.Regexp{Regexp: mustParseRE(".")}, chunkOpts)
832
833 matches := sres.Files
834 if len(matches) != 2 || sres.Stats.MatchCount != 8 {
835 t.Fatalf("got %v, want 2 matches", matches)
836 }
837 if len(matches[0].ChunkMatches[0].Content) != 4 || len(matches[1].ChunkMatches[0].Content) != 4 {
838 t.Fatalf("want 4 chars in every file, got %#v", matches)
839 }
840
841 })
842}
843
844func TestFileRestriction(t *testing.T) {
845 b := testIndexBuilder(t, nil,
846 Document{Name: "banana1", Content: []byte("x orange y")},
847 Document{Name: "banana2", Content: []byte("x apple y")},
848 Document{Name: "orange", Content: []byte("x apple z")})
849
850 t.Run("LineMatches", func(t *testing.T) {
851 sres := searchForTest(t, b, query.NewAnd(
852 &query.Substring{
853 Pattern: "banana",
854 FileName: true,
855 },
856 &query.Substring{
857 Pattern: "apple",
858 }))
859
860 matches := sres.Files
861 if len(matches) != 1 || len(matches[0].LineMatches) != 1 {
862 t.Fatalf("got %v, want 1 match", matches)
863 }
864
865 match := matches[0].LineMatches[0]
866 got := string(match.Line)
867 want := "x apple y"
868 if got != want {
869 t.Errorf("got match %#v, want line %q", match, want)
870 }
871 })
872
873 t.Run("ChunkMatches", func(t *testing.T) {
874 sres := searchForTest(t, b, query.NewAnd(
875 &query.Substring{
876 Pattern: "banana",
877 FileName: true,
878 },
879 &query.Substring{
880 Pattern: "apple",
881 }), chunkOpts)
882
883 matches := sres.Files
884 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 {
885 t.Fatalf("got %v, want 1 match", matches)
886 }
887
888 match := matches[0].ChunkMatches[0]
889 got := string(match.Content)
890 want := "x apple y"
891 if got != want {
892 t.Errorf("got match %#v, want line %q", match, want)
893 }
894 })
895}
896
897func TestFileNameBoundary(t *testing.T) {
898 b := testIndexBuilder(t, nil,
899 Document{Name: "banana2", Content: []byte("x apple y")},
900 Document{Name: "helpers.go", Content: []byte("x apple y")},
901 Document{Name: "foo", Content: []byte("x apple y")})
902
903 t.Run("LineMatches", func(t *testing.T) {
904 sres := searchForTest(t, b, &query.Substring{
905 Pattern: "helpers.go",
906 FileName: true,
907 })
908
909 matches := sres.Files
910 if len(matches) != 1 || len(matches[0].LineMatches) != 1 {
911 t.Fatalf("got %v, want 1 match", matches)
912 }
913 })
914
915 t.Run("ChunkMatches", func(t *testing.T) {
916 sres := searchForTest(t, b, &query.Substring{
917 Pattern: "helpers.go",
918 FileName: true,
919 }, chunkOpts)
920
921 matches := sres.Files
922 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 {
923 t.Fatalf("got %v, want 1 match", matches)
924 }
925 })
926}
927
928func TestDocumentOrder(t *testing.T) {
929 var docs []Document
930 for i := 0; i < 3; i++ {
931 docs = append(docs, Document{Name: fmt.Sprintf("f%d", i), Content: []byte("needle")})
932 }
933
934 b := testIndexBuilder(t, nil, docs...)
935
936 t.Run("LineMatches", func(t *testing.T) {
937 sres := searchForTest(t, b, query.NewAnd(
938 &query.Substring{
939 Pattern: "needle",
940 }))
941
942 want := []string{"f0", "f1", "f2"}
943 var got []string
944 for _, f := range sres.Files {
945 got = append(got, f.FileName)
946 }
947 if !reflect.DeepEqual(got, want) {
948 t.Fatalf("got %v, want %v", got, want)
949 }
950 })
951
952 t.Run("ChunkMatches", func(t *testing.T) {
953 sres := searchForTest(t, b,
954 query.NewAnd(&query.Substring{
955 Pattern: "needle",
956 }),
957 chunkOpts,
958 )
959
960 want := []string{"f0", "f1", "f2"}
961 var got []string
962 for _, f := range sres.Files {
963 got = append(got, f.FileName)
964 }
965 if !reflect.DeepEqual(got, want) {
966 t.Fatalf("got %v, want %v", got, want)
967 }
968 })
969}
970
971func TestBranchMask(t *testing.T) {
972 b := testIndexBuilder(t, &Repository{
973 Branches: []RepositoryBranch{
974 {"master", "v-master"},
975 {"stable", "v-stable"},
976 {"bonzai", "v-bonzai"},
977 },
978 }, Document{Name: "f1", Content: []byte("needle"), Branches: []string{"master"}},
979 Document{Name: "f2", Content: []byte("needle"), Branches: []string{"stable", "master"}},
980 Document{Name: "f3", Content: []byte("needle"), Branches: []string{"stable", "master"}},
981 Document{Name: "f4", Content: []byte("needle"), Branches: []string{"bonzai"}},
982 )
983
984 t.Run("LineMatches", func(t *testing.T) {
985 sres := searchForTest(t, b, query.NewAnd(
986 &query.Substring{
987 Pattern: "needle",
988 },
989 &query.Branch{
990 Pattern: "table",
991 }))
992
993 if len(sres.Files) != 2 || sres.Files[0].FileName != "f2" || sres.Files[1].FileName != "f3" {
994 t.Fatalf("got %v, want 2 result from [f2,f3]", sres.Files)
995 }
996
997 if len(sres.Files[0].Branches) != 1 || sres.Files[0].Branches[0] != "stable" {
998 t.Fatalf("got %v, want 1 branch 'stable'", sres.Files[0].Branches)
999 }
1000 })
1001
1002 t.Run("ChunkMatches", func(t *testing.T) {
1003 sres := searchForTest(t, b, query.NewAnd(
1004 &query.Substring{
1005 Pattern: "needle",
1006 },
1007 &query.Branch{
1008 Pattern: "table",
1009 }),
1010 chunkOpts,
1011 )
1012
1013 if len(sres.Files) != 2 || sres.Files[0].FileName != "f2" || sres.Files[1].FileName != "f3" {
1014 t.Fatalf("got %v, want 2 result from [f2,f3]", sres.Files)
1015 }
1016
1017 if len(sres.Files[0].Branches) != 1 || sres.Files[0].Branches[0] != "stable" {
1018 t.Fatalf("got %v, want 1 branch 'stable'", sres.Files[0].Branches)
1019 }
1020 })
1021}
1022
1023func TestBranchLimit(t *testing.T) {
1024 for limit := 64; limit <= 65; limit++ {
1025 r := &Repository{}
1026 for i := 0; i < limit; i++ {
1027 s := fmt.Sprintf("b%d", i)
1028 r.Branches = append(r.Branches, RepositoryBranch{
1029 s, "v-" + s,
1030 })
1031 }
1032 _, err := NewIndexBuilder(r)
1033 if limit == 64 && err != nil {
1034 t.Fatalf("NewIndexBuilder: %v", err)
1035 } else if limit == 65 && err == nil {
1036 t.Fatalf("NewIndexBuilder succeeded")
1037 }
1038 }
1039}
1040
1041func TestBranchReport(t *testing.T) {
1042 branches := []string{"stable", "master"}
1043 b := testIndexBuilder(t, &Repository{
1044 Branches: []RepositoryBranch{
1045 {"stable", "vs"},
1046 {"master", "vm"},
1047 },
1048 },
1049 Document{Name: "f2", Content: []byte("needle"), Branches: branches})
1050
1051 t.Run("LineMatches", func(t *testing.T) {
1052 sres := searchForTest(t, b, &query.Substring{
1053 Pattern: "needle",
1054 })
1055 if len(sres.Files) != 1 {
1056 t.Fatalf("got %v, want 1 result from f2", sres.Files)
1057 }
1058
1059 f := sres.Files[0]
1060 if !reflect.DeepEqual(f.Branches, branches) {
1061 t.Fatalf("got branches %q, want %q", f.Branches, branches)
1062 }
1063 })
1064
1065 t.Run("ChunkMatches", func(t *testing.T) {
1066 sres := searchForTest(t, b, &query.Substring{
1067 Pattern: "needle",
1068 }, chunkOpts)
1069 if len(sres.Files) != 1 {
1070 t.Fatalf("got %v, want 1 result from f2", sres.Files)
1071 }
1072
1073 f := sres.Files[0]
1074 if !reflect.DeepEqual(f.Branches, branches) {
1075 t.Fatalf("got branches %q, want %q", f.Branches, branches)
1076 }
1077 })
1078
1079}
1080
1081func TestBranchVersions(t *testing.T) {
1082 b := testIndexBuilder(t, &Repository{
1083 Branches: []RepositoryBranch{
1084 {"stable", "v-stable"},
1085 {"master", "v-master"},
1086 },
1087 }, Document{Name: "f2", Content: []byte("needle"), Branches: []string{"master"}})
1088
1089 t.Run("LineMatches", func(t *testing.T) {
1090 sres := searchForTest(t, b, &query.Substring{
1091 Pattern: "needle",
1092 })
1093 if len(sres.Files) != 1 {
1094 t.Fatalf("got %v, want 1 result from f2", sres.Files)
1095 }
1096
1097 f := sres.Files[0]
1098 if f.Version != "v-master" {
1099 t.Fatalf("got file %#v, want version 'v-master'", f)
1100 }
1101 })
1102
1103 t.Run("ChunkMatches", func(t *testing.T) {
1104 sres := searchForTest(t, b, &query.Substring{
1105 Pattern: "needle",
1106 }, chunkOpts)
1107 if len(sres.Files) != 1 {
1108 t.Fatalf("got %v, want 1 result from f2", sres.Files)
1109 }
1110
1111 f := sres.Files[0]
1112 if f.Version != "v-master" {
1113 t.Fatalf("got file %#v, want version 'v-master'", f)
1114 }
1115 })
1116}
1117
1118func mustParseRE(s string) *syntax.Regexp {
1119 r, err := syntax.Parse(s, syntax.Perl)
1120 if err != nil {
1121 panic(err)
1122 }
1123
1124 return r
1125}
1126
1127func TestRegexp(t *testing.T) {
1128 content := []byte("needle the bla")
1129 // ----------------01234567890123
1130
1131 b := testIndexBuilder(t, nil,
1132 Document{
1133 Name: "f1",
1134 Content: content,
1135 })
1136
1137 t.Run("LineMatches", func(t *testing.T) {
1138 sres := searchForTest(t, b,
1139 &query.Regexp{
1140 Regexp: mustParseRE("dle.*bla"),
1141 })
1142
1143 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 {
1144 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
1145 }
1146
1147 got := sres.Files[0].LineMatches[0]
1148 want := LineMatch{
1149 LineFragments: []LineFragmentMatch{{
1150 LineOffset: 3,
1151 Offset: 3,
1152 MatchLength: 11,
1153 }},
1154 Line: content,
1155 FileName: false,
1156 LineNumber: 1,
1157 LineStart: 0,
1158 LineEnd: 14,
1159 }
1160
1161 if !reflect.DeepEqual(got, want) {
1162 t.Errorf("got %#v, want %#v", got, want)
1163 }
1164 })
1165
1166 t.Run("ChunkMatches", func(t *testing.T) {
1167 sres := searchForTest(t, b,
1168 &query.Regexp{
1169 Regexp: mustParseRE("dle.*bla"),
1170 }, chunkOpts)
1171
1172 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 {
1173 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
1174 }
1175
1176 got := sres.Files[0].ChunkMatches[0]
1177 want := ChunkMatch{
1178 Content: content,
1179 ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1},
1180 Ranges: []Range{{
1181 Start: Location{ByteOffset: 3, LineNumber: 1, Column: 4},
1182 End: Location{ByteOffset: 14, LineNumber: 1, Column: 15},
1183 }},
1184 }
1185
1186 if diff := cmp.Diff(want, got); diff != "" {
1187 t.Fatal(diff)
1188 }
1189 })
1190}
1191
1192func TestRegexpFile(t *testing.T) {
1193 content := []byte("needle the bla")
1194
1195 name := "let's play: find the mussel"
1196 b := testIndexBuilder(t, nil,
1197 Document{Name: name, Content: content},
1198 Document{Name: "play.txt", Content: content})
1199
1200 t.Run("LineMatches", func(t *testing.T) {
1201 sres := searchForTest(t, b,
1202 &query.Regexp{
1203 Regexp: mustParseRE("play.*mussel"),
1204 FileName: true,
1205 })
1206
1207 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 {
1208 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
1209 }
1210
1211 if sres.Files[0].FileName != name {
1212 t.Errorf("got match %#v, want name %q", sres.Files[0], name)
1213 }
1214 })
1215
1216 t.Run("ChunkMatches", func(t *testing.T) {
1217 sres := searchForTest(t, b,
1218 &query.Regexp{
1219 Regexp: mustParseRE("play.*mussel"),
1220 FileName: true,
1221 }, chunkOpts)
1222
1223 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 {
1224 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
1225 }
1226
1227 if sres.Files[0].FileName != name {
1228 t.Errorf("got match %#v, want name %q", sres.Files[0], name)
1229 }
1230 })
1231}
1232
1233func TestRegexpOrder(t *testing.T) {
1234 content := []byte("bla the needle")
1235 // ----------------01234567890123
1236
1237 b := testIndexBuilder(t, nil,
1238 Document{Name: "f1", Content: content})
1239
1240 t.Run("LineMatches", func(t *testing.T) {
1241 sres := searchForTest(t, b,
1242 &query.Regexp{
1243 Regexp: mustParseRE("dle.*bla"),
1244 })
1245
1246 if len(sres.Files) != 0 {
1247 t.Fatalf("got %v, want 0 matches", sres.Files)
1248 }
1249 })
1250
1251 t.Run("ChunkMatches", func(t *testing.T) {
1252 sres := searchForTest(t, b,
1253 &query.Regexp{
1254 Regexp: mustParseRE("dle.*bla"),
1255 })
1256
1257 if len(sres.Files) != 0 {
1258 t.Fatalf("got %v, want 0 matches", sres.Files)
1259 }
1260 })
1261}
1262
1263func TestRepoName(t *testing.T) {
1264 content := []byte("bla the needle")
1265 // ----------------01234567890123
1266
1267 b := testIndexBuilder(t, &Repository{Name: "bla"},
1268 Document{Name: "f1", Content: content})
1269
1270 t.Run("LineMatches", func(t *testing.T) {
1271 sres := searchForTest(t, b,
1272 query.NewAnd(
1273 &query.Substring{Pattern: "needle"},
1274 &query.Repo{Regexp: regexp.MustCompile("foo")},
1275 ))
1276
1277 if len(sres.Files) != 0 {
1278 t.Fatalf("got %v, want 0 matches", sres.Files)
1279 }
1280
1281 if sres.Stats.FilesConsidered > 0 {
1282 t.Fatalf("got FilesConsidered %d, should have short circuited", sres.Stats.FilesConsidered)
1283 }
1284
1285 sres = searchForTest(t, b,
1286 query.NewAnd(
1287 &query.Substring{Pattern: "needle"},
1288 &query.Repo{Regexp: regexp.MustCompile("bla")},
1289 ))
1290 if len(sres.Files) != 1 {
1291 t.Fatalf("got %v, want 1 match", sres.Files)
1292 }
1293 })
1294
1295 t.Run("ChunkMatches", func(t *testing.T) {
1296 sres := searchForTest(t, b,
1297 query.NewAnd(
1298 &query.Substring{Pattern: "needle"},
1299 &query.Repo{Regexp: regexp.MustCompile("foo")},
1300 ),
1301 chunkOpts,
1302 )
1303
1304 if len(sres.Files) != 0 {
1305 t.Fatalf("got %v, want 0 matches", sres.Files)
1306 }
1307
1308 if sres.Stats.FilesConsidered > 0 {
1309 t.Fatalf("got FilesConsidered %d, should have short circuited", sres.Stats.FilesConsidered)
1310 }
1311
1312 sres = searchForTest(t, b,
1313 query.NewAnd(
1314 &query.Substring{Pattern: "needle"},
1315 &query.Repo{Regexp: regexp.MustCompile("bla")},
1316 ))
1317 if len(sres.Files) != 1 {
1318 t.Fatalf("got %v, want 1 match", sres.Files)
1319 }
1320 })
1321}
1322
1323func TestMergeMatches(t *testing.T) {
1324 content := []byte("blablabla")
1325 b := testIndexBuilder(t, nil,
1326 Document{Name: "f1", Content: content})
1327
1328 t.Run("LineMatches", func(t *testing.T) {
1329 sres := searchForTest(t, b,
1330 &query.Substring{Pattern: "bla"})
1331 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 {
1332 t.Fatalf("got %v, want 1 match", sres.Files)
1333 }
1334 })
1335
1336 t.Run("ChunkMatches", func(t *testing.T) {
1337 sres := searchForTest(t, b,
1338 &query.Substring{Pattern: "bla"},
1339 chunkOpts,
1340 )
1341 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 {
1342 t.Fatalf("got %v, want 1 match", sres.Files)
1343 }
1344 })
1345}
1346
1347func TestRepoURL(t *testing.T) {
1348 content := []byte("blablabla")
1349 b := testIndexBuilder(t, &Repository{
1350 Name: "name",
1351 URL: "URL",
1352 CommitURLTemplate: "commit",
1353 FileURLTemplate: "file-url",
1354 LineFragmentTemplate: "fragment",
1355 }, Document{Name: "f1", Content: content})
1356
1357 sres := searchForTest(t, b, &query.Substring{Pattern: "bla"})
1358
1359 if sres.RepoURLs["name"] != "file-url" {
1360 t.Errorf("got RepoURLs %v, want {name: URL}", sres.RepoURLs)
1361 }
1362 if sres.LineFragments["name"] != "fragment" {
1363 t.Errorf("got URLs %v, want {name: URL}", sres.LineFragments)
1364 }
1365}
1366
1367func TestRegexpCaseSensitive(t *testing.T) {
1368 content := []byte("bla\nfunc unmarshalGitiles\n")
1369 b := testIndexBuilder(t, nil, Document{
1370 Name: "f1",
1371 Content: content,
1372 })
1373
1374 t.Run("LineMatches", func(t *testing.T) {
1375 res := searchForTest(t, b,
1376 &query.Regexp{
1377 Regexp: mustParseRE("func.*Gitiles"),
1378 CaseSensitive: true,
1379 })
1380
1381 if len(res.Files) != 1 {
1382 t.Fatalf("got %v, want one match", res.Files)
1383 }
1384 })
1385
1386 t.Run("ChunkMatches", func(t *testing.T) {
1387 res := searchForTest(t, b,
1388 &query.Regexp{
1389 Regexp: mustParseRE("func.*Gitiles"),
1390 CaseSensitive: true,
1391 },
1392 chunkOpts,
1393 )
1394
1395 if len(res.Files) != 1 {
1396 t.Fatalf("got %v, want one match", res.Files)
1397 }
1398 })
1399}
1400
1401func TestRegexpCaseFolding(t *testing.T) {
1402 content := []byte("bla\nfunc unmarshalGitiles\n")
1403
1404 b := testIndexBuilder(t, nil,
1405 Document{Name: "f1", Content: content})
1406 res := searchForTest(t, b,
1407 &query.Regexp{
1408 Regexp: mustParseRE("func.*GITILES"),
1409 CaseSensitive: false,
1410 })
1411
1412 if len(res.Files) != 1 {
1413 t.Fatalf("got %v, want one match", res.Files)
1414 }
1415}
1416
1417func TestCaseRegexp(t *testing.T) {
1418 content := []byte("BLABLABLA")
1419 b := testIndexBuilder(t, nil,
1420 Document{Name: "f1", Content: content})
1421
1422 t.Run("LineMatches", func(t *testing.T) {
1423 res := searchForTest(t, b,
1424 &query.Regexp{
1425 Regexp: mustParseRE("[xb][xl][xa]"),
1426 CaseSensitive: true,
1427 })
1428
1429 if len(res.Files) > 0 {
1430 t.Fatalf("got %v, want no matches", res.Files)
1431 }
1432 })
1433
1434 t.Run("ChunkMatches", func(t *testing.T) {
1435 res := searchForTest(t, b,
1436 &query.Regexp{
1437 Regexp: mustParseRE("[xb][xl][xa]"),
1438 CaseSensitive: true,
1439 },
1440 chunkOpts,
1441 )
1442
1443 if len(res.Files) > 0 {
1444 t.Fatalf("got %v, want no matches", res.Files)
1445 }
1446 })
1447}
1448
1449func TestNegativeRegexp(t *testing.T) {
1450 content := []byte("BLABLABLA needle bla")
1451 b := testIndexBuilder(t, nil,
1452 Document{Name: "f1", Content: content})
1453
1454 t.Run("LineMatches", func(t *testing.T) {
1455 res := searchForTest(t, b,
1456 query.NewAnd(
1457 &query.Substring{
1458 Pattern: "needle",
1459 },
1460 &query.Not{
1461 Child: &query.Regexp{
1462 Regexp: mustParseRE(".cs"),
1463 },
1464 }))
1465
1466 if len(res.Files) != 1 {
1467 t.Fatalf("got %v, want 1 match", res.Files)
1468 }
1469 })
1470
1471 t.Run("ChunkMatches", func(t *testing.T) {
1472 res := searchForTest(t, b,
1473 query.NewAnd(
1474 &query.Substring{
1475 Pattern: "needle",
1476 },
1477 &query.Not{
1478 Child: &query.Regexp{
1479 Regexp: mustParseRE(".cs"),
1480 },
1481 },
1482 ),
1483 chunkOpts)
1484
1485 if len(res.Files) != 1 {
1486 t.Fatalf("got %v, want 1 match", res.Files)
1487 }
1488 })
1489}
1490
1491func TestSymbolRank(t *testing.T) {
1492 t.Skip()
1493
1494 content := []byte("func bla() blubxxxxx")
1495 // ----------------01234567890123456789
1496 b := testIndexBuilder(t, nil,
1497 Document{
1498 Name: "f1",
1499 Content: content,
1500 }, Document{
1501 Name: "f2",
1502 Content: content,
1503 Symbols: []DocumentSection{{5, 8}},
1504 }, Document{
1505 Name: "f3",
1506 Content: content,
1507 })
1508
1509 t.Run("LineMatches", func(t *testing.T) {
1510 res := searchForTest(t, b,
1511 &query.Substring{
1512 CaseSensitive: false,
1513 Pattern: "bla",
1514 })
1515
1516 if len(res.Files) != 3 {
1517 t.Fatalf("got %d files, want 3 files. Full data: %v", len(res.Files), res.Files)
1518 }
1519 if res.Files[0].FileName != "f2" {
1520 t.Errorf("got %#v, want 'f2' as top match", res.Files[0])
1521 }
1522 })
1523
1524 t.Run("ChunkMatches", func(t *testing.T) {
1525 res := searchForTest(t, b,
1526 &query.Substring{
1527 CaseSensitive: false,
1528 Pattern: "bla",
1529 }, chunkOpts)
1530
1531 if len(res.Files) != 3 {
1532 t.Fatalf("got %d files, want 3 files. Full data: %v", len(res.Files), res.Files)
1533 }
1534 if res.Files[0].FileName != "f2" {
1535 t.Errorf("got %#v, want 'f2' as top match", res.Files[0])
1536 }
1537 })
1538}
1539
1540func TestSymbolRankRegexpUTF8(t *testing.T) {
1541 t.Skip()
1542
1543 prefix := strings.Repeat(string([]rune{kelvinCodePoint}), 100) + "\n"
1544 content := []byte(prefix +
1545 "func bla() blub")
1546 // ------012345678901234
1547 b := testIndexBuilder(t, nil,
1548 Document{
1549 Name: "f1",
1550 Content: content,
1551 }, Document{
1552 Name: "f2",
1553 Content: content,
1554 Symbols: []DocumentSection{{uint32(len(prefix) + 5), uint32(len(prefix) + 8)}},
1555 }, Document{
1556 Name: "f3",
1557 Content: content,
1558 })
1559
1560 t.Run("LineMatches", func(t *testing.T) {
1561 res := searchForTest(t, b,
1562 &query.Regexp{
1563 Regexp: mustParseRE("b.a"),
1564 })
1565
1566 if len(res.Files) != 3 {
1567 t.Fatalf("got %#v, want 3 files", res.Files)
1568 }
1569 if res.Files[0].FileName != "f2" {
1570 t.Errorf("got %#v, want 'f2' as top match", res.Files[0])
1571 }
1572 })
1573
1574 t.Run("ChunjkMatches", func(t *testing.T) {
1575 res := searchForTest(t, b,
1576 &query.Regexp{
1577 Regexp: mustParseRE("b.a"),
1578 }, chunkOpts)
1579
1580 if len(res.Files) != 3 {
1581 t.Fatalf("got %#v, want 3 files", res.Files)
1582 }
1583 if res.Files[0].FileName != "f2" {
1584 t.Errorf("got %#v, want 'f2' as top match", res.Files[0])
1585 }
1586 })
1587}
1588
1589func TestPartialSymbolRank(t *testing.T) {
1590 t.Skip()
1591
1592 content := []byte("func bla() blub")
1593 // ----------------012345678901234
1594
1595 b := testIndexBuilder(t, nil,
1596 Document{
1597 Name: "f1",
1598 Content: content,
1599 Symbols: []DocumentSection{{4, 9}},
1600 }, Document{
1601 Name: "f2",
1602 Content: content,
1603 Symbols: []DocumentSection{{4, 8}},
1604 }, Document{
1605 Name: "f3",
1606 Content: content,
1607 Symbols: []DocumentSection{{4, 9}},
1608 })
1609
1610 t.Run("LineMatches", func(t *testing.T) {
1611 res := searchForTest(t, b,
1612 &query.Substring{
1613 Pattern: "bla",
1614 })
1615
1616 if len(res.Files) != 3 {
1617 t.Fatalf("got %#v, want 3 files", res.Files)
1618 }
1619 if res.Files[0].FileName != "f2" {
1620 t.Errorf("got %#v, want 'f2' as top match", res.Files[0])
1621 }
1622 })
1623
1624 t.Run("ChunkMatches", func(t *testing.T) {
1625 res := searchForTest(t, b,
1626 &query.Substring{
1627 Pattern: "bla",
1628 }, chunkOpts)
1629
1630 if len(res.Files) != 3 {
1631 t.Fatalf("got %#v, want 3 files", res.Files)
1632 }
1633 if res.Files[0].FileName != "f2" {
1634 t.Errorf("got %#v, want 'f2' as top match", res.Files[0])
1635 }
1636 })
1637}
1638
1639func TestNegativeRepo(t *testing.T) {
1640 content := []byte("bla the needle")
1641 // ----------------01234567890123
1642 b := testIndexBuilder(t, &Repository{
1643 Name: "bla",
1644 }, Document{Name: "f1", Content: content})
1645
1646 t.Run("LineMatches", func(t *testing.T) {
1647 sres := searchForTest(t, b,
1648 query.NewAnd(
1649 &query.Substring{Pattern: "needle"},
1650 &query.Not{Child: &query.Repo{Regexp: regexp.MustCompile("bla")}},
1651 ))
1652
1653 if len(sres.Files) != 0 {
1654 t.Fatalf("got %v, want 0 matches", sres.Files)
1655 }
1656 })
1657
1658 t.Run("ChunkMatches", func(t *testing.T) {
1659 sres := searchForTest(t, b,
1660 query.NewAnd(
1661 &query.Substring{Pattern: "needle"},
1662 &query.Not{Child: &query.Repo{Regexp: regexp.MustCompile("bla")}},
1663 ), chunkOpts)
1664
1665 if len(sres.Files) != 0 {
1666 t.Fatalf("got %v, want 0 matches", sres.Files)
1667 }
1668 })
1669}
1670
1671func TestListRepos(t *testing.T) {
1672 content := []byte("bla the needle\n")
1673 // ----------------012345678901234-
1674
1675 t.Run("default and minimal fallback", func(t *testing.T) {
1676 repo := &Repository{
1677 Name: "reponame",
1678 Branches: []RepositoryBranch{{Name: "main"}, {Name: "dev"}},
1679 }
1680 b := testIndexBuilder(t, repo,
1681 Document{Name: "f1", Content: content, Branches: []string{"main", "dev"}},
1682 Document{Name: "f2", Content: content, Branches: []string{"main"}},
1683 Document{Name: "f2", Content: content, Branches: []string{"dev"}},
1684 Document{Name: "f3", Content: content, Branches: []string{"dev"}})
1685
1686 searcher := searcherForTest(t, b)
1687
1688 for _, opts := range []*ListOptions{
1689 nil,
1690 {Minimal: false},
1691 {Minimal: true},
1692 } {
1693 t.Run(fmt.Sprint(opts), func(t *testing.T) {
1694 q := &query.Repo{Regexp: regexp.MustCompile("epo")}
1695
1696 res, err := searcher.List(context.Background(), q, opts)
1697 if err != nil {
1698 t.Fatalf("List(%v): %v", q, err)
1699 }
1700
1701 want := &RepoList{
1702 Repos: []*RepoListEntry{{
1703 Repository: *repo,
1704 Stats: RepoStats{
1705 Documents: 4,
1706 ContentBytes: 68, // (15 bytes of content and 2 bytes of filename) x 4
1707 Shards: 1,
1708
1709 NewLinesCount: 4,
1710 DefaultBranchNewLinesCount: 2,
1711 OtherBranchesNewLinesCount: 3,
1712 },
1713 }},
1714 Stats: RepoStats{
1715 Documents: 4,
1716 ContentBytes: 68,
1717 Shards: 1,
1718
1719 NewLinesCount: 4,
1720 DefaultBranchNewLinesCount: 2,
1721 OtherBranchesNewLinesCount: 3,
1722 },
1723 }
1724 ignored := []cmp.Option{
1725 cmpopts.EquateEmpty(),
1726 cmpopts.IgnoreFields(RepoListEntry{}, "IndexMetadata"),
1727 cmpopts.IgnoreFields(RepoStats{}, "IndexBytes"),
1728 cmpopts.IgnoreFields(Repository{}, "SubRepoMap"),
1729 cmpopts.IgnoreFields(Repository{}, "priority"),
1730 }
1731 if diff := cmp.Diff(want, res, ignored...); diff != "" {
1732 t.Fatalf("mismatch (-want +got):\n%s", diff)
1733 }
1734
1735 q = &query.Repo{Regexp: regexp.MustCompile("bla")}
1736 res, err = searcher.List(context.Background(), q, nil)
1737 if err != nil {
1738 t.Fatalf("List(%v): %v", q, err)
1739 }
1740 if len(res.Repos) != 0 || len(res.Minimal) != 0 {
1741 t.Fatalf("got %v, want 0 matches", res)
1742 }
1743 })
1744 }
1745 })
1746
1747 t.Run("minimal", func(t *testing.T) {
1748 repo := &Repository{
1749 ID: 1234,
1750 Name: "reponame",
1751 Branches: []RepositoryBranch{{Name: "main"}, {Name: "dev"}},
1752 RawConfig: map[string]string{"repoid": "1234"},
1753 }
1754 b := testIndexBuilder(t, repo,
1755 Document{Name: "f1", Content: content, Branches: []string{"main", "dev"}},
1756 Document{Name: "f2", Content: content, Branches: []string{"main"}},
1757 Document{Name: "f2", Content: content, Branches: []string{"dev"}},
1758 Document{Name: "f3", Content: content, Branches: []string{"dev"}})
1759
1760 searcher := searcherForTest(t, b)
1761
1762 q := &query.Repo{Regexp: regexp.MustCompile("epo")}
1763 res, err := searcher.List(context.Background(), q, &ListOptions{Minimal: true})
1764 if err != nil {
1765 t.Fatalf("List(%v): %v", q, err)
1766 }
1767
1768 want := &RepoList{
1769 Minimal: map[uint32]*MinimalRepoListEntry{
1770 repo.ID: {
1771 HasSymbols: repo.HasSymbols,
1772 Branches: repo.Branches,
1773 },
1774 },
1775 Stats: RepoStats{
1776 Shards: 1,
1777 Documents: 4,
1778 IndexBytes: 412,
1779 ContentBytes: 68,
1780 NewLinesCount: 4,
1781 DefaultBranchNewLinesCount: 2,
1782 OtherBranchesNewLinesCount: 3,
1783 },
1784 }
1785
1786 if os.Getenv("ZOEKT_ENABLE_NGRAM_BS") != "" {
1787 want.Stats.IndexBytes = 228
1788 }
1789
1790 if diff := cmp.Diff(want, res); diff != "" {
1791 t.Fatalf("mismatch (-want +got):\n%s", diff)
1792 }
1793
1794 q = &query.Repo{Regexp: regexp.MustCompile("bla")}
1795 res, err = searcher.List(context.Background(), q, &ListOptions{Minimal: true})
1796 if err != nil {
1797 t.Fatalf("List(%v): %v", q, err)
1798 }
1799 if len(res.Repos) != 0 || len(res.Minimal) != 0 {
1800 t.Fatalf("got %v, want 0 matches", res)
1801 }
1802 })
1803}
1804
1805func TestListReposByContent(t *testing.T) {
1806 content := []byte("bla the needle")
1807
1808 b := testIndexBuilder(t, &Repository{
1809 Name: "reponame",
1810 },
1811 Document{Name: "f1", Content: content},
1812 Document{Name: "f2", Content: content})
1813
1814 searcher := searcherForTest(t, b)
1815 q := &query.Substring{Pattern: "needle"}
1816 res, err := searcher.List(context.Background(), q, nil)
1817 if err != nil {
1818 t.Fatalf("List(%v): %v", q, err)
1819 }
1820 if len(res.Repos) != 1 || res.Repos[0].Repository.Name != "reponame" {
1821 t.Fatalf("got %v, want 1 matches", res)
1822 }
1823 if got := res.Repos[0].Stats.Shards; got != 1 {
1824 t.Fatalf("got %d, want 1 shard", got)
1825 }
1826 q = &query.Substring{Pattern: "foo"}
1827 res, err = searcher.List(context.Background(), q, nil)
1828 if err != nil {
1829 t.Fatalf("List(%v): %v", q, err)
1830 }
1831 if len(res.Repos) != 0 {
1832 t.Fatalf("got %v, want 0 matches", res)
1833 }
1834}
1835
1836func TestMetadata(t *testing.T) {
1837 content := []byte("bla the needle")
1838
1839 b := testIndexBuilder(t, &Repository{
1840 Name: "reponame",
1841 }, Document{Name: "f1", Content: content},
1842 Document{Name: "f2", Content: content})
1843
1844 var buf bytes.Buffer
1845 if err := b.Write(&buf); err != nil {
1846 t.Fatal(err)
1847 }
1848 f := &memSeeker{buf.Bytes()}
1849
1850 rd, _, err := ReadMetadata(f)
1851 if err != nil {
1852 t.Fatalf("ReadMetadata: %v", err)
1853 }
1854
1855 if got, want := rd[0].Name, "reponame"; got != want {
1856 t.Fatalf("got %q want %q", got, want)
1857 }
1858}
1859
1860func TestOr(t *testing.T) {
1861 b := testIndexBuilder(t, nil,
1862 Document{Name: "f1", Content: []byte("needle")},
1863 Document{Name: "f2", Content: []byte("banana")})
1864 t.Run("LineMatches", func(t *testing.T) {
1865 sres := searchForTest(t, b, query.NewOr(
1866 &query.Substring{Pattern: "needle"},
1867 &query.Substring{Pattern: "banana"}))
1868
1869 if len(sres.Files) != 2 {
1870 t.Fatalf("got %v, want 2 files", sres.Files)
1871 }
1872 })
1873
1874 t.Run("ChunkMatches", func(t *testing.T) {
1875 sres := searchForTest(t, b, query.NewOr(
1876 &query.Substring{Pattern: "needle"},
1877 &query.Substring{Pattern: "banana"}))
1878
1879 if len(sres.Files) != 2 {
1880 t.Fatalf("got %v, want 2 files", sres.Files)
1881 }
1882 })
1883}
1884
1885func TestFrequency(t *testing.T) {
1886 content := []byte("sla _Py_HashDouble(double v sla las las shd dot dot")
1887
1888 b := testIndexBuilder(t, nil,
1889 Document{
1890 Name: "f1",
1891 Content: content,
1892 })
1893
1894 t.Run("LineMatches", func(t *testing.T) {
1895 sres := searchForTest(t, b, &query.Substring{Pattern: "slashdot"})
1896 if len(sres.Files) != 0 {
1897 t.Errorf("got %v, wanted 0 matches", sres.Files)
1898 }
1899 })
1900
1901 t.Run("ChunkMatches", func(t *testing.T) {
1902 sres := searchForTest(t, b, &query.Substring{Pattern: "slashdot"}, chunkOpts)
1903 if len(sres.Files) != 0 {
1904 t.Errorf("got %v, wanted 0 matches", sres.Files)
1905 }
1906 })
1907}
1908
1909func TestMatchNewline(t *testing.T) {
1910 re, err := syntax.Parse("[^a]a", syntax.ClassNL)
1911 if err != nil {
1912 t.Fatalf("syntax.Parse: %v", err)
1913 }
1914
1915 content := []byte("pqr\nalex")
1916
1917 b := testIndexBuilder(t, nil,
1918 Document{
1919 Name: "f1",
1920 Content: content,
1921 })
1922
1923 t.Run("LineMatches", func(t *testing.T) {
1924 sres := searchForTest(t, b, &query.Regexp{Regexp: re, CaseSensitive: true})
1925 if len(sres.Files) != 1 {
1926 t.Errorf("got %v, wanted 1 matches", sres.Files)
1927 } else if l := sres.Files[0].LineMatches[0].Line; !bytes.Equal(l, content[len("pqr\n"):]) {
1928 t.Errorf("got match line %q, want %q", l, content)
1929 }
1930 })
1931
1932 t.Run("ChunkMatches", func(t *testing.T) {
1933 sres := searchForTest(t, b, &query.Regexp{Regexp: re, CaseSensitive: true}, chunkOpts)
1934 if len(sres.Files) != 1 {
1935 t.Errorf("got %v, wanted 1 matches", sres.Files)
1936 } else if c := sres.Files[0].ChunkMatches[0].Content; !bytes.Equal(c, content) {
1937 t.Errorf("got match line %q, want %q", c, content)
1938 }
1939 })
1940}
1941
1942func TestSubRepo(t *testing.T) {
1943 subRepos := map[string]*Repository{
1944 "sub": {
1945 Name: "sub-name",
1946 LineFragmentTemplate: "sub-line",
1947 },
1948 }
1949
1950 content := []byte("pqr\nalex")
1951
1952 b := testIndexBuilder(t, &Repository{
1953 SubRepoMap: subRepos,
1954 }, Document{
1955 Name: "sub/f1",
1956 Content: content,
1957 SubRepositoryPath: "sub",
1958 })
1959
1960 sres := searchForTest(t, b, &query.Substring{Pattern: "alex"})
1961 if len(sres.Files) != 1 {
1962 t.Fatalf("got %v, wanted 1 matches", sres.Files)
1963 }
1964
1965 f := sres.Files[0]
1966 if f.SubRepositoryPath != "sub" || f.SubRepositoryName != "sub-name" {
1967 t.Errorf("got %#v, want SubRepository{Path,Name} = {'sub', 'sub-name'}", f)
1968 }
1969
1970 if sres.LineFragments["sub-name"] != "sub-line" {
1971 t.Errorf("got LineFragmentTemplate %v, want {'sub':'sub-line'}", sres.LineFragments)
1972 }
1973}
1974
1975func TestSearchEither(t *testing.T) {
1976 b := testIndexBuilder(t, nil,
1977 Document{Name: "f1", Content: []byte("bla needle bla")},
1978 Document{Name: "needle-file-branch", Content: []byte("bla content")})
1979
1980 t.Run("LineMatches", func(t *testing.T) {
1981 sres := searchForTest(t, b, &query.Substring{Pattern: "needle"})
1982 if len(sres.Files) != 2 {
1983 t.Fatalf("got %v, wanted 2 matches", sres.Files)
1984 }
1985
1986 sres = searchForTest(t, b, &query.Substring{Pattern: "needle", Content: true})
1987 if len(sres.Files) != 1 {
1988 t.Fatalf("got %v, wanted 1 match", sres.Files)
1989 }
1990
1991 if got, want := sres.Files[0].FileName, "f1"; got != want {
1992 t.Errorf("got %q, want %q", got, want)
1993 }
1994 })
1995
1996 t.Run("ChunkMatches", func(t *testing.T) {
1997 sres := searchForTest(t, b, &query.Substring{Pattern: "needle"}, chunkOpts)
1998 if len(sres.Files) != 2 {
1999 t.Fatalf("got %v, wanted 2 matches", sres.Files)
2000 }
2001
2002 sres = searchForTest(t, b, &query.Substring{Pattern: "needle", Content: true}, chunkOpts)
2003 if len(sres.Files) != 1 {
2004 t.Fatalf("got %v, wanted 1 match", sres.Files)
2005 }
2006
2007 if got, want := sres.Files[0].FileName, "f1"; got != want {
2008 t.Errorf("got %q, want %q", got, want)
2009 }
2010 })
2011}
2012
2013func TestUnicodeExactMatch(t *testing.T) {
2014 needle := "néédlÉ"
2015 content := []byte("blá blá " + needle + " blâ")
2016
2017 b := testIndexBuilder(t, nil,
2018 Document{Name: "f1", Content: content})
2019
2020 t.Run("LineMatches", func(t *testing.T) {
2021 if res := searchForTest(t, b, &query.Substring{Pattern: needle, CaseSensitive: true}); len(res.Files) != 1 {
2022 t.Fatalf("case sensitive: got %v, wanted 1 match", res.Files)
2023 }
2024 })
2025
2026 t.Run("ChunkMatches", func(t *testing.T) {
2027 res := searchForTest(t, b, &query.Substring{Pattern: needle, CaseSensitive: true}, chunkOpts)
2028 if len(res.Files) != 1 {
2029 t.Fatalf("case sensitive: got %v, wanted 1 match", res.Files)
2030 }
2031 })
2032}
2033
2034func TestUnicodeCoverContent(t *testing.T) {
2035 needle := "néédlÉ"
2036 content := []byte("blá blá " + needle + " blâ")
2037
2038 b := testIndexBuilder(t, nil,
2039 Document{Name: "f1", Content: content})
2040
2041 t.Run("LineMatches", func(t *testing.T) {
2042 if res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ", CaseSensitive: true}); len(res.Files) != 0 {
2043 t.Fatalf("case sensitive: got %v, wanted 0 match", res.Files)
2044 }
2045
2046 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ"})
2047 if len(res.Files) != 1 {
2048 t.Fatalf("case insensitive: got %v, wanted 1 match", res.Files)
2049 }
2050
2051 if got, want := res.Files[0].LineMatches[0].LineFragments[0].Offset, uint32(strings.Index(string(content), needle)); got != want {
2052 t.Errorf("got %d want %d", got, want)
2053 }
2054 })
2055
2056 t.Run("ChunkMatches", func(t *testing.T) {
2057 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ", CaseSensitive: true}, chunkOpts)
2058 if len(res.Files) != 0 {
2059 t.Fatalf("case sensitive: got %v, wanted 0 match", res.Files)
2060 }
2061
2062 res = searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ"}, chunkOpts)
2063 if len(res.Files) != 1 {
2064 t.Fatalf("case insensitive: got %v, wanted 1 match", res.Files)
2065 }
2066
2067 got := res.Files[0].ChunkMatches[0].Ranges[0].Start.ByteOffset
2068 want := uint32(strings.Index(string(content), needle))
2069 if got != want {
2070 t.Errorf("got %d want %d", got, want)
2071 }
2072 })
2073}
2074
2075func TestUnicodeNonCoverContent(t *testing.T) {
2076 needle := "nééáádlÉ"
2077 content := []byte("blá blá " + needle + " blâ")
2078
2079 b := testIndexBuilder(t, nil,
2080 Document{Name: "f1", Content: content})
2081
2082 t.Run("LineMatches", func(t *testing.T) {
2083 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉÁÁDLÉ", Content: true})
2084 if len(res.Files) != 1 {
2085 t.Fatalf("got %v, wanted 1 match", res.Files)
2086 }
2087
2088 if got, want := res.Files[0].LineMatches[0].LineFragments[0].Offset, uint32(strings.Index(string(content), needle)); got != want {
2089 t.Errorf("got %d want %d", got, want)
2090 }
2091 })
2092
2093 t.Run("ChunkMatches", func(t *testing.T) {
2094 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉÁÁDLÉ", Content: true}, chunkOpts)
2095 if len(res.Files) != 1 {
2096 t.Fatalf("got %v, wanted 1 match", res.Files)
2097 }
2098
2099 got := res.Files[0].ChunkMatches[0].Ranges[0].Start.ByteOffset
2100 want := uint32(strings.Index(string(content), needle))
2101 if got != want {
2102 t.Errorf("got %d want %d", got, want)
2103 }
2104 })
2105}
2106
2107const kelvinCodePoint = 8490
2108
2109func TestUnicodeVariableLength(t *testing.T) {
2110 lower := 'k'
2111 upper := rune(kelvinCodePoint)
2112
2113 needle := "nee" + string([]rune{lower}) + "eed"
2114 corpus := []byte("nee" + string([]rune{upper}) + "eed" +
2115 " ee" + string([]rune{lower}) + "ee" +
2116 " ee" + string([]rune{upper}) + "ee")
2117
2118 t.Run("LineMatches", func(t *testing.T) {
2119 b := testIndexBuilder(t, nil,
2120 Document{Name: "f1", Content: []byte(corpus)})
2121
2122 res := searchForTest(t, b, &query.Substring{Pattern: needle, Content: true})
2123 if len(res.Files) != 1 {
2124 t.Fatalf("got %v, wanted 1 match", res.Files)
2125 }
2126 })
2127
2128 t.Run("ChunkMatches", func(t *testing.T) {
2129 b := testIndexBuilder(t, nil,
2130 Document{Name: "f1", Content: []byte(corpus)})
2131
2132 res := searchForTest(t, b, &query.Substring{Pattern: needle, Content: true}, chunkOpts)
2133 if len(res.Files) != 1 {
2134 t.Fatalf("got %v, wanted 1 match", res.Files)
2135 }
2136 })
2137}
2138
2139func TestUnicodeFileStartOffsets(t *testing.T) {
2140 unicode := "世界"
2141 wat := "waaaaaat"
2142 b := testIndexBuilder(t, nil,
2143 Document{
2144 Name: "f1",
2145 Content: []byte(unicode),
2146 },
2147 Document{
2148 Name: "f2",
2149 Content: []byte(wat),
2150 },
2151 )
2152 q := &query.Substring{Pattern: wat, Content: true}
2153 res := searchForTest(t, b, q)
2154 if len(res.Files) != 1 {
2155 t.Fatalf("got %v, wanted 1 match", res.Files)
2156 }
2157}
2158
2159func TestLongFileUTF8(t *testing.T) {
2160 needle := "neeedle"
2161
2162 // 6 bytes.
2163 unicode := "世界"
2164 content := []byte(strings.Repeat(unicode, 100) + needle)
2165 b := testIndexBuilder(t, nil,
2166 Document{
2167 Name: "f1",
2168 Content: []byte(strings.Repeat("a", 50)),
2169 },
2170 Document{
2171 Name: "f2",
2172 Content: content,
2173 })
2174
2175 t.Run("LineMatches", func(t *testing.T) {
2176 q := &query.Substring{Pattern: needle, Content: true}
2177 res := searchForTest(t, b, q)
2178 if len(res.Files) != 1 {
2179 t.Errorf("got %v, want 1 result", res)
2180 }
2181 })
2182
2183 t.Run("ChunkMatches", func(t *testing.T) {
2184 q := &query.Substring{Pattern: needle, Content: true}
2185 res := searchForTest(t, b, q, chunkOpts)
2186 if len(res.Files) != 1 {
2187 t.Errorf("got %v, want 1 result", res)
2188 }
2189 })
2190}
2191
2192func TestEstimateDocCount(t *testing.T) {
2193 content := []byte("bla needle bla")
2194 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2195 Document{Name: "f1", Content: content},
2196 Document{Name: "f2", Content: content},
2197 )
2198
2199 t.Run("LineMatches", func(t *testing.T) {
2200 if sres := searchForTest(t, b,
2201 query.NewAnd(
2202 &query.Substring{Pattern: "needle"},
2203 &query.Repo{Regexp: regexp.MustCompile("reponame")},
2204 ), SearchOptions{
2205 EstimateDocCount: true,
2206 }); sres.Stats.ShardFilesConsidered != 2 {
2207 t.Errorf("got FilesConsidered = %d, want 2", sres.Stats.FilesConsidered)
2208 }
2209 if sres := searchForTest(t, b,
2210 query.NewAnd(
2211 &query.Substring{Pattern: "needle"},
2212 &query.Repo{Regexp: regexp.MustCompile("nomatch")},
2213 ), SearchOptions{
2214 EstimateDocCount: true,
2215 }); sres.Stats.ShardFilesConsidered != 0 {
2216 t.Errorf("got FilesConsidered = %d, want 0", sres.Stats.FilesConsidered)
2217 }
2218 })
2219
2220 t.Run("ChunkMatches", func(t *testing.T) {
2221 if sres := searchForTest(t, b,
2222 query.NewAnd(
2223 &query.Substring{Pattern: "needle"},
2224 &query.Repo{Regexp: regexp.MustCompile("reponame")},
2225 ), SearchOptions{
2226 EstimateDocCount: true,
2227 ChunkMatches: true,
2228 }); sres.Stats.ShardFilesConsidered != 2 {
2229 t.Errorf("got FilesConsidered = %d, want 2", sres.Stats.FilesConsidered)
2230 }
2231 if sres := searchForTest(t, b,
2232 query.NewAnd(
2233 &query.Substring{Pattern: "needle"},
2234 &query.Repo{Regexp: regexp.MustCompile("nomatch")},
2235 ), SearchOptions{
2236 EstimateDocCount: true,
2237 ChunkMatches: true,
2238 }); sres.Stats.ShardFilesConsidered != 0 {
2239 t.Errorf("got FilesConsidered = %d, want 0", sres.Stats.FilesConsidered)
2240 }
2241 })
2242}
2243
2244func TestUTF8CorrectCorpus(t *testing.T) {
2245 needle := "neeedle"
2246
2247 // 6 bytes.
2248 unicode := "世界"
2249 b := testIndexBuilder(t, nil,
2250 Document{
2251 Name: "f1",
2252 Content: []byte(strings.Repeat(unicode, 100)),
2253 },
2254 Document{
2255 Name: "xxxxxneeedle",
2256 Content: []byte("hello"),
2257 })
2258
2259 t.Run("LineMatches", func(t *testing.T) {
2260 q := &query.Substring{Pattern: needle, FileName: true}
2261 res := searchForTest(t, b, q)
2262 if len(res.Files) != 1 {
2263 t.Errorf("got %v, want 1 result", res)
2264 }
2265 })
2266
2267 t.Run("ChunkMatches", func(t *testing.T) {
2268 q := &query.Substring{Pattern: needle, FileName: true}
2269 res := searchForTest(t, b, q, chunkOpts)
2270 if len(res.Files) != 1 {
2271 t.Errorf("got %v, want 1 result", res)
2272 }
2273 })
2274}
2275
2276func TestBuilderStats(t *testing.T) {
2277 b := testIndexBuilder(t, nil,
2278 Document{
2279 Name: "f1",
2280 Content: []byte(strings.Repeat("abcd", 1024)),
2281 })
2282 var buf bytes.Buffer
2283 if err := b.Write(&buf); err != nil {
2284 t.Fatal(err)
2285 }
2286
2287 if got, want := b.ContentSize(), uint32(2+4*1024); got != want {
2288 t.Errorf("got %d, want %d", got, want)
2289 }
2290}
2291
2292func TestIOStats(t *testing.T) {
2293 b := testIndexBuilder(t, nil,
2294 Document{
2295 Name: "f1",
2296 Content: []byte(strings.Repeat("abcd", 1024)),
2297 })
2298
2299 t.Run("LineMatches", func(t *testing.T) {
2300 q := &query.Substring{Pattern: "abc", CaseSensitive: true, Content: true}
2301 res := searchForTest(t, b, q)
2302
2303 // 4096 (content) + 2 (overhead: newlines or doc sections)
2304 if got, want := res.Stats.ContentBytesLoaded, int64(4098); got != want {
2305 t.Errorf("got content I/O %d, want %d", got, want)
2306 }
2307
2308 // 1024 entries, each 4 bytes apart. 4 fits into single byte
2309 // delta encoded.
2310 if got, want := res.Stats.IndexBytesLoaded, int64(1024); got != want {
2311 t.Errorf("got index I/O %d, want %d", got, want)
2312 }
2313 })
2314
2315 t.Run("ChunkMatches", func(t *testing.T) {
2316 q := &query.Substring{Pattern: "abc", CaseSensitive: true, Content: true}
2317 res := searchForTest(t, b, q, chunkOpts)
2318
2319 // 4096 (content) + 2 (overhead: newlines or doc sections)
2320 if got, want := res.Stats.ContentBytesLoaded, int64(4098); got != want {
2321 t.Errorf("got content I/O %d, want %d", got, want)
2322 }
2323
2324 // 1024 entries, each 4 bytes apart. 4 fits into single byte
2325 // delta encoded.
2326 if got, want := res.Stats.IndexBytesLoaded, int64(1024); got != want {
2327 t.Errorf("got index I/O %d, want %d", got, want)
2328 }
2329 })
2330}
2331
2332func TestStartLineAnchor(t *testing.T) {
2333 b := testIndexBuilder(t, nil,
2334 Document{
2335 Name: "f1",
2336 Content: []byte(
2337 `hello
2338start of middle of line
2339`),
2340 })
2341
2342 t.Run("LineMatches", func(t *testing.T) {
2343 q, err := query.Parse("^start")
2344 if err != nil {
2345 t.Errorf("parse: %v", err)
2346 }
2347
2348 res := searchForTest(t, b, q)
2349 if len(res.Files) != 1 {
2350 t.Errorf("got %v, want 1 file", res.Files)
2351 }
2352
2353 q, err = query.Parse("^middle")
2354 if err != nil {
2355 t.Errorf("parse: %v", err)
2356 }
2357 res = searchForTest(t, b, q)
2358 if len(res.Files) != 0 {
2359 t.Errorf("got %v, want 0 files", res.Files)
2360 }
2361 })
2362
2363 t.Run("ChunkMatches", func(t *testing.T) {
2364 q, err := query.Parse("^start")
2365 if err != nil {
2366 t.Errorf("parse: %v", err)
2367 }
2368
2369 res := searchForTest(t, b, q, chunkOpts)
2370 if len(res.Files) != 1 {
2371 t.Errorf("got %v, want 1 file", res.Files)
2372 }
2373
2374 q, err = query.Parse("^middle")
2375 if err != nil {
2376 t.Errorf("parse: %v", err)
2377 }
2378 res = searchForTest(t, b, q, chunkOpts)
2379 if len(res.Files) != 0 {
2380 t.Errorf("got %v, want 0 files", res.Files)
2381 }
2382 })
2383}
2384
2385func TestAndOrUnicode(t *testing.T) {
2386 q, err := query.Parse("orange.*apple")
2387 if err != nil {
2388 t.Errorf("parse: %v", err)
2389 }
2390 finalQ := query.NewAnd(q,
2391 query.NewOr(query.NewAnd(&query.Repo{Regexp: regexp.MustCompile("name")},
2392 query.NewOr(&query.Branch{Pattern: "master"}))))
2393
2394 b := testIndexBuilder(t, &Repository{
2395 Name: "name",
2396 Branches: []RepositoryBranch{{"master", "master-version"}},
2397 }, Document{
2398 Name: "f2",
2399 Content: []byte("orange\u2318apple"),
2400 // --------------0123456 78901
2401 Branches: []string{"master"},
2402 })
2403
2404 t.Run("LineMatches", func(t *testing.T) {
2405 res := searchForTest(t, b, finalQ)
2406 if len(res.Files) != 1 {
2407 t.Errorf("got %v, want 1 result", res.Files)
2408 }
2409 })
2410
2411 t.Run("ChunkMatches", func(t *testing.T) {
2412 res := searchForTest(t, b, finalQ, chunkOpts)
2413 if len(res.Files) != 1 {
2414 t.Errorf("got %v, want 1 result", res.Files)
2415 }
2416 })
2417}
2418
2419func TestAndShort(t *testing.T) {
2420 content := []byte("bla needle at orange bla")
2421 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2422 Document{Name: "f1", Content: content},
2423 Document{Name: "f2", Content: []byte("xx at xx")},
2424 Document{Name: "f3", Content: []byte("yy orange xx")},
2425 )
2426
2427 q := query.NewAnd(&query.Substring{Pattern: "at"},
2428 &query.Substring{Pattern: "orange"})
2429
2430 t.Run("LineMatches", func(t *testing.T) {
2431 res := searchForTest(t, b, q)
2432 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
2433 t.Errorf("got %v, want 1 result", res.Files)
2434 }
2435 })
2436
2437 t.Run("ChunkMatches", func(t *testing.T) {
2438 res := searchForTest(t, b, q, chunkOpts)
2439 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
2440 t.Errorf("got %v, want 1 result", res.Files)
2441 }
2442 })
2443}
2444
2445func TestNoCollectRegexpSubstring(t *testing.T) {
2446 content := []byte("bla final bla\nfoo final, foo")
2447 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2448 Document{Name: "f1", Content: content},
2449 )
2450
2451 q := &query.Regexp{
2452 Regexp: mustParseRE("final[,.]"),
2453 }
2454
2455 t.Run("LineMatches", func(t *testing.T) {
2456 res := searchForTest(t, b, q)
2457 if len(res.Files) != 1 {
2458 t.Fatalf("got %v, want 1 result", res.Files)
2459 }
2460 if f := res.Files[0]; len(f.LineMatches) != 1 {
2461 t.Fatalf("got line matches %v, want 1 line match", printLineMatches(f.LineMatches))
2462 }
2463 })
2464
2465 t.Run("ChunkMatches", func(t *testing.T) {
2466 res := searchForTest(t, b, q, chunkOpts)
2467 if len(res.Files) != 1 {
2468 t.Fatalf("got %v, want 1 result", res.Files)
2469 }
2470 if f := res.Files[0]; len(f.ChunkMatches) != 1 {
2471 t.Fatalf("got line matches %v, want 1 line match", printLineMatches(f.LineMatches))
2472 }
2473 })
2474}
2475
2476func printLineMatches(ms []LineMatch) string {
2477 var ss []string
2478 for _, m := range ms {
2479 ss = append(ss, fmt.Sprintf("%d:%q %v", m.LineNumber, m.Line, m.LineFragments))
2480 }
2481
2482 return strings.Join(ss, ", ")
2483}
2484
2485func TestLang(t *testing.T) {
2486 content := []byte("bla needle bla")
2487 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2488 Document{Name: "f1", Content: content},
2489 Document{Name: "f2", Language: "java", Content: content},
2490 Document{Name: "f3", Language: "cpp", Content: content},
2491 )
2492
2493 q := query.NewAnd(&query.Substring{Pattern: "needle"},
2494 &query.Language{Language: "cpp"})
2495
2496 t.Run("LineMatches", func(t *testing.T) {
2497 res := searchForTest(t, b, q)
2498 if len(res.Files) != 1 {
2499 t.Fatalf("got %v, want 1 result in f3", res.Files)
2500 }
2501 f := res.Files[0]
2502 if f.FileName != "f3" || f.Language != "cpp" {
2503 t.Fatalf("got %v, want 1 match with language cpp", f)
2504 }
2505 })
2506
2507 t.Run("ChunkMatches", func(t *testing.T) {
2508 res := searchForTest(t, b, q, chunkOpts)
2509 if len(res.Files) != 1 {
2510 t.Fatalf("got %v, want 1 result in f3", res.Files)
2511 }
2512 f := res.Files[0]
2513 if f.FileName != "f3" || f.Language != "cpp" {
2514 t.Fatalf("got %v, want 1 match with language cpp", f)
2515 }
2516 })
2517}
2518
2519func TestLangShortcut(t *testing.T) {
2520 content := []byte("bla needle bla")
2521 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2522 Document{Name: "f2", Language: "java", Content: content},
2523 Document{Name: "f3", Language: "cpp", Content: content},
2524 )
2525
2526 q := query.NewAnd(&query.Substring{Pattern: "needle"},
2527 &query.Language{Language: "fortran"})
2528
2529 t.Run("LineMatches", func(t *testing.T) {
2530 res := searchForTest(t, b, q)
2531 if len(res.Files) != 0 {
2532 t.Fatalf("got %v, want 0 results", res.Files)
2533 }
2534 if res.Stats.IndexBytesLoaded > 0 {
2535 t.Errorf("got IndexBytesLoaded %d, want 0", res.Stats.IndexBytesLoaded)
2536 }
2537 })
2538
2539 t.Run("ChunkMatches", func(t *testing.T) {
2540 res := searchForTest(t, b, q, chunkOpts)
2541 if len(res.Files) != 0 {
2542 t.Fatalf("got %v, want 0 results", res.Files)
2543 }
2544 if res.Stats.IndexBytesLoaded > 0 {
2545 t.Errorf("got IndexBytesLoaded %d, want 0", res.Stats.IndexBytesLoaded)
2546 }
2547 })
2548}
2549
2550func TestNoTextMatchAtoms(t *testing.T) {
2551 content := []byte("bla needle bla")
2552 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2553 Document{Name: "f1", Content: content},
2554 Document{Name: "f2", Language: "java", Content: content},
2555 Document{Name: "f3", Language: "cpp", Content: content},
2556 )
2557 q := query.NewAnd(&query.Language{Language: "java"})
2558 t.Run("LineMatches", func(t *testing.T) {
2559 res := searchForTest(t, b, q)
2560 if len(res.Files) != 1 {
2561 t.Fatalf("got %v, want 1 result in f3", res.Files)
2562 }
2563 })
2564
2565 t.Run("ChunkMatches", func(t *testing.T) {
2566 res := searchForTest(t, b, q, chunkOpts)
2567 if len(res.Files) != 1 {
2568 t.Fatalf("got %v, want 1 result in f3", res.Files)
2569 }
2570 })
2571}
2572
2573func TestNoPositiveAtoms(t *testing.T) {
2574 content := []byte("bla needle bla")
2575 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2576 Document{Name: "f1", Content: content},
2577 Document{Name: "f2", Content: content},
2578 )
2579
2580 q := query.NewAnd(
2581 &query.Not{Child: &query.Substring{Pattern: "xyz"}},
2582 &query.Repo{Regexp: regexp.MustCompile("reponame")})
2583 t.Run("LineMatches", func(t *testing.T) {
2584 res := searchForTest(t, b, q)
2585 if len(res.Files) != 2 {
2586 t.Fatalf("got %v, want 2 results in f3", res.Files)
2587 }
2588 })
2589 t.Run("ChunkMatches", func(t *testing.T) {
2590 res := searchForTest(t, b, q, chunkOpts)
2591 if len(res.Files) != 2 {
2592 t.Fatalf("got %v, want 2 results in f3", res.Files)
2593 }
2594 })
2595}
2596
2597func TestSymbolBoundaryStart(t *testing.T) {
2598 content := []byte("start\nbla bla\nend")
2599 // ----------------012345-67890123-456
2600
2601 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2602 Document{
2603 Name: "f1",
2604 Content: content,
2605 Symbols: []DocumentSection{{0, 5}, {14, 17}},
2606 },
2607 )
2608 q := &query.Symbol{
2609 Expr: &query.Substring{Pattern: "start"},
2610 }
2611 t.Run("LineMatches", func(t *testing.T) {
2612 res := searchForTest(t, b, q)
2613 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
2614 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2615 }
2616 m := res.Files[0].LineMatches[0].LineFragments[0]
2617 if m.Offset != 0 {
2618 t.Fatalf("got offset %d want 0", m.Offset)
2619 }
2620 })
2621
2622 t.Run("ChunkMatches", func(t *testing.T) {
2623 res := searchForTest(t, b, q, chunkOpts)
2624 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
2625 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2626 }
2627 m := res.Files[0].ChunkMatches[0].Ranges[0]
2628 if m.Start.ByteOffset != 0 {
2629 t.Fatalf("got offset %d want 0", m.Start.ByteOffset)
2630 }
2631 })
2632}
2633
2634func TestSymbolBoundaryEnd(t *testing.T) {
2635 content := []byte("start\nbla bla\nend")
2636 // ----------------012345-67890123-456
2637
2638 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2639 Document{
2640 Name: "f1",
2641 Content: content,
2642 Symbols: []DocumentSection{{14, 17}},
2643 },
2644 )
2645 q := &query.Symbol{
2646 Expr: &query.Substring{Pattern: "end"},
2647 }
2648 t.Run("LineMatches", func(t *testing.T) {
2649 res := searchForTest(t, b, q)
2650 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
2651 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2652 }
2653 m := res.Files[0].LineMatches[0].LineFragments[0]
2654 if m.Offset != 14 {
2655 t.Fatalf("got offset %d want 0", m.Offset)
2656 }
2657 })
2658
2659 t.Run("ChunkMatches", func(t *testing.T) {
2660 res := searchForTest(t, b, q, chunkOpts)
2661 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
2662 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2663 }
2664 m := res.Files[0].ChunkMatches[0].Ranges[0]
2665 if m.Start.ByteOffset != 14 {
2666 t.Fatalf("got offset %d want 0", m.Start.ByteOffset)
2667 }
2668 })
2669}
2670
2671func TestSymbolSubstring(t *testing.T) {
2672 content := []byte("bla\nsymblabla\nbla")
2673 // ----------------0123-4567890123-456
2674
2675 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2676 Document{
2677 Name: "f1",
2678 Content: content,
2679 Symbols: []DocumentSection{{4, 12}},
2680 },
2681 )
2682 q := &query.Symbol{
2683 Expr: &query.Substring{Pattern: "bla"},
2684 }
2685 t.Run("LineMatches", func(t *testing.T) {
2686 res := searchForTest(t, b, q)
2687 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
2688 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2689 }
2690 m := res.Files[0].LineMatches[0].LineFragments[0]
2691 if m.Offset != 7 || m.MatchLength != 3 {
2692 t.Fatalf("got offset %d, size %d want 7 size 3", m.Offset, m.MatchLength)
2693 }
2694 })
2695
2696 t.Run("ChunkMatches", func(t *testing.T) {
2697 res := searchForTest(t, b, q, chunkOpts)
2698 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
2699 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2700 }
2701 m := res.Files[0].ChunkMatches[0].Ranges[0]
2702 if m.Start.ByteOffset != 7 || m.End.ByteOffset != 10 {
2703 t.Fatalf("got offset %d, end %d want 7, 10", m.Start.ByteOffset, m.End.ByteOffset)
2704 }
2705 })
2706}
2707
2708func TestSymbolSubstringExact(t *testing.T) {
2709 content := []byte("bla\nsym\nbla\nsym\nasymb")
2710 // ----------------0123-4567-890123456-78901
2711
2712 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2713 Document{
2714 Name: "f1",
2715 Content: content,
2716 Symbols: []DocumentSection{{4, 7}},
2717 },
2718 )
2719 q := &query.Symbol{
2720 Expr: &query.Substring{Pattern: "sym"},
2721 }
2722 t.Run("LineMatches", func(t *testing.T) {
2723 res := searchForTest(t, b, q)
2724 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
2725 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2726 }
2727 m := res.Files[0].LineMatches[0].LineFragments[0]
2728 if m.Offset != 4 {
2729 t.Fatalf("got offset %d, want 7", m.Offset)
2730 }
2731 })
2732
2733 t.Run("ChunkMatches", func(t *testing.T) {
2734 res := searchForTest(t, b, q, chunkOpts)
2735 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
2736 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2737 }
2738 m := res.Files[0].ChunkMatches[0].Ranges[0]
2739 if m.Start.ByteOffset != 4 {
2740 t.Fatalf("got offset %d, want 7", m.Start.ByteOffset)
2741 }
2742 })
2743}
2744
2745func TestSymbolRegexpExact(t *testing.T) {
2746 content := []byte("blah\nbla\nbl")
2747 // ----------------01234-5678-90
2748
2749 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2750 Document{
2751 Name: "f1",
2752 Content: content,
2753 Symbols: []DocumentSection{{0, 4}, {5, 8}, {9, 11}},
2754 },
2755 )
2756 q := &query.Symbol{
2757 Expr: &query.Regexp{Regexp: mustParseRE("^bla$")},
2758 }
2759 t.Run("LineMatches", func(t *testing.T) {
2760 res := searchForTest(t, b, q)
2761 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
2762 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2763 }
2764 m := res.Files[0].LineMatches[0].LineFragments[0]
2765 if m.Offset != 5 {
2766 t.Fatalf("got offset %d, want 5", m.Offset)
2767 }
2768 })
2769
2770 t.Run("ChunkMatches", func(t *testing.T) {
2771 res := searchForTest(t, b, q, chunkOpts)
2772 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
2773 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2774 }
2775 m := res.Files[0].ChunkMatches[0].Ranges[0]
2776 if m.Start.ByteOffset != 5 {
2777 t.Fatalf("got offset %d, want 5", m.Start.ByteOffset)
2778 }
2779 })
2780}
2781
2782func TestSymbolRegexpPartial(t *testing.T) {
2783 content := []byte("abcdef")
2784 // ----------------012345
2785
2786 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2787 Document{
2788 Name: "f1",
2789 Content: content,
2790 Symbols: []DocumentSection{{0, 6}},
2791 },
2792 )
2793 q := &query.Symbol{
2794 Expr: &query.Regexp{Regexp: mustParseRE("(b|d)c(d|b)")},
2795 }
2796 t.Run("LineMatches", func(t *testing.T) {
2797 res := searchForTest(t, b, q)
2798 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
2799 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2800 }
2801 m := res.Files[0].LineMatches[0].LineFragments[0]
2802 if m.Offset != 1 {
2803 t.Fatalf("got offset %d, want 1", m.Offset)
2804 }
2805 if m.MatchLength != 3 {
2806 t.Fatalf("got match length %d, want 3", m.MatchLength)
2807 }
2808 })
2809
2810 t.Run("ChunkMatches", func(t *testing.T) {
2811 res := searchForTest(t, b, q, chunkOpts)
2812 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
2813 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2814 }
2815 m := res.Files[0].ChunkMatches[0].Ranges[0]
2816 if m.Start.ByteOffset != 1 {
2817 t.Fatalf("got offset %d, want 1", m.Start.ByteOffset)
2818 }
2819 if m.End.ByteOffset != 4 {
2820 t.Fatalf("got match end %d, want 4", m.End.ByteOffset)
2821 }
2822 })
2823}
2824
2825func TestSymbolRegexpAll(t *testing.T) {
2826 docs := []Document{
2827 {
2828 Name: "f1",
2829 Content: []byte("Hello Zoekt"),
2830 // --------------01234567890
2831 Symbols: []DocumentSection{{0, 5}, {6, 11}},
2832 },
2833 {
2834 Name: "f2",
2835 Content: []byte("Second Zoekt Third"),
2836 // --------------012345678901234567
2837 Symbols: []DocumentSection{{0, 6}, {7, 12}, {13, 18}},
2838 },
2839 }
2840
2841 b := testIndexBuilder(t, &Repository{Name: "reponame"}, docs...)
2842 q := &query.Symbol{
2843 Expr: &query.Regexp{Regexp: mustParseRE(".*")},
2844 }
2845 t.Run("LineMatches", func(t *testing.T) {
2846 res := searchForTest(t, b, q)
2847 if len(res.Files) != len(docs) {
2848 t.Fatalf("got %v, want %d file", res.Files, len(docs))
2849 }
2850 for i, want := range docs {
2851 got := res.Files[i].LineMatches[0].LineFragments
2852 if len(got) != len(want.Symbols) {
2853 t.Fatalf("got %d symbols, want %d symbols in doc %s", len(got), len(want.Symbols), want.Name)
2854 }
2855
2856 for j, sec := range want.Symbols {
2857 if sec.Start != got[j].Offset {
2858 t.Fatalf("got offset %d, want %d in doc %s", got[j].Offset, sec.Start, want.Name)
2859 }
2860 }
2861 }
2862 })
2863
2864 t.Run("ChunkMatches", func(t *testing.T) {
2865 res := searchForTest(t, b, q, chunkOpts)
2866 if len(res.Files) != len(docs) {
2867 t.Fatalf("got %v, want %d file", res.Files, len(docs))
2868 }
2869 for i, want := range docs {
2870 got := res.Files[i].ChunkMatches[0].Ranges
2871 if len(got) != len(want.Symbols) {
2872 t.Fatalf("got %d symbols, want %d symbols in doc %s", len(got), len(want.Symbols), want.Name)
2873 }
2874
2875 for j, sec := range want.Symbols {
2876 if sec.Start != uint32(got[j].Start.ByteOffset) {
2877 t.Fatalf("got offset %d, want %d in doc %s", got[j].Start.ByteOffset, sec.Start, want.Name)
2878 }
2879 }
2880 }
2881 })
2882}
2883
2884func TestHitIterTerminate(t *testing.T) {
2885 // contrived input: trigram frequencies forces selecting abc +
2886 // def for the distance iteration. There is no match, so this
2887 // will advance the compressedPostingIterator to beyond the
2888 // end.
2889 content := []byte("abc bcdbcd cdecde abcabc def efg")
2890 b := testIndexBuilder(t, nil,
2891 Document{
2892 Name: "f1",
2893 Content: content,
2894 },
2895 )
2896
2897 t.Run("LineMatches", func(t *testing.T) {
2898 searchForTest(t, b, &query.Substring{Pattern: "abcdef"})
2899 })
2900
2901 t.Run("ChunkMatches", func(t *testing.T) {
2902 searchForTest(t, b, &query.Substring{Pattern: "abcdef"}, chunkOpts)
2903 })
2904}
2905
2906func TestDistanceHitIterBailLast(t *testing.T) {
2907 content := []byte("AST AST AST UASH")
2908 b := testIndexBuilder(t, nil,
2909 Document{
2910 Name: "f1",
2911 Content: content,
2912 },
2913 )
2914 t.Run("LineMatches", func(t *testing.T) {
2915 res := searchForTest(t, b, &query.Substring{Pattern: "UAST"})
2916 if len(res.Files) != 0 {
2917 t.Fatalf("got %v, want no results", res.Files)
2918 }
2919 })
2920
2921 t.Run("LineMatches", func(t *testing.T) {
2922 res := searchForTest(t, b, &query.Substring{Pattern: "UAST"}, chunkOpts)
2923 if len(res.Files) != 0 {
2924 t.Fatalf("got %v, want no results", res.Files)
2925 }
2926 })
2927}
2928
2929func TestDocumentSectionRuneBoundary(t *testing.T) {
2930 content := string([]rune{kelvinCodePoint, kelvinCodePoint, kelvinCodePoint})
2931 b, err := NewIndexBuilder(nil)
2932 if err != nil {
2933 t.Fatalf("NewIndexBuilder: %v", err)
2934 }
2935
2936 for i, sec := range []DocumentSection{
2937 {2, 6},
2938 {3, 7},
2939 } {
2940 if err := b.Add(Document{
2941 Name: "f1",
2942 Content: []byte(content),
2943 Symbols: []DocumentSection{sec},
2944 }); err == nil {
2945 t.Errorf("%d: Add succeeded", i)
2946 }
2947 }
2948}
2949
2950func TestUnicodeQuery(t *testing.T) {
2951 content := string([]rune{kelvinCodePoint, kelvinCodePoint, kelvinCodePoint})
2952 b := testIndexBuilder(t, nil,
2953 Document{
2954 Name: "f1",
2955 Content: []byte(content),
2956 },
2957 )
2958
2959 q := &query.Substring{Pattern: content}
2960
2961 t.Run("LineMatches", func(t *testing.T) {
2962 res := searchForTest(t, b, q)
2963 if len(res.Files) != 1 {
2964 t.Fatalf("want 1 match, got %v", res.Files)
2965 }
2966
2967 f := res.Files[0]
2968 if len(f.LineMatches) != 1 {
2969 t.Fatalf("want 1 line, got %v", f.LineMatches)
2970 }
2971 l := f.LineMatches[0]
2972
2973 if len(l.LineFragments) != 1 {
2974 t.Fatalf("want 1 line fragment, got %v", l.LineFragments)
2975 }
2976 fr := l.LineFragments[0]
2977 if fr.MatchLength != len(content) {
2978 t.Fatalf("got MatchLength %d want %d", fr.MatchLength, len(content))
2979 }
2980 })
2981
2982 t.Run("ChunkMatches", func(t *testing.T) {
2983 res := searchForTest(t, b, q, chunkOpts)
2984 if len(res.Files) != 1 {
2985 t.Fatalf("want 1 match, got %v", res.Files)
2986 }
2987
2988 f := res.Files[0]
2989 if len(f.ChunkMatches) != 1 {
2990 t.Fatalf("want 1 line, got %v", f.LineMatches)
2991 }
2992 cm := f.ChunkMatches[0]
2993
2994 if len(cm.Ranges) != 1 {
2995 t.Fatalf("want 1 line fragment, got %v", cm.Ranges)
2996 }
2997 rr := cm.Ranges[0]
2998 if matchLen := rr.End.ByteOffset - rr.Start.ByteOffset; int(matchLen) != len(content) {
2999 t.Fatalf("got MatchLength %d want %d", matchLen, len(content))
3000 }
3001 })
3002}
3003
3004func TestSkipInvalidContent(t *testing.T) {
3005 for _, content := range []string{
3006 // Binary
3007 "abc def \x00 abc",
3008 } {
3009
3010 b, err := NewIndexBuilder(nil)
3011 if err != nil {
3012 t.Fatalf("NewIndexBuilder: %v", err)
3013 }
3014
3015 if err := b.Add(Document{
3016 Name: "f1",
3017 Content: []byte(content),
3018 }); err != nil {
3019 t.Fatal(err)
3020 }
3021
3022 t.Run("LineMatches", func(t *testing.T) {
3023 q := &query.Substring{Pattern: "abc def"}
3024 res := searchForTest(t, b, q)
3025 if len(res.Files) != 0 {
3026 t.Fatalf("got %v, want no results", res.Files)
3027 }
3028
3029 q = &query.Substring{Pattern: "NOT-INDEXED"}
3030 res = searchForTest(t, b, q)
3031 if len(res.Files) != 1 {
3032 t.Fatalf("got %v, want 1 result", res.Files)
3033 }
3034 })
3035
3036 t.Run("ChunkMatches", func(t *testing.T) {
3037 q := &query.Substring{Pattern: "abc def"}
3038 res := searchForTest(t, b, q, chunkOpts)
3039 if len(res.Files) != 0 {
3040 t.Fatalf("got %v, want no results", res.Files)
3041 }
3042
3043 q = &query.Substring{Pattern: "NOT-INDEXED"}
3044 res = searchForTest(t, b, q, chunkOpts)
3045 if len(res.Files) != 1 {
3046 t.Fatalf("got %v, want 1 result", res.Files)
3047 }
3048 })
3049 }
3050}
3051
3052func TestCheckText(t *testing.T) {
3053 for _, text := range []string{"", "simple ascii", "símplé unicödé", "\uFEFFwith utf8 'bom'", "with \uFFFD unicode replacement char"} {
3054 if err := CheckText([]byte(text), 20000); err != nil {
3055 t.Errorf("CheckText(%q): %v", text, err)
3056 }
3057 }
3058 for _, text := range []string{"zero\x00byte", "xx", "0123456789abcdefghi"} {
3059 if err := CheckText([]byte(text), 15); err == nil {
3060 t.Errorf("CheckText(%q) succeeded", text)
3061 }
3062 }
3063}
3064
3065func TestLineAnd(t *testing.T) {
3066 b := testIndexBuilder(t, &Repository{Name: "reponame"},
3067 Document{Name: "f1", Content: []byte("apple\nbanana\napple banana chocolate apple pudding banana\ngrape")},
3068 Document{Name: "f2", Content: []byte("apple orange\nbanana")},
3069 Document{Name: "f3", Content: []byte("banana grape")},
3070 )
3071 pattern := "(apple)(?-s:.)*?(banana)"
3072 r, _ := syntax.Parse(pattern, syntax.Perl)
3073
3074 q := query.Regexp{
3075 Regexp: r,
3076 Content: true,
3077 }
3078 t.Run("LineMatches", func(t *testing.T) {
3079 res := searchForTest(t, b, &q)
3080 wantRegexpCount := 1
3081 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3082 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3083 }
3084 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
3085 t.Errorf("got %v, want 1 result", res.Files)
3086 }
3087 })
3088
3089 t.Run("ChunkMatches", func(t *testing.T) {
3090 res := searchForTest(t, b, &q, chunkOpts)
3091 wantRegexpCount := 1
3092 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3093 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3094 }
3095 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
3096 t.Errorf("got %v, want 1 result", res.Files)
3097 }
3098 })
3099}
3100
3101func TestLineAndFileName(t *testing.T) {
3102 b := testIndexBuilder(t, &Repository{Name: "reponame"},
3103 Document{Name: "f1", Content: []byte("apple banana\ngrape")},
3104 Document{Name: "f2", Content: []byte("apple banana\norange")},
3105 Document{Name: "apple banana", Content: []byte("banana grape")},
3106 )
3107 pattern := "(apple)(?-s:.)*?(banana)"
3108 r, _ := syntax.Parse(pattern, syntax.Perl)
3109
3110 q := query.Regexp{
3111 Regexp: r,
3112 FileName: true,
3113 }
3114 t.Run("LineMatches", func(t *testing.T) {
3115 res := searchForTest(t, b, &q)
3116 wantRegexpCount := 1
3117 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3118 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3119 }
3120 if len(res.Files) != 1 || res.Files[0].FileName != "apple banana" {
3121 t.Errorf("got %v, want 1 result", res.Files)
3122 }
3123 })
3124
3125 t.Run("ChunkMatches", func(t *testing.T) {
3126 res := searchForTest(t, b, &q, chunkOpts)
3127 wantRegexpCount := 1
3128 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3129 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3130 }
3131 if len(res.Files) != 1 || res.Files[0].FileName != "apple banana" {
3132 t.Errorf("got %v, want 1 result", res.Files)
3133 }
3134 })
3135}
3136
3137func TestMultiLineRegex(t *testing.T) {
3138 b := testIndexBuilder(t, &Repository{Name: "reponame"},
3139 Document{Name: "f1", Content: []byte("apple banana\ngrape")},
3140 Document{Name: "f2", Content: []byte("apple orange")},
3141 Document{Name: "f3", Content: []byte("grape apple")},
3142 )
3143 pattern := "(apple).*?[[:space:]].*?(grape)"
3144 r, _ := syntax.Parse(pattern, syntax.Perl)
3145
3146 q := query.Regexp{
3147 Regexp: r,
3148 }
3149 t.Run("LineMatches", func(t *testing.T) {
3150 res := searchForTest(t, b, &q)
3151 wantRegexpCount := 2
3152 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3153 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3154 }
3155 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
3156 t.Errorf("got %v, want 1 result", res.Files)
3157 }
3158 if l := len(res.Files[0].LineMatches); l != 2 {
3159 t.Errorf("got %v, want 2 line matches", l)
3160 }
3161 })
3162
3163 t.Run("ChunkMatches", func(t *testing.T) {
3164 res := searchForTest(t, b, &q, chunkOpts)
3165 wantRegexpCount := 2
3166 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3167 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3168 }
3169 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
3170 t.Errorf("got %v, want 1 result", res.Files)
3171 }
3172 if l := len(res.Files[0].ChunkMatches); l != 1 {
3173 t.Errorf("got %v, want 1 chunk matches", l)
3174 }
3175 if l := len(res.Files[0].ChunkMatches[0].Ranges); l != 1 {
3176 t.Errorf("got %v, want 1 chunk ranges", l)
3177 }
3178 })
3179}
3180
3181func TestSearchTypeFileName(t *testing.T) {
3182 b := testIndexBuilder(t, &Repository{
3183 Name: "reponame",
3184 },
3185 Document{Name: "f1", Content: []byte("bla the needle")},
3186 Document{Name: "f2", Content: []byte("another file another\nneedle")},
3187 // -----------------------------------012345678901234567890-123456
3188 )
3189
3190 t.Run("LineMatches", func(t *testing.T) {
3191 wantSingleMatch := func(res *SearchResult, want string) {
3192 t.Helper()
3193 fmatches := res.Files
3194 if len(fmatches) != 1 {
3195 t.Errorf("got %v, want 1 matches", len(fmatches))
3196 return
3197 }
3198 if len(fmatches[0].LineMatches) != 1 {
3199 t.Errorf("got %d line matches", len(fmatches[0].LineMatches))
3200 return
3201 }
3202 var got string
3203 if fmatches[0].LineMatches[0].FileName {
3204 got = fmatches[0].FileName
3205 } else {
3206 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].LineMatches[0].LineFragments[0].Offset)
3207 }
3208
3209 if got != want {
3210 t.Errorf("got %s, want %s", got, want)
3211 }
3212 }
3213
3214 // Only return the later match in the second file
3215 res := searchForTest(t, b, query.NewAnd(
3216 &query.Type{
3217 Type: query.TypeFileName,
3218 Child: &query.Substring{Pattern: "needle"},
3219 },
3220 &query.Substring{Pattern: "file"}))
3221 wantSingleMatch(res, "f2:8")
3222
3223 // Only return a filename result
3224 res = searchForTest(t, b,
3225 &query.Type{
3226 Type: query.TypeFileName,
3227 Child: &query.Substring{Pattern: "file"},
3228 })
3229 wantSingleMatch(res, "f2")
3230 })
3231
3232 t.Run("ChunkMatches", func(t *testing.T) {
3233 wantSingleMatch := func(res *SearchResult, want string) {
3234 t.Helper()
3235 fmatches := res.Files
3236 if len(fmatches) != 1 {
3237 t.Errorf("got %v, want 1 matches", len(fmatches))
3238 return
3239 }
3240 if len(fmatches[0].ChunkMatches) != 1 {
3241 t.Errorf("got %d line matches", len(fmatches[0].ChunkMatches))
3242 return
3243 }
3244 var got string
3245 if fmatches[0].ChunkMatches[0].FileName {
3246 got = fmatches[0].FileName
3247 } else {
3248 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset)
3249 }
3250
3251 if got != want {
3252 t.Errorf("got %s, want %s", got, want)
3253 }
3254 }
3255
3256 // Only return the later match in the second file
3257 res := searchForTest(t, b, query.NewAnd(
3258 &query.Type{
3259 Type: query.TypeFileName,
3260 Child: &query.Substring{Pattern: "needle"},
3261 },
3262 &query.Substring{Pattern: "file"}),
3263 chunkOpts,
3264 )
3265 wantSingleMatch(res, "f2:8")
3266
3267 // Only return a filename result
3268 res = searchForTest(t, b,
3269 &query.Type{
3270 Type: query.TypeFileName,
3271 Child: &query.Substring{Pattern: "file"},
3272 },
3273 chunkOpts,
3274 )
3275 wantSingleMatch(res, "f2")
3276 })
3277}
3278
3279func TestSearchTypeLanguage(t *testing.T) {
3280 b := testIndexBuilder(t, &Repository{
3281 Name: "reponame",
3282 },
3283 Document{Name: "apex.cls", Content: []byte("public class Car extends Vehicle {")},
3284 Document{Name: "tex.cls", Content: []byte(`\DeclareOption*{`)},
3285 Document{Name: "hello.h", Content: []byte(`#include <stdio.h>`)},
3286 )
3287
3288 t.Log(b.languageMap)
3289
3290 t.Run("LineMatches", func(t *testing.T) {
3291 wantSingleMatch := func(res *SearchResult, want string) {
3292 t.Helper()
3293 fmatches := res.Files
3294 if len(fmatches) != 1 {
3295 t.Errorf("got %v, want 1 matches", len(fmatches))
3296 return
3297 }
3298 if len(fmatches[0].LineMatches) != 1 {
3299 t.Errorf("got %d line matches", len(fmatches[0].LineMatches))
3300 return
3301 }
3302 var got string
3303 if fmatches[0].LineMatches[0].FileName {
3304 got = fmatches[0].FileName
3305 } else {
3306 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].LineMatches[0].LineFragments[0].Offset)
3307 }
3308
3309 if got != want {
3310 t.Errorf("got %s, want %s", got, want)
3311 }
3312 }
3313
3314 res := searchForTest(t, b, &query.Language{Language: "Apex"})
3315 wantSingleMatch(res, "apex.cls")
3316
3317 res = searchForTest(t, b, &query.Language{Language: "TeX"})
3318 wantSingleMatch(res, "tex.cls")
3319
3320 res = searchForTest(t, b, &query.Language{Language: "C"})
3321 wantSingleMatch(res, "hello.h")
3322
3323 // test fallback language search by pretending it's an older index version
3324 res = searchForTest(t, b, &query.Language{Language: "C++"})
3325 if len(res.Files) != 0 {
3326 t.Errorf("got %d results for C++, want 0", len(res.Files))
3327 }
3328
3329 b.featureVersion = 11 // force fallback
3330 res = searchForTest(t, b, &query.Language{Language: "C++"})
3331 wantSingleMatch(res, "hello.h")
3332 })
3333
3334 t.Run("ChunkMatches", func(t *testing.T) {
3335 wantSingleMatch := func(res *SearchResult, want string) {
3336 t.Helper()
3337 fmatches := res.Files
3338 if len(fmatches) != 1 {
3339 t.Errorf("got %v, want 1 matches", len(fmatches))
3340 return
3341 }
3342 if len(fmatches[0].ChunkMatches) != 1 {
3343 t.Errorf("got %d line matches", len(fmatches[0].ChunkMatches))
3344 return
3345 }
3346 var got string
3347 if fmatches[0].ChunkMatches[0].FileName {
3348 got = fmatches[0].FileName
3349 } else {
3350 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset)
3351 }
3352
3353 if got != want {
3354 t.Errorf("got %s, want %s", got, want)
3355 }
3356 }
3357
3358 b.featureVersion = FeatureVersion // reset feature version
3359 res := searchForTest(t, b, &query.Language{Language: "Apex"}, chunkOpts)
3360 wantSingleMatch(res, "apex.cls")
3361
3362 res = searchForTest(t, b, &query.Language{Language: "TeX"}, chunkOpts)
3363 wantSingleMatch(res, "tex.cls")
3364
3365 res = searchForTest(t, b, &query.Language{Language: "C"}, chunkOpts)
3366 wantSingleMatch(res, "hello.h")
3367
3368 // test fallback language search by pretending it's an older index version
3369 res = searchForTest(t, b, &query.Language{Language: "C++"}, chunkOpts)
3370 if len(res.Files) != 0 {
3371 t.Errorf("got %d results for C++, want 0", len(res.Files))
3372 }
3373
3374 b.featureVersion = 11 // force fallback
3375 res = searchForTest(t, b, &query.Language{Language: "C++"}, chunkOpts)
3376 wantSingleMatch(res, "hello.h")
3377 })
3378}
3379
3380func TestStats(t *testing.T) {
3381 ignored := []cmp.Option{
3382 cmpopts.EquateEmpty(),
3383 cmpopts.IgnoreFields(RepoListEntry{}, "Repository"),
3384 cmpopts.IgnoreFields(RepoListEntry{}, "IndexMetadata"),
3385 cmpopts.IgnoreFields(RepoStats{}, "IndexBytes"),
3386 }
3387
3388 repoListEntries := func(b *IndexBuilder) []RepoListEntry {
3389 searcher := searcherForTest(t, b)
3390 indexdata := searcher.(*indexData)
3391 return indexdata.repoListEntry
3392 }
3393
3394 t.Run("one empty repo", func(t *testing.T) {
3395 b := testIndexBuilder(t, nil)
3396 got := repoListEntries(b)
3397 want := []RepoListEntry{
3398 {
3399 Stats: RepoStats{
3400 Repos: 0,
3401 Shards: 1,
3402 Documents: 0,
3403 IndexBytes: 20,
3404 ContentBytes: 0,
3405 NewLinesCount: 0,
3406 DefaultBranchNewLinesCount: 0,
3407 OtherBranchesNewLinesCount: 0,
3408 },
3409 },
3410 }
3411
3412 if diff := cmp.Diff(want, got, ignored...); diff != "" {
3413 t.Fatalf("mismatch (-want +got):\n%s", diff)
3414 }
3415
3416 })
3417
3418 t.Run("one simple shard", func(t *testing.T) {
3419 b := testIndexBuilder(t, nil,
3420 Document{Name: "doc 0", Content: []byte("content 0")},
3421 Document{Name: "doc 1", Content: []byte("content 1")},
3422 )
3423 got := repoListEntries(b)
3424 want := []RepoListEntry{
3425 {
3426 Stats: RepoStats{
3427 Repos: 0,
3428 Shards: 1,
3429 Documents: 2,
3430 IndexBytes: 224,
3431 ContentBytes: 28,
3432 NewLinesCount: 0,
3433 DefaultBranchNewLinesCount: 0,
3434 OtherBranchesNewLinesCount: 0,
3435 },
3436 },
3437 }
3438
3439 if diff := cmp.Diff(want, got, ignored...); diff != "" {
3440 t.Fatalf("mismatch (-want +got):\n%s", diff)
3441 }
3442
3443 })
3444
3445 t.Run("one compound shard", func(t *testing.T) {
3446 b := testIndexBuilderCompound(t,
3447 []*Repository{
3448 {Name: "repo 0"},
3449 {Name: "repo 1"},
3450 },
3451 [][]Document{
3452 {
3453 {Name: "doc 0", Content: []byte("content 0")},
3454 {Name: "doc 1", Content: []byte("content 1")},
3455 },
3456 {
3457 {Name: "doc 2", Content: []byte("content 2")},
3458 {Name: "doc 3", Content: []byte("content 3")},
3459 },
3460 },
3461 )
3462 got := repoListEntries(b)
3463 want := []RepoListEntry{
3464 {
3465 Stats: RepoStats{
3466 Repos: 0,
3467 Shards: 1,
3468 Documents: 2,
3469 IndexBytes: 180,
3470 ContentBytes: 28,
3471 NewLinesCount: 0,
3472 DefaultBranchNewLinesCount: 0,
3473 OtherBranchesNewLinesCount: 0,
3474 },
3475 },
3476 {
3477 Stats: RepoStats{
3478 Repos: 0,
3479 Shards: 1,
3480 Documents: 2,
3481 IndexBytes: 180,
3482 ContentBytes: 28,
3483 NewLinesCount: 0,
3484 DefaultBranchNewLinesCount: 0,
3485 OtherBranchesNewLinesCount: 0,
3486 },
3487 },
3488 }
3489
3490 if diff := cmp.Diff(want, got, ignored...); diff != "" {
3491 t.Fatalf("mismatch (-want +got):\n%s", diff)
3492 }
3493 })
3494
3495 t.Run("compound shard with empty repos", func(t *testing.T) {
3496 b := testIndexBuilderCompound(t,
3497 []*Repository{
3498 {Name: "repo 0"},
3499 {Name: "repo 1"},
3500 {Name: "repo 2"},
3501 {Name: "repo 3"},
3502 {Name: "repo 4"},
3503 },
3504 [][]Document{
3505 {{Name: "doc 0", Content: []byte("content 0")}},
3506 nil,
3507 {{Name: "doc 1", Content: []byte("content 1")}},
3508 nil,
3509 nil,
3510 },
3511 )
3512 got := repoListEntries(b)
3513
3514 entryEmpty := RepoListEntry{Stats: RepoStats{
3515 Shards: 1,
3516 Documents: 0,
3517 ContentBytes: 0,
3518 }}
3519 entryNonEmpty := RepoListEntry{Stats: RepoStats{
3520 Shards: 1,
3521 Documents: 1,
3522 ContentBytes: 14,
3523 }}
3524
3525 want := []RepoListEntry{
3526 entryNonEmpty,
3527 entryEmpty,
3528 entryNonEmpty,
3529 entryEmpty,
3530 entryEmpty,
3531 }
3532
3533 if diff := cmp.Diff(want, got, ignored...); diff != "" {
3534 t.Fatalf("mismatch (-want +got):\n%s", diff)
3535 }
3536
3537 })
3538}
3539
3540// This tests the frequent pattern "\bLITERAL\b".
3541func TestWordSearch(t *testing.T) {
3542 content := []byte("needle the bla")
3543 // ----------------01234567890123
3544
3545 b := testIndexBuilder(t, nil,
3546 Document{
3547 Name: "f1",
3548 Content: content,
3549 })
3550
3551 t.Run("LineMatches", func(t *testing.T) {
3552 sres := searchForTest(t, b,
3553 &query.Regexp{
3554 Regexp: mustParseRE("\\bthe\\b"),
3555 CaseSensitive: true,
3556 Content: true,
3557 })
3558
3559 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 {
3560 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
3561 }
3562
3563 if sres.Stats.RegexpsConsidered != 0 {
3564 t.Fatal("expected regexp to be skipped")
3565 }
3566
3567 got := sres.Files[0].LineMatches[0]
3568 want := LineMatch{
3569 LineFragments: []LineFragmentMatch{{
3570 LineOffset: 7,
3571 Offset: 7,
3572 MatchLength: 3,
3573 }},
3574 Line: content,
3575 FileName: false,
3576 LineNumber: 1,
3577 LineStart: 0,
3578 LineEnd: 14,
3579 }
3580
3581 if !reflect.DeepEqual(got, want) {
3582 t.Errorf("got %#v, want %#v", got, want)
3583 }
3584 })
3585
3586 t.Run("ChunkMatches", func(t *testing.T) {
3587 sres := searchForTest(t, b,
3588 &query.Regexp{
3589 Regexp: mustParseRE("\\bthe\\b"),
3590 CaseSensitive: true,
3591 }, chunkOpts)
3592
3593 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 {
3594 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
3595 }
3596
3597 if sres.Stats.RegexpsConsidered != 0 {
3598 t.Fatal("expected regexp to be skipped")
3599 }
3600
3601 got := sres.Files[0].ChunkMatches[0]
3602 want := ChunkMatch{
3603 Content: content,
3604 ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1},
3605 Ranges: []Range{{
3606 Start: Location{ByteOffset: 7, LineNumber: 1, Column: 8},
3607 End: Location{ByteOffset: 10, LineNumber: 1, Column: 11},
3608 }},
3609 }
3610
3611 if diff := cmp.Diff(want, got); diff != "" {
3612 t.Fatal(diff)
3613 }
3614 })
3615}