fork of https://github.com/sourcegraph/zoekt
1// Copyright 2016 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package zoekt
16
17import (
18 "bytes"
19 "context"
20 "fmt"
21 "reflect"
22 "regexp/syntax"
23 "strings"
24 "testing"
25
26 "github.com/google/go-cmp/cmp"
27 "github.com/google/go-cmp/cmp/cmpopts"
28 "github.com/grafana/regexp"
29 "github.com/kylelemons/godebug/pretty"
30
31 "github.com/sourcegraph/zoekt/query"
32)
33
34func clearScores(r *SearchResult) {
35 for i := range r.Files {
36 r.Files[i].Score = 0.0
37 for j := range r.Files[i].LineMatches {
38 r.Files[i].LineMatches[j].Score = 0.0
39 }
40 for j := range r.Files[i].ChunkMatches {
41 r.Files[i].ChunkMatches[j].Score = 0.0
42 }
43 r.Files[i].Checksum = nil
44 r.Files[i].Debug = ""
45 }
46}
47
48func testIndexBuilder(t *testing.T, repo *Repository, docs ...Document) *IndexBuilder {
49 t.Helper()
50
51 b, err := NewIndexBuilder(repo)
52 if err != nil {
53 t.Fatalf("NewIndexBuilder: %v", err)
54 }
55
56 for i, d := range docs {
57 if err := b.Add(d); err != nil {
58 t.Fatalf("Add %d: %v", i, err)
59 }
60 }
61
62 return b
63}
64
65func testIndexBuilderCompound(t *testing.T, repos []*Repository, docs [][]Document) *IndexBuilder {
66 t.Helper()
67
68 b := newIndexBuilder()
69 b.indexFormatVersion = NextIndexFormatVersion
70
71 if len(repos) != len(docs) {
72 t.Fatalf("testIndexBuilderCompound: repos must be the same length as docs, got: len(repos)=%d len(docs)=%d", len(repos), len(docs))
73 }
74
75 for i, repo := range repos {
76 if err := b.setRepository(repo); err != nil {
77 t.Fatal(err)
78 }
79 for j, d := range docs[i] {
80 if err := b.Add(d); err != nil {
81 t.Fatalf("Add %d %d: %v", i, j, err)
82 }
83 }
84 }
85
86 return b
87}
88
89func TestBoundary(t *testing.T) {
90 b := testIndexBuilder(t, nil,
91 Document{Name: "f1", Content: []byte("x the")},
92 Document{Name: "f1", Content: []byte("reader")})
93 res := searchForTest(t, b, &query.Substring{Pattern: "there"})
94 if len(res.Files) > 0 {
95 t.Fatalf("got %v, want no matches", res.Files)
96 }
97}
98
99func TestDocSectionInvalid(t *testing.T) {
100 b, err := NewIndexBuilder(nil)
101 if err != nil {
102 t.Fatalf("NewIndexBuilder: %v", err)
103 }
104 doc := Document{
105 Name: "f1",
106 Content: []byte("01234567890123"),
107 Symbols: []DocumentSection{{5, 8}, {7, 9}},
108 }
109
110 if err := b.Add(doc); err == nil {
111 t.Errorf("overlapping doc sections should fail")
112 }
113
114 doc = Document{
115 Name: "f1",
116 Content: []byte("01234567890123"),
117 Symbols: []DocumentSection{{0, 20}},
118 }
119
120 if err := b.Add(doc); err == nil {
121 t.Errorf("doc sections beyond EOF should fail")
122 }
123}
124
125func TestBasic(t *testing.T) {
126 b := testIndexBuilder(t, nil,
127 Document{
128 Name: "f2",
129 Content: []byte("to carry water in the no later bla"),
130 // --------------0123456789012345678901234567890123
131 })
132
133 t.Run("LineMatch", func(t *testing.T) {
134 res := searchForTest(t, b, &query.Substring{
135 Pattern: "water",
136 CaseSensitive: true,
137 })
138 fmatches := res.Files
139 if len(fmatches) != 1 || len(fmatches[0].LineMatches) != 1 {
140 t.Fatalf("got %v, want 1 matches", fmatches)
141 }
142
143 got := fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].LineMatches[0].LineFragments[0].Offset)
144 want := "f2:9"
145 if got != want {
146 t.Errorf("1: got %s, want %s", got, want)
147 }
148 })
149
150 t.Run("ChunkMatch", func(t *testing.T) {
151 res := searchForTest(t, b, &query.Substring{
152 Pattern: "water",
153 CaseSensitive: true,
154 }, chunkOpts)
155 fmatches := res.Files
156 if len(fmatches) != 1 || len(fmatches[0].ChunkMatches) != 1 {
157 t.Fatalf("got %v, want 1 matches", fmatches)
158 }
159
160 got := fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset)
161 want := "f2:9"
162 if got != want {
163 t.Errorf("1: got %s, want %s", got, want)
164 }
165 })
166}
167
168func TestEmptyIndex(t *testing.T) {
169 b := testIndexBuilder(t, nil)
170 searcher := searcherForTest(t, b)
171
172 var opts SearchOptions
173 if _, err := searcher.Search(context.Background(), &query.Substring{}, &opts); err != nil {
174 t.Fatalf("Search: %v", err)
175 }
176
177 if _, err := searcher.List(context.Background(), &query.Repo{Regexp: regexp.MustCompile("")}, nil); err != nil {
178 t.Fatalf("List: %v", err)
179 }
180
181 if _, err := searcher.Search(context.Background(), &query.Substring{Pattern: "java", FileName: true}, &opts); err != nil {
182 t.Fatalf("Search: %v", err)
183 }
184}
185
186type memSeeker struct {
187 data []byte
188}
189
190func (s *memSeeker) Name() string {
191 return "memseeker"
192}
193
194func (s *memSeeker) Close() {}
195func (s *memSeeker) Read(off, sz uint32) ([]byte, error) {
196 return s.data[off : off+sz], nil
197}
198
199func (s *memSeeker) Size() (uint32, error) {
200 return uint32(len(s.data)), nil
201}
202
203func TestNewlines(t *testing.T) {
204 b := testIndexBuilder(t, nil,
205 Document{Name: "filename", Content: []byte("line1\nline2\nbla")})
206 // ---------------------------------------------012345-678901-234
207
208 t.Run("LineMatches", func(t *testing.T) {
209 sres := searchForTest(t, b, &query.Substring{Pattern: "ne2"})
210
211 matches := sres.Files
212 want := []FileMatch{{
213 FileName: "filename",
214 LineMatches: []LineMatch{{
215 LineFragments: []LineFragmentMatch{{
216 Offset: 8,
217 LineOffset: 2,
218 MatchLength: 3,
219 }},
220 Line: []byte("line2"),
221 LineStart: 6,
222 LineEnd: 11,
223 LineNumber: 2,
224 }},
225 }}
226
227 if !reflect.DeepEqual(matches, want) {
228 t.Errorf("got %v, want %v", matches, want)
229 }
230 })
231
232 t.Run("ChunkMatches", func(t *testing.T) {
233 sres := searchForTest(t, b, &query.Substring{Pattern: "ne2"}, chunkOpts)
234
235 matches := sres.Files
236 want := []FileMatch{{
237 FileName: "filename",
238 ChunkMatches: []ChunkMatch{{
239 Content: []byte("line2"),
240 ContentStart: Location{
241 ByteOffset: 6,
242 LineNumber: 2,
243 Column: 1,
244 },
245 Ranges: []Range{{
246 Start: Location{ByteOffset: 8, LineNumber: 2, Column: 3},
247 End: Location{ByteOffset: 11, LineNumber: 2, Column: 6},
248 }},
249 }},
250 }}
251
252 if diff := cmp.Diff(want, matches); diff != "" {
253 t.Fatal(diff)
254 }
255 })
256}
257
258// A result spanning multiple lines should have LineMatches that only cover
259// single lines.
260func TestQueryNewlines(t *testing.T) {
261 text := "line1\nline2\nbla"
262 b := testIndexBuilder(t, nil,
263 Document{Name: "filename", Content: []byte(text)})
264
265 t.Run("LineMatches", func(t *testing.T) {
266 sres := searchForTest(t, b, &query.Substring{Pattern: "ine2\nbla"})
267 matches := sres.Files
268 if len(matches) != 1 {
269 t.Fatalf("got %d file matches, want exactly one", len(matches))
270 }
271 m := matches[0]
272 if len(m.LineMatches) != 2 {
273 t.Fatalf("got %d line matches, want exactly two", len(m.LineMatches))
274 }
275 })
276
277 t.Run("ChunkMatches", func(t *testing.T) {
278 sres := searchForTest(t, b, &query.Substring{Pattern: "ine2\nbla"}, chunkOpts)
279 matches := sres.Files
280 if len(matches) != 1 {
281 t.Fatalf("got %d file matches, want exactly one", len(matches))
282 }
283 m := matches[0]
284 if len(m.ChunkMatches) != 1 {
285 t.Fatalf("got %d chunk matches, want exactly one", len(m.ChunkMatches))
286 }
287 })
288}
289
290var chunkOpts = SearchOptions{ChunkMatches: true}
291
292func searchForTest(t *testing.T, b *IndexBuilder, q query.Q, o ...SearchOptions) *SearchResult {
293 searcher := searcherForTest(t, b)
294 var opts SearchOptions
295 if len(o) > 0 {
296 opts = o[0]
297 }
298 res, err := searcher.Search(context.Background(), q, &opts)
299 if err != nil {
300 t.Fatalf("Search(%s): %v", q, err)
301 }
302 clearScores(res)
303 return res
304}
305
306func searcherForTest(t *testing.T, b *IndexBuilder) Searcher {
307 var buf bytes.Buffer
308 if err := b.Write(&buf); err != nil {
309 t.Fatal(err)
310 }
311 f := &memSeeker{buf.Bytes()}
312
313 searcher, err := NewSearcher(f)
314 if err != nil {
315 t.Fatalf("NewSearcher: %v", err)
316 }
317
318 return searcher
319}
320
321func TestCaseFold(t *testing.T) {
322 b := testIndexBuilder(t, nil,
323 Document{Name: "f1", Content: []byte("I love BaNaNAS.")},
324 // -----------------------------------012345678901234
325 )
326 t.Run("LineMatches", func(t *testing.T) {
327 sres := searchForTest(t, b, &query.Substring{
328 Pattern: "bananas",
329 CaseSensitive: true,
330 })
331 matches := sres.Files
332 if len(matches) != 0 {
333 t.Errorf("foldcase: got %#v, want 0 matches", matches)
334 }
335
336 sres = searchForTest(t, b,
337 &query.Substring{
338 Pattern: "BaNaNAS",
339 CaseSensitive: true,
340 })
341 matches = sres.Files
342 if len(matches) != 1 {
343 t.Errorf("no foldcase: got %v, want 1 matches", matches)
344 } else if matches[0].LineMatches[0].LineFragments[0].Offset != 7 {
345 t.Errorf("foldcase: got %v, want offsets 7", matches)
346 }
347 })
348
349 t.Run("ChunkMatches", func(t *testing.T) {
350 sres := searchForTest(t, b, &query.Substring{
351 Pattern: "bananas",
352 CaseSensitive: true,
353 }, chunkOpts)
354 matches := sres.Files
355 if len(matches) != 0 {
356 t.Errorf("foldcase: got %#v, want 0 matches", matches)
357 }
358
359 sres = searchForTest(t, b,
360 &query.Substring{
361 Pattern: "BaNaNAS",
362 CaseSensitive: true,
363 })
364 matches = sres.Files
365 if len(matches) != 1 {
366 t.Errorf("no foldcase: got %v, want 1 matches", matches)
367 } else if matches[0].LineMatches[0].LineFragments[0].Offset != 7 {
368 t.Errorf("foldcase: got %v, want offsets 7", matches)
369 }
370 })
371}
372
373func TestAndSearch(t *testing.T) {
374 b := testIndexBuilder(t, nil,
375 Document{Name: "f1", Content: []byte("x banana y")},
376 Document{Name: "f2", Content: []byte("x apple y")},
377 Document{Name: "f3", Content: []byte("x banana apple y")},
378 // ---------------------------------------0123456789012345
379 )
380
381 t.Run("LineMatches", func(t *testing.T) {
382 sres := searchForTest(t, b, query.NewAnd(
383 &query.Substring{
384 Pattern: "banana",
385 },
386 &query.Substring{
387 Pattern: "apple",
388 },
389 ))
390 matches := sres.Files
391 if len(matches) != 1 || len(matches[0].LineMatches) != 1 || len(matches[0].LineMatches[0].LineFragments) != 2 {
392 t.Fatalf("got %#v, want 1 match with 2 fragments", matches)
393 }
394
395 if matches[0].LineMatches[0].LineFragments[0].Offset != 2 || matches[0].LineMatches[0].LineFragments[1].Offset != 9 {
396 t.Fatalf("got %#v, want offsets 2,9", matches)
397 }
398
399 wantStats := Stats{
400 FilesLoaded: 1,
401 ContentBytesLoaded: 18,
402 IndexBytesLoaded: 8,
403 NgramMatches: 3, // we look at doc 1, because it's max(0,1) due to AND
404 MatchCount: 1,
405 FileCount: 1,
406 FilesConsidered: 2,
407 ShardsScanned: 1,
408 }
409 if diff := pretty.Compare(wantStats, sres.Stats); diff != "" {
410 t.Errorf("got stats diff %s", diff)
411 }
412 })
413
414 t.Run("ChunkMatches", func(t *testing.T) {
415 sres := searchForTest(t, b, query.NewAnd(
416 &query.Substring{
417 Pattern: "banana",
418 },
419 &query.Substring{
420 Pattern: "apple",
421 },
422 ), chunkOpts)
423 matches := sres.Files
424 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 || len(matches[0].ChunkMatches[0].Ranges) != 2 {
425 t.Fatalf("got %#v, want 1 chunk match with 2 ranges", matches)
426 }
427
428 if matches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset != 2 || matches[0].ChunkMatches[0].Ranges[1].Start.ByteOffset != 9 {
429 t.Fatalf("got %#v, want offsets 2,9", matches)
430 }
431
432 wantStats := Stats{
433 FilesLoaded: 1,
434 ContentBytesLoaded: 18,
435 IndexBytesLoaded: 8,
436 NgramMatches: 3, // we look at doc 1, because it's max(0,1) due to AND
437 MatchCount: 2,
438 FileCount: 1,
439 FilesConsidered: 2,
440 ShardsScanned: 1,
441 }
442 if diff := pretty.Compare(wantStats, sres.Stats); diff != "" {
443 t.Errorf("got stats diff %s", diff)
444 }
445 })
446}
447
448func TestAndNegateSearch(t *testing.T) {
449 b := testIndexBuilder(t, nil,
450 Document{Name: "f1", Content: []byte("x banana y")},
451 // -----------------------------------0123456789
452 Document{Name: "f4", Content: []byte("x banana apple y")})
453
454 t.Run("LineMatches", func(t *testing.T) {
455 sres := searchForTest(t, b, query.NewAnd(
456 &query.Substring{
457 Pattern: "banana",
458 },
459 &query.Not{Child: &query.Substring{
460 Pattern: "apple",
461 }}))
462
463 matches := sres.Files
464
465 if len(matches) != 1 || len(matches[0].LineMatches) != 1 {
466 t.Fatalf("got %v, want 1 match", matches)
467 }
468 if matches[0].FileName != "f1" {
469 t.Fatalf("got match %#v, want FileName: f1", matches[0])
470 }
471 if matches[0].LineMatches[0].LineFragments[0].Offset != 2 {
472 t.Fatalf("got %v, want offset 2", matches)
473 }
474 })
475
476 t.Run("ChunkMatches", func(t *testing.T) {
477 sres := searchForTest(t, b,
478 query.NewAnd(
479 &query.Substring{
480 Pattern: "banana",
481 },
482 &query.Not{Child: &query.Substring{
483 Pattern: "apple",
484 }},
485 ),
486 chunkOpts,
487 )
488
489 matches := sres.Files
490
491 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 {
492 t.Fatalf("got %v, want 1 match", matches)
493 }
494 if matches[0].FileName != "f1" {
495 t.Fatalf("got match %#v, want FileName: f1", matches[0])
496 }
497 if matches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset != 2 {
498 t.Fatalf("got %v, want offset 2", matches)
499 }
500 })
501}
502
503func TestNegativeMatchesOnlyShortcut(t *testing.T) {
504 b := testIndexBuilder(t, nil,
505 Document{Name: "f1", Content: []byte("x banana y")},
506 Document{Name: "f2", Content: []byte("x appelmoes y")},
507 Document{Name: "f3", Content: []byte("x appelmoes y")},
508 Document{Name: "f3", Content: []byte("x appelmoes y")})
509
510 t.Run("LineMatches", func(t *testing.T) {
511 sres := searchForTest(t, b, query.NewAnd(
512 &query.Substring{
513 Pattern: "banana",
514 },
515 &query.Not{Child: &query.Substring{
516 Pattern: "appel",
517 }}))
518
519 if sres.Stats.FilesConsidered != 1 {
520 t.Errorf("got %#v, want FilesConsidered: 1", sres.Stats)
521 }
522 })
523
524 t.Run("ChunkMatches", func(t *testing.T) {
525 sres := searchForTest(t, b, query.NewAnd(
526 &query.Substring{
527 Pattern: "banana",
528 },
529 &query.Not{Child: &query.Substring{
530 Pattern: "appel",
531 }}), chunkOpts)
532
533 if sres.Stats.FilesConsidered != 1 {
534 t.Errorf("got %#v, want FilesConsidered: 1", sres.Stats)
535 }
536 })
537}
538
539func TestFileSearch(t *testing.T) {
540 b := testIndexBuilder(t, nil,
541 Document{Name: "banzana", Content: []byte("x orange y")},
542 // -------------0123456
543 Document{Name: "banana", Content: []byte("x apple y")},
544 // -------------012345
545 )
546
547 t.Run("LineMatches", func(t *testing.T) {
548 sres := searchForTest(t, b, &query.Substring{
549 Pattern: "anan",
550 FileName: true,
551 })
552
553 matches := sres.Files
554 if len(matches) != 1 || len(matches[0].LineMatches) != 1 {
555 t.Fatalf("got %v, want 1 match", matches)
556 }
557
558 got := matches[0].LineMatches[0]
559 want := LineMatch{
560 Line: []byte("banana"),
561 LineFragments: []LineFragmentMatch{{
562 Offset: 1,
563 LineOffset: 1,
564 MatchLength: 4,
565 }},
566 FileName: true,
567 }
568
569 if !reflect.DeepEqual(got, want) {
570 t.Errorf("got %#v, want %#v", got, want)
571 }
572 })
573
574 t.Run("ChunkMatches", func(t *testing.T) {
575 sres := searchForTest(t, b, &query.Substring{
576 Pattern: "anan",
577 FileName: true,
578 }, chunkOpts)
579
580 matches := sres.Files
581 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 {
582 t.Fatalf("got %v, want 1 match", matches)
583 }
584
585 got := matches[0].ChunkMatches[0]
586 want := ChunkMatch{
587 Content: []byte("banana"),
588 ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1},
589 Ranges: []Range{{
590 Start: Location{ByteOffset: 1, LineNumber: 1, Column: 2},
591 End: Location{ByteOffset: 5, LineNumber: 1, Column: 6},
592 }},
593 FileName: true,
594 }
595
596 if diff := cmp.Diff(want, got); diff != "" {
597 t.Fatal(diff)
598 }
599 })
600
601 t.Run("FileNameSet", func(t *testing.T) {
602 sres := searchForTest(t, b, query.NewFileNameSet("banana"), chunkOpts)
603
604 matches := sres.Files
605 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 {
606 t.Fatalf("got %v, want 1 match", matches)
607 }
608
609 got := matches[0].ChunkMatches[0]
610 want := ChunkMatch{
611 Content: []byte("banana"),
612 ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1},
613 Ranges: []Range{{
614 Start: Location{ByteOffset: 0, LineNumber: 1, Column: 1},
615 End: Location{ByteOffset: 6, LineNumber: 1, Column: 7},
616 }},
617 FileName: true,
618 }
619
620 if diff := cmp.Diff(want, got); diff != "" {
621 t.Fatal(diff)
622 }
623 })
624}
625
626func TestFileCase(t *testing.T) {
627 b := testIndexBuilder(t, nil,
628 Document{Name: "BANANA", Content: []byte("x orange y")})
629
630 t.Run("LineMatches", func(t *testing.T) {
631 sres := searchForTest(t, b, &query.Substring{
632 Pattern: "banana",
633 FileName: true,
634 })
635
636 matches := sres.Files
637 if len(matches) != 1 || matches[0].FileName != "BANANA" {
638 t.Fatalf("got %v, want 1 match 'BANANA'", matches)
639 }
640 })
641
642 t.Run("ChunkMatches", func(t *testing.T) {
643 sres := searchForTest(t, b, &query.Substring{
644 Pattern: "banana",
645 FileName: true,
646 }, chunkOpts)
647
648 matches := sres.Files
649 if len(matches) != 1 || matches[0].FileName != "BANANA" {
650 t.Fatalf("got %v, want 1 match 'BANANA'", matches)
651 }
652 })
653}
654
655func TestFileRegexpSearchBruteForce(t *testing.T) {
656 b := testIndexBuilder(t, nil,
657 Document{Name: "banzana", Content: []byte("x orange y")},
658 Document{Name: "banana", Content: []byte("x apple y")},
659 )
660 t.Run("LineMatches", func(t *testing.T) {
661 sres := searchForTest(t, b, &query.Regexp{
662 Regexp: mustParseRE("[qn][zx]"),
663 FileName: true,
664 })
665
666 matches := sres.Files
667 if len(matches) != 1 || matches[0].FileName != "banzana" {
668 t.Fatalf("got %v, want 1 match on 'banzana'", matches)
669 }
670 })
671 t.Run("LineMatches", func(t *testing.T) {
672 sres := searchForTest(t, b, &query.Regexp{
673 Regexp: mustParseRE("[qn][zx]"),
674 FileName: true,
675 }, chunkOpts)
676
677 matches := sres.Files
678 if len(matches) != 1 || matches[0].FileName != "banzana" {
679 t.Fatalf("got %v, want 1 match on 'banzana'", matches)
680 }
681 })
682}
683
684func TestFileRegexpSearchShortString(t *testing.T) {
685 b := testIndexBuilder(t, nil,
686 Document{Name: "banana.py", Content: []byte("x orange y")})
687
688 t.Run("LineMatches", func(t *testing.T) {
689 sres := searchForTest(t, b, &query.Regexp{
690 Regexp: mustParseRE("ana.py"),
691 FileName: true,
692 })
693
694 matches := sres.Files
695 if len(matches) != 1 || matches[0].FileName != "banana.py" {
696 t.Fatalf("got %v, want 1 match on 'banana.py'", matches)
697 }
698 })
699
700 t.Run("ChunkMatches", func(t *testing.T) {
701 sres := searchForTest(t, b, &query.Regexp{
702 Regexp: mustParseRE("ana.py"),
703 FileName: true,
704 }, chunkOpts)
705
706 matches := sres.Files
707 if len(matches) != 1 || matches[0].FileName != "banana.py" {
708 t.Fatalf("got %v, want 1 match on 'banana.py'", matches)
709 }
710 })
711}
712
713func TestFileSubstringSearchBruteForce(t *testing.T) {
714 b := testIndexBuilder(t, nil,
715 Document{Name: "BANZANA", Content: []byte("x orange y")},
716 Document{Name: "banana", Content: []byte("x apple y")})
717
718 q := &query.Substring{
719 Pattern: "z",
720 FileName: true,
721 }
722
723 t.Run("LineMatches", func(t *testing.T) {
724 res := searchForTest(t, b, q)
725 if len(res.Files) != 1 || res.Files[0].FileName != "BANZANA" {
726 t.Fatalf("got %v, want 1 match on 'BANZANA''", res.Files)
727 }
728 })
729
730 t.Run("ChunkMatches", func(t *testing.T) {
731 res := searchForTest(t, b, q, chunkOpts)
732 if len(res.Files) != 1 || res.Files[0].FileName != "BANZANA" {
733 t.Fatalf("got %v, want 1 match on 'BANZANA''", res.Files)
734 }
735 })
736}
737
738func TestFileSubstringSearchBruteForceEnd(t *testing.T) {
739 b := testIndexBuilder(t, nil,
740 Document{Name: "BANZANA", Content: []byte("x orange y")},
741 Document{Name: "bananaq", Content: []byte("x apple y")})
742
743 q := &query.Substring{
744 Pattern: "q",
745 FileName: true,
746 }
747 t.Run("LineMatches", func(t *testing.T) {
748 res := searchForTest(t, b, q)
749 if want := "bananaq"; len(res.Files) != 1 || res.Files[0].FileName != want {
750 t.Fatalf("got %v, want 1 match in %q", res.Files, want)
751 }
752 })
753
754 t.Run("LineMatches", func(t *testing.T) {
755 res := searchForTest(t, b, q, chunkOpts)
756 if want := "bananaq"; len(res.Files) != 1 || res.Files[0].FileName != want {
757 t.Fatalf("got %v, want 1 match in %q", res.Files, want)
758 }
759 })
760}
761
762func TestSearchMatchAll(t *testing.T) {
763 b := testIndexBuilder(t, nil,
764 Document{Name: "banzana", Content: []byte("x orange y")},
765 Document{Name: "banana", Content: []byte("x apple y")})
766
767 t.Run("LineMatches", func(t *testing.T) {
768 sres := searchForTest(t, b, &query.Const{Value: true})
769 matches := sres.Files
770 if len(matches) != 2 {
771 t.Fatalf("got %v, want 2 matches", matches)
772 }
773 })
774
775 t.Run("ChunkMatches", func(t *testing.T) {
776 sres := searchForTest(t, b, &query.Const{Value: true}, chunkOpts)
777 matches := sres.Files
778 if len(matches) != 2 {
779 t.Fatalf("got %v, want 2 matches", matches)
780 }
781 })
782}
783
784func TestSearchNewline(t *testing.T) {
785 b := testIndexBuilder(t, nil,
786 Document{Name: "banzana", Content: []byte("abcd\ndefg")})
787
788 t.Run("LineMatches", func(t *testing.T) {
789 sres := searchForTest(t, b, &query.Substring{Pattern: "d\nd"})
790
791 // Just check that we don't crash.
792
793 matches := sres.Files
794 if len(matches) != 1 {
795 t.Fatalf("got %v, want 1 matches", matches)
796 }
797 })
798
799 t.Run("ChunkMatches", func(t *testing.T) {
800 sres := searchForTest(t, b, &query.Substring{Pattern: "d\nd"}, chunkOpts)
801
802 // Just check that we don't crash.
803
804 matches := sres.Files
805 if len(matches) != 1 {
806 t.Fatalf("got %v, want 1 matches", matches)
807 }
808 })
809}
810
811func TestSearchMatchAllRegexp(t *testing.T) {
812 b := testIndexBuilder(t, nil,
813 Document{Name: "banzana", Content: []byte("abcd")},
814 Document{Name: "banana", Content: []byte("pqrs")})
815
816 t.Run("LineMatches", func(t *testing.T) {
817 sres := searchForTest(t, b, &query.Regexp{Regexp: mustParseRE(".")})
818
819 matches := sres.Files
820 if len(matches) != 2 || sres.Stats.MatchCount != 2 {
821 t.Fatalf("got %v, want 2 matches", matches)
822 }
823 if len(matches[0].LineMatches[0].Line) != 4 || len(matches[1].LineMatches[0].Line) != 4 {
824 t.Fatalf("want 4 chars in every file, got %#v", matches)
825 }
826
827 })
828
829 t.Run("ChunkMatches", func(t *testing.T) {
830 sres := searchForTest(t, b, &query.Regexp{Regexp: mustParseRE(".")}, chunkOpts)
831
832 matches := sres.Files
833 if len(matches) != 2 || sres.Stats.MatchCount != 8 {
834 t.Fatalf("got %v, want 2 matches", matches)
835 }
836 if len(matches[0].ChunkMatches[0].Content) != 4 || len(matches[1].ChunkMatches[0].Content) != 4 {
837 t.Fatalf("want 4 chars in every file, got %#v", matches)
838 }
839
840 })
841}
842
843func TestFileRestriction(t *testing.T) {
844 b := testIndexBuilder(t, nil,
845 Document{Name: "banana1", Content: []byte("x orange y")},
846 Document{Name: "banana2", Content: []byte("x apple y")},
847 Document{Name: "orange", Content: []byte("x apple z")})
848
849 t.Run("LineMatches", func(t *testing.T) {
850 sres := searchForTest(t, b, query.NewAnd(
851 &query.Substring{
852 Pattern: "banana",
853 FileName: true,
854 },
855 &query.Substring{
856 Pattern: "apple",
857 }))
858
859 matches := sres.Files
860 if len(matches) != 1 || len(matches[0].LineMatches) != 1 {
861 t.Fatalf("got %v, want 1 match", matches)
862 }
863
864 match := matches[0].LineMatches[0]
865 got := string(match.Line)
866 want := "x apple y"
867 if got != want {
868 t.Errorf("got match %#v, want line %q", match, want)
869 }
870 })
871
872 t.Run("ChunkMatches", func(t *testing.T) {
873 sres := searchForTest(t, b, query.NewAnd(
874 &query.Substring{
875 Pattern: "banana",
876 FileName: true,
877 },
878 &query.Substring{
879 Pattern: "apple",
880 }), chunkOpts)
881
882 matches := sres.Files
883 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 {
884 t.Fatalf("got %v, want 1 match", matches)
885 }
886
887 match := matches[0].ChunkMatches[0]
888 got := string(match.Content)
889 want := "x apple y"
890 if got != want {
891 t.Errorf("got match %#v, want line %q", match, want)
892 }
893 })
894}
895
896func TestFileNameBoundary(t *testing.T) {
897 b := testIndexBuilder(t, nil,
898 Document{Name: "banana2", Content: []byte("x apple y")},
899 Document{Name: "helpers.go", Content: []byte("x apple y")},
900 Document{Name: "foo", Content: []byte("x apple y")})
901
902 t.Run("LineMatches", func(t *testing.T) {
903 sres := searchForTest(t, b, &query.Substring{
904 Pattern: "helpers.go",
905 FileName: true,
906 })
907
908 matches := sres.Files
909 if len(matches) != 1 || len(matches[0].LineMatches) != 1 {
910 t.Fatalf("got %v, want 1 match", matches)
911 }
912 })
913
914 t.Run("ChunkMatches", func(t *testing.T) {
915 sres := searchForTest(t, b, &query.Substring{
916 Pattern: "helpers.go",
917 FileName: true,
918 }, chunkOpts)
919
920 matches := sres.Files
921 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 {
922 t.Fatalf("got %v, want 1 match", matches)
923 }
924 })
925}
926
927func TestDocumentOrder(t *testing.T) {
928 var docs []Document
929 for i := 0; i < 3; i++ {
930 docs = append(docs, Document{Name: fmt.Sprintf("f%d", i), Content: []byte("needle")})
931 }
932
933 b := testIndexBuilder(t, nil, docs...)
934
935 t.Run("LineMatches", func(t *testing.T) {
936 sres := searchForTest(t, b, query.NewAnd(
937 &query.Substring{
938 Pattern: "needle",
939 }))
940
941 want := []string{"f0", "f1", "f2"}
942 var got []string
943 for _, f := range sres.Files {
944 got = append(got, f.FileName)
945 }
946 if !reflect.DeepEqual(got, want) {
947 t.Fatalf("got %v, want %v", got, want)
948 }
949 })
950
951 t.Run("ChunkMatches", func(t *testing.T) {
952 sres := searchForTest(t, b,
953 query.NewAnd(&query.Substring{
954 Pattern: "needle",
955 }),
956 chunkOpts,
957 )
958
959 want := []string{"f0", "f1", "f2"}
960 var got []string
961 for _, f := range sres.Files {
962 got = append(got, f.FileName)
963 }
964 if !reflect.DeepEqual(got, want) {
965 t.Fatalf("got %v, want %v", got, want)
966 }
967 })
968}
969
970func TestBranchMask(t *testing.T) {
971 b := testIndexBuilder(t, &Repository{
972 Branches: []RepositoryBranch{
973 {"master", "v-master"},
974 {"stable", "v-stable"},
975 {"bonzai", "v-bonzai"},
976 },
977 }, Document{Name: "f1", Content: []byte("needle"), Branches: []string{"master"}},
978 Document{Name: "f2", Content: []byte("needle"), Branches: []string{"stable", "master"}},
979 Document{Name: "f3", Content: []byte("needle"), Branches: []string{"stable", "master"}},
980 Document{Name: "f4", Content: []byte("needle"), Branches: []string{"bonzai"}},
981 )
982
983 t.Run("LineMatches", func(t *testing.T) {
984 sres := searchForTest(t, b, query.NewAnd(
985 &query.Substring{
986 Pattern: "needle",
987 },
988 &query.Branch{
989 Pattern: "table",
990 }))
991
992 if len(sres.Files) != 2 || sres.Files[0].FileName != "f2" || sres.Files[1].FileName != "f3" {
993 t.Fatalf("got %v, want 2 result from [f2,f3]", sres.Files)
994 }
995
996 if len(sres.Files[0].Branches) != 1 || sres.Files[0].Branches[0] != "stable" {
997 t.Fatalf("got %v, want 1 branch 'stable'", sres.Files[0].Branches)
998 }
999 })
1000
1001 t.Run("ChunkMatches", func(t *testing.T) {
1002 sres := searchForTest(t, b, query.NewAnd(
1003 &query.Substring{
1004 Pattern: "needle",
1005 },
1006 &query.Branch{
1007 Pattern: "table",
1008 }),
1009 chunkOpts,
1010 )
1011
1012 if len(sres.Files) != 2 || sres.Files[0].FileName != "f2" || sres.Files[1].FileName != "f3" {
1013 t.Fatalf("got %v, want 2 result from [f2,f3]", sres.Files)
1014 }
1015
1016 if len(sres.Files[0].Branches) != 1 || sres.Files[0].Branches[0] != "stable" {
1017 t.Fatalf("got %v, want 1 branch 'stable'", sres.Files[0].Branches)
1018 }
1019 })
1020}
1021
1022func TestBranchLimit(t *testing.T) {
1023 for limit := 64; limit <= 65; limit++ {
1024 r := &Repository{}
1025 for i := 0; i < limit; i++ {
1026 s := fmt.Sprintf("b%d", i)
1027 r.Branches = append(r.Branches, RepositoryBranch{
1028 s, "v-" + s,
1029 })
1030 }
1031 _, err := NewIndexBuilder(r)
1032 if limit == 64 && err != nil {
1033 t.Fatalf("NewIndexBuilder: %v", err)
1034 } else if limit == 65 && err == nil {
1035 t.Fatalf("NewIndexBuilder succeeded")
1036 }
1037 }
1038}
1039
1040func TestBranchReport(t *testing.T) {
1041 branches := []string{"stable", "master"}
1042 b := testIndexBuilder(t, &Repository{
1043 Branches: []RepositoryBranch{
1044 {"stable", "vs"},
1045 {"master", "vm"},
1046 },
1047 },
1048 Document{Name: "f2", Content: []byte("needle"), Branches: branches})
1049
1050 t.Run("LineMatches", func(t *testing.T) {
1051 sres := searchForTest(t, b, &query.Substring{
1052 Pattern: "needle",
1053 })
1054 if len(sres.Files) != 1 {
1055 t.Fatalf("got %v, want 1 result from f2", sres.Files)
1056 }
1057
1058 f := sres.Files[0]
1059 if !reflect.DeepEqual(f.Branches, branches) {
1060 t.Fatalf("got branches %q, want %q", f.Branches, branches)
1061 }
1062 })
1063
1064 t.Run("ChunkMatches", func(t *testing.T) {
1065 sres := searchForTest(t, b, &query.Substring{
1066 Pattern: "needle",
1067 }, chunkOpts)
1068 if len(sres.Files) != 1 {
1069 t.Fatalf("got %v, want 1 result from f2", sres.Files)
1070 }
1071
1072 f := sres.Files[0]
1073 if !reflect.DeepEqual(f.Branches, branches) {
1074 t.Fatalf("got branches %q, want %q", f.Branches, branches)
1075 }
1076 })
1077
1078}
1079
1080func TestBranchVersions(t *testing.T) {
1081 b := testIndexBuilder(t, &Repository{
1082 Branches: []RepositoryBranch{
1083 {"stable", "v-stable"},
1084 {"master", "v-master"},
1085 },
1086 }, Document{Name: "f2", Content: []byte("needle"), Branches: []string{"master"}})
1087
1088 t.Run("LineMatches", func(t *testing.T) {
1089 sres := searchForTest(t, b, &query.Substring{
1090 Pattern: "needle",
1091 })
1092 if len(sres.Files) != 1 {
1093 t.Fatalf("got %v, want 1 result from f2", sres.Files)
1094 }
1095
1096 f := sres.Files[0]
1097 if f.Version != "v-master" {
1098 t.Fatalf("got file %#v, want version 'v-master'", f)
1099 }
1100 })
1101
1102 t.Run("ChunkMatches", func(t *testing.T) {
1103 sres := searchForTest(t, b, &query.Substring{
1104 Pattern: "needle",
1105 }, chunkOpts)
1106 if len(sres.Files) != 1 {
1107 t.Fatalf("got %v, want 1 result from f2", sres.Files)
1108 }
1109
1110 f := sres.Files[0]
1111 if f.Version != "v-master" {
1112 t.Fatalf("got file %#v, want version 'v-master'", f)
1113 }
1114 })
1115}
1116
1117func mustParseRE(s string) *syntax.Regexp {
1118 r, err := syntax.Parse(s, syntax.Perl)
1119 if err != nil {
1120 panic(err)
1121 }
1122
1123 return r
1124}
1125
1126func TestRegexp(t *testing.T) {
1127 content := []byte("needle the bla")
1128 // ----------------01234567890123
1129
1130 b := testIndexBuilder(t, nil,
1131 Document{
1132 Name: "f1",
1133 Content: content,
1134 })
1135
1136 t.Run("LineMatches", func(t *testing.T) {
1137 sres := searchForTest(t, b,
1138 &query.Regexp{
1139 Regexp: mustParseRE("dle.*bla"),
1140 })
1141
1142 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 {
1143 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
1144 }
1145
1146 got := sres.Files[0].LineMatches[0]
1147 want := LineMatch{
1148 LineFragments: []LineFragmentMatch{{
1149 LineOffset: 3,
1150 Offset: 3,
1151 MatchLength: 11,
1152 }},
1153 Line: content,
1154 FileName: false,
1155 LineNumber: 1,
1156 LineStart: 0,
1157 LineEnd: 14,
1158 }
1159
1160 if !reflect.DeepEqual(got, want) {
1161 t.Errorf("got %#v, want %#v", got, want)
1162 }
1163 })
1164
1165 t.Run("ChunkMatches", func(t *testing.T) {
1166 sres := searchForTest(t, b,
1167 &query.Regexp{
1168 Regexp: mustParseRE("dle.*bla"),
1169 }, chunkOpts)
1170
1171 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 {
1172 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
1173 }
1174
1175 got := sres.Files[0].ChunkMatches[0]
1176 want := ChunkMatch{
1177 Content: content,
1178 ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1},
1179 Ranges: []Range{{
1180 Start: Location{ByteOffset: 3, LineNumber: 1, Column: 4},
1181 End: Location{ByteOffset: 14, LineNumber: 1, Column: 15},
1182 }},
1183 }
1184
1185 if diff := cmp.Diff(want, got); diff != "" {
1186 t.Fatal(diff)
1187 }
1188 })
1189}
1190
1191func TestRegexpFile(t *testing.T) {
1192 content := []byte("needle the bla")
1193
1194 name := "let's play: find the mussel"
1195 b := testIndexBuilder(t, nil,
1196 Document{Name: name, Content: content},
1197 Document{Name: "play.txt", Content: content})
1198
1199 t.Run("LineMatches", func(t *testing.T) {
1200 sres := searchForTest(t, b,
1201 &query.Regexp{
1202 Regexp: mustParseRE("play.*mussel"),
1203 FileName: true,
1204 })
1205
1206 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 {
1207 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
1208 }
1209
1210 if sres.Files[0].FileName != name {
1211 t.Errorf("got match %#v, want name %q", sres.Files[0], name)
1212 }
1213 })
1214
1215 t.Run("ChunkMatches", func(t *testing.T) {
1216 sres := searchForTest(t, b,
1217 &query.Regexp{
1218 Regexp: mustParseRE("play.*mussel"),
1219 FileName: true,
1220 }, chunkOpts)
1221
1222 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 {
1223 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
1224 }
1225
1226 if sres.Files[0].FileName != name {
1227 t.Errorf("got match %#v, want name %q", sres.Files[0], name)
1228 }
1229 })
1230}
1231
1232func TestRegexpOrder(t *testing.T) {
1233 content := []byte("bla the needle")
1234 // ----------------01234567890123
1235
1236 b := testIndexBuilder(t, nil,
1237 Document{Name: "f1", Content: content})
1238
1239 t.Run("LineMatches", func(t *testing.T) {
1240 sres := searchForTest(t, b,
1241 &query.Regexp{
1242 Regexp: mustParseRE("dle.*bla"),
1243 })
1244
1245 if len(sres.Files) != 0 {
1246 t.Fatalf("got %v, want 0 matches", sres.Files)
1247 }
1248 })
1249
1250 t.Run("ChunkMatches", func(t *testing.T) {
1251 sres := searchForTest(t, b,
1252 &query.Regexp{
1253 Regexp: mustParseRE("dle.*bla"),
1254 })
1255
1256 if len(sres.Files) != 0 {
1257 t.Fatalf("got %v, want 0 matches", sres.Files)
1258 }
1259 })
1260}
1261
1262func TestRepoName(t *testing.T) {
1263 content := []byte("bla the needle")
1264 // ----------------01234567890123
1265
1266 b := testIndexBuilder(t, &Repository{Name: "bla"},
1267 Document{Name: "f1", Content: content})
1268
1269 t.Run("LineMatches", func(t *testing.T) {
1270 sres := searchForTest(t, b,
1271 query.NewAnd(
1272 &query.Substring{Pattern: "needle"},
1273 &query.Repo{Regexp: regexp.MustCompile("foo")},
1274 ))
1275
1276 if len(sres.Files) != 0 {
1277 t.Fatalf("got %v, want 0 matches", sres.Files)
1278 }
1279
1280 if sres.Stats.FilesConsidered > 0 {
1281 t.Fatalf("got FilesConsidered %d, should have short circuited", sres.Stats.FilesConsidered)
1282 }
1283
1284 sres = searchForTest(t, b,
1285 query.NewAnd(
1286 &query.Substring{Pattern: "needle"},
1287 &query.Repo{Regexp: regexp.MustCompile("bla")},
1288 ))
1289 if len(sres.Files) != 1 {
1290 t.Fatalf("got %v, want 1 match", sres.Files)
1291 }
1292 })
1293
1294 t.Run("ChunkMatches", func(t *testing.T) {
1295 sres := searchForTest(t, b,
1296 query.NewAnd(
1297 &query.Substring{Pattern: "needle"},
1298 &query.Repo{Regexp: regexp.MustCompile("foo")},
1299 ),
1300 chunkOpts,
1301 )
1302
1303 if len(sres.Files) != 0 {
1304 t.Fatalf("got %v, want 0 matches", sres.Files)
1305 }
1306
1307 if sres.Stats.FilesConsidered > 0 {
1308 t.Fatalf("got FilesConsidered %d, should have short circuited", sres.Stats.FilesConsidered)
1309 }
1310
1311 sres = searchForTest(t, b,
1312 query.NewAnd(
1313 &query.Substring{Pattern: "needle"},
1314 &query.Repo{Regexp: regexp.MustCompile("bla")},
1315 ))
1316 if len(sres.Files) != 1 {
1317 t.Fatalf("got %v, want 1 match", sres.Files)
1318 }
1319 })
1320}
1321
1322func TestMergeMatches(t *testing.T) {
1323 content := []byte("blablabla")
1324 b := testIndexBuilder(t, nil,
1325 Document{Name: "f1", Content: content})
1326
1327 t.Run("LineMatches", func(t *testing.T) {
1328 sres := searchForTest(t, b,
1329 &query.Substring{Pattern: "bla"})
1330 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 {
1331 t.Fatalf("got %v, want 1 match", sres.Files)
1332 }
1333 })
1334
1335 t.Run("ChunkMatches", func(t *testing.T) {
1336 sres := searchForTest(t, b,
1337 &query.Substring{Pattern: "bla"},
1338 chunkOpts,
1339 )
1340 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 {
1341 t.Fatalf("got %v, want 1 match", sres.Files)
1342 }
1343 })
1344}
1345
1346func TestRepoURL(t *testing.T) {
1347 content := []byte("blablabla")
1348 b := testIndexBuilder(t, &Repository{
1349 Name: "name",
1350 URL: "URL",
1351 CommitURLTemplate: "commit",
1352 FileURLTemplate: "file-url",
1353 LineFragmentTemplate: "fragment",
1354 }, Document{Name: "f1", Content: content})
1355
1356 sres := searchForTest(t, b, &query.Substring{Pattern: "bla"})
1357
1358 if sres.RepoURLs["name"] != "file-url" {
1359 t.Errorf("got RepoURLs %v, want {name: URL}", sres.RepoURLs)
1360 }
1361 if sres.LineFragments["name"] != "fragment" {
1362 t.Errorf("got URLs %v, want {name: URL}", sres.LineFragments)
1363 }
1364}
1365
1366func TestRegexpCaseSensitive(t *testing.T) {
1367 content := []byte("bla\nfunc unmarshalGitiles\n")
1368 b := testIndexBuilder(t, nil, Document{
1369 Name: "f1",
1370 Content: content,
1371 })
1372
1373 t.Run("LineMatches", func(t *testing.T) {
1374 res := searchForTest(t, b,
1375 &query.Regexp{
1376 Regexp: mustParseRE("func.*Gitiles"),
1377 CaseSensitive: true,
1378 })
1379
1380 if len(res.Files) != 1 {
1381 t.Fatalf("got %v, want one match", res.Files)
1382 }
1383 })
1384
1385 t.Run("ChunkMatches", func(t *testing.T) {
1386 res := searchForTest(t, b,
1387 &query.Regexp{
1388 Regexp: mustParseRE("func.*Gitiles"),
1389 CaseSensitive: true,
1390 },
1391 chunkOpts,
1392 )
1393
1394 if len(res.Files) != 1 {
1395 t.Fatalf("got %v, want one match", res.Files)
1396 }
1397 })
1398}
1399
1400func TestRegexpCaseFolding(t *testing.T) {
1401 content := []byte("bla\nfunc unmarshalGitiles\n")
1402
1403 b := testIndexBuilder(t, nil,
1404 Document{Name: "f1", Content: content})
1405 res := searchForTest(t, b,
1406 &query.Regexp{
1407 Regexp: mustParseRE("func.*GITILES"),
1408 CaseSensitive: false,
1409 })
1410
1411 if len(res.Files) != 1 {
1412 t.Fatalf("got %v, want one match", res.Files)
1413 }
1414}
1415
1416func TestCaseRegexp(t *testing.T) {
1417 content := []byte("BLABLABLA")
1418 b := testIndexBuilder(t, nil,
1419 Document{Name: "f1", Content: content})
1420
1421 t.Run("LineMatches", func(t *testing.T) {
1422 res := searchForTest(t, b,
1423 &query.Regexp{
1424 Regexp: mustParseRE("[xb][xl][xa]"),
1425 CaseSensitive: true,
1426 })
1427
1428 if len(res.Files) > 0 {
1429 t.Fatalf("got %v, want no matches", res.Files)
1430 }
1431 })
1432
1433 t.Run("ChunkMatches", func(t *testing.T) {
1434 res := searchForTest(t, b,
1435 &query.Regexp{
1436 Regexp: mustParseRE("[xb][xl][xa]"),
1437 CaseSensitive: true,
1438 },
1439 chunkOpts,
1440 )
1441
1442 if len(res.Files) > 0 {
1443 t.Fatalf("got %v, want no matches", res.Files)
1444 }
1445 })
1446}
1447
1448func TestNegativeRegexp(t *testing.T) {
1449 content := []byte("BLABLABLA needle bla")
1450 b := testIndexBuilder(t, nil,
1451 Document{Name: "f1", Content: content})
1452
1453 t.Run("LineMatches", func(t *testing.T) {
1454 res := searchForTest(t, b,
1455 query.NewAnd(
1456 &query.Substring{
1457 Pattern: "needle",
1458 },
1459 &query.Not{
1460 Child: &query.Regexp{
1461 Regexp: mustParseRE(".cs"),
1462 },
1463 }))
1464
1465 if len(res.Files) != 1 {
1466 t.Fatalf("got %v, want 1 match", res.Files)
1467 }
1468 })
1469
1470 t.Run("ChunkMatches", func(t *testing.T) {
1471 res := searchForTest(t, b,
1472 query.NewAnd(
1473 &query.Substring{
1474 Pattern: "needle",
1475 },
1476 &query.Not{
1477 Child: &query.Regexp{
1478 Regexp: mustParseRE(".cs"),
1479 },
1480 },
1481 ),
1482 chunkOpts)
1483
1484 if len(res.Files) != 1 {
1485 t.Fatalf("got %v, want 1 match", res.Files)
1486 }
1487 })
1488}
1489
1490func TestSymbolRank(t *testing.T) {
1491 t.Skip()
1492
1493 content := []byte("func bla() blubxxxxx")
1494 // ----------------01234567890123456789
1495 b := testIndexBuilder(t, nil,
1496 Document{
1497 Name: "f1",
1498 Content: content,
1499 }, Document{
1500 Name: "f2",
1501 Content: content,
1502 Symbols: []DocumentSection{{5, 8}},
1503 }, Document{
1504 Name: "f3",
1505 Content: content,
1506 })
1507
1508 t.Run("LineMatches", func(t *testing.T) {
1509 res := searchForTest(t, b,
1510 &query.Substring{
1511 CaseSensitive: false,
1512 Pattern: "bla",
1513 })
1514
1515 if len(res.Files) != 3 {
1516 t.Fatalf("got %d files, want 3 files. Full data: %v", len(res.Files), res.Files)
1517 }
1518 if res.Files[0].FileName != "f2" {
1519 t.Errorf("got %#v, want 'f2' as top match", res.Files[0])
1520 }
1521 })
1522
1523 t.Run("ChunkMatches", func(t *testing.T) {
1524 res := searchForTest(t, b,
1525 &query.Substring{
1526 CaseSensitive: false,
1527 Pattern: "bla",
1528 }, chunkOpts)
1529
1530 if len(res.Files) != 3 {
1531 t.Fatalf("got %d files, want 3 files. Full data: %v", len(res.Files), res.Files)
1532 }
1533 if res.Files[0].FileName != "f2" {
1534 t.Errorf("got %#v, want 'f2' as top match", res.Files[0])
1535 }
1536 })
1537}
1538
1539func TestSymbolRankRegexpUTF8(t *testing.T) {
1540 t.Skip()
1541
1542 prefix := strings.Repeat(string([]rune{kelvinCodePoint}), 100) + "\n"
1543 content := []byte(prefix +
1544 "func bla() blub")
1545 // ------012345678901234
1546 b := testIndexBuilder(t, nil,
1547 Document{
1548 Name: "f1",
1549 Content: content,
1550 }, Document{
1551 Name: "f2",
1552 Content: content,
1553 Symbols: []DocumentSection{{uint32(len(prefix) + 5), uint32(len(prefix) + 8)}},
1554 }, Document{
1555 Name: "f3",
1556 Content: content,
1557 })
1558
1559 t.Run("LineMatches", func(t *testing.T) {
1560 res := searchForTest(t, b,
1561 &query.Regexp{
1562 Regexp: mustParseRE("b.a"),
1563 })
1564
1565 if len(res.Files) != 3 {
1566 t.Fatalf("got %#v, want 3 files", res.Files)
1567 }
1568 if res.Files[0].FileName != "f2" {
1569 t.Errorf("got %#v, want 'f2' as top match", res.Files[0])
1570 }
1571 })
1572
1573 t.Run("ChunjkMatches", func(t *testing.T) {
1574 res := searchForTest(t, b,
1575 &query.Regexp{
1576 Regexp: mustParseRE("b.a"),
1577 }, chunkOpts)
1578
1579 if len(res.Files) != 3 {
1580 t.Fatalf("got %#v, want 3 files", res.Files)
1581 }
1582 if res.Files[0].FileName != "f2" {
1583 t.Errorf("got %#v, want 'f2' as top match", res.Files[0])
1584 }
1585 })
1586}
1587
1588func TestPartialSymbolRank(t *testing.T) {
1589 t.Skip()
1590
1591 content := []byte("func bla() blub")
1592 // ----------------012345678901234
1593
1594 b := testIndexBuilder(t, nil,
1595 Document{
1596 Name: "f1",
1597 Content: content,
1598 Symbols: []DocumentSection{{4, 9}},
1599 }, Document{
1600 Name: "f2",
1601 Content: content,
1602 Symbols: []DocumentSection{{4, 8}},
1603 }, Document{
1604 Name: "f3",
1605 Content: content,
1606 Symbols: []DocumentSection{{4, 9}},
1607 })
1608
1609 t.Run("LineMatches", func(t *testing.T) {
1610 res := searchForTest(t, b,
1611 &query.Substring{
1612 Pattern: "bla",
1613 })
1614
1615 if len(res.Files) != 3 {
1616 t.Fatalf("got %#v, want 3 files", res.Files)
1617 }
1618 if res.Files[0].FileName != "f2" {
1619 t.Errorf("got %#v, want 'f2' as top match", res.Files[0])
1620 }
1621 })
1622
1623 t.Run("ChunkMatches", func(t *testing.T) {
1624 res := searchForTest(t, b,
1625 &query.Substring{
1626 Pattern: "bla",
1627 }, chunkOpts)
1628
1629 if len(res.Files) != 3 {
1630 t.Fatalf("got %#v, want 3 files", res.Files)
1631 }
1632 if res.Files[0].FileName != "f2" {
1633 t.Errorf("got %#v, want 'f2' as top match", res.Files[0])
1634 }
1635 })
1636}
1637
1638func TestNegativeRepo(t *testing.T) {
1639 content := []byte("bla the needle")
1640 // ----------------01234567890123
1641 b := testIndexBuilder(t, &Repository{
1642 Name: "bla",
1643 }, Document{Name: "f1", Content: content})
1644
1645 t.Run("LineMatches", func(t *testing.T) {
1646 sres := searchForTest(t, b,
1647 query.NewAnd(
1648 &query.Substring{Pattern: "needle"},
1649 &query.Not{Child: &query.Repo{Regexp: regexp.MustCompile("bla")}},
1650 ))
1651
1652 if len(sres.Files) != 0 {
1653 t.Fatalf("got %v, want 0 matches", sres.Files)
1654 }
1655 })
1656
1657 t.Run("ChunkMatches", func(t *testing.T) {
1658 sres := searchForTest(t, b,
1659 query.NewAnd(
1660 &query.Substring{Pattern: "needle"},
1661 &query.Not{Child: &query.Repo{Regexp: regexp.MustCompile("bla")}},
1662 ), chunkOpts)
1663
1664 if len(sres.Files) != 0 {
1665 t.Fatalf("got %v, want 0 matches", sres.Files)
1666 }
1667 })
1668}
1669
1670func TestListRepos(t *testing.T) {
1671 content := []byte("bla the needle\n")
1672 // ----------------012345678901234-
1673
1674 t.Run("default and minimal fallback", func(t *testing.T) {
1675 repo := &Repository{
1676 Name: "reponame",
1677 Branches: []RepositoryBranch{{Name: "main"}, {Name: "dev"}},
1678 }
1679 b := testIndexBuilder(t, repo,
1680 Document{Name: "f1", Content: content, Branches: []string{"main", "dev"}},
1681 Document{Name: "f2", Content: content, Branches: []string{"main"}},
1682 Document{Name: "f2", Content: content, Branches: []string{"dev"}},
1683 Document{Name: "f3", Content: content, Branches: []string{"dev"}})
1684
1685 searcher := searcherForTest(t, b)
1686
1687 for _, opts := range []*ListOptions{
1688 nil,
1689 {Minimal: false},
1690 {Minimal: true},
1691 } {
1692 t.Run(fmt.Sprint(opts), func(t *testing.T) {
1693 q := &query.Repo{Regexp: regexp.MustCompile("epo")}
1694
1695 res, err := searcher.List(context.Background(), q, opts)
1696 if err != nil {
1697 t.Fatalf("List(%v): %v", q, err)
1698 }
1699
1700 want := &RepoList{
1701 Repos: []*RepoListEntry{{
1702 Repository: *repo,
1703 Stats: RepoStats{
1704 Documents: 4,
1705 ContentBytes: 68, // (15 bytes of content and 2 bytes of filename) x 4
1706 Shards: 1,
1707
1708 NewLinesCount: 4,
1709 DefaultBranchNewLinesCount: 2,
1710 OtherBranchesNewLinesCount: 3,
1711 },
1712 }},
1713 Stats: RepoStats{
1714 Documents: 4,
1715 ContentBytes: 68,
1716 Shards: 1,
1717
1718 NewLinesCount: 4,
1719 DefaultBranchNewLinesCount: 2,
1720 OtherBranchesNewLinesCount: 3,
1721 },
1722 }
1723 ignored := []cmp.Option{
1724 cmpopts.EquateEmpty(),
1725 cmpopts.IgnoreFields(RepoListEntry{}, "IndexMetadata"),
1726 cmpopts.IgnoreFields(RepoStats{}, "IndexBytes"),
1727 cmpopts.IgnoreFields(Repository{}, "SubRepoMap"),
1728 cmpopts.IgnoreFields(Repository{}, "priority"),
1729 }
1730 if diff := cmp.Diff(want, res, ignored...); diff != "" {
1731 t.Fatalf("mismatch (-want +got):\n%s", diff)
1732 }
1733
1734 q = &query.Repo{Regexp: regexp.MustCompile("bla")}
1735 res, err = searcher.List(context.Background(), q, nil)
1736 if err != nil {
1737 t.Fatalf("List(%v): %v", q, err)
1738 }
1739 if len(res.Repos) != 0 || len(res.Minimal) != 0 {
1740 t.Fatalf("got %v, want 0 matches", res)
1741 }
1742 })
1743 }
1744 })
1745
1746 t.Run("minimal", func(t *testing.T) {
1747 repo := &Repository{
1748 ID: 1234,
1749 Name: "reponame",
1750 Branches: []RepositoryBranch{{Name: "main"}, {Name: "dev"}},
1751 RawConfig: map[string]string{"repoid": "1234"},
1752 }
1753 b := testIndexBuilder(t, repo,
1754 Document{Name: "f1", Content: content, Branches: []string{"main", "dev"}},
1755 Document{Name: "f2", Content: content, Branches: []string{"main"}},
1756 Document{Name: "f2", Content: content, Branches: []string{"dev"}},
1757 Document{Name: "f3", Content: content, Branches: []string{"dev"}})
1758
1759 searcher := searcherForTest(t, b)
1760
1761 q := &query.Repo{Regexp: regexp.MustCompile("epo")}
1762 res, err := searcher.List(context.Background(), q, &ListOptions{Minimal: true})
1763 if err != nil {
1764 t.Fatalf("List(%v): %v", q, err)
1765 }
1766
1767 want := &RepoList{
1768 Minimal: map[uint32]*MinimalRepoListEntry{
1769 repo.ID: {
1770 HasSymbols: repo.HasSymbols,
1771 Branches: repo.Branches,
1772 },
1773 },
1774 Stats: RepoStats{
1775 Shards: 1,
1776 Documents: 4,
1777 IndexBytes: 412,
1778 ContentBytes: 68,
1779 NewLinesCount: 4,
1780 DefaultBranchNewLinesCount: 2,
1781 OtherBranchesNewLinesCount: 3,
1782 },
1783 }
1784
1785 if diff := cmp.Diff(want, res); diff != "" {
1786 t.Fatalf("mismatch (-want +got):\n%s", diff)
1787 }
1788
1789 q = &query.Repo{Regexp: regexp.MustCompile("bla")}
1790 res, err = searcher.List(context.Background(), q, &ListOptions{Minimal: true})
1791 if err != nil {
1792 t.Fatalf("List(%v): %v", q, err)
1793 }
1794 if len(res.Repos) != 0 || len(res.Minimal) != 0 {
1795 t.Fatalf("got %v, want 0 matches", res)
1796 }
1797 })
1798}
1799
1800func TestListReposByContent(t *testing.T) {
1801 content := []byte("bla the needle")
1802
1803 b := testIndexBuilder(t, &Repository{
1804 Name: "reponame",
1805 },
1806 Document{Name: "f1", Content: content},
1807 Document{Name: "f2", Content: content})
1808
1809 searcher := searcherForTest(t, b)
1810 q := &query.Substring{Pattern: "needle"}
1811 res, err := searcher.List(context.Background(), q, nil)
1812 if err != nil {
1813 t.Fatalf("List(%v): %v", q, err)
1814 }
1815 if len(res.Repos) != 1 || res.Repos[0].Repository.Name != "reponame" {
1816 t.Fatalf("got %v, want 1 matches", res)
1817 }
1818 if got := res.Repos[0].Stats.Shards; got != 1 {
1819 t.Fatalf("got %d, want 1 shard", got)
1820 }
1821 q = &query.Substring{Pattern: "foo"}
1822 res, err = searcher.List(context.Background(), q, nil)
1823 if err != nil {
1824 t.Fatalf("List(%v): %v", q, err)
1825 }
1826 if len(res.Repos) != 0 {
1827 t.Fatalf("got %v, want 0 matches", res)
1828 }
1829}
1830
1831func TestMetadata(t *testing.T) {
1832 content := []byte("bla the needle")
1833
1834 b := testIndexBuilder(t, &Repository{
1835 Name: "reponame",
1836 }, Document{Name: "f1", Content: content},
1837 Document{Name: "f2", Content: content})
1838
1839 var buf bytes.Buffer
1840 if err := b.Write(&buf); err != nil {
1841 t.Fatal(err)
1842 }
1843 f := &memSeeker{buf.Bytes()}
1844
1845 rd, _, err := ReadMetadata(f)
1846 if err != nil {
1847 t.Fatalf("ReadMetadata: %v", err)
1848 }
1849
1850 if got, want := rd[0].Name, "reponame"; got != want {
1851 t.Fatalf("got %q want %q", got, want)
1852 }
1853}
1854
1855func TestOr(t *testing.T) {
1856 b := testIndexBuilder(t, nil,
1857 Document{Name: "f1", Content: []byte("needle")},
1858 Document{Name: "f2", Content: []byte("banana")})
1859 t.Run("LineMatches", func(t *testing.T) {
1860 sres := searchForTest(t, b, query.NewOr(
1861 &query.Substring{Pattern: "needle"},
1862 &query.Substring{Pattern: "banana"}))
1863
1864 if len(sres.Files) != 2 {
1865 t.Fatalf("got %v, want 2 files", sres.Files)
1866 }
1867 })
1868
1869 t.Run("ChunkMatches", func(t *testing.T) {
1870 sres := searchForTest(t, b, query.NewOr(
1871 &query.Substring{Pattern: "needle"},
1872 &query.Substring{Pattern: "banana"}))
1873
1874 if len(sres.Files) != 2 {
1875 t.Fatalf("got %v, want 2 files", sres.Files)
1876 }
1877 })
1878}
1879
1880func TestFrequency(t *testing.T) {
1881 content := []byte("sla _Py_HashDouble(double v sla las las shd dot dot")
1882
1883 b := testIndexBuilder(t, nil,
1884 Document{
1885 Name: "f1",
1886 Content: content,
1887 })
1888
1889 t.Run("LineMatches", func(t *testing.T) {
1890 sres := searchForTest(t, b, &query.Substring{Pattern: "slashdot"})
1891 if len(sres.Files) != 0 {
1892 t.Errorf("got %v, wanted 0 matches", sres.Files)
1893 }
1894 })
1895
1896 t.Run("ChunkMatches", func(t *testing.T) {
1897 sres := searchForTest(t, b, &query.Substring{Pattern: "slashdot"}, chunkOpts)
1898 if len(sres.Files) != 0 {
1899 t.Errorf("got %v, wanted 0 matches", sres.Files)
1900 }
1901 })
1902}
1903
1904func TestMatchNewline(t *testing.T) {
1905 re, err := syntax.Parse("[^a]a", syntax.ClassNL)
1906 if err != nil {
1907 t.Fatalf("syntax.Parse: %v", err)
1908 }
1909
1910 content := []byte("pqr\nalex")
1911
1912 b := testIndexBuilder(t, nil,
1913 Document{
1914 Name: "f1",
1915 Content: content,
1916 })
1917
1918 t.Run("LineMatches", func(t *testing.T) {
1919 sres := searchForTest(t, b, &query.Regexp{Regexp: re, CaseSensitive: true})
1920 if len(sres.Files) != 1 {
1921 t.Errorf("got %v, wanted 1 matches", sres.Files)
1922 } else if l := sres.Files[0].LineMatches[0].Line; !bytes.Equal(l, content[len("pqr\n"):]) {
1923 t.Errorf("got match line %q, want %q", l, content)
1924 }
1925 })
1926
1927 t.Run("ChunkMatches", func(t *testing.T) {
1928 sres := searchForTest(t, b, &query.Regexp{Regexp: re, CaseSensitive: true}, chunkOpts)
1929 if len(sres.Files) != 1 {
1930 t.Errorf("got %v, wanted 1 matches", sres.Files)
1931 } else if c := sres.Files[0].ChunkMatches[0].Content; !bytes.Equal(c, content) {
1932 t.Errorf("got match line %q, want %q", c, content)
1933 }
1934 })
1935}
1936
1937func TestSubRepo(t *testing.T) {
1938 subRepos := map[string]*Repository{
1939 "sub": {
1940 Name: "sub-name",
1941 LineFragmentTemplate: "sub-line",
1942 },
1943 }
1944
1945 content := []byte("pqr\nalex")
1946
1947 b := testIndexBuilder(t, &Repository{
1948 SubRepoMap: subRepos,
1949 }, Document{
1950 Name: "sub/f1",
1951 Content: content,
1952 SubRepositoryPath: "sub",
1953 })
1954
1955 sres := searchForTest(t, b, &query.Substring{Pattern: "alex"})
1956 if len(sres.Files) != 1 {
1957 t.Fatalf("got %v, wanted 1 matches", sres.Files)
1958 }
1959
1960 f := sres.Files[0]
1961 if f.SubRepositoryPath != "sub" || f.SubRepositoryName != "sub-name" {
1962 t.Errorf("got %#v, want SubRepository{Path,Name} = {'sub', 'sub-name'}", f)
1963 }
1964
1965 if sres.LineFragments["sub-name"] != "sub-line" {
1966 t.Errorf("got LineFragmentTemplate %v, want {'sub':'sub-line'}", sres.LineFragments)
1967 }
1968}
1969
1970func TestSearchEither(t *testing.T) {
1971 b := testIndexBuilder(t, nil,
1972 Document{Name: "f1", Content: []byte("bla needle bla")},
1973 Document{Name: "needle-file-branch", Content: []byte("bla content")})
1974
1975 t.Run("LineMatches", func(t *testing.T) {
1976 sres := searchForTest(t, b, &query.Substring{Pattern: "needle"})
1977 if len(sres.Files) != 2 {
1978 t.Fatalf("got %v, wanted 2 matches", sres.Files)
1979 }
1980
1981 sres = searchForTest(t, b, &query.Substring{Pattern: "needle", Content: true})
1982 if len(sres.Files) != 1 {
1983 t.Fatalf("got %v, wanted 1 match", sres.Files)
1984 }
1985
1986 if got, want := sres.Files[0].FileName, "f1"; got != want {
1987 t.Errorf("got %q, want %q", got, want)
1988 }
1989 })
1990
1991 t.Run("ChunkMatches", func(t *testing.T) {
1992 sres := searchForTest(t, b, &query.Substring{Pattern: "needle"}, chunkOpts)
1993 if len(sres.Files) != 2 {
1994 t.Fatalf("got %v, wanted 2 matches", sres.Files)
1995 }
1996
1997 sres = searchForTest(t, b, &query.Substring{Pattern: "needle", Content: true}, chunkOpts)
1998 if len(sres.Files) != 1 {
1999 t.Fatalf("got %v, wanted 1 match", sres.Files)
2000 }
2001
2002 if got, want := sres.Files[0].FileName, "f1"; got != want {
2003 t.Errorf("got %q, want %q", got, want)
2004 }
2005 })
2006}
2007
2008func TestUnicodeExactMatch(t *testing.T) {
2009 needle := "néédlÉ"
2010 content := []byte("blá blá " + needle + " blâ")
2011
2012 b := testIndexBuilder(t, nil,
2013 Document{Name: "f1", Content: content})
2014
2015 t.Run("LineMatches", func(t *testing.T) {
2016 if res := searchForTest(t, b, &query.Substring{Pattern: needle, CaseSensitive: true}); len(res.Files) != 1 {
2017 t.Fatalf("case sensitive: got %v, wanted 1 match", res.Files)
2018 }
2019 })
2020
2021 t.Run("ChunkMatches", func(t *testing.T) {
2022 res := searchForTest(t, b, &query.Substring{Pattern: needle, CaseSensitive: true}, chunkOpts)
2023 if len(res.Files) != 1 {
2024 t.Fatalf("case sensitive: got %v, wanted 1 match", res.Files)
2025 }
2026 })
2027}
2028
2029func TestUnicodeCoverContent(t *testing.T) {
2030 needle := "néédlÉ"
2031 content := []byte("blá blá " + needle + " blâ")
2032
2033 b := testIndexBuilder(t, nil,
2034 Document{Name: "f1", Content: content})
2035
2036 t.Run("LineMatches", func(t *testing.T) {
2037 if res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ", CaseSensitive: true}); len(res.Files) != 0 {
2038 t.Fatalf("case sensitive: got %v, wanted 0 match", res.Files)
2039 }
2040
2041 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ"})
2042 if len(res.Files) != 1 {
2043 t.Fatalf("case insensitive: got %v, wanted 1 match", res.Files)
2044 }
2045
2046 if got, want := res.Files[0].LineMatches[0].LineFragments[0].Offset, uint32(strings.Index(string(content), needle)); got != want {
2047 t.Errorf("got %d want %d", got, want)
2048 }
2049 })
2050
2051 t.Run("ChunkMatches", func(t *testing.T) {
2052 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ", CaseSensitive: true}, chunkOpts)
2053 if len(res.Files) != 0 {
2054 t.Fatalf("case sensitive: got %v, wanted 0 match", res.Files)
2055 }
2056
2057 res = searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ"}, chunkOpts)
2058 if len(res.Files) != 1 {
2059 t.Fatalf("case insensitive: got %v, wanted 1 match", res.Files)
2060 }
2061
2062 got := res.Files[0].ChunkMatches[0].Ranges[0].Start.ByteOffset
2063 want := uint32(strings.Index(string(content), needle))
2064 if got != want {
2065 t.Errorf("got %d want %d", got, want)
2066 }
2067 })
2068}
2069
2070func TestUnicodeNonCoverContent(t *testing.T) {
2071 needle := "nééáádlÉ"
2072 content := []byte("blá blá " + needle + " blâ")
2073
2074 b := testIndexBuilder(t, nil,
2075 Document{Name: "f1", Content: content})
2076
2077 t.Run("LineMatches", func(t *testing.T) {
2078 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉÁÁDLÉ", Content: true})
2079 if len(res.Files) != 1 {
2080 t.Fatalf("got %v, wanted 1 match", res.Files)
2081 }
2082
2083 if got, want := res.Files[0].LineMatches[0].LineFragments[0].Offset, uint32(strings.Index(string(content), needle)); got != want {
2084 t.Errorf("got %d want %d", got, want)
2085 }
2086 })
2087
2088 t.Run("ChunkMatches", func(t *testing.T) {
2089 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉÁÁDLÉ", Content: true}, chunkOpts)
2090 if len(res.Files) != 1 {
2091 t.Fatalf("got %v, wanted 1 match", res.Files)
2092 }
2093
2094 got := res.Files[0].ChunkMatches[0].Ranges[0].Start.ByteOffset
2095 want := uint32(strings.Index(string(content), needle))
2096 if got != want {
2097 t.Errorf("got %d want %d", got, want)
2098 }
2099 })
2100}
2101
2102const kelvinCodePoint = 8490
2103
2104func TestUnicodeVariableLength(t *testing.T) {
2105 lower := 'k'
2106 upper := rune(kelvinCodePoint)
2107
2108 needle := "nee" + string([]rune{lower}) + "eed"
2109 corpus := []byte("nee" + string([]rune{upper}) + "eed" +
2110 " ee" + string([]rune{lower}) + "ee" +
2111 " ee" + string([]rune{upper}) + "ee")
2112
2113 t.Run("LineMatches", func(t *testing.T) {
2114 b := testIndexBuilder(t, nil,
2115 Document{Name: "f1", Content: []byte(corpus)})
2116
2117 res := searchForTest(t, b, &query.Substring{Pattern: needle, Content: true})
2118 if len(res.Files) != 1 {
2119 t.Fatalf("got %v, wanted 1 match", res.Files)
2120 }
2121 })
2122
2123 t.Run("ChunkMatches", func(t *testing.T) {
2124 b := testIndexBuilder(t, nil,
2125 Document{Name: "f1", Content: []byte(corpus)})
2126
2127 res := searchForTest(t, b, &query.Substring{Pattern: needle, Content: true}, chunkOpts)
2128 if len(res.Files) != 1 {
2129 t.Fatalf("got %v, wanted 1 match", res.Files)
2130 }
2131 })
2132}
2133
2134func TestUnicodeFileStartOffsets(t *testing.T) {
2135 unicode := "世界"
2136 wat := "waaaaaat"
2137 b := testIndexBuilder(t, nil,
2138 Document{
2139 Name: "f1",
2140 Content: []byte(unicode),
2141 },
2142 Document{
2143 Name: "f2",
2144 Content: []byte(wat),
2145 },
2146 )
2147 q := &query.Substring{Pattern: wat, Content: true}
2148 res := searchForTest(t, b, q)
2149 if len(res.Files) != 1 {
2150 t.Fatalf("got %v, wanted 1 match", res.Files)
2151 }
2152}
2153
2154func TestLongFileUTF8(t *testing.T) {
2155 needle := "neeedle"
2156
2157 // 6 bytes.
2158 unicode := "世界"
2159 content := []byte(strings.Repeat(unicode, 100) + needle)
2160 b := testIndexBuilder(t, nil,
2161 Document{
2162 Name: "f1",
2163 Content: []byte(strings.Repeat("a", 50)),
2164 },
2165 Document{
2166 Name: "f2",
2167 Content: content,
2168 })
2169
2170 t.Run("LineMatches", func(t *testing.T) {
2171 q := &query.Substring{Pattern: needle, Content: true}
2172 res := searchForTest(t, b, q)
2173 if len(res.Files) != 1 {
2174 t.Errorf("got %v, want 1 result", res)
2175 }
2176 })
2177
2178 t.Run("ChunkMatches", func(t *testing.T) {
2179 q := &query.Substring{Pattern: needle, Content: true}
2180 res := searchForTest(t, b, q, chunkOpts)
2181 if len(res.Files) != 1 {
2182 t.Errorf("got %v, want 1 result", res)
2183 }
2184 })
2185}
2186
2187func TestEstimateDocCount(t *testing.T) {
2188 content := []byte("bla needle bla")
2189 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2190 Document{Name: "f1", Content: content},
2191 Document{Name: "f2", Content: content},
2192 )
2193
2194 t.Run("LineMatches", func(t *testing.T) {
2195 if sres := searchForTest(t, b,
2196 query.NewAnd(
2197 &query.Substring{Pattern: "needle"},
2198 &query.Repo{Regexp: regexp.MustCompile("reponame")},
2199 ), SearchOptions{
2200 EstimateDocCount: true,
2201 }); sres.Stats.ShardFilesConsidered != 2 {
2202 t.Errorf("got FilesConsidered = %d, want 2", sres.Stats.FilesConsidered)
2203 }
2204 if sres := searchForTest(t, b,
2205 query.NewAnd(
2206 &query.Substring{Pattern: "needle"},
2207 &query.Repo{Regexp: regexp.MustCompile("nomatch")},
2208 ), SearchOptions{
2209 EstimateDocCount: true,
2210 }); sres.Stats.ShardFilesConsidered != 0 {
2211 t.Errorf("got FilesConsidered = %d, want 0", sres.Stats.FilesConsidered)
2212 }
2213 })
2214
2215 t.Run("ChunkMatches", func(t *testing.T) {
2216 if sres := searchForTest(t, b,
2217 query.NewAnd(
2218 &query.Substring{Pattern: "needle"},
2219 &query.Repo{Regexp: regexp.MustCompile("reponame")},
2220 ), SearchOptions{
2221 EstimateDocCount: true,
2222 ChunkMatches: true,
2223 }); sres.Stats.ShardFilesConsidered != 2 {
2224 t.Errorf("got FilesConsidered = %d, want 2", sres.Stats.FilesConsidered)
2225 }
2226 if sres := searchForTest(t, b,
2227 query.NewAnd(
2228 &query.Substring{Pattern: "needle"},
2229 &query.Repo{Regexp: regexp.MustCompile("nomatch")},
2230 ), SearchOptions{
2231 EstimateDocCount: true,
2232 ChunkMatches: true,
2233 }); sres.Stats.ShardFilesConsidered != 0 {
2234 t.Errorf("got FilesConsidered = %d, want 0", sres.Stats.FilesConsidered)
2235 }
2236 })
2237}
2238
2239func TestUTF8CorrectCorpus(t *testing.T) {
2240 needle := "neeedle"
2241
2242 // 6 bytes.
2243 unicode := "世界"
2244 b := testIndexBuilder(t, nil,
2245 Document{
2246 Name: "f1",
2247 Content: []byte(strings.Repeat(unicode, 100)),
2248 },
2249 Document{
2250 Name: "xxxxxneeedle",
2251 Content: []byte("hello"),
2252 })
2253
2254 t.Run("LineMatches", func(t *testing.T) {
2255 q := &query.Substring{Pattern: needle, FileName: true}
2256 res := searchForTest(t, b, q)
2257 if len(res.Files) != 1 {
2258 t.Errorf("got %v, want 1 result", res)
2259 }
2260 })
2261
2262 t.Run("ChunkMatches", func(t *testing.T) {
2263 q := &query.Substring{Pattern: needle, FileName: true}
2264 res := searchForTest(t, b, q, chunkOpts)
2265 if len(res.Files) != 1 {
2266 t.Errorf("got %v, want 1 result", res)
2267 }
2268 })
2269}
2270
2271func TestBuilderStats(t *testing.T) {
2272 b := testIndexBuilder(t, nil,
2273 Document{
2274 Name: "f1",
2275 Content: []byte(strings.Repeat("abcd", 1024)),
2276 })
2277 var buf bytes.Buffer
2278 if err := b.Write(&buf); err != nil {
2279 t.Fatal(err)
2280 }
2281
2282 if got, want := b.ContentSize(), uint32(2+4*1024); got != want {
2283 t.Errorf("got %d, want %d", got, want)
2284 }
2285}
2286
2287func TestIOStats(t *testing.T) {
2288 b := testIndexBuilder(t, nil,
2289 Document{
2290 Name: "f1",
2291 Content: []byte(strings.Repeat("abcd", 1024)),
2292 })
2293
2294 t.Run("LineMatches", func(t *testing.T) {
2295 q := &query.Substring{Pattern: "abc", CaseSensitive: true, Content: true}
2296 res := searchForTest(t, b, q)
2297
2298 // 4096 (content) + 2 (overhead: newlines or doc sections)
2299 if got, want := res.Stats.ContentBytesLoaded, int64(4098); got != want {
2300 t.Errorf("got content I/O %d, want %d", got, want)
2301 }
2302
2303 // 1024 entries, each 4 bytes apart. 4 fits into single byte
2304 // delta encoded.
2305 if got, want := res.Stats.IndexBytesLoaded, int64(1024); got != want {
2306 t.Errorf("got index I/O %d, want %d", got, want)
2307 }
2308 })
2309
2310 t.Run("ChunkMatches", func(t *testing.T) {
2311 q := &query.Substring{Pattern: "abc", CaseSensitive: true, Content: true}
2312 res := searchForTest(t, b, q, chunkOpts)
2313
2314 // 4096 (content) + 2 (overhead: newlines or doc sections)
2315 if got, want := res.Stats.ContentBytesLoaded, int64(4098); got != want {
2316 t.Errorf("got content I/O %d, want %d", got, want)
2317 }
2318
2319 // 1024 entries, each 4 bytes apart. 4 fits into single byte
2320 // delta encoded.
2321 if got, want := res.Stats.IndexBytesLoaded, int64(1024); got != want {
2322 t.Errorf("got index I/O %d, want %d", got, want)
2323 }
2324 })
2325}
2326
2327func TestStartLineAnchor(t *testing.T) {
2328 b := testIndexBuilder(t, nil,
2329 Document{
2330 Name: "f1",
2331 Content: []byte(
2332 `hello
2333start of middle of line
2334`),
2335 })
2336
2337 t.Run("LineMatches", func(t *testing.T) {
2338 q, err := query.Parse("^start")
2339 if err != nil {
2340 t.Errorf("parse: %v", err)
2341 }
2342
2343 res := searchForTest(t, b, q)
2344 if len(res.Files) != 1 {
2345 t.Errorf("got %v, want 1 file", res.Files)
2346 }
2347
2348 q, err = query.Parse("^middle")
2349 if err != nil {
2350 t.Errorf("parse: %v", err)
2351 }
2352 res = searchForTest(t, b, q)
2353 if len(res.Files) != 0 {
2354 t.Errorf("got %v, want 0 files", res.Files)
2355 }
2356 })
2357
2358 t.Run("ChunkMatches", func(t *testing.T) {
2359 q, err := query.Parse("^start")
2360 if err != nil {
2361 t.Errorf("parse: %v", err)
2362 }
2363
2364 res := searchForTest(t, b, q, chunkOpts)
2365 if len(res.Files) != 1 {
2366 t.Errorf("got %v, want 1 file", res.Files)
2367 }
2368
2369 q, err = query.Parse("^middle")
2370 if err != nil {
2371 t.Errorf("parse: %v", err)
2372 }
2373 res = searchForTest(t, b, q, chunkOpts)
2374 if len(res.Files) != 0 {
2375 t.Errorf("got %v, want 0 files", res.Files)
2376 }
2377 })
2378}
2379
2380func TestAndOrUnicode(t *testing.T) {
2381 q, err := query.Parse("orange.*apple")
2382 if err != nil {
2383 t.Errorf("parse: %v", err)
2384 }
2385 finalQ := query.NewAnd(q,
2386 query.NewOr(query.NewAnd(&query.Repo{Regexp: regexp.MustCompile("name")},
2387 query.NewOr(&query.Branch{Pattern: "master"}))))
2388
2389 b := testIndexBuilder(t, &Repository{
2390 Name: "name",
2391 Branches: []RepositoryBranch{{"master", "master-version"}},
2392 }, Document{
2393 Name: "f2",
2394 Content: []byte("orange\u2318apple"),
2395 // --------------0123456 78901
2396 Branches: []string{"master"},
2397 })
2398
2399 t.Run("LineMatches", func(t *testing.T) {
2400 res := searchForTest(t, b, finalQ)
2401 if len(res.Files) != 1 {
2402 t.Errorf("got %v, want 1 result", res.Files)
2403 }
2404 })
2405
2406 t.Run("ChunkMatches", func(t *testing.T) {
2407 res := searchForTest(t, b, finalQ, chunkOpts)
2408 if len(res.Files) != 1 {
2409 t.Errorf("got %v, want 1 result", res.Files)
2410 }
2411 })
2412}
2413
2414func TestAndShort(t *testing.T) {
2415 content := []byte("bla needle at orange bla")
2416 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2417 Document{Name: "f1", Content: content},
2418 Document{Name: "f2", Content: []byte("xx at xx")},
2419 Document{Name: "f3", Content: []byte("yy orange xx")},
2420 )
2421
2422 q := query.NewAnd(&query.Substring{Pattern: "at"},
2423 &query.Substring{Pattern: "orange"})
2424
2425 t.Run("LineMatches", func(t *testing.T) {
2426 res := searchForTest(t, b, q)
2427 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
2428 t.Errorf("got %v, want 1 result", res.Files)
2429 }
2430 })
2431
2432 t.Run("ChunkMatches", func(t *testing.T) {
2433 res := searchForTest(t, b, q, chunkOpts)
2434 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
2435 t.Errorf("got %v, want 1 result", res.Files)
2436 }
2437 })
2438}
2439
2440func TestNoCollectRegexpSubstring(t *testing.T) {
2441 content := []byte("bla final bla\nfoo final, foo")
2442 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2443 Document{Name: "f1", Content: content},
2444 )
2445
2446 q := &query.Regexp{
2447 Regexp: mustParseRE("final[,.]"),
2448 }
2449
2450 t.Run("LineMatches", func(t *testing.T) {
2451 res := searchForTest(t, b, q)
2452 if len(res.Files) != 1 {
2453 t.Fatalf("got %v, want 1 result", res.Files)
2454 }
2455 if f := res.Files[0]; len(f.LineMatches) != 1 {
2456 t.Fatalf("got line matches %v, want 1 line match", printLineMatches(f.LineMatches))
2457 }
2458 })
2459
2460 t.Run("ChunkMatches", func(t *testing.T) {
2461 res := searchForTest(t, b, q, chunkOpts)
2462 if len(res.Files) != 1 {
2463 t.Fatalf("got %v, want 1 result", res.Files)
2464 }
2465 if f := res.Files[0]; len(f.ChunkMatches) != 1 {
2466 t.Fatalf("got line matches %v, want 1 line match", printLineMatches(f.LineMatches))
2467 }
2468 })
2469}
2470
2471func printLineMatches(ms []LineMatch) string {
2472 var ss []string
2473 for _, m := range ms {
2474 ss = append(ss, fmt.Sprintf("%d:%q %v", m.LineNumber, m.Line, m.LineFragments))
2475 }
2476
2477 return strings.Join(ss, ", ")
2478}
2479
2480func TestLang(t *testing.T) {
2481 content := []byte("bla needle bla")
2482 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2483 Document{Name: "f1", Content: content},
2484 Document{Name: "f2", Language: "java", Content: content},
2485 Document{Name: "f3", Language: "cpp", Content: content},
2486 )
2487
2488 q := query.NewAnd(&query.Substring{Pattern: "needle"},
2489 &query.Language{Language: "cpp"})
2490
2491 t.Run("LineMatches", func(t *testing.T) {
2492 res := searchForTest(t, b, q)
2493 if len(res.Files) != 1 {
2494 t.Fatalf("got %v, want 1 result in f3", res.Files)
2495 }
2496 f := res.Files[0]
2497 if f.FileName != "f3" || f.Language != "cpp" {
2498 t.Fatalf("got %v, want 1 match with language cpp", f)
2499 }
2500 })
2501
2502 t.Run("ChunkMatches", func(t *testing.T) {
2503 res := searchForTest(t, b, q, chunkOpts)
2504 if len(res.Files) != 1 {
2505 t.Fatalf("got %v, want 1 result in f3", res.Files)
2506 }
2507 f := res.Files[0]
2508 if f.FileName != "f3" || f.Language != "cpp" {
2509 t.Fatalf("got %v, want 1 match with language cpp", f)
2510 }
2511 })
2512}
2513
2514func TestLangShortcut(t *testing.T) {
2515 content := []byte("bla needle bla")
2516 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2517 Document{Name: "f2", Language: "java", Content: content},
2518 Document{Name: "f3", Language: "cpp", Content: content},
2519 )
2520
2521 q := query.NewAnd(&query.Substring{Pattern: "needle"},
2522 &query.Language{Language: "fortran"})
2523
2524 t.Run("LineMatches", func(t *testing.T) {
2525 res := searchForTest(t, b, q)
2526 if len(res.Files) != 0 {
2527 t.Fatalf("got %v, want 0 results", res.Files)
2528 }
2529 if res.Stats.IndexBytesLoaded > 0 {
2530 t.Errorf("got IndexBytesLoaded %d, want 0", res.Stats.IndexBytesLoaded)
2531 }
2532 })
2533
2534 t.Run("ChunkMatches", func(t *testing.T) {
2535 res := searchForTest(t, b, q, chunkOpts)
2536 if len(res.Files) != 0 {
2537 t.Fatalf("got %v, want 0 results", res.Files)
2538 }
2539 if res.Stats.IndexBytesLoaded > 0 {
2540 t.Errorf("got IndexBytesLoaded %d, want 0", res.Stats.IndexBytesLoaded)
2541 }
2542 })
2543}
2544
2545func TestNoTextMatchAtoms(t *testing.T) {
2546 content := []byte("bla needle bla")
2547 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2548 Document{Name: "f1", Content: content},
2549 Document{Name: "f2", Language: "java", Content: content},
2550 Document{Name: "f3", Language: "cpp", Content: content},
2551 )
2552 q := query.NewAnd(&query.Language{Language: "java"})
2553 t.Run("LineMatches", func(t *testing.T) {
2554 res := searchForTest(t, b, q)
2555 if len(res.Files) != 1 {
2556 t.Fatalf("got %v, want 1 result in f3", res.Files)
2557 }
2558 })
2559
2560 t.Run("ChunkMatches", func(t *testing.T) {
2561 res := searchForTest(t, b, q, chunkOpts)
2562 if len(res.Files) != 1 {
2563 t.Fatalf("got %v, want 1 result in f3", res.Files)
2564 }
2565 })
2566}
2567
2568func TestNoPositiveAtoms(t *testing.T) {
2569 content := []byte("bla needle bla")
2570 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2571 Document{Name: "f1", Content: content},
2572 Document{Name: "f2", Content: content},
2573 )
2574
2575 q := query.NewAnd(
2576 &query.Not{Child: &query.Substring{Pattern: "xyz"}},
2577 &query.Repo{Regexp: regexp.MustCompile("reponame")})
2578 t.Run("LineMatches", func(t *testing.T) {
2579 res := searchForTest(t, b, q)
2580 if len(res.Files) != 2 {
2581 t.Fatalf("got %v, want 2 results in f3", res.Files)
2582 }
2583 })
2584 t.Run("ChunkMatches", func(t *testing.T) {
2585 res := searchForTest(t, b, q, chunkOpts)
2586 if len(res.Files) != 2 {
2587 t.Fatalf("got %v, want 2 results in f3", res.Files)
2588 }
2589 })
2590}
2591
2592func TestSymbolBoundaryStart(t *testing.T) {
2593 content := []byte("start\nbla bla\nend")
2594 // ----------------012345-67890123-456
2595
2596 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2597 Document{
2598 Name: "f1",
2599 Content: content,
2600 Symbols: []DocumentSection{{0, 5}, {14, 17}},
2601 },
2602 )
2603 q := &query.Symbol{
2604 Expr: &query.Substring{Pattern: "start"},
2605 }
2606 t.Run("LineMatches", func(t *testing.T) {
2607 res := searchForTest(t, b, q)
2608 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
2609 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2610 }
2611 m := res.Files[0].LineMatches[0].LineFragments[0]
2612 if m.Offset != 0 {
2613 t.Fatalf("got offset %d want 0", m.Offset)
2614 }
2615 })
2616
2617 t.Run("ChunkMatches", func(t *testing.T) {
2618 res := searchForTest(t, b, q, chunkOpts)
2619 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
2620 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2621 }
2622 m := res.Files[0].ChunkMatches[0].Ranges[0]
2623 if m.Start.ByteOffset != 0 {
2624 t.Fatalf("got offset %d want 0", m.Start.ByteOffset)
2625 }
2626 })
2627}
2628
2629func TestSymbolBoundaryEnd(t *testing.T) {
2630 content := []byte("start\nbla bla\nend")
2631 // ----------------012345-67890123-456
2632
2633 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2634 Document{
2635 Name: "f1",
2636 Content: content,
2637 Symbols: []DocumentSection{{14, 17}},
2638 },
2639 )
2640 q := &query.Symbol{
2641 Expr: &query.Substring{Pattern: "end"},
2642 }
2643 t.Run("LineMatches", func(t *testing.T) {
2644 res := searchForTest(t, b, q)
2645 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
2646 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2647 }
2648 m := res.Files[0].LineMatches[0].LineFragments[0]
2649 if m.Offset != 14 {
2650 t.Fatalf("got offset %d want 0", m.Offset)
2651 }
2652 })
2653
2654 t.Run("ChunkMatches", func(t *testing.T) {
2655 res := searchForTest(t, b, q, chunkOpts)
2656 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
2657 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2658 }
2659 m := res.Files[0].ChunkMatches[0].Ranges[0]
2660 if m.Start.ByteOffset != 14 {
2661 t.Fatalf("got offset %d want 0", m.Start.ByteOffset)
2662 }
2663 })
2664}
2665
2666func TestSymbolSubstring(t *testing.T) {
2667 content := []byte("bla\nsymblabla\nbla")
2668 // ----------------0123-4567890123-456
2669
2670 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2671 Document{
2672 Name: "f1",
2673 Content: content,
2674 Symbols: []DocumentSection{{4, 12}},
2675 },
2676 )
2677 q := &query.Symbol{
2678 Expr: &query.Substring{Pattern: "bla"},
2679 }
2680 t.Run("LineMatches", func(t *testing.T) {
2681 res := searchForTest(t, b, q)
2682 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
2683 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2684 }
2685 m := res.Files[0].LineMatches[0].LineFragments[0]
2686 if m.Offset != 7 || m.MatchLength != 3 {
2687 t.Fatalf("got offset %d, size %d want 7 size 3", m.Offset, m.MatchLength)
2688 }
2689 })
2690
2691 t.Run("ChunkMatches", func(t *testing.T) {
2692 res := searchForTest(t, b, q, chunkOpts)
2693 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
2694 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2695 }
2696 m := res.Files[0].ChunkMatches[0].Ranges[0]
2697 if m.Start.ByteOffset != 7 || m.End.ByteOffset != 10 {
2698 t.Fatalf("got offset %d, end %d want 7, 10", m.Start.ByteOffset, m.End.ByteOffset)
2699 }
2700 })
2701}
2702
2703func TestSymbolSubstringExact(t *testing.T) {
2704 content := []byte("bla\nsym\nbla\nsym\nasymb")
2705 // ----------------0123-4567-890123456-78901
2706
2707 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2708 Document{
2709 Name: "f1",
2710 Content: content,
2711 Symbols: []DocumentSection{{4, 7}},
2712 },
2713 )
2714 q := &query.Symbol{
2715 Expr: &query.Substring{Pattern: "sym"},
2716 }
2717 t.Run("LineMatches", func(t *testing.T) {
2718 res := searchForTest(t, b, q)
2719 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
2720 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2721 }
2722 m := res.Files[0].LineMatches[0].LineFragments[0]
2723 if m.Offset != 4 {
2724 t.Fatalf("got offset %d, want 7", m.Offset)
2725 }
2726 })
2727
2728 t.Run("ChunkMatches", func(t *testing.T) {
2729 res := searchForTest(t, b, q, chunkOpts)
2730 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
2731 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2732 }
2733 m := res.Files[0].ChunkMatches[0].Ranges[0]
2734 if m.Start.ByteOffset != 4 {
2735 t.Fatalf("got offset %d, want 7", m.Start.ByteOffset)
2736 }
2737 })
2738}
2739
2740func TestSymbolRegexpExact(t *testing.T) {
2741 content := []byte("blah\nbla\nbl")
2742 // ----------------01234-5678-90
2743
2744 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2745 Document{
2746 Name: "f1",
2747 Content: content,
2748 Symbols: []DocumentSection{{0, 4}, {5, 8}, {9, 11}},
2749 },
2750 )
2751 q := &query.Symbol{
2752 Expr: &query.Regexp{Regexp: mustParseRE("^bla$")},
2753 }
2754 t.Run("LineMatches", func(t *testing.T) {
2755 res := searchForTest(t, b, q)
2756 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
2757 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2758 }
2759 m := res.Files[0].LineMatches[0].LineFragments[0]
2760 if m.Offset != 5 {
2761 t.Fatalf("got offset %d, want 5", m.Offset)
2762 }
2763 })
2764
2765 t.Run("ChunkMatches", func(t *testing.T) {
2766 res := searchForTest(t, b, q, chunkOpts)
2767 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
2768 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2769 }
2770 m := res.Files[0].ChunkMatches[0].Ranges[0]
2771 if m.Start.ByteOffset != 5 {
2772 t.Fatalf("got offset %d, want 5", m.Start.ByteOffset)
2773 }
2774 })
2775}
2776
2777func TestSymbolRegexpPartial(t *testing.T) {
2778 content := []byte("abcdef")
2779 // ----------------012345
2780
2781 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2782 Document{
2783 Name: "f1",
2784 Content: content,
2785 Symbols: []DocumentSection{{0, 6}},
2786 },
2787 )
2788 q := &query.Symbol{
2789 Expr: &query.Regexp{Regexp: mustParseRE("(b|d)c(d|b)")},
2790 }
2791 t.Run("LineMatches", func(t *testing.T) {
2792 res := searchForTest(t, b, q)
2793 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
2794 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2795 }
2796 m := res.Files[0].LineMatches[0].LineFragments[0]
2797 if m.Offset != 1 {
2798 t.Fatalf("got offset %d, want 1", m.Offset)
2799 }
2800 if m.MatchLength != 3 {
2801 t.Fatalf("got match length %d, want 3", m.MatchLength)
2802 }
2803 })
2804
2805 t.Run("ChunkMatches", func(t *testing.T) {
2806 res := searchForTest(t, b, q, chunkOpts)
2807 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
2808 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2809 }
2810 m := res.Files[0].ChunkMatches[0].Ranges[0]
2811 if m.Start.ByteOffset != 1 {
2812 t.Fatalf("got offset %d, want 1", m.Start.ByteOffset)
2813 }
2814 if m.End.ByteOffset != 4 {
2815 t.Fatalf("got match end %d, want 4", m.End.ByteOffset)
2816 }
2817 })
2818}
2819
2820func TestSymbolRegexpAll(t *testing.T) {
2821 docs := []Document{
2822 {
2823 Name: "f1",
2824 Content: []byte("Hello Zoekt"),
2825 // --------------01234567890
2826 Symbols: []DocumentSection{{0, 5}, {6, 11}},
2827 },
2828 {
2829 Name: "f2",
2830 Content: []byte("Second Zoekt Third"),
2831 // --------------012345678901234567
2832 Symbols: []DocumentSection{{0, 6}, {7, 12}, {13, 18}},
2833 },
2834 }
2835
2836 b := testIndexBuilder(t, &Repository{Name: "reponame"}, docs...)
2837 q := &query.Symbol{
2838 Expr: &query.Regexp{Regexp: mustParseRE(".*")},
2839 }
2840 t.Run("LineMatches", func(t *testing.T) {
2841 res := searchForTest(t, b, q)
2842 if len(res.Files) != len(docs) {
2843 t.Fatalf("got %v, want %d file", res.Files, len(docs))
2844 }
2845 for i, want := range docs {
2846 got := res.Files[i].LineMatches[0].LineFragments
2847 if len(got) != len(want.Symbols) {
2848 t.Fatalf("got %d symbols, want %d symbols in doc %s", len(got), len(want.Symbols), want.Name)
2849 }
2850
2851 for j, sec := range want.Symbols {
2852 if sec.Start != got[j].Offset {
2853 t.Fatalf("got offset %d, want %d in doc %s", got[j].Offset, sec.Start, want.Name)
2854 }
2855 }
2856 }
2857 })
2858
2859 t.Run("ChunkMatches", func(t *testing.T) {
2860 res := searchForTest(t, b, q, chunkOpts)
2861 if len(res.Files) != len(docs) {
2862 t.Fatalf("got %v, want %d file", res.Files, len(docs))
2863 }
2864 for i, want := range docs {
2865 got := res.Files[i].ChunkMatches[0].Ranges
2866 if len(got) != len(want.Symbols) {
2867 t.Fatalf("got %d symbols, want %d symbols in doc %s", len(got), len(want.Symbols), want.Name)
2868 }
2869
2870 for j, sec := range want.Symbols {
2871 if sec.Start != uint32(got[j].Start.ByteOffset) {
2872 t.Fatalf("got offset %d, want %d in doc %s", got[j].Start.ByteOffset, sec.Start, want.Name)
2873 }
2874 }
2875 }
2876 })
2877}
2878
2879func TestHitIterTerminate(t *testing.T) {
2880 // contrived input: trigram frequencies forces selecting abc +
2881 // def for the distance iteration. There is no match, so this
2882 // will advance the compressedPostingIterator to beyond the
2883 // end.
2884 content := []byte("abc bcdbcd cdecde abcabc def efg")
2885 b := testIndexBuilder(t, nil,
2886 Document{
2887 Name: "f1",
2888 Content: content,
2889 },
2890 )
2891
2892 t.Run("LineMatches", func(t *testing.T) {
2893 searchForTest(t, b, &query.Substring{Pattern: "abcdef"})
2894 })
2895
2896 t.Run("ChunkMatches", func(t *testing.T) {
2897 searchForTest(t, b, &query.Substring{Pattern: "abcdef"}, chunkOpts)
2898 })
2899}
2900
2901func TestDistanceHitIterBailLast(t *testing.T) {
2902 content := []byte("AST AST AST UASH")
2903 b := testIndexBuilder(t, nil,
2904 Document{
2905 Name: "f1",
2906 Content: content,
2907 },
2908 )
2909 t.Run("LineMatches", func(t *testing.T) {
2910 res := searchForTest(t, b, &query.Substring{Pattern: "UAST"})
2911 if len(res.Files) != 0 {
2912 t.Fatalf("got %v, want no results", res.Files)
2913 }
2914 })
2915
2916 t.Run("LineMatches", func(t *testing.T) {
2917 res := searchForTest(t, b, &query.Substring{Pattern: "UAST"}, chunkOpts)
2918 if len(res.Files) != 0 {
2919 t.Fatalf("got %v, want no results", res.Files)
2920 }
2921 })
2922}
2923
2924func TestDocumentSectionRuneBoundary(t *testing.T) {
2925 content := string([]rune{kelvinCodePoint, kelvinCodePoint, kelvinCodePoint})
2926 b, err := NewIndexBuilder(nil)
2927 if err != nil {
2928 t.Fatalf("NewIndexBuilder: %v", err)
2929 }
2930
2931 for i, sec := range []DocumentSection{
2932 {2, 6},
2933 {3, 7},
2934 } {
2935 if err := b.Add(Document{
2936 Name: "f1",
2937 Content: []byte(content),
2938 Symbols: []DocumentSection{sec},
2939 }); err == nil {
2940 t.Errorf("%d: Add succeeded", i)
2941 }
2942 }
2943}
2944
2945func TestUnicodeQuery(t *testing.T) {
2946 content := string([]rune{kelvinCodePoint, kelvinCodePoint, kelvinCodePoint})
2947 b := testIndexBuilder(t, nil,
2948 Document{
2949 Name: "f1",
2950 Content: []byte(content),
2951 },
2952 )
2953
2954 q := &query.Substring{Pattern: content}
2955
2956 t.Run("LineMatches", func(t *testing.T) {
2957 res := searchForTest(t, b, q)
2958 if len(res.Files) != 1 {
2959 t.Fatalf("want 1 match, got %v", res.Files)
2960 }
2961
2962 f := res.Files[0]
2963 if len(f.LineMatches) != 1 {
2964 t.Fatalf("want 1 line, got %v", f.LineMatches)
2965 }
2966 l := f.LineMatches[0]
2967
2968 if len(l.LineFragments) != 1 {
2969 t.Fatalf("want 1 line fragment, got %v", l.LineFragments)
2970 }
2971 fr := l.LineFragments[0]
2972 if fr.MatchLength != len(content) {
2973 t.Fatalf("got MatchLength %d want %d", fr.MatchLength, len(content))
2974 }
2975 })
2976
2977 t.Run("ChunkMatches", func(t *testing.T) {
2978 res := searchForTest(t, b, q, chunkOpts)
2979 if len(res.Files) != 1 {
2980 t.Fatalf("want 1 match, got %v", res.Files)
2981 }
2982
2983 f := res.Files[0]
2984 if len(f.ChunkMatches) != 1 {
2985 t.Fatalf("want 1 line, got %v", f.LineMatches)
2986 }
2987 cm := f.ChunkMatches[0]
2988
2989 if len(cm.Ranges) != 1 {
2990 t.Fatalf("want 1 line fragment, got %v", cm.Ranges)
2991 }
2992 rr := cm.Ranges[0]
2993 if matchLen := rr.End.ByteOffset - rr.Start.ByteOffset; int(matchLen) != len(content) {
2994 t.Fatalf("got MatchLength %d want %d", matchLen, len(content))
2995 }
2996 })
2997}
2998
2999func TestSkipInvalidContent(t *testing.T) {
3000 for _, content := range []string{
3001 // Binary
3002 "abc def \x00 abc",
3003 } {
3004
3005 b, err := NewIndexBuilder(nil)
3006 if err != nil {
3007 t.Fatalf("NewIndexBuilder: %v", err)
3008 }
3009
3010 if err := b.Add(Document{
3011 Name: "f1",
3012 Content: []byte(content),
3013 }); err != nil {
3014 t.Fatal(err)
3015 }
3016
3017 t.Run("LineMatches", func(t *testing.T) {
3018 q := &query.Substring{Pattern: "abc def"}
3019 res := searchForTest(t, b, q)
3020 if len(res.Files) != 0 {
3021 t.Fatalf("got %v, want no results", res.Files)
3022 }
3023
3024 q = &query.Substring{Pattern: "NOT-INDEXED"}
3025 res = searchForTest(t, b, q)
3026 if len(res.Files) != 1 {
3027 t.Fatalf("got %v, want 1 result", res.Files)
3028 }
3029 })
3030
3031 t.Run("ChunkMatches", func(t *testing.T) {
3032 q := &query.Substring{Pattern: "abc def"}
3033 res := searchForTest(t, b, q, chunkOpts)
3034 if len(res.Files) != 0 {
3035 t.Fatalf("got %v, want no results", res.Files)
3036 }
3037
3038 q = &query.Substring{Pattern: "NOT-INDEXED"}
3039 res = searchForTest(t, b, q, chunkOpts)
3040 if len(res.Files) != 1 {
3041 t.Fatalf("got %v, want 1 result", res.Files)
3042 }
3043 })
3044 }
3045}
3046
3047func TestCheckText(t *testing.T) {
3048 for _, text := range []string{"", "simple ascii", "símplé unicödé", "\uFEFFwith utf8 'bom'", "with \uFFFD unicode replacement char"} {
3049 if err := CheckText([]byte(text), 20000); err != nil {
3050 t.Errorf("CheckText(%q): %v", text, err)
3051 }
3052 }
3053 for _, text := range []string{"zero\x00byte", "xx", "0123456789abcdefghi"} {
3054 if err := CheckText([]byte(text), 15); err == nil {
3055 t.Errorf("CheckText(%q) succeeded", text)
3056 }
3057 }
3058}
3059
3060func TestLineAnd(t *testing.T) {
3061 b := testIndexBuilder(t, &Repository{Name: "reponame"},
3062 Document{Name: "f1", Content: []byte("apple\nbanana\napple banana chocolate apple pudding banana\ngrape")},
3063 Document{Name: "f2", Content: []byte("apple orange\nbanana")},
3064 Document{Name: "f3", Content: []byte("banana grape")},
3065 )
3066 pattern := "(apple)(?-s:.)*?(banana)"
3067 r, _ := syntax.Parse(pattern, syntax.Perl)
3068
3069 q := query.Regexp{
3070 Regexp: r,
3071 Content: true,
3072 }
3073 t.Run("LineMatches", func(t *testing.T) {
3074 res := searchForTest(t, b, &q)
3075 wantRegexpCount := 1
3076 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3077 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3078 }
3079 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
3080 t.Errorf("got %v, want 1 result", res.Files)
3081 }
3082 })
3083
3084 t.Run("ChunkMatches", func(t *testing.T) {
3085 res := searchForTest(t, b, &q, chunkOpts)
3086 wantRegexpCount := 1
3087 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3088 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3089 }
3090 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
3091 t.Errorf("got %v, want 1 result", res.Files)
3092 }
3093 })
3094}
3095
3096func TestLineAndFileName(t *testing.T) {
3097 b := testIndexBuilder(t, &Repository{Name: "reponame"},
3098 Document{Name: "f1", Content: []byte("apple banana\ngrape")},
3099 Document{Name: "f2", Content: []byte("apple banana\norange")},
3100 Document{Name: "apple banana", Content: []byte("banana grape")},
3101 )
3102 pattern := "(apple)(?-s:.)*?(banana)"
3103 r, _ := syntax.Parse(pattern, syntax.Perl)
3104
3105 q := query.Regexp{
3106 Regexp: r,
3107 FileName: true,
3108 }
3109 t.Run("LineMatches", func(t *testing.T) {
3110 res := searchForTest(t, b, &q)
3111 wantRegexpCount := 1
3112 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3113 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3114 }
3115 if len(res.Files) != 1 || res.Files[0].FileName != "apple banana" {
3116 t.Errorf("got %v, want 1 result", res.Files)
3117 }
3118 })
3119
3120 t.Run("ChunkMatches", func(t *testing.T) {
3121 res := searchForTest(t, b, &q, chunkOpts)
3122 wantRegexpCount := 1
3123 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3124 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3125 }
3126 if len(res.Files) != 1 || res.Files[0].FileName != "apple banana" {
3127 t.Errorf("got %v, want 1 result", res.Files)
3128 }
3129 })
3130}
3131
3132func TestMultiLineRegex(t *testing.T) {
3133 b := testIndexBuilder(t, &Repository{Name: "reponame"},
3134 Document{Name: "f1", Content: []byte("apple banana\ngrape")},
3135 Document{Name: "f2", Content: []byte("apple orange")},
3136 Document{Name: "f3", Content: []byte("grape apple")},
3137 )
3138 pattern := "(apple).*?[[:space:]].*?(grape)"
3139 r, _ := syntax.Parse(pattern, syntax.Perl)
3140
3141 q := query.Regexp{
3142 Regexp: r,
3143 }
3144 t.Run("LineMatches", func(t *testing.T) {
3145 res := searchForTest(t, b, &q)
3146 wantRegexpCount := 2
3147 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3148 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3149 }
3150 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
3151 t.Errorf("got %v, want 1 result", res.Files)
3152 }
3153 if l := len(res.Files[0].LineMatches); l != 2 {
3154 t.Errorf("got %v, want 2 line matches", l)
3155 }
3156 })
3157
3158 t.Run("ChunkMatches", func(t *testing.T) {
3159 res := searchForTest(t, b, &q, chunkOpts)
3160 wantRegexpCount := 2
3161 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3162 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3163 }
3164 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
3165 t.Errorf("got %v, want 1 result", res.Files)
3166 }
3167 if l := len(res.Files[0].ChunkMatches); l != 1 {
3168 t.Errorf("got %v, want 1 chunk matches", l)
3169 }
3170 if l := len(res.Files[0].ChunkMatches[0].Ranges); l != 1 {
3171 t.Errorf("got %v, want 1 chunk ranges", l)
3172 }
3173 })
3174}
3175
3176func TestSearchTypeFileName(t *testing.T) {
3177 b := testIndexBuilder(t, &Repository{
3178 Name: "reponame",
3179 },
3180 Document{Name: "f1", Content: []byte("bla the needle")},
3181 Document{Name: "f2", Content: []byte("another file another\nneedle")},
3182 // -----------------------------------012345678901234567890-123456
3183 )
3184
3185 t.Run("LineMatches", func(t *testing.T) {
3186 wantSingleMatch := func(res *SearchResult, want string) {
3187 t.Helper()
3188 fmatches := res.Files
3189 if len(fmatches) != 1 {
3190 t.Errorf("got %v, want 1 matches", len(fmatches))
3191 return
3192 }
3193 if len(fmatches[0].LineMatches) != 1 {
3194 t.Errorf("got %d line matches", len(fmatches[0].LineMatches))
3195 return
3196 }
3197 var got string
3198 if fmatches[0].LineMatches[0].FileName {
3199 got = fmatches[0].FileName
3200 } else {
3201 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].LineMatches[0].LineFragments[0].Offset)
3202 }
3203
3204 if got != want {
3205 t.Errorf("got %s, want %s", got, want)
3206 }
3207 }
3208
3209 // Only return the later match in the second file
3210 res := searchForTest(t, b, query.NewAnd(
3211 &query.Type{
3212 Type: query.TypeFileName,
3213 Child: &query.Substring{Pattern: "needle"},
3214 },
3215 &query.Substring{Pattern: "file"}))
3216 wantSingleMatch(res, "f2:8")
3217
3218 // Only return a filename result
3219 res = searchForTest(t, b,
3220 &query.Type{
3221 Type: query.TypeFileName,
3222 Child: &query.Substring{Pattern: "file"},
3223 })
3224 wantSingleMatch(res, "f2")
3225 })
3226
3227 t.Run("ChunkMatches", func(t *testing.T) {
3228 wantSingleMatch := func(res *SearchResult, want string) {
3229 t.Helper()
3230 fmatches := res.Files
3231 if len(fmatches) != 1 {
3232 t.Errorf("got %v, want 1 matches", len(fmatches))
3233 return
3234 }
3235 if len(fmatches[0].ChunkMatches) != 1 {
3236 t.Errorf("got %d line matches", len(fmatches[0].ChunkMatches))
3237 return
3238 }
3239 var got string
3240 if fmatches[0].ChunkMatches[0].FileName {
3241 got = fmatches[0].FileName
3242 } else {
3243 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset)
3244 }
3245
3246 if got != want {
3247 t.Errorf("got %s, want %s", got, want)
3248 }
3249 }
3250
3251 // Only return the later match in the second file
3252 res := searchForTest(t, b, query.NewAnd(
3253 &query.Type{
3254 Type: query.TypeFileName,
3255 Child: &query.Substring{Pattern: "needle"},
3256 },
3257 &query.Substring{Pattern: "file"}),
3258 chunkOpts,
3259 )
3260 wantSingleMatch(res, "f2:8")
3261
3262 // Only return a filename result
3263 res = searchForTest(t, b,
3264 &query.Type{
3265 Type: query.TypeFileName,
3266 Child: &query.Substring{Pattern: "file"},
3267 },
3268 chunkOpts,
3269 )
3270 wantSingleMatch(res, "f2")
3271 })
3272}
3273
3274func TestSearchTypeLanguage(t *testing.T) {
3275 b := testIndexBuilder(t, &Repository{
3276 Name: "reponame",
3277 },
3278 Document{Name: "apex.cls", Content: []byte("public class Car extends Vehicle {")},
3279 Document{Name: "tex.cls", Content: []byte(`\DeclareOption*{`)},
3280 Document{Name: "hello.h", Content: []byte(`#include <stdio.h>`)},
3281 )
3282
3283 t.Log(b.languageMap)
3284
3285 t.Run("LineMatches", func(t *testing.T) {
3286 wantSingleMatch := func(res *SearchResult, want string) {
3287 t.Helper()
3288 fmatches := res.Files
3289 if len(fmatches) != 1 {
3290 t.Errorf("got %v, want 1 matches", len(fmatches))
3291 return
3292 }
3293 if len(fmatches[0].LineMatches) != 1 {
3294 t.Errorf("got %d line matches", len(fmatches[0].LineMatches))
3295 return
3296 }
3297 var got string
3298 if fmatches[0].LineMatches[0].FileName {
3299 got = fmatches[0].FileName
3300 } else {
3301 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].LineMatches[0].LineFragments[0].Offset)
3302 }
3303
3304 if got != want {
3305 t.Errorf("got %s, want %s", got, want)
3306 }
3307 }
3308
3309 res := searchForTest(t, b, &query.Language{Language: "Apex"})
3310 wantSingleMatch(res, "apex.cls")
3311
3312 res = searchForTest(t, b, &query.Language{Language: "TeX"})
3313 wantSingleMatch(res, "tex.cls")
3314
3315 res = searchForTest(t, b, &query.Language{Language: "C"})
3316 wantSingleMatch(res, "hello.h")
3317
3318 // test fallback language search by pretending it's an older index version
3319 res = searchForTest(t, b, &query.Language{Language: "C++"})
3320 if len(res.Files) != 0 {
3321 t.Errorf("got %d results for C++, want 0", len(res.Files))
3322 }
3323
3324 b.featureVersion = 11 // force fallback
3325 res = searchForTest(t, b, &query.Language{Language: "C++"})
3326 wantSingleMatch(res, "hello.h")
3327 })
3328
3329 t.Run("ChunkMatches", func(t *testing.T) {
3330 wantSingleMatch := func(res *SearchResult, want string) {
3331 t.Helper()
3332 fmatches := res.Files
3333 if len(fmatches) != 1 {
3334 t.Errorf("got %v, want 1 matches", len(fmatches))
3335 return
3336 }
3337 if len(fmatches[0].ChunkMatches) != 1 {
3338 t.Errorf("got %d line matches", len(fmatches[0].ChunkMatches))
3339 return
3340 }
3341 var got string
3342 if fmatches[0].ChunkMatches[0].FileName {
3343 got = fmatches[0].FileName
3344 } else {
3345 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset)
3346 }
3347
3348 if got != want {
3349 t.Errorf("got %s, want %s", got, want)
3350 }
3351 }
3352
3353 b.featureVersion = FeatureVersion // reset feature version
3354 res := searchForTest(t, b, &query.Language{Language: "Apex"}, chunkOpts)
3355 wantSingleMatch(res, "apex.cls")
3356
3357 res = searchForTest(t, b, &query.Language{Language: "TeX"}, chunkOpts)
3358 wantSingleMatch(res, "tex.cls")
3359
3360 res = searchForTest(t, b, &query.Language{Language: "C"}, chunkOpts)
3361 wantSingleMatch(res, "hello.h")
3362
3363 // test fallback language search by pretending it's an older index version
3364 res = searchForTest(t, b, &query.Language{Language: "C++"}, chunkOpts)
3365 if len(res.Files) != 0 {
3366 t.Errorf("got %d results for C++, want 0", len(res.Files))
3367 }
3368
3369 b.featureVersion = 11 // force fallback
3370 res = searchForTest(t, b, &query.Language{Language: "C++"}, chunkOpts)
3371 wantSingleMatch(res, "hello.h")
3372 })
3373}
3374
3375func TestStats(t *testing.T) {
3376 ignored := []cmp.Option{
3377 cmpopts.EquateEmpty(),
3378 cmpopts.IgnoreFields(RepoListEntry{}, "Repository"),
3379 cmpopts.IgnoreFields(RepoListEntry{}, "IndexMetadata"),
3380 cmpopts.IgnoreFields(RepoStats{}, "IndexBytes"),
3381 }
3382
3383 repoListEntries := func(b *IndexBuilder) []RepoListEntry {
3384 searcher := searcherForTest(t, b)
3385 indexdata := searcher.(*indexData)
3386 return indexdata.repoListEntry
3387 }
3388
3389 t.Run("one empty repo", func(t *testing.T) {
3390 b := testIndexBuilder(t, nil)
3391 got := repoListEntries(b)
3392 want := []RepoListEntry{
3393 {
3394 Stats: RepoStats{
3395 Repos: 0,
3396 Shards: 1,
3397 Documents: 0,
3398 IndexBytes: 20,
3399 ContentBytes: 0,
3400 NewLinesCount: 0,
3401 DefaultBranchNewLinesCount: 0,
3402 OtherBranchesNewLinesCount: 0,
3403 },
3404 },
3405 }
3406
3407 if diff := cmp.Diff(want, got, ignored...); diff != "" {
3408 t.Fatalf("mismatch (-want +got):\n%s", diff)
3409 }
3410
3411 })
3412
3413 t.Run("one simple shard", func(t *testing.T) {
3414 b := testIndexBuilder(t, nil,
3415 Document{Name: "doc 0", Content: []byte("content 0")},
3416 Document{Name: "doc 1", Content: []byte("content 1")},
3417 )
3418 got := repoListEntries(b)
3419 want := []RepoListEntry{
3420 {
3421 Stats: RepoStats{
3422 Repos: 0,
3423 Shards: 1,
3424 Documents: 2,
3425 IndexBytes: 224,
3426 ContentBytes: 28,
3427 NewLinesCount: 0,
3428 DefaultBranchNewLinesCount: 0,
3429 OtherBranchesNewLinesCount: 0,
3430 },
3431 },
3432 }
3433
3434 if diff := cmp.Diff(want, got, ignored...); diff != "" {
3435 t.Fatalf("mismatch (-want +got):\n%s", diff)
3436 }
3437
3438 })
3439
3440 t.Run("one compound shard", func(t *testing.T) {
3441 b := testIndexBuilderCompound(t,
3442 []*Repository{
3443 {Name: "repo 0"},
3444 {Name: "repo 1"},
3445 },
3446 [][]Document{
3447 {
3448 {Name: "doc 0", Content: []byte("content 0")},
3449 {Name: "doc 1", Content: []byte("content 1")},
3450 },
3451 {
3452 {Name: "doc 2", Content: []byte("content 2")},
3453 {Name: "doc 3", Content: []byte("content 3")},
3454 },
3455 },
3456 )
3457 got := repoListEntries(b)
3458 want := []RepoListEntry{
3459 {
3460 Stats: RepoStats{
3461 Repos: 0,
3462 Shards: 1,
3463 Documents: 2,
3464 IndexBytes: 180,
3465 ContentBytes: 28,
3466 NewLinesCount: 0,
3467 DefaultBranchNewLinesCount: 0,
3468 OtherBranchesNewLinesCount: 0,
3469 },
3470 },
3471 {
3472 Stats: RepoStats{
3473 Repos: 0,
3474 Shards: 1,
3475 Documents: 2,
3476 IndexBytes: 180,
3477 ContentBytes: 28,
3478 NewLinesCount: 0,
3479 DefaultBranchNewLinesCount: 0,
3480 OtherBranchesNewLinesCount: 0,
3481 },
3482 },
3483 }
3484
3485 if diff := cmp.Diff(want, got, ignored...); diff != "" {
3486 t.Fatalf("mismatch (-want +got):\n%s", diff)
3487 }
3488 })
3489
3490 t.Run("compound shard with empty repos", func(t *testing.T) {
3491 b := testIndexBuilderCompound(t,
3492 []*Repository{
3493 {Name: "repo 0"},
3494 {Name: "repo 1"},
3495 {Name: "repo 2"},
3496 {Name: "repo 3"},
3497 {Name: "repo 4"},
3498 },
3499 [][]Document{
3500 {{Name: "doc 0", Content: []byte("content 0")}},
3501 nil,
3502 {{Name: "doc 1", Content: []byte("content 1")}},
3503 nil,
3504 nil,
3505 },
3506 )
3507 got := repoListEntries(b)
3508
3509 entryEmpty := RepoListEntry{Stats: RepoStats{
3510 Shards: 1,
3511 Documents: 0,
3512 ContentBytes: 0,
3513 }}
3514 entryNonEmpty := RepoListEntry{Stats: RepoStats{
3515 Shards: 1,
3516 Documents: 1,
3517 ContentBytes: 14,
3518 }}
3519
3520 want := []RepoListEntry{
3521 entryNonEmpty,
3522 entryEmpty,
3523 entryNonEmpty,
3524 entryEmpty,
3525 entryEmpty,
3526 }
3527
3528 if diff := cmp.Diff(want, got, ignored...); diff != "" {
3529 t.Fatalf("mismatch (-want +got):\n%s", diff)
3530 }
3531
3532 })
3533}
3534
3535// This tests the frequent pattern "\bLITERAL\b".
3536func TestWordSearch(t *testing.T) {
3537 content := []byte("needle the bla")
3538 // ----------------01234567890123
3539
3540 b := testIndexBuilder(t, nil,
3541 Document{
3542 Name: "f1",
3543 Content: content,
3544 })
3545
3546 t.Run("LineMatches", func(t *testing.T) {
3547 sres := searchForTest(t, b,
3548 &query.Regexp{
3549 Regexp: mustParseRE("\\bthe\\b"),
3550 CaseSensitive: true,
3551 Content: true,
3552 })
3553
3554 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 {
3555 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
3556 }
3557
3558 if sres.Stats.RegexpsConsidered != 0 {
3559 t.Fatal("expected regexp to be skipped")
3560 }
3561
3562 got := sres.Files[0].LineMatches[0]
3563 want := LineMatch{
3564 LineFragments: []LineFragmentMatch{{
3565 LineOffset: 7,
3566 Offset: 7,
3567 MatchLength: 3,
3568 }},
3569 Line: content,
3570 FileName: false,
3571 LineNumber: 1,
3572 LineStart: 0,
3573 LineEnd: 14,
3574 }
3575
3576 if !reflect.DeepEqual(got, want) {
3577 t.Errorf("got %#v, want %#v", got, want)
3578 }
3579 })
3580
3581 t.Run("ChunkMatches", func(t *testing.T) {
3582 sres := searchForTest(t, b,
3583 &query.Regexp{
3584 Regexp: mustParseRE("\\bthe\\b"),
3585 CaseSensitive: true,
3586 }, chunkOpts)
3587
3588 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 {
3589 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
3590 }
3591
3592 if sres.Stats.RegexpsConsidered != 0 {
3593 t.Fatal("expected regexp to be skipped")
3594 }
3595
3596 got := sres.Files[0].ChunkMatches[0]
3597 want := ChunkMatch{
3598 Content: content,
3599 ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1},
3600 Ranges: []Range{{
3601 Start: Location{ByteOffset: 7, LineNumber: 1, Column: 8},
3602 End: Location{ByteOffset: 10, LineNumber: 1, Column: 11},
3603 }},
3604 }
3605
3606 if diff := cmp.Diff(want, got); diff != "" {
3607 t.Fatal(diff)
3608 }
3609 })
3610}