fork of https://github.com/sourcegraph/zoekt
1// Copyright 2016 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package zoekt
16
17import (
18 "bytes"
19 "context"
20 "fmt"
21 "reflect"
22 "regexp/syntax"
23 "strings"
24 "testing"
25
26 "github.com/google/go-cmp/cmp"
27 "github.com/google/go-cmp/cmp/cmpopts"
28 "github.com/grafana/regexp"
29 "github.com/kylelemons/godebug/pretty"
30
31 "github.com/sourcegraph/zoekt/query"
32)
33
34func clearScores(r *SearchResult) {
35 for i := range r.Files {
36 r.Files[i].Score = 0.0
37 for j := range r.Files[i].LineMatches {
38 r.Files[i].LineMatches[j].Score = 0.0
39 }
40 for j := range r.Files[i].ChunkMatches {
41 r.Files[i].ChunkMatches[j].Score = 0.0
42 }
43 r.Files[i].Checksum = nil
44 r.Files[i].Debug = ""
45 }
46}
47
48func testIndexBuilder(t *testing.T, repo *Repository, docs ...Document) *IndexBuilder {
49 t.Helper()
50
51 b, err := NewIndexBuilder(repo)
52 if err != nil {
53 t.Fatalf("NewIndexBuilder: %v", err)
54 }
55
56 for i, d := range docs {
57 if err := b.Add(d); err != nil {
58 t.Fatalf("Add %d: %v", i, err)
59 }
60 }
61
62 return b
63}
64
65func testIndexBuilderCompound(t *testing.T, repos []*Repository, docs [][]Document) *IndexBuilder {
66 t.Helper()
67
68 b := newIndexBuilder()
69 b.indexFormatVersion = NextIndexFormatVersion
70
71 if len(repos) != len(docs) {
72 t.Fatalf("testIndexBuilderCompound: repos must be the same length as docs, got: len(repos)=%d len(docs)=%d", len(repos), len(docs))
73 }
74
75 for i, repo := range repos {
76 if err := b.setRepository(repo); err != nil {
77 t.Fatal(err)
78 }
79 for j, d := range docs[i] {
80 if err := b.Add(d); err != nil {
81 t.Fatalf("Add %d %d: %v", i, j, err)
82 }
83 }
84 }
85
86 return b
87}
88
89func TestBoundary(t *testing.T) {
90 b := testIndexBuilder(t, nil,
91 Document{Name: "f1", Content: []byte("x the")},
92 Document{Name: "f1", Content: []byte("reader")})
93 res := searchForTest(t, b, &query.Substring{Pattern: "there"})
94 if len(res.Files) > 0 {
95 t.Fatalf("got %v, want no matches", res.Files)
96 }
97}
98
99func TestDocSectionInvalid(t *testing.T) {
100 b, err := NewIndexBuilder(nil)
101 if err != nil {
102 t.Fatalf("NewIndexBuilder: %v", err)
103 }
104 doc := Document{
105 Name: "f1",
106 Content: []byte("01234567890123"),
107 Symbols: []DocumentSection{{5, 8}, {7, 9}},
108 }
109
110 if err := b.Add(doc); err == nil {
111 t.Errorf("overlapping doc sections should fail")
112 }
113
114 doc = Document{
115 Name: "f1",
116 Content: []byte("01234567890123"),
117 Symbols: []DocumentSection{{0, 20}},
118 }
119
120 if err := b.Add(doc); err == nil {
121 t.Errorf("doc sections beyond EOF should fail")
122 }
123}
124
125func TestBasic(t *testing.T) {
126 b := testIndexBuilder(t, nil,
127 Document{
128 Name: "f2",
129 Content: []byte("to carry water in the no later bla"),
130 // --------------0123456789012345678901234567890123
131 })
132
133 t.Run("LineMatch", func(t *testing.T) {
134 res := searchForTest(t, b, &query.Substring{
135 Pattern: "water",
136 CaseSensitive: true,
137 })
138 fmatches := res.Files
139 if len(fmatches) != 1 || len(fmatches[0].LineMatches) != 1 {
140 t.Fatalf("got %v, want 1 matches", fmatches)
141 }
142
143 got := fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].LineMatches[0].LineFragments[0].Offset)
144 want := "f2:9"
145 if got != want {
146 t.Errorf("1: got %s, want %s", got, want)
147 }
148 })
149
150 t.Run("ChunkMatch", func(t *testing.T) {
151 res := searchForTest(t, b, &query.Substring{
152 Pattern: "water",
153 CaseSensitive: true,
154 }, chunkOpts)
155 fmatches := res.Files
156 if len(fmatches) != 1 || len(fmatches[0].ChunkMatches) != 1 {
157 t.Fatalf("got %v, want 1 matches", fmatches)
158 }
159
160 got := fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset)
161 want := "f2:9"
162 if got != want {
163 t.Errorf("1: got %s, want %s", got, want)
164 }
165 })
166}
167
168func TestEmptyIndex(t *testing.T) {
169 b := testIndexBuilder(t, nil)
170 searcher := searcherForTest(t, b)
171
172 var opts SearchOptions
173 if _, err := searcher.Search(context.Background(), &query.Substring{}, &opts); err != nil {
174 t.Fatalf("Search: %v", err)
175 }
176
177 if _, err := searcher.List(context.Background(), &query.Repo{Regexp: regexp.MustCompile("")}, nil); err != nil {
178 t.Fatalf("List: %v", err)
179 }
180
181 if _, err := searcher.Search(context.Background(), &query.Substring{Pattern: "java", FileName: true}, &opts); err != nil {
182 t.Fatalf("Search: %v", err)
183 }
184}
185
186type memSeeker struct {
187 data []byte
188}
189
190func (s *memSeeker) Name() string {
191 return "memseeker"
192}
193
194func (s *memSeeker) Close() {}
195func (s *memSeeker) Read(off, sz uint32) ([]byte, error) {
196 return s.data[off : off+sz], nil
197}
198
199func (s *memSeeker) Size() (uint32, error) {
200 return uint32(len(s.data)), nil
201}
202
203func TestNewlines(t *testing.T) {
204 b := testIndexBuilder(t, nil,
205 Document{Name: "filename", Content: []byte("line1\nline2\nbla")})
206 // ---------------------------------------------012345-678901-234
207
208 t.Run("LineMatches", func(t *testing.T) {
209 sres := searchForTest(t, b, &query.Substring{Pattern: "ne2"})
210
211 matches := sres.Files
212 want := []FileMatch{{
213 FileName: "filename",
214 LineMatches: []LineMatch{{
215 LineFragments: []LineFragmentMatch{{
216 Offset: 8,
217 LineOffset: 2,
218 MatchLength: 3,
219 }},
220 Line: []byte("line2"),
221 LineStart: 6,
222 LineEnd: 11,
223 LineNumber: 2,
224 }},
225 }}
226
227 if !reflect.DeepEqual(matches, want) {
228 t.Errorf("got %v, want %v", matches, want)
229 }
230 })
231
232 t.Run("ChunkMatches", func(t *testing.T) {
233 sres := searchForTest(t, b, &query.Substring{Pattern: "ne2"}, chunkOpts)
234
235 matches := sres.Files
236 want := []FileMatch{{
237 FileName: "filename",
238 ChunkMatches: []ChunkMatch{{
239 Content: []byte("line2"),
240 ContentStart: Location{
241 ByteOffset: 6,
242 LineNumber: 2,
243 Column: 1,
244 },
245 Ranges: []Range{{
246 Start: Location{ByteOffset: 8, LineNumber: 2, Column: 3},
247 End: Location{ByteOffset: 11, LineNumber: 2, Column: 6},
248 }},
249 }},
250 }}
251
252 if diff := cmp.Diff(want, matches); diff != "" {
253 t.Fatal(diff)
254 }
255 })
256}
257
258// A result spanning multiple lines should have LineMatches that only cover
259// single lines.
260func TestQueryNewlines(t *testing.T) {
261 text := "line1\nline2\nbla"
262 b := testIndexBuilder(t, nil,
263 Document{Name: "filename", Content: []byte(text)})
264
265 t.Run("LineMatches", func(t *testing.T) {
266 sres := searchForTest(t, b, &query.Substring{Pattern: "ine2\nbla"})
267 matches := sres.Files
268 if len(matches) != 1 {
269 t.Fatalf("got %d file matches, want exactly one", len(matches))
270 }
271 m := matches[0]
272 if len(m.LineMatches) != 2 {
273 t.Fatalf("got %d line matches, want exactly two", len(m.LineMatches))
274 }
275 })
276
277 t.Run("ChunkMatches", func(t *testing.T) {
278 sres := searchForTest(t, b, &query.Substring{Pattern: "ine2\nbla"}, chunkOpts)
279 matches := sres.Files
280 if len(matches) != 1 {
281 t.Fatalf("got %d file matches, want exactly one", len(matches))
282 }
283 m := matches[0]
284 if len(m.ChunkMatches) != 1 {
285 t.Fatalf("got %d chunk matches, want exactly one", len(m.ChunkMatches))
286 }
287 })
288}
289
290var chunkOpts = SearchOptions{ChunkMatches: true}
291
292func searchForTest(t *testing.T, b *IndexBuilder, q query.Q, o ...SearchOptions) *SearchResult {
293 searcher := searcherForTest(t, b)
294 var opts SearchOptions
295 if len(o) > 0 {
296 opts = o[0]
297 }
298 res, err := searcher.Search(context.Background(), q, &opts)
299 if err != nil {
300 t.Fatalf("Search(%s): %v", q, err)
301 }
302 clearScores(res)
303 return res
304}
305
306func searcherForTest(t *testing.T, b *IndexBuilder) Searcher {
307 var buf bytes.Buffer
308 if err := b.Write(&buf); err != nil {
309 t.Fatal(err)
310 }
311 f := &memSeeker{buf.Bytes()}
312
313 searcher, err := NewSearcher(f)
314 if err != nil {
315 t.Fatalf("NewSearcher: %v", err)
316 }
317
318 return searcher
319}
320
321func TestCaseFold(t *testing.T) {
322 b := testIndexBuilder(t, nil,
323 Document{Name: "f1", Content: []byte("I love BaNaNAS.")},
324 // -----------------------------------012345678901234
325 )
326 t.Run("LineMatches", func(t *testing.T) {
327 sres := searchForTest(t, b, &query.Substring{
328 Pattern: "bananas",
329 CaseSensitive: true,
330 })
331 matches := sres.Files
332 if len(matches) != 0 {
333 t.Errorf("foldcase: got %#v, want 0 matches", matches)
334 }
335
336 sres = searchForTest(t, b,
337 &query.Substring{
338 Pattern: "BaNaNAS",
339 CaseSensitive: true,
340 })
341 matches = sres.Files
342 if len(matches) != 1 {
343 t.Errorf("no foldcase: got %v, want 1 matches", matches)
344 } else if matches[0].LineMatches[0].LineFragments[0].Offset != 7 {
345 t.Errorf("foldcase: got %v, want offsets 7", matches)
346 }
347 })
348
349 t.Run("ChunkMatches", func(t *testing.T) {
350 sres := searchForTest(t, b, &query.Substring{
351 Pattern: "bananas",
352 CaseSensitive: true,
353 }, chunkOpts)
354 matches := sres.Files
355 if len(matches) != 0 {
356 t.Errorf("foldcase: got %#v, want 0 matches", matches)
357 }
358
359 sres = searchForTest(t, b,
360 &query.Substring{
361 Pattern: "BaNaNAS",
362 CaseSensitive: true,
363 })
364 matches = sres.Files
365 if len(matches) != 1 {
366 t.Errorf("no foldcase: got %v, want 1 matches", matches)
367 } else if matches[0].LineMatches[0].LineFragments[0].Offset != 7 {
368 t.Errorf("foldcase: got %v, want offsets 7", matches)
369 }
370 })
371}
372
373func TestAndSearch(t *testing.T) {
374 b := testIndexBuilder(t, nil,
375 Document{Name: "f1", Content: []byte("x banana y")},
376 Document{Name: "f2", Content: []byte("x apple y")},
377 Document{Name: "f3", Content: []byte("x banana apple y")},
378 // ---------------------------------------0123456789012345
379 )
380
381 t.Run("LineMatches", func(t *testing.T) {
382 sres := searchForTest(t, b, query.NewAnd(
383 &query.Substring{
384 Pattern: "banana",
385 },
386 &query.Substring{
387 Pattern: "apple",
388 },
389 ))
390 matches := sres.Files
391 if len(matches) != 1 || len(matches[0].LineMatches) != 1 || len(matches[0].LineMatches[0].LineFragments) != 2 {
392 t.Fatalf("got %#v, want 1 match with 2 fragments", matches)
393 }
394
395 if matches[0].LineMatches[0].LineFragments[0].Offset != 2 || matches[0].LineMatches[0].LineFragments[1].Offset != 9 {
396 t.Fatalf("got %#v, want offsets 2,9", matches)
397 }
398
399 wantStats := Stats{
400 FilesLoaded: 1,
401 ContentBytesLoaded: 18,
402 IndexBytesLoaded: 8,
403 NgramMatches: 3, // we look at doc 1, because it's max(0,1) due to AND
404 MatchCount: 1,
405 FileCount: 1,
406 FilesConsidered: 2,
407 ShardsScanned: 1,
408 }
409 if diff := pretty.Compare(wantStats, sres.Stats); diff != "" {
410 t.Errorf("got stats diff %s", diff)
411 }
412 })
413
414 t.Run("ChunkMatches", func(t *testing.T) {
415 sres := searchForTest(t, b, query.NewAnd(
416 &query.Substring{
417 Pattern: "banana",
418 },
419 &query.Substring{
420 Pattern: "apple",
421 },
422 ), chunkOpts)
423 matches := sres.Files
424 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 || len(matches[0].ChunkMatches[0].Ranges) != 2 {
425 t.Fatalf("got %#v, want 1 chunk match with 2 ranges", matches)
426 }
427
428 if matches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset != 2 || matches[0].ChunkMatches[0].Ranges[1].Start.ByteOffset != 9 {
429 t.Fatalf("got %#v, want offsets 2,9", matches)
430 }
431
432 wantStats := Stats{
433 FilesLoaded: 1,
434 ContentBytesLoaded: 18,
435 IndexBytesLoaded: 8,
436 NgramMatches: 3, // we look at doc 1, because it's max(0,1) due to AND
437 MatchCount: 2,
438 FileCount: 1,
439 FilesConsidered: 2,
440 ShardsScanned: 1,
441 }
442 if diff := pretty.Compare(wantStats, sres.Stats); diff != "" {
443 t.Errorf("got stats diff %s", diff)
444 }
445 })
446}
447
448func TestAndNegateSearch(t *testing.T) {
449 b := testIndexBuilder(t, nil,
450 Document{Name: "f1", Content: []byte("x banana y")},
451 // -----------------------------------0123456789
452 Document{Name: "f4", Content: []byte("x banana apple y")})
453
454 t.Run("LineMatches", func(t *testing.T) {
455 sres := searchForTest(t, b, query.NewAnd(
456 &query.Substring{
457 Pattern: "banana",
458 },
459 &query.Not{Child: &query.Substring{
460 Pattern: "apple",
461 }}))
462
463 matches := sres.Files
464
465 if len(matches) != 1 || len(matches[0].LineMatches) != 1 {
466 t.Fatalf("got %v, want 1 match", matches)
467 }
468 if matches[0].FileName != "f1" {
469 t.Fatalf("got match %#v, want FileName: f1", matches[0])
470 }
471 if matches[0].LineMatches[0].LineFragments[0].Offset != 2 {
472 t.Fatalf("got %v, want offset 2", matches)
473 }
474 })
475
476 t.Run("ChunkMatches", func(t *testing.T) {
477 sres := searchForTest(t, b,
478 query.NewAnd(
479 &query.Substring{
480 Pattern: "banana",
481 },
482 &query.Not{Child: &query.Substring{
483 Pattern: "apple",
484 }},
485 ),
486 chunkOpts,
487 )
488
489 matches := sres.Files
490
491 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 {
492 t.Fatalf("got %v, want 1 match", matches)
493 }
494 if matches[0].FileName != "f1" {
495 t.Fatalf("got match %#v, want FileName: f1", matches[0])
496 }
497 if matches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset != 2 {
498 t.Fatalf("got %v, want offset 2", matches)
499 }
500 })
501}
502
503func TestNegativeMatchesOnlyShortcut(t *testing.T) {
504 b := testIndexBuilder(t, nil,
505 Document{Name: "f1", Content: []byte("x banana y")},
506 Document{Name: "f2", Content: []byte("x appelmoes y")},
507 Document{Name: "f3", Content: []byte("x appelmoes y")},
508 Document{Name: "f3", Content: []byte("x appelmoes y")})
509
510 t.Run("LineMatches", func(t *testing.T) {
511 sres := searchForTest(t, b, query.NewAnd(
512 &query.Substring{
513 Pattern: "banana",
514 },
515 &query.Not{Child: &query.Substring{
516 Pattern: "appel",
517 }}))
518
519 if sres.Stats.FilesConsidered != 1 {
520 t.Errorf("got %#v, want FilesConsidered: 1", sres.Stats)
521 }
522 })
523
524 t.Run("ChunkMatches", func(t *testing.T) {
525 sres := searchForTest(t, b, query.NewAnd(
526 &query.Substring{
527 Pattern: "banana",
528 },
529 &query.Not{Child: &query.Substring{
530 Pattern: "appel",
531 }}), chunkOpts)
532
533 if sres.Stats.FilesConsidered != 1 {
534 t.Errorf("got %#v, want FilesConsidered: 1", sres.Stats)
535 }
536 })
537}
538
539func TestFileSearch(t *testing.T) {
540 b := testIndexBuilder(t, nil,
541 Document{Name: "banzana", Content: []byte("x orange y")},
542 // -------------0123456
543 Document{Name: "banana", Content: []byte("x apple y")},
544 // -------------012345
545 )
546
547 t.Run("LineMatches", func(t *testing.T) {
548 sres := searchForTest(t, b, &query.Substring{
549 Pattern: "anan",
550 FileName: true,
551 })
552
553 matches := sres.Files
554 if len(matches) != 1 || len(matches[0].LineMatches) != 1 {
555 t.Fatalf("got %v, want 1 match", matches)
556 }
557
558 got := matches[0].LineMatches[0]
559 want := LineMatch{
560 Line: []byte("banana"),
561 LineFragments: []LineFragmentMatch{{
562 Offset: 1,
563 LineOffset: 1,
564 MatchLength: 4,
565 }},
566 FileName: true,
567 }
568
569 if !reflect.DeepEqual(got, want) {
570 t.Errorf("got %#v, want %#v", got, want)
571 }
572 })
573
574 t.Run("ChunkMatches", func(t *testing.T) {
575 sres := searchForTest(t, b, &query.Substring{
576 Pattern: "anan",
577 FileName: true,
578 }, chunkOpts)
579
580 matches := sres.Files
581 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 {
582 t.Fatalf("got %v, want 1 match", matches)
583 }
584
585 got := matches[0].ChunkMatches[0]
586 want := ChunkMatch{
587 Content: []byte("banana"),
588 ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1},
589 Ranges: []Range{{
590 Start: Location{ByteOffset: 1, LineNumber: 1, Column: 2},
591 End: Location{ByteOffset: 5, LineNumber: 1, Column: 6},
592 }},
593 FileName: true,
594 }
595
596 if diff := cmp.Diff(want, got); diff != "" {
597 t.Fatal(diff)
598 }
599 })
600
601 t.Run("FileNameSet", func(t *testing.T) {
602 sres := searchForTest(t, b, query.NewFileNameSet("banana"), chunkOpts)
603
604 matches := sres.Files
605 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 {
606 t.Fatalf("got %v, want 1 match", matches)
607 }
608
609 got := matches[0].ChunkMatches[0]
610 want := ChunkMatch{
611 Content: []byte("banana"),
612 ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1},
613 Ranges: []Range{{
614 Start: Location{ByteOffset: 0, LineNumber: 1, Column: 1},
615 End: Location{ByteOffset: 6, LineNumber: 1, Column: 7},
616 }},
617 FileName: true,
618 }
619
620 if diff := cmp.Diff(want, got); diff != "" {
621 t.Fatal(diff)
622 }
623 })
624}
625
626func TestFileCase(t *testing.T) {
627 b := testIndexBuilder(t, nil,
628 Document{Name: "BANANA", Content: []byte("x orange y")})
629
630 t.Run("LineMatches", func(t *testing.T) {
631 sres := searchForTest(t, b, &query.Substring{
632 Pattern: "banana",
633 FileName: true,
634 })
635
636 matches := sres.Files
637 if len(matches) != 1 || matches[0].FileName != "BANANA" {
638 t.Fatalf("got %v, want 1 match 'BANANA'", matches)
639 }
640 })
641
642 t.Run("ChunkMatches", func(t *testing.T) {
643 sres := searchForTest(t, b, &query.Substring{
644 Pattern: "banana",
645 FileName: true,
646 }, chunkOpts)
647
648 matches := sres.Files
649 if len(matches) != 1 || matches[0].FileName != "BANANA" {
650 t.Fatalf("got %v, want 1 match 'BANANA'", matches)
651 }
652 })
653}
654
655func TestFileRegexpSearchBruteForce(t *testing.T) {
656 b := testIndexBuilder(t, nil,
657 Document{Name: "banzana", Content: []byte("x orange y")},
658 Document{Name: "banana", Content: []byte("x apple y")},
659 )
660 t.Run("LineMatches", func(t *testing.T) {
661 sres := searchForTest(t, b, &query.Regexp{
662 Regexp: mustParseRE("[qn][zx]"),
663 FileName: true,
664 })
665
666 matches := sres.Files
667 if len(matches) != 1 || matches[0].FileName != "banzana" {
668 t.Fatalf("got %v, want 1 match on 'banzana'", matches)
669 }
670 })
671 t.Run("LineMatches", func(t *testing.T) {
672 sres := searchForTest(t, b, &query.Regexp{
673 Regexp: mustParseRE("[qn][zx]"),
674 FileName: true,
675 }, chunkOpts)
676
677 matches := sres.Files
678 if len(matches) != 1 || matches[0].FileName != "banzana" {
679 t.Fatalf("got %v, want 1 match on 'banzana'", matches)
680 }
681 })
682}
683
684func TestFileRegexpSearchShortString(t *testing.T) {
685 b := testIndexBuilder(t, nil,
686 Document{Name: "banana.py", Content: []byte("x orange y")})
687
688 t.Run("LineMatches", func(t *testing.T) {
689 sres := searchForTest(t, b, &query.Regexp{
690 Regexp: mustParseRE("ana.py"),
691 FileName: true,
692 })
693
694 matches := sres.Files
695 if len(matches) != 1 || matches[0].FileName != "banana.py" {
696 t.Fatalf("got %v, want 1 match on 'banana.py'", matches)
697 }
698 })
699
700 t.Run("ChunkMatches", func(t *testing.T) {
701 sres := searchForTest(t, b, &query.Regexp{
702 Regexp: mustParseRE("ana.py"),
703 FileName: true,
704 }, chunkOpts)
705
706 matches := sres.Files
707 if len(matches) != 1 || matches[0].FileName != "banana.py" {
708 t.Fatalf("got %v, want 1 match on 'banana.py'", matches)
709 }
710 })
711}
712
713func TestFileSubstringSearchBruteForce(t *testing.T) {
714 b := testIndexBuilder(t, nil,
715 Document{Name: "BANZANA", Content: []byte("x orange y")},
716 Document{Name: "banana", Content: []byte("x apple y")})
717
718 q := &query.Substring{
719 Pattern: "z",
720 FileName: true,
721 }
722
723 t.Run("LineMatches", func(t *testing.T) {
724 res := searchForTest(t, b, q)
725 if len(res.Files) != 1 || res.Files[0].FileName != "BANZANA" {
726 t.Fatalf("got %v, want 1 match on 'BANZANA''", res.Files)
727 }
728 })
729
730 t.Run("ChunkMatches", func(t *testing.T) {
731 res := searchForTest(t, b, q, chunkOpts)
732 if len(res.Files) != 1 || res.Files[0].FileName != "BANZANA" {
733 t.Fatalf("got %v, want 1 match on 'BANZANA''", res.Files)
734 }
735 })
736}
737
738func TestFileSubstringSearchBruteForceEnd(t *testing.T) {
739 b := testIndexBuilder(t, nil,
740 Document{Name: "BANZANA", Content: []byte("x orange y")},
741 Document{Name: "bananaq", Content: []byte("x apple y")})
742
743 q := &query.Substring{
744 Pattern: "q",
745 FileName: true,
746 }
747 t.Run("LineMatches", func(t *testing.T) {
748 res := searchForTest(t, b, q)
749 if want := "bananaq"; len(res.Files) != 1 || res.Files[0].FileName != want {
750 t.Fatalf("got %v, want 1 match in %q", res.Files, want)
751 }
752 })
753
754 t.Run("LineMatches", func(t *testing.T) {
755 res := searchForTest(t, b, q, chunkOpts)
756 if want := "bananaq"; len(res.Files) != 1 || res.Files[0].FileName != want {
757 t.Fatalf("got %v, want 1 match in %q", res.Files, want)
758 }
759 })
760}
761
762func TestSearchMatchAll(t *testing.T) {
763 b := testIndexBuilder(t, nil,
764 Document{Name: "banzana", Content: []byte("x orange y")},
765 Document{Name: "banana", Content: []byte("x apple y")})
766
767 t.Run("LineMatches", func(t *testing.T) {
768 sres := searchForTest(t, b, &query.Const{Value: true})
769 matches := sres.Files
770 if len(matches) != 2 {
771 t.Fatalf("got %v, want 2 matches", matches)
772 }
773 })
774
775 t.Run("ChunkMatches", func(t *testing.T) {
776 sres := searchForTest(t, b, &query.Const{Value: true}, chunkOpts)
777 matches := sres.Files
778 if len(matches) != 2 {
779 t.Fatalf("got %v, want 2 matches", matches)
780 }
781 })
782}
783
784func TestSearchNewline(t *testing.T) {
785 b := testIndexBuilder(t, nil,
786 Document{Name: "banzana", Content: []byte("abcd\ndefg")})
787
788 t.Run("LineMatches", func(t *testing.T) {
789 sres := searchForTest(t, b, &query.Substring{Pattern: "d\nd"})
790
791 // Just check that we don't crash.
792
793 matches := sres.Files
794 if len(matches) != 1 {
795 t.Fatalf("got %v, want 1 matches", matches)
796 }
797 })
798
799 t.Run("ChunkMatches", func(t *testing.T) {
800 sres := searchForTest(t, b, &query.Substring{Pattern: "d\nd"}, chunkOpts)
801
802 // Just check that we don't crash.
803
804 matches := sres.Files
805 if len(matches) != 1 {
806 t.Fatalf("got %v, want 1 matches", matches)
807 }
808 })
809}
810
811func TestSearchMatchAllRegexp(t *testing.T) {
812 b := testIndexBuilder(t, nil,
813 Document{Name: "banzana", Content: []byte("abcd")},
814 Document{Name: "banana", Content: []byte("pqrs")})
815
816 t.Run("LineMatches", func(t *testing.T) {
817 sres := searchForTest(t, b, &query.Regexp{Regexp: mustParseRE(".")})
818
819 matches := sres.Files
820 if len(matches) != 2 || sres.Stats.MatchCount != 2 {
821 t.Fatalf("got %v, want 2 matches", matches)
822 }
823 if len(matches[0].LineMatches[0].Line) != 4 || len(matches[1].LineMatches[0].Line) != 4 {
824 t.Fatalf("want 4 chars in every file, got %#v", matches)
825 }
826
827 })
828
829 t.Run("ChunkMatches", func(t *testing.T) {
830 sres := searchForTest(t, b, &query.Regexp{Regexp: mustParseRE(".")}, chunkOpts)
831
832 matches := sres.Files
833 if len(matches) != 2 || sres.Stats.MatchCount != 8 {
834 t.Fatalf("got %v, want 2 matches", matches)
835 }
836 if len(matches[0].ChunkMatches[0].Content) != 4 || len(matches[1].ChunkMatches[0].Content) != 4 {
837 t.Fatalf("want 4 chars in every file, got %#v", matches)
838 }
839
840 })
841}
842
843func TestFileRestriction(t *testing.T) {
844 b := testIndexBuilder(t, nil,
845 Document{Name: "banana1", Content: []byte("x orange y")},
846 Document{Name: "banana2", Content: []byte("x apple y")},
847 Document{Name: "orange", Content: []byte("x apple z")})
848
849 t.Run("LineMatches", func(t *testing.T) {
850 sres := searchForTest(t, b, query.NewAnd(
851 &query.Substring{
852 Pattern: "banana",
853 FileName: true,
854 },
855 &query.Substring{
856 Pattern: "apple",
857 }))
858
859 matches := sres.Files
860 if len(matches) != 1 || len(matches[0].LineMatches) != 1 {
861 t.Fatalf("got %v, want 1 match", matches)
862 }
863
864 match := matches[0].LineMatches[0]
865 got := string(match.Line)
866 want := "x apple y"
867 if got != want {
868 t.Errorf("got match %#v, want line %q", match, want)
869 }
870 })
871
872 t.Run("ChunkMatches", func(t *testing.T) {
873 sres := searchForTest(t, b, query.NewAnd(
874 &query.Substring{
875 Pattern: "banana",
876 FileName: true,
877 },
878 &query.Substring{
879 Pattern: "apple",
880 }), chunkOpts)
881
882 matches := sres.Files
883 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 {
884 t.Fatalf("got %v, want 1 match", matches)
885 }
886
887 match := matches[0].ChunkMatches[0]
888 got := string(match.Content)
889 want := "x apple y"
890 if got != want {
891 t.Errorf("got match %#v, want line %q", match, want)
892 }
893 })
894}
895
896func TestFileNameBoundary(t *testing.T) {
897 b := testIndexBuilder(t, nil,
898 Document{Name: "banana2", Content: []byte("x apple y")},
899 Document{Name: "helpers.go", Content: []byte("x apple y")},
900 Document{Name: "foo", Content: []byte("x apple y")})
901
902 t.Run("LineMatches", func(t *testing.T) {
903 sres := searchForTest(t, b, &query.Substring{
904 Pattern: "helpers.go",
905 FileName: true,
906 })
907
908 matches := sres.Files
909 if len(matches) != 1 || len(matches[0].LineMatches) != 1 {
910 t.Fatalf("got %v, want 1 match", matches)
911 }
912 })
913
914 t.Run("ChunkMatches", func(t *testing.T) {
915 sres := searchForTest(t, b, &query.Substring{
916 Pattern: "helpers.go",
917 FileName: true,
918 }, chunkOpts)
919
920 matches := sres.Files
921 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 {
922 t.Fatalf("got %v, want 1 match", matches)
923 }
924 })
925}
926
927func TestDocumentOrder(t *testing.T) {
928 var docs []Document
929 for i := 0; i < 3; i++ {
930 docs = append(docs, Document{Name: fmt.Sprintf("f%d", i), Content: []byte("needle")})
931 }
932
933 b := testIndexBuilder(t, nil, docs...)
934
935 t.Run("LineMatches", func(t *testing.T) {
936 sres := searchForTest(t, b, query.NewAnd(
937 &query.Substring{
938 Pattern: "needle",
939 }))
940
941 want := []string{"f0", "f1", "f2"}
942 var got []string
943 for _, f := range sres.Files {
944 got = append(got, f.FileName)
945 }
946 if !reflect.DeepEqual(got, want) {
947 t.Fatalf("got %v, want %v", got, want)
948 }
949 })
950
951 t.Run("ChunkMatches", func(t *testing.T) {
952 sres := searchForTest(t, b,
953 query.NewAnd(&query.Substring{
954 Pattern: "needle",
955 }),
956 chunkOpts,
957 )
958
959 want := []string{"f0", "f1", "f2"}
960 var got []string
961 for _, f := range sres.Files {
962 got = append(got, f.FileName)
963 }
964 if !reflect.DeepEqual(got, want) {
965 t.Fatalf("got %v, want %v", got, want)
966 }
967 })
968}
969
970func TestBranchMask(t *testing.T) {
971 b := testIndexBuilder(t, &Repository{
972 Branches: []RepositoryBranch{
973 {"master", "v-master"},
974 {"stable", "v-stable"},
975 {"bonzai", "v-bonzai"},
976 },
977 }, Document{Name: "f1", Content: []byte("needle"), Branches: []string{"master"}},
978 Document{Name: "f2", Content: []byte("needle"), Branches: []string{"stable", "master"}},
979 Document{Name: "f3", Content: []byte("needle"), Branches: []string{"stable", "master"}},
980 Document{Name: "f4", Content: []byte("needle"), Branches: []string{"bonzai"}},
981 )
982
983 t.Run("LineMatches", func(t *testing.T) {
984 sres := searchForTest(t, b, query.NewAnd(
985 &query.Substring{
986 Pattern: "needle",
987 },
988 &query.Branch{
989 Pattern: "table",
990 }))
991
992 if len(sres.Files) != 2 || sres.Files[0].FileName != "f2" || sres.Files[1].FileName != "f3" {
993 t.Fatalf("got %v, want 2 result from [f2,f3]", sres.Files)
994 }
995
996 if len(sres.Files[0].Branches) != 1 || sres.Files[0].Branches[0] != "stable" {
997 t.Fatalf("got %v, want 1 branch 'stable'", sres.Files[0].Branches)
998 }
999 })
1000
1001 t.Run("ChunkMatches", func(t *testing.T) {
1002 sres := searchForTest(t, b, query.NewAnd(
1003 &query.Substring{
1004 Pattern: "needle",
1005 },
1006 &query.Branch{
1007 Pattern: "table",
1008 }),
1009 chunkOpts,
1010 )
1011
1012 if len(sres.Files) != 2 || sres.Files[0].FileName != "f2" || sres.Files[1].FileName != "f3" {
1013 t.Fatalf("got %v, want 2 result from [f2,f3]", sres.Files)
1014 }
1015
1016 if len(sres.Files[0].Branches) != 1 || sres.Files[0].Branches[0] != "stable" {
1017 t.Fatalf("got %v, want 1 branch 'stable'", sres.Files[0].Branches)
1018 }
1019 })
1020}
1021
1022func TestBranchLimit(t *testing.T) {
1023 for limit := 64; limit <= 65; limit++ {
1024 r := &Repository{}
1025 for i := 0; i < limit; i++ {
1026 s := fmt.Sprintf("b%d", i)
1027 r.Branches = append(r.Branches, RepositoryBranch{
1028 s, "v-" + s,
1029 })
1030 }
1031 _, err := NewIndexBuilder(r)
1032 if limit == 64 && err != nil {
1033 t.Fatalf("NewIndexBuilder: %v", err)
1034 } else if limit == 65 && err == nil {
1035 t.Fatalf("NewIndexBuilder succeeded")
1036 }
1037 }
1038}
1039
1040func TestBranchReport(t *testing.T) {
1041 branches := []string{"stable", "master"}
1042 b := testIndexBuilder(t, &Repository{
1043 Branches: []RepositoryBranch{
1044 {"stable", "vs"},
1045 {"master", "vm"},
1046 },
1047 },
1048 Document{Name: "f2", Content: []byte("needle"), Branches: branches})
1049
1050 t.Run("LineMatches", func(t *testing.T) {
1051 sres := searchForTest(t, b, &query.Substring{
1052 Pattern: "needle",
1053 })
1054 if len(sres.Files) != 1 {
1055 t.Fatalf("got %v, want 1 result from f2", sres.Files)
1056 }
1057
1058 f := sres.Files[0]
1059 if !reflect.DeepEqual(f.Branches, branches) {
1060 t.Fatalf("got branches %q, want %q", f.Branches, branches)
1061 }
1062 })
1063
1064 t.Run("ChunkMatches", func(t *testing.T) {
1065 sres := searchForTest(t, b, &query.Substring{
1066 Pattern: "needle",
1067 }, chunkOpts)
1068 if len(sres.Files) != 1 {
1069 t.Fatalf("got %v, want 1 result from f2", sres.Files)
1070 }
1071
1072 f := sres.Files[0]
1073 if !reflect.DeepEqual(f.Branches, branches) {
1074 t.Fatalf("got branches %q, want %q", f.Branches, branches)
1075 }
1076 })
1077
1078}
1079
1080func TestBranchVersions(t *testing.T) {
1081 b := testIndexBuilder(t, &Repository{
1082 Branches: []RepositoryBranch{
1083 {"stable", "v-stable"},
1084 {"master", "v-master"},
1085 },
1086 }, Document{Name: "f2", Content: []byte("needle"), Branches: []string{"master"}})
1087
1088 t.Run("LineMatches", func(t *testing.T) {
1089 sres := searchForTest(t, b, &query.Substring{
1090 Pattern: "needle",
1091 })
1092 if len(sres.Files) != 1 {
1093 t.Fatalf("got %v, want 1 result from f2", sres.Files)
1094 }
1095
1096 f := sres.Files[0]
1097 if f.Version != "v-master" {
1098 t.Fatalf("got file %#v, want version 'v-master'", f)
1099 }
1100 })
1101
1102 t.Run("ChunkMatches", func(t *testing.T) {
1103 sres := searchForTest(t, b, &query.Substring{
1104 Pattern: "needle",
1105 }, chunkOpts)
1106 if len(sres.Files) != 1 {
1107 t.Fatalf("got %v, want 1 result from f2", sres.Files)
1108 }
1109
1110 f := sres.Files[0]
1111 if f.Version != "v-master" {
1112 t.Fatalf("got file %#v, want version 'v-master'", f)
1113 }
1114 })
1115}
1116
1117func mustParseRE(s string) *syntax.Regexp {
1118 r, err := syntax.Parse(s, syntax.Perl)
1119 if err != nil {
1120 panic(err)
1121 }
1122
1123 return r
1124}
1125
1126func TestRegexp(t *testing.T) {
1127 content := []byte("needle the bla")
1128 // ----------------01234567890123
1129
1130 b := testIndexBuilder(t, nil,
1131 Document{
1132 Name: "f1",
1133 Content: content,
1134 })
1135
1136 t.Run("LineMatches", func(t *testing.T) {
1137 sres := searchForTest(t, b,
1138 &query.Regexp{
1139 Regexp: mustParseRE("dle.*bla"),
1140 })
1141
1142 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 {
1143 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
1144 }
1145
1146 got := sres.Files[0].LineMatches[0]
1147 want := LineMatch{
1148 LineFragments: []LineFragmentMatch{{
1149 LineOffset: 3,
1150 Offset: 3,
1151 MatchLength: 11,
1152 }},
1153 Line: content,
1154 FileName: false,
1155 LineNumber: 1,
1156 LineStart: 0,
1157 LineEnd: 14,
1158 }
1159
1160 if !reflect.DeepEqual(got, want) {
1161 t.Errorf("got %#v, want %#v", got, want)
1162 }
1163 })
1164
1165 t.Run("ChunkMatches", func(t *testing.T) {
1166 sres := searchForTest(t, b,
1167 &query.Regexp{
1168 Regexp: mustParseRE("dle.*bla"),
1169 }, chunkOpts)
1170
1171 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 {
1172 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
1173 }
1174
1175 got := sres.Files[0].ChunkMatches[0]
1176 want := ChunkMatch{
1177 Content: content,
1178 ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1},
1179 Ranges: []Range{{
1180 Start: Location{ByteOffset: 3, LineNumber: 1, Column: 4},
1181 End: Location{ByteOffset: 14, LineNumber: 1, Column: 15},
1182 }},
1183 }
1184
1185 if diff := cmp.Diff(want, got); diff != "" {
1186 t.Fatal(diff)
1187 }
1188 })
1189}
1190
1191func TestRegexpFile(t *testing.T) {
1192 content := []byte("needle the bla")
1193
1194 name := "let's play: find the mussel"
1195 b := testIndexBuilder(t, nil,
1196 Document{Name: name, Content: content},
1197 Document{Name: "play.txt", Content: content})
1198
1199 t.Run("LineMatches", func(t *testing.T) {
1200 sres := searchForTest(t, b,
1201 &query.Regexp{
1202 Regexp: mustParseRE("play.*mussel"),
1203 FileName: true,
1204 })
1205
1206 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 {
1207 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
1208 }
1209
1210 if sres.Files[0].FileName != name {
1211 t.Errorf("got match %#v, want name %q", sres.Files[0], name)
1212 }
1213 })
1214
1215 t.Run("ChunkMatches", func(t *testing.T) {
1216 sres := searchForTest(t, b,
1217 &query.Regexp{
1218 Regexp: mustParseRE("play.*mussel"),
1219 FileName: true,
1220 }, chunkOpts)
1221
1222 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 {
1223 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
1224 }
1225
1226 if sres.Files[0].FileName != name {
1227 t.Errorf("got match %#v, want name %q", sres.Files[0], name)
1228 }
1229 })
1230}
1231
1232func TestRegexpOrder(t *testing.T) {
1233 content := []byte("bla the needle")
1234 // ----------------01234567890123
1235
1236 b := testIndexBuilder(t, nil,
1237 Document{Name: "f1", Content: content})
1238
1239 t.Run("LineMatches", func(t *testing.T) {
1240 sres := searchForTest(t, b,
1241 &query.Regexp{
1242 Regexp: mustParseRE("dle.*bla"),
1243 })
1244
1245 if len(sres.Files) != 0 {
1246 t.Fatalf("got %v, want 0 matches", sres.Files)
1247 }
1248 })
1249
1250 t.Run("ChunkMatches", func(t *testing.T) {
1251 sres := searchForTest(t, b,
1252 &query.Regexp{
1253 Regexp: mustParseRE("dle.*bla"),
1254 })
1255
1256 if len(sres.Files) != 0 {
1257 t.Fatalf("got %v, want 0 matches", sres.Files)
1258 }
1259 })
1260}
1261
1262func TestRepoName(t *testing.T) {
1263 content := []byte("bla the needle")
1264 // ----------------01234567890123
1265
1266 b := testIndexBuilder(t, &Repository{Name: "bla"},
1267 Document{Name: "f1", Content: content})
1268
1269 t.Run("LineMatches", func(t *testing.T) {
1270 sres := searchForTest(t, b,
1271 query.NewAnd(
1272 &query.Substring{Pattern: "needle"},
1273 &query.Repo{Regexp: regexp.MustCompile("foo")},
1274 ))
1275
1276 if len(sres.Files) != 0 {
1277 t.Fatalf("got %v, want 0 matches", sres.Files)
1278 }
1279
1280 if sres.Stats.FilesConsidered > 0 {
1281 t.Fatalf("got FilesConsidered %d, should have short circuited", sres.Stats.FilesConsidered)
1282 }
1283
1284 sres = searchForTest(t, b,
1285 query.NewAnd(
1286 &query.Substring{Pattern: "needle"},
1287 &query.Repo{Regexp: regexp.MustCompile("bla")},
1288 ))
1289 if len(sres.Files) != 1 {
1290 t.Fatalf("got %v, want 1 match", sres.Files)
1291 }
1292 })
1293
1294 t.Run("ChunkMatches", func(t *testing.T) {
1295 sres := searchForTest(t, b,
1296 query.NewAnd(
1297 &query.Substring{Pattern: "needle"},
1298 &query.Repo{Regexp: regexp.MustCompile("foo")},
1299 ),
1300 chunkOpts,
1301 )
1302
1303 if len(sres.Files) != 0 {
1304 t.Fatalf("got %v, want 0 matches", sres.Files)
1305 }
1306
1307 if sres.Stats.FilesConsidered > 0 {
1308 t.Fatalf("got FilesConsidered %d, should have short circuited", sres.Stats.FilesConsidered)
1309 }
1310
1311 sres = searchForTest(t, b,
1312 query.NewAnd(
1313 &query.Substring{Pattern: "needle"},
1314 &query.Repo{Regexp: regexp.MustCompile("bla")},
1315 ))
1316 if len(sres.Files) != 1 {
1317 t.Fatalf("got %v, want 1 match", sres.Files)
1318 }
1319 })
1320}
1321
1322func TestMergeMatches(t *testing.T) {
1323 content := []byte("blablabla")
1324 b := testIndexBuilder(t, nil,
1325 Document{Name: "f1", Content: content})
1326
1327 t.Run("LineMatches", func(t *testing.T) {
1328 sres := searchForTest(t, b,
1329 &query.Substring{Pattern: "bla"})
1330 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 {
1331 t.Fatalf("got %v, want 1 match", sres.Files)
1332 }
1333 })
1334
1335 t.Run("ChunkMatches", func(t *testing.T) {
1336 sres := searchForTest(t, b,
1337 &query.Substring{Pattern: "bla"},
1338 chunkOpts,
1339 )
1340 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 {
1341 t.Fatalf("got %v, want 1 match", sres.Files)
1342 }
1343 })
1344}
1345
1346func TestRepoURL(t *testing.T) {
1347 content := []byte("blablabla")
1348 b := testIndexBuilder(t, &Repository{
1349 Name: "name",
1350 URL: "URL",
1351 CommitURLTemplate: "commit",
1352 FileURLTemplate: "file-url",
1353 LineFragmentTemplate: "fragment",
1354 }, Document{Name: "f1", Content: content})
1355
1356 sres := searchForTest(t, b, &query.Substring{Pattern: "bla"})
1357
1358 if sres.RepoURLs["name"] != "file-url" {
1359 t.Errorf("got RepoURLs %v, want {name: URL}", sres.RepoURLs)
1360 }
1361 if sres.LineFragments["name"] != "fragment" {
1362 t.Errorf("got URLs %v, want {name: URL}", sres.LineFragments)
1363 }
1364}
1365
1366func TestRegexpCaseSensitive(t *testing.T) {
1367 content := []byte("bla\nfunc unmarshalGitiles\n")
1368 b := testIndexBuilder(t, nil, Document{
1369 Name: "f1",
1370 Content: content,
1371 })
1372
1373 t.Run("LineMatches", func(t *testing.T) {
1374 res := searchForTest(t, b,
1375 &query.Regexp{
1376 Regexp: mustParseRE("func.*Gitiles"),
1377 CaseSensitive: true,
1378 })
1379
1380 if len(res.Files) != 1 {
1381 t.Fatalf("got %v, want one match", res.Files)
1382 }
1383 })
1384
1385 t.Run("ChunkMatches", func(t *testing.T) {
1386 res := searchForTest(t, b,
1387 &query.Regexp{
1388 Regexp: mustParseRE("func.*Gitiles"),
1389 CaseSensitive: true,
1390 },
1391 chunkOpts,
1392 )
1393
1394 if len(res.Files) != 1 {
1395 t.Fatalf("got %v, want one match", res.Files)
1396 }
1397 })
1398}
1399
1400func TestRegexpCaseFolding(t *testing.T) {
1401 content := []byte("bla\nfunc unmarshalGitiles\n")
1402
1403 b := testIndexBuilder(t, nil,
1404 Document{Name: "f1", Content: content})
1405 res := searchForTest(t, b,
1406 &query.Regexp{
1407 Regexp: mustParseRE("func.*GITILES"),
1408 CaseSensitive: false,
1409 })
1410
1411 if len(res.Files) != 1 {
1412 t.Fatalf("got %v, want one match", res.Files)
1413 }
1414}
1415
1416func TestCaseRegexp(t *testing.T) {
1417 content := []byte("BLABLABLA")
1418 b := testIndexBuilder(t, nil,
1419 Document{Name: "f1", Content: content})
1420
1421 t.Run("LineMatches", func(t *testing.T) {
1422 res := searchForTest(t, b,
1423 &query.Regexp{
1424 Regexp: mustParseRE("[xb][xl][xa]"),
1425 CaseSensitive: true,
1426 })
1427
1428 if len(res.Files) > 0 {
1429 t.Fatalf("got %v, want no matches", res.Files)
1430 }
1431 })
1432
1433 t.Run("ChunkMatches", func(t *testing.T) {
1434 res := searchForTest(t, b,
1435 &query.Regexp{
1436 Regexp: mustParseRE("[xb][xl][xa]"),
1437 CaseSensitive: true,
1438 },
1439 chunkOpts,
1440 )
1441
1442 if len(res.Files) > 0 {
1443 t.Fatalf("got %v, want no matches", res.Files)
1444 }
1445 })
1446}
1447
1448func TestNegativeRegexp(t *testing.T) {
1449 content := []byte("BLABLABLA needle bla")
1450 b := testIndexBuilder(t, nil,
1451 Document{Name: "f1", Content: content})
1452
1453 t.Run("LineMatches", func(t *testing.T) {
1454 res := searchForTest(t, b,
1455 query.NewAnd(
1456 &query.Substring{
1457 Pattern: "needle",
1458 },
1459 &query.Not{
1460 Child: &query.Regexp{
1461 Regexp: mustParseRE(".cs"),
1462 },
1463 }))
1464
1465 if len(res.Files) != 1 {
1466 t.Fatalf("got %v, want 1 match", res.Files)
1467 }
1468 })
1469
1470 t.Run("ChunkMatches", func(t *testing.T) {
1471 res := searchForTest(t, b,
1472 query.NewAnd(
1473 &query.Substring{
1474 Pattern: "needle",
1475 },
1476 &query.Not{
1477 Child: &query.Regexp{
1478 Regexp: mustParseRE(".cs"),
1479 },
1480 },
1481 ),
1482 chunkOpts)
1483
1484 if len(res.Files) != 1 {
1485 t.Fatalf("got %v, want 1 match", res.Files)
1486 }
1487 })
1488}
1489
1490func TestSymbolRank(t *testing.T) {
1491 t.Skip()
1492
1493 content := []byte("func bla() blubxxxxx")
1494 // ----------------01234567890123456789
1495 b := testIndexBuilder(t, nil,
1496 Document{
1497 Name: "f1",
1498 Content: content,
1499 }, Document{
1500 Name: "f2",
1501 Content: content,
1502 Symbols: []DocumentSection{{5, 8}},
1503 }, Document{
1504 Name: "f3",
1505 Content: content,
1506 })
1507
1508 t.Run("LineMatches", func(t *testing.T) {
1509 res := searchForTest(t, b,
1510 &query.Substring{
1511 CaseSensitive: false,
1512 Pattern: "bla",
1513 })
1514
1515 if len(res.Files) != 3 {
1516 t.Fatalf("got %d files, want 3 files. Full data: %v", len(res.Files), res.Files)
1517 }
1518 if res.Files[0].FileName != "f2" {
1519 t.Errorf("got %#v, want 'f2' as top match", res.Files[0])
1520 }
1521 })
1522
1523 t.Run("ChunkMatches", func(t *testing.T) {
1524 res := searchForTest(t, b,
1525 &query.Substring{
1526 CaseSensitive: false,
1527 Pattern: "bla",
1528 }, chunkOpts)
1529
1530 if len(res.Files) != 3 {
1531 t.Fatalf("got %d files, want 3 files. Full data: %v", len(res.Files), res.Files)
1532 }
1533 if res.Files[0].FileName != "f2" {
1534 t.Errorf("got %#v, want 'f2' as top match", res.Files[0])
1535 }
1536 })
1537}
1538
1539func TestSymbolRankRegexpUTF8(t *testing.T) {
1540 t.Skip()
1541
1542 prefix := strings.Repeat(string([]rune{kelvinCodePoint}), 100) + "\n"
1543 content := []byte(prefix +
1544 "func bla() blub")
1545 // ------012345678901234
1546 b := testIndexBuilder(t, nil,
1547 Document{
1548 Name: "f1",
1549 Content: content,
1550 }, Document{
1551 Name: "f2",
1552 Content: content,
1553 Symbols: []DocumentSection{{uint32(len(prefix) + 5), uint32(len(prefix) + 8)}},
1554 }, Document{
1555 Name: "f3",
1556 Content: content,
1557 })
1558
1559 t.Run("LineMatches", func(t *testing.T) {
1560 res := searchForTest(t, b,
1561 &query.Regexp{
1562 Regexp: mustParseRE("b.a"),
1563 })
1564
1565 if len(res.Files) != 3 {
1566 t.Fatalf("got %#v, want 3 files", res.Files)
1567 }
1568 if res.Files[0].FileName != "f2" {
1569 t.Errorf("got %#v, want 'f2' as top match", res.Files[0])
1570 }
1571 })
1572
1573 t.Run("ChunjkMatches", func(t *testing.T) {
1574 res := searchForTest(t, b,
1575 &query.Regexp{
1576 Regexp: mustParseRE("b.a"),
1577 }, chunkOpts)
1578
1579 if len(res.Files) != 3 {
1580 t.Fatalf("got %#v, want 3 files", res.Files)
1581 }
1582 if res.Files[0].FileName != "f2" {
1583 t.Errorf("got %#v, want 'f2' as top match", res.Files[0])
1584 }
1585 })
1586}
1587
1588func TestPartialSymbolRank(t *testing.T) {
1589 t.Skip()
1590
1591 content := []byte("func bla() blub")
1592 // ----------------012345678901234
1593
1594 b := testIndexBuilder(t, nil,
1595 Document{
1596 Name: "f1",
1597 Content: content,
1598 Symbols: []DocumentSection{{4, 9}},
1599 }, Document{
1600 Name: "f2",
1601 Content: content,
1602 Symbols: []DocumentSection{{4, 8}},
1603 }, Document{
1604 Name: "f3",
1605 Content: content,
1606 Symbols: []DocumentSection{{4, 9}},
1607 })
1608
1609 t.Run("LineMatches", func(t *testing.T) {
1610 res := searchForTest(t, b,
1611 &query.Substring{
1612 Pattern: "bla",
1613 })
1614
1615 if len(res.Files) != 3 {
1616 t.Fatalf("got %#v, want 3 files", res.Files)
1617 }
1618 if res.Files[0].FileName != "f2" {
1619 t.Errorf("got %#v, want 'f2' as top match", res.Files[0])
1620 }
1621 })
1622
1623 t.Run("ChunkMatches", func(t *testing.T) {
1624 res := searchForTest(t, b,
1625 &query.Substring{
1626 Pattern: "bla",
1627 }, chunkOpts)
1628
1629 if len(res.Files) != 3 {
1630 t.Fatalf("got %#v, want 3 files", res.Files)
1631 }
1632 if res.Files[0].FileName != "f2" {
1633 t.Errorf("got %#v, want 'f2' as top match", res.Files[0])
1634 }
1635 })
1636}
1637
1638func TestNegativeRepo(t *testing.T) {
1639 content := []byte("bla the needle")
1640 // ----------------01234567890123
1641 b := testIndexBuilder(t, &Repository{
1642 Name: "bla",
1643 }, Document{Name: "f1", Content: content})
1644
1645 t.Run("LineMatches", func(t *testing.T) {
1646 sres := searchForTest(t, b,
1647 query.NewAnd(
1648 &query.Substring{Pattern: "needle"},
1649 &query.Not{Child: &query.Repo{Regexp: regexp.MustCompile("bla")}},
1650 ))
1651
1652 if len(sres.Files) != 0 {
1653 t.Fatalf("got %v, want 0 matches", sres.Files)
1654 }
1655 })
1656
1657 t.Run("ChunkMatches", func(t *testing.T) {
1658 sres := searchForTest(t, b,
1659 query.NewAnd(
1660 &query.Substring{Pattern: "needle"},
1661 &query.Not{Child: &query.Repo{Regexp: regexp.MustCompile("bla")}},
1662 ), chunkOpts)
1663
1664 if len(sres.Files) != 0 {
1665 t.Fatalf("got %v, want 0 matches", sres.Files)
1666 }
1667 })
1668}
1669
1670func TestListRepos(t *testing.T) {
1671 content := []byte("bla the needle\n")
1672 // ----------------012345678901234-
1673
1674 t.Run("default and minimal fallback", func(t *testing.T) {
1675 repo := &Repository{
1676 Name: "reponame",
1677 Branches: []RepositoryBranch{{Name: "main"}, {Name: "dev"}},
1678 }
1679 b := testIndexBuilder(t, repo,
1680 Document{Name: "f1", Content: content, Branches: []string{"main", "dev"}},
1681 Document{Name: "f2", Content: content, Branches: []string{"main"}},
1682 Document{Name: "f2", Content: content, Branches: []string{"dev"}},
1683 Document{Name: "f3", Content: content, Branches: []string{"dev"}})
1684
1685 searcher := searcherForTest(t, b)
1686
1687 for _, opts := range []*ListOptions{
1688 nil,
1689 {Minimal: false},
1690 {Minimal: true},
1691 } {
1692 t.Run(fmt.Sprint(opts), func(t *testing.T) {
1693 q := &query.Repo{Regexp: regexp.MustCompile("epo")}
1694
1695 res, err := searcher.List(context.Background(), q, opts)
1696 if err != nil {
1697 t.Fatalf("List(%v): %v", q, err)
1698 }
1699
1700 want := &RepoList{
1701 Repos: []*RepoListEntry{{
1702 Repository: *repo,
1703 Stats: RepoStats{
1704 Documents: 4,
1705 ContentBytes: 68, // (15 bytes of content and 2 bytes of filename) x 4
1706 Shards: 1,
1707
1708 NewLinesCount: 4,
1709 DefaultBranchNewLinesCount: 2,
1710 OtherBranchesNewLinesCount: 3,
1711 },
1712 }},
1713 Stats: RepoStats{
1714 Documents: 4,
1715 ContentBytes: 68,
1716 Shards: 1,
1717
1718 NewLinesCount: 4,
1719 DefaultBranchNewLinesCount: 2,
1720 OtherBranchesNewLinesCount: 3,
1721 },
1722 }
1723 ignored := []cmp.Option{
1724 cmpopts.EquateEmpty(),
1725 cmpopts.IgnoreFields(RepoListEntry{}, "IndexMetadata"),
1726 cmpopts.IgnoreFields(RepoStats{}, "IndexBytes"),
1727 cmpopts.IgnoreFields(Repository{}, "SubRepoMap"),
1728 cmpopts.IgnoreFields(Repository{}, "priority"),
1729 }
1730 if diff := cmp.Diff(want, res, ignored...); diff != "" {
1731 t.Fatalf("mismatch (-want +got):\n%s", diff)
1732 }
1733
1734 q = &query.Repo{Regexp: regexp.MustCompile("bla")}
1735 res, err = searcher.List(context.Background(), q, nil)
1736 if err != nil {
1737 t.Fatalf("List(%v): %v", q, err)
1738 }
1739 if len(res.Repos) != 0 || len(res.Minimal) != 0 {
1740 t.Fatalf("got %v, want 0 matches", res)
1741 }
1742 })
1743 }
1744 })
1745
1746 t.Run("minimal", func(t *testing.T) {
1747 repo := &Repository{
1748 ID: 1234,
1749 Name: "reponame",
1750 Branches: []RepositoryBranch{{Name: "main"}, {Name: "dev"}},
1751 RawConfig: map[string]string{"repoid": "1234"},
1752 }
1753 b := testIndexBuilder(t, repo,
1754 Document{Name: "f1", Content: content, Branches: []string{"main", "dev"}},
1755 Document{Name: "f2", Content: content, Branches: []string{"main"}},
1756 Document{Name: "f2", Content: content, Branches: []string{"dev"}},
1757 Document{Name: "f3", Content: content, Branches: []string{"dev"}})
1758
1759 searcher := searcherForTest(t, b)
1760
1761 q := &query.Repo{Regexp: regexp.MustCompile("epo")}
1762 res, err := searcher.List(context.Background(), q, &ListOptions{Minimal: true})
1763 if err != nil {
1764 t.Fatalf("List(%v): %v", q, err)
1765 }
1766
1767 want := &RepoList{
1768 Minimal: map[uint32]*MinimalRepoListEntry{
1769 repo.ID: {
1770 HasSymbols: repo.HasSymbols,
1771 Branches: repo.Branches,
1772 },
1773 },
1774 Stats: RepoStats{
1775 Shards: 1,
1776 Documents: 4,
1777 IndexBytes: 412,
1778 ContentBytes: 68,
1779 NewLinesCount: 4,
1780 DefaultBranchNewLinesCount: 2,
1781 OtherBranchesNewLinesCount: 3,
1782 },
1783 }
1784
1785 ignored := []cmp.Option{
1786 cmpopts.IgnoreFields(MinimalRepoListEntry{}, "IndexTimeUnix"),
1787 }
1788 if diff := cmp.Diff(want, res, ignored...); diff != "" {
1789 t.Fatalf("mismatch (-want +got):\n%s", diff)
1790 }
1791
1792 q = &query.Repo{Regexp: regexp.MustCompile("bla")}
1793 res, err = searcher.List(context.Background(), q, &ListOptions{Minimal: true})
1794 if err != nil {
1795 t.Fatalf("List(%v): %v", q, err)
1796 }
1797 if len(res.Repos) != 0 || len(res.Minimal) != 0 {
1798 t.Fatalf("got %v, want 0 matches", res)
1799 }
1800 })
1801}
1802
1803func TestListReposByContent(t *testing.T) {
1804 content := []byte("bla the needle")
1805
1806 b := testIndexBuilder(t, &Repository{
1807 Name: "reponame",
1808 },
1809 Document{Name: "f1", Content: content},
1810 Document{Name: "f2", Content: content})
1811
1812 searcher := searcherForTest(t, b)
1813 q := &query.Substring{Pattern: "needle"}
1814 res, err := searcher.List(context.Background(), q, nil)
1815 if err != nil {
1816 t.Fatalf("List(%v): %v", q, err)
1817 }
1818 if len(res.Repos) != 1 || res.Repos[0].Repository.Name != "reponame" {
1819 t.Fatalf("got %v, want 1 matches", res)
1820 }
1821 if got := res.Repos[0].Stats.Shards; got != 1 {
1822 t.Fatalf("got %d, want 1 shard", got)
1823 }
1824 q = &query.Substring{Pattern: "foo"}
1825 res, err = searcher.List(context.Background(), q, nil)
1826 if err != nil {
1827 t.Fatalf("List(%v): %v", q, err)
1828 }
1829 if len(res.Repos) != 0 {
1830 t.Fatalf("got %v, want 0 matches", res)
1831 }
1832}
1833
1834func TestMetadata(t *testing.T) {
1835 content := []byte("bla the needle")
1836
1837 b := testIndexBuilder(t, &Repository{
1838 Name: "reponame",
1839 }, Document{Name: "f1", Content: content},
1840 Document{Name: "f2", Content: content})
1841
1842 var buf bytes.Buffer
1843 if err := b.Write(&buf); err != nil {
1844 t.Fatal(err)
1845 }
1846 f := &memSeeker{buf.Bytes()}
1847
1848 rd, _, err := ReadMetadata(f)
1849 if err != nil {
1850 t.Fatalf("ReadMetadata: %v", err)
1851 }
1852
1853 if got, want := rd[0].Name, "reponame"; got != want {
1854 t.Fatalf("got %q want %q", got, want)
1855 }
1856}
1857
1858func TestOr(t *testing.T) {
1859 b := testIndexBuilder(t, nil,
1860 Document{Name: "f1", Content: []byte("needle")},
1861 Document{Name: "f2", Content: []byte("banana")})
1862 t.Run("LineMatches", func(t *testing.T) {
1863 sres := searchForTest(t, b, query.NewOr(
1864 &query.Substring{Pattern: "needle"},
1865 &query.Substring{Pattern: "banana"}))
1866
1867 if len(sres.Files) != 2 {
1868 t.Fatalf("got %v, want 2 files", sres.Files)
1869 }
1870 })
1871
1872 t.Run("ChunkMatches", func(t *testing.T) {
1873 sres := searchForTest(t, b, query.NewOr(
1874 &query.Substring{Pattern: "needle"},
1875 &query.Substring{Pattern: "banana"}))
1876
1877 if len(sres.Files) != 2 {
1878 t.Fatalf("got %v, want 2 files", sres.Files)
1879 }
1880 })
1881}
1882
1883func TestFrequency(t *testing.T) {
1884 content := []byte("sla _Py_HashDouble(double v sla las las shd dot dot")
1885
1886 b := testIndexBuilder(t, nil,
1887 Document{
1888 Name: "f1",
1889 Content: content,
1890 })
1891
1892 t.Run("LineMatches", func(t *testing.T) {
1893 sres := searchForTest(t, b, &query.Substring{Pattern: "slashdot"})
1894 if len(sres.Files) != 0 {
1895 t.Errorf("got %v, wanted 0 matches", sres.Files)
1896 }
1897 })
1898
1899 t.Run("ChunkMatches", func(t *testing.T) {
1900 sres := searchForTest(t, b, &query.Substring{Pattern: "slashdot"}, chunkOpts)
1901 if len(sres.Files) != 0 {
1902 t.Errorf("got %v, wanted 0 matches", sres.Files)
1903 }
1904 })
1905}
1906
1907func TestMatchNewline(t *testing.T) {
1908 re, err := syntax.Parse("[^a]a", syntax.ClassNL)
1909 if err != nil {
1910 t.Fatalf("syntax.Parse: %v", err)
1911 }
1912
1913 content := []byte("pqr\nalex")
1914
1915 b := testIndexBuilder(t, nil,
1916 Document{
1917 Name: "f1",
1918 Content: content,
1919 })
1920
1921 t.Run("LineMatches", func(t *testing.T) {
1922 sres := searchForTest(t, b, &query.Regexp{Regexp: re, CaseSensitive: true})
1923 if len(sres.Files) != 1 {
1924 t.Errorf("got %v, wanted 1 matches", sres.Files)
1925 } else if l := sres.Files[0].LineMatches[0].Line; !bytes.Equal(l, content[len("pqr\n"):]) {
1926 t.Errorf("got match line %q, want %q", l, content)
1927 }
1928 })
1929
1930 t.Run("ChunkMatches", func(t *testing.T) {
1931 sres := searchForTest(t, b, &query.Regexp{Regexp: re, CaseSensitive: true}, chunkOpts)
1932 if len(sres.Files) != 1 {
1933 t.Errorf("got %v, wanted 1 matches", sres.Files)
1934 } else if c := sres.Files[0].ChunkMatches[0].Content; !bytes.Equal(c, content) {
1935 t.Errorf("got match line %q, want %q", c, content)
1936 }
1937 })
1938}
1939
1940func TestSubRepo(t *testing.T) {
1941 subRepos := map[string]*Repository{
1942 "sub": {
1943 Name: "sub-name",
1944 LineFragmentTemplate: "sub-line",
1945 },
1946 }
1947
1948 content := []byte("pqr\nalex")
1949
1950 b := testIndexBuilder(t, &Repository{
1951 SubRepoMap: subRepos,
1952 }, Document{
1953 Name: "sub/f1",
1954 Content: content,
1955 SubRepositoryPath: "sub",
1956 })
1957
1958 sres := searchForTest(t, b, &query.Substring{Pattern: "alex"})
1959 if len(sres.Files) != 1 {
1960 t.Fatalf("got %v, wanted 1 matches", sres.Files)
1961 }
1962
1963 f := sres.Files[0]
1964 if f.SubRepositoryPath != "sub" || f.SubRepositoryName != "sub-name" {
1965 t.Errorf("got %#v, want SubRepository{Path,Name} = {'sub', 'sub-name'}", f)
1966 }
1967
1968 if sres.LineFragments["sub-name"] != "sub-line" {
1969 t.Errorf("got LineFragmentTemplate %v, want {'sub':'sub-line'}", sres.LineFragments)
1970 }
1971}
1972
1973func TestSearchEither(t *testing.T) {
1974 b := testIndexBuilder(t, nil,
1975 Document{Name: "f1", Content: []byte("bla needle bla")},
1976 Document{Name: "needle-file-branch", Content: []byte("bla content")})
1977
1978 t.Run("LineMatches", func(t *testing.T) {
1979 sres := searchForTest(t, b, &query.Substring{Pattern: "needle"})
1980 if len(sres.Files) != 2 {
1981 t.Fatalf("got %v, wanted 2 matches", sres.Files)
1982 }
1983
1984 sres = searchForTest(t, b, &query.Substring{Pattern: "needle", Content: true})
1985 if len(sres.Files) != 1 {
1986 t.Fatalf("got %v, wanted 1 match", sres.Files)
1987 }
1988
1989 if got, want := sres.Files[0].FileName, "f1"; got != want {
1990 t.Errorf("got %q, want %q", got, want)
1991 }
1992 })
1993
1994 t.Run("ChunkMatches", func(t *testing.T) {
1995 sres := searchForTest(t, b, &query.Substring{Pattern: "needle"}, chunkOpts)
1996 if len(sres.Files) != 2 {
1997 t.Fatalf("got %v, wanted 2 matches", sres.Files)
1998 }
1999
2000 sres = searchForTest(t, b, &query.Substring{Pattern: "needle", Content: true}, chunkOpts)
2001 if len(sres.Files) != 1 {
2002 t.Fatalf("got %v, wanted 1 match", sres.Files)
2003 }
2004
2005 if got, want := sres.Files[0].FileName, "f1"; got != want {
2006 t.Errorf("got %q, want %q", got, want)
2007 }
2008 })
2009}
2010
2011func TestUnicodeExactMatch(t *testing.T) {
2012 needle := "néédlÉ"
2013 content := []byte("blá blá " + needle + " blâ")
2014
2015 b := testIndexBuilder(t, nil,
2016 Document{Name: "f1", Content: content})
2017
2018 t.Run("LineMatches", func(t *testing.T) {
2019 if res := searchForTest(t, b, &query.Substring{Pattern: needle, CaseSensitive: true}); len(res.Files) != 1 {
2020 t.Fatalf("case sensitive: got %v, wanted 1 match", res.Files)
2021 }
2022 })
2023
2024 t.Run("ChunkMatches", func(t *testing.T) {
2025 res := searchForTest(t, b, &query.Substring{Pattern: needle, CaseSensitive: true}, chunkOpts)
2026 if len(res.Files) != 1 {
2027 t.Fatalf("case sensitive: got %v, wanted 1 match", res.Files)
2028 }
2029 })
2030}
2031
2032func TestUnicodeCoverContent(t *testing.T) {
2033 needle := "néédlÉ"
2034 content := []byte("blá blá " + needle + " blâ")
2035
2036 b := testIndexBuilder(t, nil,
2037 Document{Name: "f1", Content: content})
2038
2039 t.Run("LineMatches", func(t *testing.T) {
2040 if res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ", CaseSensitive: true}); len(res.Files) != 0 {
2041 t.Fatalf("case sensitive: got %v, wanted 0 match", res.Files)
2042 }
2043
2044 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ"})
2045 if len(res.Files) != 1 {
2046 t.Fatalf("case insensitive: got %v, wanted 1 match", res.Files)
2047 }
2048
2049 if got, want := res.Files[0].LineMatches[0].LineFragments[0].Offset, uint32(strings.Index(string(content), needle)); got != want {
2050 t.Errorf("got %d want %d", got, want)
2051 }
2052 })
2053
2054 t.Run("ChunkMatches", func(t *testing.T) {
2055 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ", CaseSensitive: true}, chunkOpts)
2056 if len(res.Files) != 0 {
2057 t.Fatalf("case sensitive: got %v, wanted 0 match", res.Files)
2058 }
2059
2060 res = searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ"}, chunkOpts)
2061 if len(res.Files) != 1 {
2062 t.Fatalf("case insensitive: got %v, wanted 1 match", res.Files)
2063 }
2064
2065 got := res.Files[0].ChunkMatches[0].Ranges[0].Start.ByteOffset
2066 want := uint32(strings.Index(string(content), needle))
2067 if got != want {
2068 t.Errorf("got %d want %d", got, want)
2069 }
2070 })
2071}
2072
2073func TestUnicodeNonCoverContent(t *testing.T) {
2074 needle := "nééáádlÉ"
2075 content := []byte("blá blá " + needle + " blâ")
2076
2077 b := testIndexBuilder(t, nil,
2078 Document{Name: "f1", Content: content})
2079
2080 t.Run("LineMatches", func(t *testing.T) {
2081 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉÁÁDLÉ", Content: true})
2082 if len(res.Files) != 1 {
2083 t.Fatalf("got %v, wanted 1 match", res.Files)
2084 }
2085
2086 if got, want := res.Files[0].LineMatches[0].LineFragments[0].Offset, uint32(strings.Index(string(content), needle)); got != want {
2087 t.Errorf("got %d want %d", got, want)
2088 }
2089 })
2090
2091 t.Run("ChunkMatches", func(t *testing.T) {
2092 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉÁÁDLÉ", Content: true}, chunkOpts)
2093 if len(res.Files) != 1 {
2094 t.Fatalf("got %v, wanted 1 match", res.Files)
2095 }
2096
2097 got := res.Files[0].ChunkMatches[0].Ranges[0].Start.ByteOffset
2098 want := uint32(strings.Index(string(content), needle))
2099 if got != want {
2100 t.Errorf("got %d want %d", got, want)
2101 }
2102 })
2103}
2104
2105const kelvinCodePoint = 8490
2106
2107func TestUnicodeVariableLength(t *testing.T) {
2108 lower := 'k'
2109 upper := rune(kelvinCodePoint)
2110
2111 needle := "nee" + string([]rune{lower}) + "eed"
2112 corpus := []byte("nee" + string([]rune{upper}) + "eed" +
2113 " ee" + string([]rune{lower}) + "ee" +
2114 " ee" + string([]rune{upper}) + "ee")
2115
2116 t.Run("LineMatches", func(t *testing.T) {
2117 b := testIndexBuilder(t, nil,
2118 Document{Name: "f1", Content: []byte(corpus)})
2119
2120 res := searchForTest(t, b, &query.Substring{Pattern: needle, Content: true})
2121 if len(res.Files) != 1 {
2122 t.Fatalf("got %v, wanted 1 match", res.Files)
2123 }
2124 })
2125
2126 t.Run("ChunkMatches", func(t *testing.T) {
2127 b := testIndexBuilder(t, nil,
2128 Document{Name: "f1", Content: []byte(corpus)})
2129
2130 res := searchForTest(t, b, &query.Substring{Pattern: needle, Content: true}, chunkOpts)
2131 if len(res.Files) != 1 {
2132 t.Fatalf("got %v, wanted 1 match", res.Files)
2133 }
2134 })
2135}
2136
2137func TestUnicodeFileStartOffsets(t *testing.T) {
2138 unicode := "世界"
2139 wat := "waaaaaat"
2140 b := testIndexBuilder(t, nil,
2141 Document{
2142 Name: "f1",
2143 Content: []byte(unicode),
2144 },
2145 Document{
2146 Name: "f2",
2147 Content: []byte(wat),
2148 },
2149 )
2150 q := &query.Substring{Pattern: wat, Content: true}
2151 res := searchForTest(t, b, q)
2152 if len(res.Files) != 1 {
2153 t.Fatalf("got %v, wanted 1 match", res.Files)
2154 }
2155}
2156
2157func TestLongFileUTF8(t *testing.T) {
2158 needle := "neeedle"
2159
2160 // 6 bytes.
2161 unicode := "世界"
2162 content := []byte(strings.Repeat(unicode, 100) + needle)
2163 b := testIndexBuilder(t, nil,
2164 Document{
2165 Name: "f1",
2166 Content: []byte(strings.Repeat("a", 50)),
2167 },
2168 Document{
2169 Name: "f2",
2170 Content: content,
2171 })
2172
2173 t.Run("LineMatches", func(t *testing.T) {
2174 q := &query.Substring{Pattern: needle, Content: true}
2175 res := searchForTest(t, b, q)
2176 if len(res.Files) != 1 {
2177 t.Errorf("got %v, want 1 result", res)
2178 }
2179 })
2180
2181 t.Run("ChunkMatches", func(t *testing.T) {
2182 q := &query.Substring{Pattern: needle, Content: true}
2183 res := searchForTest(t, b, q, chunkOpts)
2184 if len(res.Files) != 1 {
2185 t.Errorf("got %v, want 1 result", res)
2186 }
2187 })
2188}
2189
2190func TestEstimateDocCount(t *testing.T) {
2191 content := []byte("bla needle bla")
2192 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2193 Document{Name: "f1", Content: content},
2194 Document{Name: "f2", Content: content},
2195 )
2196
2197 t.Run("LineMatches", func(t *testing.T) {
2198 if sres := searchForTest(t, b,
2199 query.NewAnd(
2200 &query.Substring{Pattern: "needle"},
2201 &query.Repo{Regexp: regexp.MustCompile("reponame")},
2202 ), SearchOptions{
2203 EstimateDocCount: true,
2204 }); sres.Stats.ShardFilesConsidered != 2 {
2205 t.Errorf("got FilesConsidered = %d, want 2", sres.Stats.FilesConsidered)
2206 }
2207 if sres := searchForTest(t, b,
2208 query.NewAnd(
2209 &query.Substring{Pattern: "needle"},
2210 &query.Repo{Regexp: regexp.MustCompile("nomatch")},
2211 ), SearchOptions{
2212 EstimateDocCount: true,
2213 }); sres.Stats.ShardFilesConsidered != 0 {
2214 t.Errorf("got FilesConsidered = %d, want 0", sres.Stats.FilesConsidered)
2215 }
2216 })
2217
2218 t.Run("ChunkMatches", func(t *testing.T) {
2219 if sres := searchForTest(t, b,
2220 query.NewAnd(
2221 &query.Substring{Pattern: "needle"},
2222 &query.Repo{Regexp: regexp.MustCompile("reponame")},
2223 ), SearchOptions{
2224 EstimateDocCount: true,
2225 ChunkMatches: true,
2226 }); sres.Stats.ShardFilesConsidered != 2 {
2227 t.Errorf("got FilesConsidered = %d, want 2", sres.Stats.FilesConsidered)
2228 }
2229 if sres := searchForTest(t, b,
2230 query.NewAnd(
2231 &query.Substring{Pattern: "needle"},
2232 &query.Repo{Regexp: regexp.MustCompile("nomatch")},
2233 ), SearchOptions{
2234 EstimateDocCount: true,
2235 ChunkMatches: true,
2236 }); sres.Stats.ShardFilesConsidered != 0 {
2237 t.Errorf("got FilesConsidered = %d, want 0", sres.Stats.FilesConsidered)
2238 }
2239 })
2240}
2241
2242func TestUTF8CorrectCorpus(t *testing.T) {
2243 needle := "neeedle"
2244
2245 // 6 bytes.
2246 unicode := "世界"
2247 b := testIndexBuilder(t, nil,
2248 Document{
2249 Name: "f1",
2250 Content: []byte(strings.Repeat(unicode, 100)),
2251 },
2252 Document{
2253 Name: "xxxxxneeedle",
2254 Content: []byte("hello"),
2255 })
2256
2257 t.Run("LineMatches", func(t *testing.T) {
2258 q := &query.Substring{Pattern: needle, FileName: true}
2259 res := searchForTest(t, b, q)
2260 if len(res.Files) != 1 {
2261 t.Errorf("got %v, want 1 result", res)
2262 }
2263 })
2264
2265 t.Run("ChunkMatches", func(t *testing.T) {
2266 q := &query.Substring{Pattern: needle, FileName: true}
2267 res := searchForTest(t, b, q, chunkOpts)
2268 if len(res.Files) != 1 {
2269 t.Errorf("got %v, want 1 result", res)
2270 }
2271 })
2272}
2273
2274func TestBuilderStats(t *testing.T) {
2275 b := testIndexBuilder(t, nil,
2276 Document{
2277 Name: "f1",
2278 Content: []byte(strings.Repeat("abcd", 1024)),
2279 })
2280 var buf bytes.Buffer
2281 if err := b.Write(&buf); err != nil {
2282 t.Fatal(err)
2283 }
2284
2285 if got, want := b.ContentSize(), uint32(2+4*1024); got != want {
2286 t.Errorf("got %d, want %d", got, want)
2287 }
2288}
2289
2290func TestIOStats(t *testing.T) {
2291 b := testIndexBuilder(t, nil,
2292 Document{
2293 Name: "f1",
2294 Content: []byte(strings.Repeat("abcd", 1024)),
2295 })
2296
2297 t.Run("LineMatches", func(t *testing.T) {
2298 q := &query.Substring{Pattern: "abc", CaseSensitive: true, Content: true}
2299 res := searchForTest(t, b, q)
2300
2301 // 4096 (content) + 2 (overhead: newlines or doc sections)
2302 if got, want := res.Stats.ContentBytesLoaded, int64(4098); got != want {
2303 t.Errorf("got content I/O %d, want %d", got, want)
2304 }
2305
2306 // 1024 entries, each 4 bytes apart. 4 fits into single byte
2307 // delta encoded.
2308 if got, want := res.Stats.IndexBytesLoaded, int64(1024); got != want {
2309 t.Errorf("got index I/O %d, want %d", got, want)
2310 }
2311 })
2312
2313 t.Run("ChunkMatches", func(t *testing.T) {
2314 q := &query.Substring{Pattern: "abc", CaseSensitive: true, Content: true}
2315 res := searchForTest(t, b, q, chunkOpts)
2316
2317 // 4096 (content) + 2 (overhead: newlines or doc sections)
2318 if got, want := res.Stats.ContentBytesLoaded, int64(4098); got != want {
2319 t.Errorf("got content I/O %d, want %d", got, want)
2320 }
2321
2322 // 1024 entries, each 4 bytes apart. 4 fits into single byte
2323 // delta encoded.
2324 if got, want := res.Stats.IndexBytesLoaded, int64(1024); got != want {
2325 t.Errorf("got index I/O %d, want %d", got, want)
2326 }
2327 })
2328}
2329
2330func TestStartLineAnchor(t *testing.T) {
2331 b := testIndexBuilder(t, nil,
2332 Document{
2333 Name: "f1",
2334 Content: []byte(
2335 `hello
2336start of middle of line
2337`),
2338 })
2339
2340 t.Run("LineMatches", func(t *testing.T) {
2341 q, err := query.Parse("^start")
2342 if err != nil {
2343 t.Errorf("parse: %v", err)
2344 }
2345
2346 res := searchForTest(t, b, q)
2347 if len(res.Files) != 1 {
2348 t.Errorf("got %v, want 1 file", res.Files)
2349 }
2350
2351 q, err = query.Parse("^middle")
2352 if err != nil {
2353 t.Errorf("parse: %v", err)
2354 }
2355 res = searchForTest(t, b, q)
2356 if len(res.Files) != 0 {
2357 t.Errorf("got %v, want 0 files", res.Files)
2358 }
2359 })
2360
2361 t.Run("ChunkMatches", func(t *testing.T) {
2362 q, err := query.Parse("^start")
2363 if err != nil {
2364 t.Errorf("parse: %v", err)
2365 }
2366
2367 res := searchForTest(t, b, q, chunkOpts)
2368 if len(res.Files) != 1 {
2369 t.Errorf("got %v, want 1 file", res.Files)
2370 }
2371
2372 q, err = query.Parse("^middle")
2373 if err != nil {
2374 t.Errorf("parse: %v", err)
2375 }
2376 res = searchForTest(t, b, q, chunkOpts)
2377 if len(res.Files) != 0 {
2378 t.Errorf("got %v, want 0 files", res.Files)
2379 }
2380 })
2381}
2382
2383func TestAndOrUnicode(t *testing.T) {
2384 q, err := query.Parse("orange.*apple")
2385 if err != nil {
2386 t.Errorf("parse: %v", err)
2387 }
2388 finalQ := query.NewAnd(q,
2389 query.NewOr(query.NewAnd(&query.Repo{Regexp: regexp.MustCompile("name")},
2390 query.NewOr(&query.Branch{Pattern: "master"}))))
2391
2392 b := testIndexBuilder(t, &Repository{
2393 Name: "name",
2394 Branches: []RepositoryBranch{{"master", "master-version"}},
2395 }, Document{
2396 Name: "f2",
2397 Content: []byte("orange\u2318apple"),
2398 // --------------0123456 78901
2399 Branches: []string{"master"},
2400 })
2401
2402 t.Run("LineMatches", func(t *testing.T) {
2403 res := searchForTest(t, b, finalQ)
2404 if len(res.Files) != 1 {
2405 t.Errorf("got %v, want 1 result", res.Files)
2406 }
2407 })
2408
2409 t.Run("ChunkMatches", func(t *testing.T) {
2410 res := searchForTest(t, b, finalQ, chunkOpts)
2411 if len(res.Files) != 1 {
2412 t.Errorf("got %v, want 1 result", res.Files)
2413 }
2414 })
2415}
2416
2417func TestAndShort(t *testing.T) {
2418 content := []byte("bla needle at orange bla")
2419 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2420 Document{Name: "f1", Content: content},
2421 Document{Name: "f2", Content: []byte("xx at xx")},
2422 Document{Name: "f3", Content: []byte("yy orange xx")},
2423 )
2424
2425 q := query.NewAnd(&query.Substring{Pattern: "at"},
2426 &query.Substring{Pattern: "orange"})
2427
2428 t.Run("LineMatches", func(t *testing.T) {
2429 res := searchForTest(t, b, q)
2430 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
2431 t.Errorf("got %v, want 1 result", res.Files)
2432 }
2433 })
2434
2435 t.Run("ChunkMatches", func(t *testing.T) {
2436 res := searchForTest(t, b, q, chunkOpts)
2437 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
2438 t.Errorf("got %v, want 1 result", res.Files)
2439 }
2440 })
2441}
2442
2443func TestNoCollectRegexpSubstring(t *testing.T) {
2444 content := []byte("bla final bla\nfoo final, foo")
2445 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2446 Document{Name: "f1", Content: content},
2447 )
2448
2449 q := &query.Regexp{
2450 Regexp: mustParseRE("final[,.]"),
2451 }
2452
2453 t.Run("LineMatches", func(t *testing.T) {
2454 res := searchForTest(t, b, q)
2455 if len(res.Files) != 1 {
2456 t.Fatalf("got %v, want 1 result", res.Files)
2457 }
2458 if f := res.Files[0]; len(f.LineMatches) != 1 {
2459 t.Fatalf("got line matches %v, want 1 line match", printLineMatches(f.LineMatches))
2460 }
2461 })
2462
2463 t.Run("ChunkMatches", func(t *testing.T) {
2464 res := searchForTest(t, b, q, chunkOpts)
2465 if len(res.Files) != 1 {
2466 t.Fatalf("got %v, want 1 result", res.Files)
2467 }
2468 if f := res.Files[0]; len(f.ChunkMatches) != 1 {
2469 t.Fatalf("got line matches %v, want 1 line match", printLineMatches(f.LineMatches))
2470 }
2471 })
2472}
2473
2474func printLineMatches(ms []LineMatch) string {
2475 var ss []string
2476 for _, m := range ms {
2477 ss = append(ss, fmt.Sprintf("%d:%q %v", m.LineNumber, m.Line, m.LineFragments))
2478 }
2479
2480 return strings.Join(ss, ", ")
2481}
2482
2483func TestLang(t *testing.T) {
2484 content := []byte("bla needle bla")
2485 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2486 Document{Name: "f1", Content: content},
2487 Document{Name: "f2", Language: "java", Content: content},
2488 Document{Name: "f3", Language: "cpp", Content: content},
2489 )
2490
2491 q := query.NewAnd(&query.Substring{Pattern: "needle"},
2492 &query.Language{Language: "cpp"})
2493
2494 t.Run("LineMatches", func(t *testing.T) {
2495 res := searchForTest(t, b, q)
2496 if len(res.Files) != 1 {
2497 t.Fatalf("got %v, want 1 result in f3", res.Files)
2498 }
2499 f := res.Files[0]
2500 if f.FileName != "f3" || f.Language != "cpp" {
2501 t.Fatalf("got %v, want 1 match with language cpp", f)
2502 }
2503 })
2504
2505 t.Run("ChunkMatches", func(t *testing.T) {
2506 res := searchForTest(t, b, q, chunkOpts)
2507 if len(res.Files) != 1 {
2508 t.Fatalf("got %v, want 1 result in f3", res.Files)
2509 }
2510 f := res.Files[0]
2511 if f.FileName != "f3" || f.Language != "cpp" {
2512 t.Fatalf("got %v, want 1 match with language cpp", f)
2513 }
2514 })
2515}
2516
2517func TestLangShortcut(t *testing.T) {
2518 content := []byte("bla needle bla")
2519 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2520 Document{Name: "f2", Language: "java", Content: content},
2521 Document{Name: "f3", Language: "cpp", Content: content},
2522 )
2523
2524 q := query.NewAnd(&query.Substring{Pattern: "needle"},
2525 &query.Language{Language: "fortran"})
2526
2527 t.Run("LineMatches", func(t *testing.T) {
2528 res := searchForTest(t, b, q)
2529 if len(res.Files) != 0 {
2530 t.Fatalf("got %v, want 0 results", res.Files)
2531 }
2532 if res.Stats.IndexBytesLoaded > 0 {
2533 t.Errorf("got IndexBytesLoaded %d, want 0", res.Stats.IndexBytesLoaded)
2534 }
2535 })
2536
2537 t.Run("ChunkMatches", func(t *testing.T) {
2538 res := searchForTest(t, b, q, chunkOpts)
2539 if len(res.Files) != 0 {
2540 t.Fatalf("got %v, want 0 results", res.Files)
2541 }
2542 if res.Stats.IndexBytesLoaded > 0 {
2543 t.Errorf("got IndexBytesLoaded %d, want 0", res.Stats.IndexBytesLoaded)
2544 }
2545 })
2546}
2547
2548func TestNoTextMatchAtoms(t *testing.T) {
2549 content := []byte("bla needle bla")
2550 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2551 Document{Name: "f1", Content: content},
2552 Document{Name: "f2", Language: "java", Content: content},
2553 Document{Name: "f3", Language: "cpp", Content: content},
2554 )
2555 q := query.NewAnd(&query.Language{Language: "java"})
2556 t.Run("LineMatches", func(t *testing.T) {
2557 res := searchForTest(t, b, q)
2558 if len(res.Files) != 1 {
2559 t.Fatalf("got %v, want 1 result in f3", res.Files)
2560 }
2561 })
2562
2563 t.Run("ChunkMatches", func(t *testing.T) {
2564 res := searchForTest(t, b, q, chunkOpts)
2565 if len(res.Files) != 1 {
2566 t.Fatalf("got %v, want 1 result in f3", res.Files)
2567 }
2568 })
2569}
2570
2571func TestNoPositiveAtoms(t *testing.T) {
2572 content := []byte("bla needle bla")
2573 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2574 Document{Name: "f1", Content: content},
2575 Document{Name: "f2", Content: content},
2576 )
2577
2578 q := query.NewAnd(
2579 &query.Not{Child: &query.Substring{Pattern: "xyz"}},
2580 &query.Repo{Regexp: regexp.MustCompile("reponame")})
2581 t.Run("LineMatches", func(t *testing.T) {
2582 res := searchForTest(t, b, q)
2583 if len(res.Files) != 2 {
2584 t.Fatalf("got %v, want 2 results in f3", res.Files)
2585 }
2586 })
2587 t.Run("ChunkMatches", func(t *testing.T) {
2588 res := searchForTest(t, b, q, chunkOpts)
2589 if len(res.Files) != 2 {
2590 t.Fatalf("got %v, want 2 results in f3", res.Files)
2591 }
2592 })
2593}
2594
2595func TestSymbolBoundaryStart(t *testing.T) {
2596 content := []byte("start\nbla bla\nend")
2597 // ----------------012345-67890123-456
2598
2599 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2600 Document{
2601 Name: "f1",
2602 Content: content,
2603 Symbols: []DocumentSection{{0, 5}, {14, 17}},
2604 },
2605 )
2606 q := &query.Symbol{
2607 Expr: &query.Substring{Pattern: "start"},
2608 }
2609 t.Run("LineMatches", func(t *testing.T) {
2610 res := searchForTest(t, b, q)
2611 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
2612 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2613 }
2614 m := res.Files[0].LineMatches[0].LineFragments[0]
2615 if m.Offset != 0 {
2616 t.Fatalf("got offset %d want 0", m.Offset)
2617 }
2618 })
2619
2620 t.Run("ChunkMatches", func(t *testing.T) {
2621 res := searchForTest(t, b, q, chunkOpts)
2622 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
2623 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2624 }
2625 m := res.Files[0].ChunkMatches[0].Ranges[0]
2626 if m.Start.ByteOffset != 0 {
2627 t.Fatalf("got offset %d want 0", m.Start.ByteOffset)
2628 }
2629 })
2630}
2631
2632func TestSymbolBoundaryEnd(t *testing.T) {
2633 content := []byte("start\nbla bla\nend")
2634 // ----------------012345-67890123-456
2635
2636 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2637 Document{
2638 Name: "f1",
2639 Content: content,
2640 Symbols: []DocumentSection{{14, 17}},
2641 },
2642 )
2643 q := &query.Symbol{
2644 Expr: &query.Substring{Pattern: "end"},
2645 }
2646 t.Run("LineMatches", func(t *testing.T) {
2647 res := searchForTest(t, b, q)
2648 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
2649 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2650 }
2651 m := res.Files[0].LineMatches[0].LineFragments[0]
2652 if m.Offset != 14 {
2653 t.Fatalf("got offset %d want 0", m.Offset)
2654 }
2655 })
2656
2657 t.Run("ChunkMatches", func(t *testing.T) {
2658 res := searchForTest(t, b, q, chunkOpts)
2659 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
2660 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2661 }
2662 m := res.Files[0].ChunkMatches[0].Ranges[0]
2663 if m.Start.ByteOffset != 14 {
2664 t.Fatalf("got offset %d want 0", m.Start.ByteOffset)
2665 }
2666 })
2667}
2668
2669func TestSymbolSubstring(t *testing.T) {
2670 content := []byte("bla\nsymblabla\nbla")
2671 // ----------------0123-4567890123-456
2672
2673 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2674 Document{
2675 Name: "f1",
2676 Content: content,
2677 Symbols: []DocumentSection{{4, 12}},
2678 },
2679 )
2680 q := &query.Symbol{
2681 Expr: &query.Substring{Pattern: "bla"},
2682 }
2683 t.Run("LineMatches", func(t *testing.T) {
2684 res := searchForTest(t, b, q)
2685 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
2686 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2687 }
2688 m := res.Files[0].LineMatches[0].LineFragments[0]
2689 if m.Offset != 7 || m.MatchLength != 3 {
2690 t.Fatalf("got offset %d, size %d want 7 size 3", m.Offset, m.MatchLength)
2691 }
2692 })
2693
2694 t.Run("ChunkMatches", func(t *testing.T) {
2695 res := searchForTest(t, b, q, chunkOpts)
2696 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
2697 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2698 }
2699 m := res.Files[0].ChunkMatches[0].Ranges[0]
2700 if m.Start.ByteOffset != 7 || m.End.ByteOffset != 10 {
2701 t.Fatalf("got offset %d, end %d want 7, 10", m.Start.ByteOffset, m.End.ByteOffset)
2702 }
2703 })
2704}
2705
2706func TestSymbolSubstringExact(t *testing.T) {
2707 content := []byte("bla\nsym\nbla\nsym\nasymb")
2708 // ----------------0123-4567-890123456-78901
2709
2710 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2711 Document{
2712 Name: "f1",
2713 Content: content,
2714 Symbols: []DocumentSection{{4, 7}},
2715 },
2716 )
2717 q := &query.Symbol{
2718 Expr: &query.Substring{Pattern: "sym"},
2719 }
2720 t.Run("LineMatches", func(t *testing.T) {
2721 res := searchForTest(t, b, q)
2722 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
2723 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2724 }
2725 m := res.Files[0].LineMatches[0].LineFragments[0]
2726 if m.Offset != 4 {
2727 t.Fatalf("got offset %d, want 7", m.Offset)
2728 }
2729 })
2730
2731 t.Run("ChunkMatches", func(t *testing.T) {
2732 res := searchForTest(t, b, q, chunkOpts)
2733 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
2734 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2735 }
2736 m := res.Files[0].ChunkMatches[0].Ranges[0]
2737 if m.Start.ByteOffset != 4 {
2738 t.Fatalf("got offset %d, want 7", m.Start.ByteOffset)
2739 }
2740 })
2741}
2742
2743func TestSymbolRegexpExact(t *testing.T) {
2744 content := []byte("blah\nbla\nbl")
2745 // ----------------01234-5678-90
2746
2747 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2748 Document{
2749 Name: "f1",
2750 Content: content,
2751 Symbols: []DocumentSection{{0, 4}, {5, 8}, {9, 11}},
2752 },
2753 )
2754 q := &query.Symbol{
2755 Expr: &query.Regexp{Regexp: mustParseRE("^bla$")},
2756 }
2757 t.Run("LineMatches", func(t *testing.T) {
2758 res := searchForTest(t, b, q)
2759 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
2760 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2761 }
2762 m := res.Files[0].LineMatches[0].LineFragments[0]
2763 if m.Offset != 5 {
2764 t.Fatalf("got offset %d, want 5", m.Offset)
2765 }
2766 })
2767
2768 t.Run("ChunkMatches", func(t *testing.T) {
2769 res := searchForTest(t, b, q, chunkOpts)
2770 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
2771 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2772 }
2773 m := res.Files[0].ChunkMatches[0].Ranges[0]
2774 if m.Start.ByteOffset != 5 {
2775 t.Fatalf("got offset %d, want 5", m.Start.ByteOffset)
2776 }
2777 })
2778}
2779
2780func TestSymbolRegexpPartial(t *testing.T) {
2781 content := []byte("abcdef")
2782 // ----------------012345
2783
2784 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2785 Document{
2786 Name: "f1",
2787 Content: content,
2788 Symbols: []DocumentSection{{0, 6}},
2789 },
2790 )
2791 q := &query.Symbol{
2792 Expr: &query.Regexp{Regexp: mustParseRE("(b|d)c(d|b)")},
2793 }
2794 t.Run("LineMatches", func(t *testing.T) {
2795 res := searchForTest(t, b, q)
2796 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
2797 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2798 }
2799 m := res.Files[0].LineMatches[0].LineFragments[0]
2800 if m.Offset != 1 {
2801 t.Fatalf("got offset %d, want 1", m.Offset)
2802 }
2803 if m.MatchLength != 3 {
2804 t.Fatalf("got match length %d, want 3", m.MatchLength)
2805 }
2806 })
2807
2808 t.Run("ChunkMatches", func(t *testing.T) {
2809 res := searchForTest(t, b, q, chunkOpts)
2810 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
2811 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2812 }
2813 m := res.Files[0].ChunkMatches[0].Ranges[0]
2814 if m.Start.ByteOffset != 1 {
2815 t.Fatalf("got offset %d, want 1", m.Start.ByteOffset)
2816 }
2817 if m.End.ByteOffset != 4 {
2818 t.Fatalf("got match end %d, want 4", m.End.ByteOffset)
2819 }
2820 })
2821}
2822
2823func TestSymbolRegexpAll(t *testing.T) {
2824 docs := []Document{
2825 {
2826 Name: "f1",
2827 Content: []byte("Hello Zoekt"),
2828 // --------------01234567890
2829 Symbols: []DocumentSection{{0, 5}, {6, 11}},
2830 },
2831 {
2832 Name: "f2",
2833 Content: []byte("Second Zoekt Third"),
2834 // --------------012345678901234567
2835 Symbols: []DocumentSection{{0, 6}, {7, 12}, {13, 18}},
2836 },
2837 }
2838
2839 b := testIndexBuilder(t, &Repository{Name: "reponame"}, docs...)
2840 q := &query.Symbol{
2841 Expr: &query.Regexp{Regexp: mustParseRE(".*")},
2842 }
2843 t.Run("LineMatches", func(t *testing.T) {
2844 res := searchForTest(t, b, q)
2845 if len(res.Files) != len(docs) {
2846 t.Fatalf("got %v, want %d file", res.Files, len(docs))
2847 }
2848 for i, want := range docs {
2849 got := res.Files[i].LineMatches[0].LineFragments
2850 if len(got) != len(want.Symbols) {
2851 t.Fatalf("got %d symbols, want %d symbols in doc %s", len(got), len(want.Symbols), want.Name)
2852 }
2853
2854 for j, sec := range want.Symbols {
2855 if sec.Start != got[j].Offset {
2856 t.Fatalf("got offset %d, want %d in doc %s", got[j].Offset, sec.Start, want.Name)
2857 }
2858 }
2859 }
2860 })
2861
2862 t.Run("ChunkMatches", func(t *testing.T) {
2863 res := searchForTest(t, b, q, chunkOpts)
2864 if len(res.Files) != len(docs) {
2865 t.Fatalf("got %v, want %d file", res.Files, len(docs))
2866 }
2867 for i, want := range docs {
2868 got := res.Files[i].ChunkMatches[0].Ranges
2869 if len(got) != len(want.Symbols) {
2870 t.Fatalf("got %d symbols, want %d symbols in doc %s", len(got), len(want.Symbols), want.Name)
2871 }
2872
2873 for j, sec := range want.Symbols {
2874 if sec.Start != uint32(got[j].Start.ByteOffset) {
2875 t.Fatalf("got offset %d, want %d in doc %s", got[j].Start.ByteOffset, sec.Start, want.Name)
2876 }
2877 }
2878 }
2879 })
2880}
2881
2882func TestHitIterTerminate(t *testing.T) {
2883 // contrived input: trigram frequencies forces selecting abc +
2884 // def for the distance iteration. There is no match, so this
2885 // will advance the compressedPostingIterator to beyond the
2886 // end.
2887 content := []byte("abc bcdbcd cdecde abcabc def efg")
2888 b := testIndexBuilder(t, nil,
2889 Document{
2890 Name: "f1",
2891 Content: content,
2892 },
2893 )
2894
2895 t.Run("LineMatches", func(t *testing.T) {
2896 searchForTest(t, b, &query.Substring{Pattern: "abcdef"})
2897 })
2898
2899 t.Run("ChunkMatches", func(t *testing.T) {
2900 searchForTest(t, b, &query.Substring{Pattern: "abcdef"}, chunkOpts)
2901 })
2902}
2903
2904func TestDistanceHitIterBailLast(t *testing.T) {
2905 content := []byte("AST AST AST UASH")
2906 b := testIndexBuilder(t, nil,
2907 Document{
2908 Name: "f1",
2909 Content: content,
2910 },
2911 )
2912 t.Run("LineMatches", func(t *testing.T) {
2913 res := searchForTest(t, b, &query.Substring{Pattern: "UAST"})
2914 if len(res.Files) != 0 {
2915 t.Fatalf("got %v, want no results", res.Files)
2916 }
2917 })
2918
2919 t.Run("LineMatches", func(t *testing.T) {
2920 res := searchForTest(t, b, &query.Substring{Pattern: "UAST"}, chunkOpts)
2921 if len(res.Files) != 0 {
2922 t.Fatalf("got %v, want no results", res.Files)
2923 }
2924 })
2925}
2926
2927func TestDocumentSectionRuneBoundary(t *testing.T) {
2928 content := string([]rune{kelvinCodePoint, kelvinCodePoint, kelvinCodePoint})
2929 b, err := NewIndexBuilder(nil)
2930 if err != nil {
2931 t.Fatalf("NewIndexBuilder: %v", err)
2932 }
2933
2934 for i, sec := range []DocumentSection{
2935 {2, 6},
2936 {3, 7},
2937 } {
2938 if err := b.Add(Document{
2939 Name: "f1",
2940 Content: []byte(content),
2941 Symbols: []DocumentSection{sec},
2942 }); err == nil {
2943 t.Errorf("%d: Add succeeded", i)
2944 }
2945 }
2946}
2947
2948func TestUnicodeQuery(t *testing.T) {
2949 content := string([]rune{kelvinCodePoint, kelvinCodePoint, kelvinCodePoint})
2950 b := testIndexBuilder(t, nil,
2951 Document{
2952 Name: "f1",
2953 Content: []byte(content),
2954 },
2955 )
2956
2957 q := &query.Substring{Pattern: content}
2958
2959 t.Run("LineMatches", func(t *testing.T) {
2960 res := searchForTest(t, b, q)
2961 if len(res.Files) != 1 {
2962 t.Fatalf("want 1 match, got %v", res.Files)
2963 }
2964
2965 f := res.Files[0]
2966 if len(f.LineMatches) != 1 {
2967 t.Fatalf("want 1 line, got %v", f.LineMatches)
2968 }
2969 l := f.LineMatches[0]
2970
2971 if len(l.LineFragments) != 1 {
2972 t.Fatalf("want 1 line fragment, got %v", l.LineFragments)
2973 }
2974 fr := l.LineFragments[0]
2975 if fr.MatchLength != len(content) {
2976 t.Fatalf("got MatchLength %d want %d", fr.MatchLength, len(content))
2977 }
2978 })
2979
2980 t.Run("ChunkMatches", func(t *testing.T) {
2981 res := searchForTest(t, b, q, chunkOpts)
2982 if len(res.Files) != 1 {
2983 t.Fatalf("want 1 match, got %v", res.Files)
2984 }
2985
2986 f := res.Files[0]
2987 if len(f.ChunkMatches) != 1 {
2988 t.Fatalf("want 1 line, got %v", f.LineMatches)
2989 }
2990 cm := f.ChunkMatches[0]
2991
2992 if len(cm.Ranges) != 1 {
2993 t.Fatalf("want 1 line fragment, got %v", cm.Ranges)
2994 }
2995 rr := cm.Ranges[0]
2996 if matchLen := rr.End.ByteOffset - rr.Start.ByteOffset; int(matchLen) != len(content) {
2997 t.Fatalf("got MatchLength %d want %d", matchLen, len(content))
2998 }
2999 })
3000}
3001
3002func TestSkipInvalidContent(t *testing.T) {
3003 for _, content := range []string{
3004 // Binary
3005 "abc def \x00 abc",
3006 } {
3007
3008 b, err := NewIndexBuilder(nil)
3009 if err != nil {
3010 t.Fatalf("NewIndexBuilder: %v", err)
3011 }
3012
3013 if err := b.Add(Document{
3014 Name: "f1",
3015 Content: []byte(content),
3016 }); err != nil {
3017 t.Fatal(err)
3018 }
3019
3020 t.Run("LineMatches", func(t *testing.T) {
3021 q := &query.Substring{Pattern: "abc def"}
3022 res := searchForTest(t, b, q)
3023 if len(res.Files) != 0 {
3024 t.Fatalf("got %v, want no results", res.Files)
3025 }
3026
3027 q = &query.Substring{Pattern: "NOT-INDEXED"}
3028 res = searchForTest(t, b, q)
3029 if len(res.Files) != 1 {
3030 t.Fatalf("got %v, want 1 result", res.Files)
3031 }
3032 })
3033
3034 t.Run("ChunkMatches", func(t *testing.T) {
3035 q := &query.Substring{Pattern: "abc def"}
3036 res := searchForTest(t, b, q, chunkOpts)
3037 if len(res.Files) != 0 {
3038 t.Fatalf("got %v, want no results", res.Files)
3039 }
3040
3041 q = &query.Substring{Pattern: "NOT-INDEXED"}
3042 res = searchForTest(t, b, q, chunkOpts)
3043 if len(res.Files) != 1 {
3044 t.Fatalf("got %v, want 1 result", res.Files)
3045 }
3046 })
3047 }
3048}
3049
3050func TestCheckText(t *testing.T) {
3051 for _, text := range []string{"", "simple ascii", "símplé unicödé", "\uFEFFwith utf8 'bom'", "with \uFFFD unicode replacement char"} {
3052 if err := CheckText([]byte(text), 20000); err != nil {
3053 t.Errorf("CheckText(%q): %v", text, err)
3054 }
3055 }
3056 for _, text := range []string{"zero\x00byte", "xx", "0123456789abcdefghi"} {
3057 if err := CheckText([]byte(text), 15); err == nil {
3058 t.Errorf("CheckText(%q) succeeded", text)
3059 }
3060 }
3061}
3062
3063func TestLineAnd(t *testing.T) {
3064 b := testIndexBuilder(t, &Repository{Name: "reponame"},
3065 Document{Name: "f1", Content: []byte("apple\nbanana\napple banana chocolate apple pudding banana\ngrape")},
3066 Document{Name: "f2", Content: []byte("apple orange\nbanana")},
3067 Document{Name: "f3", Content: []byte("banana grape")},
3068 )
3069 pattern := "(apple)(?-s:.)*?(banana)"
3070 r, _ := syntax.Parse(pattern, syntax.Perl)
3071
3072 q := query.Regexp{
3073 Regexp: r,
3074 Content: true,
3075 }
3076 t.Run("LineMatches", func(t *testing.T) {
3077 res := searchForTest(t, b, &q)
3078 wantRegexpCount := 1
3079 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3080 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3081 }
3082 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
3083 t.Errorf("got %v, want 1 result", res.Files)
3084 }
3085 })
3086
3087 t.Run("ChunkMatches", func(t *testing.T) {
3088 res := searchForTest(t, b, &q, chunkOpts)
3089 wantRegexpCount := 1
3090 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3091 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3092 }
3093 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
3094 t.Errorf("got %v, want 1 result", res.Files)
3095 }
3096 })
3097}
3098
3099func TestLineAndFileName(t *testing.T) {
3100 b := testIndexBuilder(t, &Repository{Name: "reponame"},
3101 Document{Name: "f1", Content: []byte("apple banana\ngrape")},
3102 Document{Name: "f2", Content: []byte("apple banana\norange")},
3103 Document{Name: "apple banana", Content: []byte("banana grape")},
3104 )
3105 pattern := "(apple)(?-s:.)*?(banana)"
3106 r, _ := syntax.Parse(pattern, syntax.Perl)
3107
3108 q := query.Regexp{
3109 Regexp: r,
3110 FileName: true,
3111 }
3112 t.Run("LineMatches", func(t *testing.T) {
3113 res := searchForTest(t, b, &q)
3114 wantRegexpCount := 1
3115 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3116 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3117 }
3118 if len(res.Files) != 1 || res.Files[0].FileName != "apple banana" {
3119 t.Errorf("got %v, want 1 result", res.Files)
3120 }
3121 })
3122
3123 t.Run("ChunkMatches", func(t *testing.T) {
3124 res := searchForTest(t, b, &q, chunkOpts)
3125 wantRegexpCount := 1
3126 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3127 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3128 }
3129 if len(res.Files) != 1 || res.Files[0].FileName != "apple banana" {
3130 t.Errorf("got %v, want 1 result", res.Files)
3131 }
3132 })
3133}
3134
3135func TestMultiLineRegex(t *testing.T) {
3136 b := testIndexBuilder(t, &Repository{Name: "reponame"},
3137 Document{Name: "f1", Content: []byte("apple banana\ngrape")},
3138 Document{Name: "f2", Content: []byte("apple orange")},
3139 Document{Name: "f3", Content: []byte("grape apple")},
3140 )
3141 pattern := "(apple).*?[[:space:]].*?(grape)"
3142 r, _ := syntax.Parse(pattern, syntax.Perl)
3143
3144 q := query.Regexp{
3145 Regexp: r,
3146 }
3147 t.Run("LineMatches", func(t *testing.T) {
3148 res := searchForTest(t, b, &q)
3149 wantRegexpCount := 2
3150 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3151 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3152 }
3153 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
3154 t.Errorf("got %v, want 1 result", res.Files)
3155 }
3156 if l := len(res.Files[0].LineMatches); l != 2 {
3157 t.Errorf("got %v, want 2 line matches", l)
3158 }
3159 })
3160
3161 t.Run("ChunkMatches", func(t *testing.T) {
3162 res := searchForTest(t, b, &q, chunkOpts)
3163 wantRegexpCount := 2
3164 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3165 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3166 }
3167 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
3168 t.Errorf("got %v, want 1 result", res.Files)
3169 }
3170 if l := len(res.Files[0].ChunkMatches); l != 1 {
3171 t.Errorf("got %v, want 1 chunk matches", l)
3172 }
3173 if l := len(res.Files[0].ChunkMatches[0].Ranges); l != 1 {
3174 t.Errorf("got %v, want 1 chunk ranges", l)
3175 }
3176 })
3177}
3178
3179func TestSearchTypeFileName(t *testing.T) {
3180 b := testIndexBuilder(t, &Repository{
3181 Name: "reponame",
3182 },
3183 Document{Name: "f1", Content: []byte("bla the needle")},
3184 Document{Name: "f2", Content: []byte("another file another\nneedle")},
3185 // -----------------------------------012345678901234567890-123456
3186 )
3187
3188 t.Run("LineMatches", func(t *testing.T) {
3189 wantSingleMatch := func(res *SearchResult, want string) {
3190 t.Helper()
3191 fmatches := res.Files
3192 if len(fmatches) != 1 {
3193 t.Errorf("got %v, want 1 matches", len(fmatches))
3194 return
3195 }
3196 if len(fmatches[0].LineMatches) != 1 {
3197 t.Errorf("got %d line matches", len(fmatches[0].LineMatches))
3198 return
3199 }
3200 var got string
3201 if fmatches[0].LineMatches[0].FileName {
3202 got = fmatches[0].FileName
3203 } else {
3204 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].LineMatches[0].LineFragments[0].Offset)
3205 }
3206
3207 if got != want {
3208 t.Errorf("got %s, want %s", got, want)
3209 }
3210 }
3211
3212 // Only return the later match in the second file
3213 res := searchForTest(t, b, query.NewAnd(
3214 &query.Type{
3215 Type: query.TypeFileName,
3216 Child: &query.Substring{Pattern: "needle"},
3217 },
3218 &query.Substring{Pattern: "file"}))
3219 wantSingleMatch(res, "f2:8")
3220
3221 // Only return a filename result
3222 res = searchForTest(t, b,
3223 &query.Type{
3224 Type: query.TypeFileName,
3225 Child: &query.Substring{Pattern: "file"},
3226 })
3227 wantSingleMatch(res, "f2")
3228 })
3229
3230 t.Run("ChunkMatches", func(t *testing.T) {
3231 wantSingleMatch := func(res *SearchResult, want string) {
3232 t.Helper()
3233 fmatches := res.Files
3234 if len(fmatches) != 1 {
3235 t.Errorf("got %v, want 1 matches", len(fmatches))
3236 return
3237 }
3238 if len(fmatches[0].ChunkMatches) != 1 {
3239 t.Errorf("got %d line matches", len(fmatches[0].ChunkMatches))
3240 return
3241 }
3242 var got string
3243 if fmatches[0].ChunkMatches[0].FileName {
3244 got = fmatches[0].FileName
3245 } else {
3246 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset)
3247 }
3248
3249 if got != want {
3250 t.Errorf("got %s, want %s", got, want)
3251 }
3252 }
3253
3254 // Only return the later match in the second file
3255 res := searchForTest(t, b, query.NewAnd(
3256 &query.Type{
3257 Type: query.TypeFileName,
3258 Child: &query.Substring{Pattern: "needle"},
3259 },
3260 &query.Substring{Pattern: "file"}),
3261 chunkOpts,
3262 )
3263 wantSingleMatch(res, "f2:8")
3264
3265 // Only return a filename result
3266 res = searchForTest(t, b,
3267 &query.Type{
3268 Type: query.TypeFileName,
3269 Child: &query.Substring{Pattern: "file"},
3270 },
3271 chunkOpts,
3272 )
3273 wantSingleMatch(res, "f2")
3274 })
3275}
3276
3277func TestSearchTypeLanguage(t *testing.T) {
3278 b := testIndexBuilder(t, &Repository{
3279 Name: "reponame",
3280 },
3281 Document{Name: "apex.cls", Content: []byte("public class Car extends Vehicle {")},
3282 Document{Name: "tex.cls", Content: []byte(`\DeclareOption*{`)},
3283 Document{Name: "hello.h", Content: []byte(`#include <stdio.h>`)},
3284 )
3285
3286 t.Log(b.languageMap)
3287
3288 t.Run("LineMatches", func(t *testing.T) {
3289 wantSingleMatch := func(res *SearchResult, want string) {
3290 t.Helper()
3291 fmatches := res.Files
3292 if len(fmatches) != 1 {
3293 t.Errorf("got %v, want 1 matches", len(fmatches))
3294 return
3295 }
3296 if len(fmatches[0].LineMatches) != 1 {
3297 t.Errorf("got %d line matches", len(fmatches[0].LineMatches))
3298 return
3299 }
3300 var got string
3301 if fmatches[0].LineMatches[0].FileName {
3302 got = fmatches[0].FileName
3303 } else {
3304 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].LineMatches[0].LineFragments[0].Offset)
3305 }
3306
3307 if got != want {
3308 t.Errorf("got %s, want %s", got, want)
3309 }
3310 }
3311
3312 res := searchForTest(t, b, &query.Language{Language: "Apex"})
3313 wantSingleMatch(res, "apex.cls")
3314
3315 res = searchForTest(t, b, &query.Language{Language: "TeX"})
3316 wantSingleMatch(res, "tex.cls")
3317
3318 res = searchForTest(t, b, &query.Language{Language: "C"})
3319 wantSingleMatch(res, "hello.h")
3320
3321 // test fallback language search by pretending it's an older index version
3322 res = searchForTest(t, b, &query.Language{Language: "C++"})
3323 if len(res.Files) != 0 {
3324 t.Errorf("got %d results for C++, want 0", len(res.Files))
3325 }
3326
3327 b.featureVersion = 11 // force fallback
3328 res = searchForTest(t, b, &query.Language{Language: "C++"})
3329 wantSingleMatch(res, "hello.h")
3330 })
3331
3332 t.Run("ChunkMatches", func(t *testing.T) {
3333 wantSingleMatch := func(res *SearchResult, want string) {
3334 t.Helper()
3335 fmatches := res.Files
3336 if len(fmatches) != 1 {
3337 t.Errorf("got %v, want 1 matches", len(fmatches))
3338 return
3339 }
3340 if len(fmatches[0].ChunkMatches) != 1 {
3341 t.Errorf("got %d line matches", len(fmatches[0].ChunkMatches))
3342 return
3343 }
3344 var got string
3345 if fmatches[0].ChunkMatches[0].FileName {
3346 got = fmatches[0].FileName
3347 } else {
3348 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset)
3349 }
3350
3351 if got != want {
3352 t.Errorf("got %s, want %s", got, want)
3353 }
3354 }
3355
3356 b.featureVersion = FeatureVersion // reset feature version
3357 res := searchForTest(t, b, &query.Language{Language: "Apex"}, chunkOpts)
3358 wantSingleMatch(res, "apex.cls")
3359
3360 res = searchForTest(t, b, &query.Language{Language: "TeX"}, chunkOpts)
3361 wantSingleMatch(res, "tex.cls")
3362
3363 res = searchForTest(t, b, &query.Language{Language: "C"}, chunkOpts)
3364 wantSingleMatch(res, "hello.h")
3365
3366 // test fallback language search by pretending it's an older index version
3367 res = searchForTest(t, b, &query.Language{Language: "C++"}, chunkOpts)
3368 if len(res.Files) != 0 {
3369 t.Errorf("got %d results for C++, want 0", len(res.Files))
3370 }
3371
3372 b.featureVersion = 11 // force fallback
3373 res = searchForTest(t, b, &query.Language{Language: "C++"}, chunkOpts)
3374 wantSingleMatch(res, "hello.h")
3375 })
3376}
3377
3378func TestStats(t *testing.T) {
3379 ignored := []cmp.Option{
3380 cmpopts.EquateEmpty(),
3381 cmpopts.IgnoreFields(RepoListEntry{}, "Repository"),
3382 cmpopts.IgnoreFields(RepoListEntry{}, "IndexMetadata"),
3383 cmpopts.IgnoreFields(RepoStats{}, "IndexBytes"),
3384 }
3385
3386 repoListEntries := func(b *IndexBuilder) []RepoListEntry {
3387 searcher := searcherForTest(t, b)
3388 indexdata := searcher.(*indexData)
3389 return indexdata.repoListEntry
3390 }
3391
3392 t.Run("one empty repo", func(t *testing.T) {
3393 b := testIndexBuilder(t, nil)
3394 got := repoListEntries(b)
3395 want := []RepoListEntry{
3396 {
3397 Stats: RepoStats{
3398 Repos: 0,
3399 Shards: 1,
3400 Documents: 0,
3401 IndexBytes: 20,
3402 ContentBytes: 0,
3403 NewLinesCount: 0,
3404 DefaultBranchNewLinesCount: 0,
3405 OtherBranchesNewLinesCount: 0,
3406 },
3407 },
3408 }
3409
3410 if diff := cmp.Diff(want, got, ignored...); diff != "" {
3411 t.Fatalf("mismatch (-want +got):\n%s", diff)
3412 }
3413
3414 })
3415
3416 t.Run("one simple shard", func(t *testing.T) {
3417 b := testIndexBuilder(t, nil,
3418 Document{Name: "doc 0", Content: []byte("content 0")},
3419 Document{Name: "doc 1", Content: []byte("content 1")},
3420 )
3421 got := repoListEntries(b)
3422 want := []RepoListEntry{
3423 {
3424 Stats: RepoStats{
3425 Repos: 0,
3426 Shards: 1,
3427 Documents: 2,
3428 IndexBytes: 224,
3429 ContentBytes: 28,
3430 NewLinesCount: 0,
3431 DefaultBranchNewLinesCount: 0,
3432 OtherBranchesNewLinesCount: 0,
3433 },
3434 },
3435 }
3436
3437 if diff := cmp.Diff(want, got, ignored...); diff != "" {
3438 t.Fatalf("mismatch (-want +got):\n%s", diff)
3439 }
3440
3441 })
3442
3443 t.Run("one compound shard", func(t *testing.T) {
3444 b := testIndexBuilderCompound(t,
3445 []*Repository{
3446 {Name: "repo 0"},
3447 {Name: "repo 1"},
3448 },
3449 [][]Document{
3450 {
3451 {Name: "doc 0", Content: []byte("content 0")},
3452 {Name: "doc 1", Content: []byte("content 1")},
3453 },
3454 {
3455 {Name: "doc 2", Content: []byte("content 2")},
3456 {Name: "doc 3", Content: []byte("content 3")},
3457 },
3458 },
3459 )
3460 got := repoListEntries(b)
3461 want := []RepoListEntry{
3462 {
3463 Stats: RepoStats{
3464 Repos: 0,
3465 Shards: 1,
3466 Documents: 2,
3467 IndexBytes: 180,
3468 ContentBytes: 28,
3469 NewLinesCount: 0,
3470 DefaultBranchNewLinesCount: 0,
3471 OtherBranchesNewLinesCount: 0,
3472 },
3473 },
3474 {
3475 Stats: RepoStats{
3476 Repos: 0,
3477 Shards: 1,
3478 Documents: 2,
3479 IndexBytes: 180,
3480 ContentBytes: 28,
3481 NewLinesCount: 0,
3482 DefaultBranchNewLinesCount: 0,
3483 OtherBranchesNewLinesCount: 0,
3484 },
3485 },
3486 }
3487
3488 if diff := cmp.Diff(want, got, ignored...); diff != "" {
3489 t.Fatalf("mismatch (-want +got):\n%s", diff)
3490 }
3491 })
3492
3493 t.Run("compound shard with empty repos", func(t *testing.T) {
3494 b := testIndexBuilderCompound(t,
3495 []*Repository{
3496 {Name: "repo 0"},
3497 {Name: "repo 1"},
3498 {Name: "repo 2"},
3499 {Name: "repo 3"},
3500 {Name: "repo 4"},
3501 },
3502 [][]Document{
3503 {{Name: "doc 0", Content: []byte("content 0")}},
3504 nil,
3505 {{Name: "doc 1", Content: []byte("content 1")}},
3506 nil,
3507 nil,
3508 },
3509 )
3510 got := repoListEntries(b)
3511
3512 entryEmpty := RepoListEntry{Stats: RepoStats{
3513 Shards: 1,
3514 Documents: 0,
3515 ContentBytes: 0,
3516 }}
3517 entryNonEmpty := RepoListEntry{Stats: RepoStats{
3518 Shards: 1,
3519 Documents: 1,
3520 ContentBytes: 14,
3521 }}
3522
3523 want := []RepoListEntry{
3524 entryNonEmpty,
3525 entryEmpty,
3526 entryNonEmpty,
3527 entryEmpty,
3528 entryEmpty,
3529 }
3530
3531 if diff := cmp.Diff(want, got, ignored...); diff != "" {
3532 t.Fatalf("mismatch (-want +got):\n%s", diff)
3533 }
3534
3535 })
3536}
3537
3538// This tests the frequent pattern "\bLITERAL\b".
3539func TestWordSearch(t *testing.T) {
3540 content := []byte("needle the bla")
3541 // ----------------01234567890123
3542
3543 b := testIndexBuilder(t, nil,
3544 Document{
3545 Name: "f1",
3546 Content: content,
3547 })
3548
3549 t.Run("LineMatches", func(t *testing.T) {
3550 sres := searchForTest(t, b,
3551 &query.Regexp{
3552 Regexp: mustParseRE("\\bthe\\b"),
3553 CaseSensitive: true,
3554 Content: true,
3555 })
3556
3557 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 {
3558 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
3559 }
3560
3561 if sres.Stats.RegexpsConsidered != 0 {
3562 t.Fatal("expected regexp to be skipped")
3563 }
3564
3565 got := sres.Files[0].LineMatches[0]
3566 want := LineMatch{
3567 LineFragments: []LineFragmentMatch{{
3568 LineOffset: 7,
3569 Offset: 7,
3570 MatchLength: 3,
3571 }},
3572 Line: content,
3573 FileName: false,
3574 LineNumber: 1,
3575 LineStart: 0,
3576 LineEnd: 14,
3577 }
3578
3579 if !reflect.DeepEqual(got, want) {
3580 t.Errorf("got %#v, want %#v", got, want)
3581 }
3582 })
3583
3584 t.Run("ChunkMatches", func(t *testing.T) {
3585 sres := searchForTest(t, b,
3586 &query.Regexp{
3587 Regexp: mustParseRE("\\bthe\\b"),
3588 CaseSensitive: true,
3589 }, chunkOpts)
3590
3591 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 {
3592 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
3593 }
3594
3595 if sres.Stats.RegexpsConsidered != 0 {
3596 t.Fatal("expected regexp to be skipped")
3597 }
3598
3599 got := sres.Files[0].ChunkMatches[0]
3600 want := ChunkMatch{
3601 Content: content,
3602 ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1},
3603 Ranges: []Range{{
3604 Start: Location{ByteOffset: 7, LineNumber: 1, Column: 8},
3605 End: Location{ByteOffset: 10, LineNumber: 1, Column: 11},
3606 }},
3607 }
3608
3609 if diff := cmp.Diff(want, got); diff != "" {
3610 t.Fatal(diff)
3611 }
3612 })
3613}