fork of https://github.com/sourcegraph/zoekt
1// Copyright 2016 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package zoekt
16
17import (
18 "bytes"
19 "context"
20 "fmt"
21 "reflect"
22 "regexp/syntax"
23 "strings"
24 "testing"
25
26 "github.com/google/go-cmp/cmp"
27 "github.com/google/go-cmp/cmp/cmpopts"
28 "github.com/grafana/regexp"
29
30 "github.com/sourcegraph/zoekt/query"
31)
32
33func clearScores(r *SearchResult) {
34 for i := range r.Files {
35 r.Files[i].Score = 0.0
36 for j := range r.Files[i].LineMatches {
37 r.Files[i].LineMatches[j].Score = 0.0
38 }
39 for j := range r.Files[i].ChunkMatches {
40 r.Files[i].ChunkMatches[j].Score = 0.0
41 r.Files[i].ChunkMatches[j].BestLineMatch = 0
42 }
43 r.Files[i].Checksum = nil
44 r.Files[i].Debug = ""
45 }
46}
47
48func testIndexBuilder(t *testing.T, repo *Repository, docs ...Document) *IndexBuilder {
49 t.Helper()
50
51 b, err := NewIndexBuilder(repo)
52 if err != nil {
53 t.Fatalf("NewIndexBuilder: %v", err)
54 }
55
56 for i, d := range docs {
57 if err := b.Add(d); err != nil {
58 t.Fatalf("Add %d: %v", i, err)
59 }
60 }
61
62 return b
63}
64
65func testIndexBuilderCompound(t *testing.T, repos []*Repository, docs [][]Document) *IndexBuilder {
66 t.Helper()
67
68 b := newIndexBuilder()
69 b.indexFormatVersion = NextIndexFormatVersion
70
71 if len(repos) != len(docs) {
72 t.Fatalf("testIndexBuilderCompound: repos must be the same length as docs, got: len(repos)=%d len(docs)=%d", len(repos), len(docs))
73 }
74
75 for i, repo := range repos {
76 if err := b.setRepository(repo); err != nil {
77 t.Fatal(err)
78 }
79 for j, d := range docs[i] {
80 if err := b.Add(d); err != nil {
81 t.Fatalf("Add %d %d: %v", i, j, err)
82 }
83 }
84 }
85
86 return b
87}
88
89func TestBoundary(t *testing.T) {
90 b := testIndexBuilder(t, nil,
91 Document{Name: "f1", Content: []byte("x the")},
92 Document{Name: "f1", Content: []byte("reader")})
93 res := searchForTest(t, b, &query.Substring{Pattern: "there"})
94 if len(res.Files) > 0 {
95 t.Fatalf("got %v, want no matches", res.Files)
96 }
97}
98
99func TestDocSectionInvalid(t *testing.T) {
100 b, err := NewIndexBuilder(nil)
101 if err != nil {
102 t.Fatalf("NewIndexBuilder: %v", err)
103 }
104 doc := Document{
105 Name: "f1",
106 Content: []byte("01234567890123"),
107 Symbols: []DocumentSection{{5, 8}, {7, 9}},
108 }
109
110 if err := b.Add(doc); err == nil {
111 t.Errorf("overlapping doc sections should fail")
112 }
113
114 doc = Document{
115 Name: "f1",
116 Content: []byte("01234567890123"),
117 Symbols: []DocumentSection{{0, 20}},
118 }
119
120 if err := b.Add(doc); err == nil {
121 t.Errorf("doc sections beyond EOF should fail")
122 }
123}
124
125func TestBasic(t *testing.T) {
126 b := testIndexBuilder(t, nil,
127 Document{
128 Name: "f2",
129 Content: []byte("to carry water in the no later bla"),
130 // --------------0123456789012345678901234567890123
131 })
132
133 t.Run("LineMatch", func(t *testing.T) {
134 res := searchForTest(t, b, &query.Substring{
135 Pattern: "water",
136 CaseSensitive: true,
137 })
138 fmatches := res.Files
139 if len(fmatches) != 1 || len(fmatches[0].LineMatches) != 1 {
140 t.Fatalf("got %v, want 1 matches", fmatches)
141 }
142
143 got := fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].LineMatches[0].LineFragments[0].Offset)
144 want := "f2:9"
145 if got != want {
146 t.Errorf("1: got %s, want %s", got, want)
147 }
148 })
149
150 t.Run("ChunkMatch", func(t *testing.T) {
151 res := searchForTest(t, b, &query.Substring{
152 Pattern: "water",
153 CaseSensitive: true,
154 }, chunkOpts)
155 fmatches := res.Files
156 if len(fmatches) != 1 || len(fmatches[0].ChunkMatches) != 1 {
157 t.Fatalf("got %v, want 1 matches", fmatches)
158 }
159
160 got := fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset)
161 want := "f2:9"
162 if got != want {
163 t.Errorf("1: got %s, want %s", got, want)
164 }
165 })
166}
167
168func TestEmptyIndex(t *testing.T) {
169 b := testIndexBuilder(t, nil)
170 searcher := searcherForTest(t, b)
171
172 var opts SearchOptions
173 if _, err := searcher.Search(context.Background(), &query.Substring{}, &opts); err != nil {
174 t.Fatalf("Search: %v", err)
175 }
176
177 if _, err := searcher.List(context.Background(), &query.Repo{Regexp: regexp.MustCompile("")}, nil); err != nil {
178 t.Fatalf("List: %v", err)
179 }
180
181 if _, err := searcher.Search(context.Background(), &query.Substring{Pattern: "java", FileName: true}, &opts); err != nil {
182 t.Fatalf("Search: %v", err)
183 }
184}
185
186type memSeeker struct {
187 data []byte
188}
189
190func (s *memSeeker) Name() string {
191 return "memseeker"
192}
193
194func (s *memSeeker) Close() {}
195func (s *memSeeker) Read(off, sz uint32) ([]byte, error) {
196 return s.data[off : off+sz], nil
197}
198
199func (s *memSeeker) Size() (uint32, error) {
200 return uint32(len(s.data)), nil
201}
202
203func TestNewlines(t *testing.T) {
204 b := testIndexBuilder(t, nil,
205 // -----------------------------------------012345-678901-234
206 Document{Name: "filename", Content: []byte("line1\nline2\nbla")})
207
208 t.Run("LineMatches", func(t *testing.T) {
209 sres := searchForTest(t, b, &query.Substring{Pattern: "ne2"})
210
211 matches := sres.Files
212 want := []FileMatch{{
213 FileName: "filename",
214 LineMatches: []LineMatch{{
215 LineFragments: []LineFragmentMatch{{
216 Offset: 8,
217 LineOffset: 2,
218 MatchLength: 3,
219 }},
220 Line: []byte("line2\n"),
221 LineStart: 6,
222 LineEnd: 12,
223 LineNumber: 2,
224 }},
225 }}
226
227 if diff := cmp.Diff(matches, want); diff != "" {
228 t.Fatal(diff)
229 }
230 })
231
232 t.Run("ChunkMatches", func(t *testing.T) {
233 sres := searchForTest(t, b, &query.Substring{Pattern: "ne2"}, chunkOpts)
234
235 matches := sres.Files
236 want := []FileMatch{{
237 FileName: "filename",
238 ChunkMatches: []ChunkMatch{{
239 Content: []byte("line2\n"),
240 ContentStart: Location{
241 ByteOffset: 6,
242 LineNumber: 2,
243 Column: 1,
244 },
245 Ranges: []Range{{
246 Start: Location{ByteOffset: 8, LineNumber: 2, Column: 3},
247 End: Location{ByteOffset: 11, LineNumber: 2, Column: 6},
248 }},
249 }},
250 }}
251
252 if diff := cmp.Diff(want, matches); diff != "" {
253 t.Fatal(diff)
254 }
255 })
256}
257
258// A result spanning multiple lines should have LineMatches that only cover
259// single lines.
260func TestQueryNewlines(t *testing.T) {
261 text := "line1\nline2\nbla"
262 b := testIndexBuilder(t, nil,
263 Document{Name: "filename", Content: []byte(text)})
264
265 t.Run("LineMatches", func(t *testing.T) {
266 sres := searchForTest(t, b, &query.Substring{Pattern: "ine2\nbla"})
267 matches := sres.Files
268 if len(matches) != 1 {
269 t.Fatalf("got %d file matches, want exactly one", len(matches))
270 }
271 m := matches[0]
272 if len(m.LineMatches) != 2 {
273 t.Fatalf("got %d line matches, want exactly two %#v", len(m.LineMatches), m.LineMatches)
274 }
275 })
276
277 t.Run("ChunkMatches", func(t *testing.T) {
278 sres := searchForTest(t, b, &query.Substring{Pattern: "ine2\nbla"}, chunkOpts)
279 matches := sres.Files
280 if len(matches) != 1 {
281 t.Fatalf("got %d file matches, want exactly one", len(matches))
282 }
283 m := matches[0]
284 if len(m.ChunkMatches) != 1 {
285 t.Fatalf("got %d chunk matches, want exactly one", len(m.ChunkMatches))
286 }
287 })
288}
289
290var chunkOpts = SearchOptions{ChunkMatches: true}
291
292func searchForTest(t *testing.T, b *IndexBuilder, q query.Q, o ...SearchOptions) *SearchResult {
293 searcher := searcherForTest(t, b)
294 var opts SearchOptions
295 if len(o) > 0 {
296 opts = o[0]
297 }
298 res, err := searcher.Search(context.Background(), q, &opts)
299 if err != nil {
300 t.Fatalf("Search(%s): %v", q, err)
301 }
302 clearScores(res)
303 return res
304}
305
306func searcherForTest(t *testing.T, b *IndexBuilder) Searcher {
307 var buf bytes.Buffer
308 if err := b.Write(&buf); err != nil {
309 t.Fatal(err)
310 }
311 f := &memSeeker{buf.Bytes()}
312
313 searcher, err := NewSearcher(f)
314 if err != nil {
315 t.Fatalf("NewSearcher: %v", err)
316 }
317
318 return searcher
319}
320
321func TestCaseFold(t *testing.T) {
322 b := testIndexBuilder(t, nil,
323 Document{Name: "f1", Content: []byte("I love BaNaNAS.")},
324 // -----------------------------------012345678901234
325 )
326 t.Run("LineMatches", func(t *testing.T) {
327 sres := searchForTest(t, b, &query.Substring{
328 Pattern: "bananas",
329 CaseSensitive: true,
330 })
331 matches := sres.Files
332 if len(matches) != 0 {
333 t.Errorf("foldcase: got %#v, want 0 matches", matches)
334 }
335
336 sres = searchForTest(t, b,
337 &query.Substring{
338 Pattern: "BaNaNAS",
339 CaseSensitive: true,
340 })
341 matches = sres.Files
342 if len(matches) != 1 {
343 t.Errorf("no foldcase: got %v, want 1 matches", matches)
344 } else if matches[0].LineMatches[0].LineFragments[0].Offset != 7 {
345 t.Errorf("foldcase: got %v, want offsets 7", matches)
346 }
347 })
348
349 t.Run("ChunkMatches", func(t *testing.T) {
350 sres := searchForTest(t, b, &query.Substring{
351 Pattern: "bananas",
352 CaseSensitive: true,
353 }, chunkOpts)
354 matches := sres.Files
355 if len(matches) != 0 {
356 t.Errorf("foldcase: got %#v, want 0 matches", matches)
357 }
358
359 sres = searchForTest(t, b,
360 &query.Substring{
361 Pattern: "BaNaNAS",
362 CaseSensitive: true,
363 })
364 matches = sres.Files
365 if len(matches) != 1 {
366 t.Errorf("no foldcase: got %v, want 1 matches", matches)
367 } else if matches[0].LineMatches[0].LineFragments[0].Offset != 7 {
368 t.Errorf("foldcase: got %v, want offsets 7", matches)
369 }
370 })
371}
372
373// wordsAsSymbols finds all ASCII words in doc.Content which are longer than 2
374// chars. Those are then set as symbols.
375func wordsAsSymbols(doc Document) Document {
376 re := regexp.MustCompile(`\b\w{2,}\b`)
377 var symbols []DocumentSection
378 for _, match := range re.FindAllIndex(doc.Content, -1) {
379 symbols = append(symbols, DocumentSection{
380 Start: uint32(match[0]),
381 End: uint32(match[1]),
382 })
383 }
384 doc.Symbols = symbols
385 return doc
386}
387
388func TestSearchStats(t *testing.T) {
389 ctx := context.Background()
390 searcher := searcherForTest(t, testIndexBuilder(t, nil,
391 wordsAsSymbols(Document{Name: "f1", Content: []byte("x banana y")}),
392 wordsAsSymbols(Document{Name: "f2", Content: []byte("x apple y")}),
393 wordsAsSymbols(Document{Name: "f3", Content: []byte("x banana apple y")}),
394 // --------------------------------------------------0123456789012345
395 ))
396
397 andQuery := query.NewAnd(
398 &query.Substring{
399 Pattern: "banana",
400 },
401 &query.Substring{
402 Pattern: "apple",
403 },
404 )
405
406 t.Run("LineMatches", func(t *testing.T) {
407 sres, err := searcher.Search(ctx, andQuery, &SearchOptions{})
408 if err != nil {
409 t.Fatal(err)
410 }
411 matches := sres.Files
412 if len(matches) != 1 || len(matches[0].LineMatches) != 1 || len(matches[0].LineMatches[0].LineFragments) != 2 {
413 t.Fatalf("got %#v, want 1 match with 2 fragments", matches)
414 }
415
416 if matches[0].LineMatches[0].LineFragments[0].Offset != 2 || matches[0].LineMatches[0].LineFragments[1].Offset != 9 {
417 t.Fatalf("got %#v, want offsets 2,9", matches)
418 }
419 })
420 t.Run("ChunkMatches", func(t *testing.T) {
421 sres, err := searcher.Search(ctx, andQuery, &chunkOpts)
422 if err != nil {
423 t.Fatal(err)
424 }
425 matches := sres.Files
426 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 || len(matches[0].ChunkMatches[0].Ranges) != 2 {
427 t.Fatalf("got %#v, want 1 chunk match with 2 ranges", matches)
428 }
429
430 if matches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset != 2 || matches[0].ChunkMatches[0].Ranges[1].Start.ByteOffset != 9 {
431 t.Fatalf("got %#v, want offsets 2,9", matches)
432 }
433 })
434 t.Run("Stats", func(t *testing.T) {
435 cases := []struct {
436 Name string
437 Q query.Q
438 Want Stats
439 }{{
440 Name: "and-query",
441 Q: andQuery,
442 Want: Stats{
443 FilesLoaded: 1,
444 ContentBytesLoaded: 22,
445 IndexBytesLoaded: 10,
446 NgramMatches: 3, // we look at doc 1, because it's max(0,1) due to AND
447 NgramLookups: 104,
448 MatchCount: 2,
449 FileCount: 1,
450 FilesConsidered: 2,
451 ShardsScanned: 1,
452 },
453 }, {
454 Name: "one-trigram",
455 Q: &query.Substring{
456 Pattern: "a y",
457 Content: true,
458 CaseSensitive: true,
459 },
460 Want: Stats{
461 ContentBytesLoaded: 14,
462 IndexBytesLoaded: 1,
463 FileCount: 1,
464 FilesConsidered: 1,
465 FilesLoaded: 1,
466 ShardsScanned: 1,
467 MatchCount: 1,
468 NgramMatches: 1,
469 NgramLookups: 2, // once to lookup frequency then again to access posting list.
470 },
471 }, {
472 Name: "one-trigram-case-insensitive",
473 Q: &query.Substring{
474 Pattern: "a y",
475 Content: true,
476 },
477 Want: Stats{
478 ContentBytesLoaded: 14,
479 IndexBytesLoaded: 1,
480 FileCount: 1,
481 FilesConsidered: 1,
482 FilesLoaded: 1,
483 ShardsScanned: 1,
484 MatchCount: 1,
485 NgramMatches: 1,
486 NgramLookups: 8, // "a y" has 2**2 casings which we lookup twice.
487 },
488 }, {
489 Name: "one-trigram-pruned",
490 Q: &query.Substring{
491 Pattern: "foo",
492 Content: true,
493 CaseSensitive: true,
494 },
495 Want: Stats{
496 ShardsSkippedFilter: 1,
497 NgramLookups: 1, // only had to lookup once
498 },
499 }, {
500 Name: "one-trigram-branch-pruned",
501 Q: query.NewAnd(
502 &query.Substring{
503 Pattern: "foo",
504 Content: true,
505 CaseSensitive: true,
506 },
507 &query.Substring{
508 Pattern: "a y",
509 Content: true,
510 CaseSensitive: true,
511 },
512 ),
513 Want: Stats{
514 IndexBytesLoaded: 1, // we created an iterator for "a y" before pruning.
515 ShardsSkippedFilter: 1,
516 NgramLookups: 3, // we lookedup "foo" once (1), but lookedup and created "a y" (2).
517 },
518 }, {
519 Name: "symbol-substr-nomatch",
520 Q: &query.Symbol{Expr: &query.Substring{
521 Pattern: "banana apple",
522 Content: true,
523 CaseSensitive: true,
524 }},
525 Want: Stats{
526 IndexBytesLoaded: 3,
527 FilesConsidered: 1, // important that we only check 1 file to ensure we are using the index
528 MatchCount: 0, // even though there is a match it doesn't align with a symbol
529 ShardsScanned: 1,
530 NgramMatches: 1,
531 NgramLookups: 12,
532 },
533 }, {
534 Name: "symbol-substr",
535 Q: &query.Symbol{Expr: &query.Substring{
536 Pattern: "apple",
537 Content: true,
538 CaseSensitive: true,
539 }},
540 Want: Stats{
541 ContentBytesLoaded: 35,
542 IndexBytesLoaded: 4,
543 FileCount: 2,
544 FilesConsidered: 2, // must be 2 to ensure we used the index
545 FilesLoaded: 2,
546 MatchCount: 2, // apple symbols is in two files
547 ShardsScanned: 1,
548 NgramMatches: 2,
549 NgramLookups: 5,
550 },
551 }, {
552 Name: "symbol-regexp-nomatch",
553 Q: &query.Symbol{Expr: &query.Regexp{
554 Regexp: mustParseRE("^apple.banana$"),
555 Content: true,
556 CaseSensitive: true,
557 }},
558 Want: Stats{
559 ContentBytesLoaded: 33, // we still have to run regex since "app" matches two documents
560 IndexBytesLoaded: 10,
561 FilesConsidered: 2, // important that we don't check 3 to ensure we are using the index
562 FilesLoaded: 2,
563 MatchCount: 0, // even though there is a match it doesn't align with a symbol
564 ShardsScanned: 1,
565 NgramMatches: 3,
566 NgramLookups: 11,
567 },
568 }, {
569 Name: "symbol-regexp",
570 Q: &query.Symbol{Expr: &query.Regexp{
571 Regexp: mustParseRE("^app.e$"),
572 Content: true,
573 CaseSensitive: true,
574 }},
575 Want: Stats{
576 ContentBytesLoaded: 35,
577 IndexBytesLoaded: 2,
578 FileCount: 2,
579 FilesConsidered: 2, // must be 2 to ensure we used the index
580 FilesLoaded: 2,
581 MatchCount: 2, // apple symbols is in two files
582 ShardsScanned: 1,
583 NgramMatches: 2,
584 NgramLookups: 2,
585 },
586 }}
587
588 for _, tc := range cases {
589 t.Run(tc.Name, func(t *testing.T) {
590 sres, err := searcher.Search(ctx, tc.Q, &chunkOpts)
591 if err != nil {
592 t.Fatal(err)
593 }
594 if diff := cmp.Diff(tc.Want, sres.Stats, cmpopts.IgnoreFields(Stats{}, "MatchTreeConstruction", "MatchTreeSearch")); diff != "" {
595 t.Errorf("unexpected Stats (-want +got):\n%s", diff)
596 }
597 })
598 }
599 })
600}
601
602func TestAndNegateSearch(t *testing.T) {
603 b := testIndexBuilder(t, nil,
604 Document{Name: "f1", Content: []byte("x banana y")},
605 // -----------------------------------0123456789
606 Document{Name: "f4", Content: []byte("x banana apple y")})
607
608 t.Run("LineMatches", func(t *testing.T) {
609 sres := searchForTest(t, b, query.NewAnd(
610 &query.Substring{
611 Pattern: "banana",
612 },
613 &query.Not{Child: &query.Substring{
614 Pattern: "apple",
615 }}))
616
617 matches := sres.Files
618
619 if len(matches) != 1 || len(matches[0].LineMatches) != 1 {
620 t.Fatalf("got %v, want 1 match", matches)
621 }
622 if matches[0].FileName != "f1" {
623 t.Fatalf("got match %#v, want FileName: f1", matches[0])
624 }
625 if matches[0].LineMatches[0].LineFragments[0].Offset != 2 {
626 t.Fatalf("got %v, want offset 2", matches)
627 }
628 })
629
630 t.Run("ChunkMatches", func(t *testing.T) {
631 sres := searchForTest(t, b,
632 query.NewAnd(
633 &query.Substring{
634 Pattern: "banana",
635 },
636 &query.Not{Child: &query.Substring{
637 Pattern: "apple",
638 }},
639 ),
640 chunkOpts,
641 )
642
643 matches := sres.Files
644
645 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 {
646 t.Fatalf("got %v, want 1 match", matches)
647 }
648 if matches[0].FileName != "f1" {
649 t.Fatalf("got match %#v, want FileName: f1", matches[0])
650 }
651 if matches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset != 2 {
652 t.Fatalf("got %v, want offset 2", matches)
653 }
654 })
655}
656
657func TestNegativeMatchesOnlyShortcut(t *testing.T) {
658 b := testIndexBuilder(t, nil,
659 Document{Name: "f1", Content: []byte("x banana y")},
660 Document{Name: "f2", Content: []byte("x appelmoes y")},
661 Document{Name: "f3", Content: []byte("x appelmoes y")},
662 Document{Name: "f3", Content: []byte("x appelmoes y")})
663
664 t.Run("LineMatches", func(t *testing.T) {
665 sres := searchForTest(t, b, query.NewAnd(
666 &query.Substring{
667 Pattern: "banana",
668 },
669 &query.Not{Child: &query.Substring{
670 Pattern: "appel",
671 }}))
672
673 if sres.Stats.FilesConsidered != 1 {
674 t.Errorf("got %#v, want FilesConsidered: 1", sres.Stats)
675 }
676 })
677
678 t.Run("ChunkMatches", func(t *testing.T) {
679 sres := searchForTest(t, b, query.NewAnd(
680 &query.Substring{
681 Pattern: "banana",
682 },
683 &query.Not{Child: &query.Substring{
684 Pattern: "appel",
685 }}), chunkOpts)
686
687 if sres.Stats.FilesConsidered != 1 {
688 t.Errorf("got %#v, want FilesConsidered: 1", sres.Stats)
689 }
690 })
691}
692
693func TestFileSearch(t *testing.T) {
694 b := testIndexBuilder(t, nil,
695 Document{Name: "banzana", Content: []byte("x orange y")},
696 // -------------0123456
697 Document{Name: "banana", Content: []byte("x apple y")},
698 // -------------012345
699 )
700
701 t.Run("LineMatches", func(t *testing.T) {
702 sres := searchForTest(t, b, &query.Substring{
703 Pattern: "anan",
704 FileName: true,
705 })
706
707 matches := sres.Files
708 if len(matches) != 1 || len(matches[0].LineMatches) != 1 {
709 t.Fatalf("got %v, want 1 match", matches)
710 }
711
712 got := matches[0].LineMatches[0]
713 want := LineMatch{
714 Line: []byte("banana"),
715 LineFragments: []LineFragmentMatch{{
716 Offset: 1,
717 LineOffset: 1,
718 MatchLength: 4,
719 }},
720 FileName: true,
721 }
722
723 if !reflect.DeepEqual(got, want) {
724 t.Errorf("got %#v, want %#v", got, want)
725 }
726 })
727
728 t.Run("ChunkMatches", func(t *testing.T) {
729 sres := searchForTest(t, b, &query.Substring{
730 Pattern: "anan",
731 FileName: true,
732 }, chunkOpts)
733
734 matches := sres.Files
735 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 {
736 t.Fatalf("got %v, want 1 match", matches)
737 }
738
739 got := matches[0].ChunkMatches[0]
740 want := ChunkMatch{
741 Content: []byte("banana"),
742 ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1},
743 Ranges: []Range{{
744 Start: Location{ByteOffset: 1, LineNumber: 1, Column: 2},
745 End: Location{ByteOffset: 5, LineNumber: 1, Column: 6},
746 }},
747 FileName: true,
748 }
749
750 if diff := cmp.Diff(want, got); diff != "" {
751 t.Fatal(diff)
752 }
753 })
754
755 t.Run("FileNameSet", func(t *testing.T) {
756 sres := searchForTest(t, b, query.NewFileNameSet("banana"), chunkOpts)
757
758 matches := sres.Files
759 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 {
760 t.Fatalf("got %v, want 1 match", matches)
761 }
762
763 got := matches[0].ChunkMatches[0]
764 want := ChunkMatch{
765 Content: []byte("banana"),
766 ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1},
767 Ranges: []Range{{
768 Start: Location{ByteOffset: 0, LineNumber: 1, Column: 1},
769 End: Location{ByteOffset: 6, LineNumber: 1, Column: 7},
770 }},
771 FileName: true,
772 }
773
774 if diff := cmp.Diff(want, got); diff != "" {
775 t.Fatal(diff)
776 }
777 })
778}
779
780func TestFileCase(t *testing.T) {
781 b := testIndexBuilder(t, nil,
782 Document{Name: "BANANA", Content: []byte("x orange y")})
783
784 t.Run("LineMatches", func(t *testing.T) {
785 sres := searchForTest(t, b, &query.Substring{
786 Pattern: "banana",
787 FileName: true,
788 })
789
790 matches := sres.Files
791 if len(matches) != 1 || matches[0].FileName != "BANANA" {
792 t.Fatalf("got %v, want 1 match 'BANANA'", matches)
793 }
794 })
795
796 t.Run("ChunkMatches", func(t *testing.T) {
797 sres := searchForTest(t, b, &query.Substring{
798 Pattern: "banana",
799 FileName: true,
800 }, chunkOpts)
801
802 matches := sres.Files
803 if len(matches) != 1 || matches[0].FileName != "BANANA" {
804 t.Fatalf("got %v, want 1 match 'BANANA'", matches)
805 }
806 })
807}
808
809func TestFileRegexpSearchBruteForce(t *testing.T) {
810 b := testIndexBuilder(t, nil,
811 Document{Name: "banzana", Content: []byte("x orange y")},
812 Document{Name: "banana", Content: []byte("x apple y")},
813 )
814 t.Run("LineMatches", func(t *testing.T) {
815 sres := searchForTest(t, b, &query.Regexp{
816 Regexp: mustParseRE("[qn][zx]"),
817 FileName: true,
818 })
819
820 matches := sres.Files
821 if len(matches) != 1 || matches[0].FileName != "banzana" {
822 t.Fatalf("got %v, want 1 match on 'banzana'", matches)
823 }
824 })
825 t.Run("LineMatches", func(t *testing.T) {
826 sres := searchForTest(t, b, &query.Regexp{
827 Regexp: mustParseRE("[qn][zx]"),
828 FileName: true,
829 }, chunkOpts)
830
831 matches := sres.Files
832 if len(matches) != 1 || matches[0].FileName != "banzana" {
833 t.Fatalf("got %v, want 1 match on 'banzana'", matches)
834 }
835 })
836}
837
838func TestFileRegexpSearchShortString(t *testing.T) {
839 b := testIndexBuilder(t, nil,
840 Document{Name: "banana.py", Content: []byte("x orange y")})
841
842 t.Run("LineMatches", func(t *testing.T) {
843 sres := searchForTest(t, b, &query.Regexp{
844 Regexp: mustParseRE("ana.py"),
845 FileName: true,
846 })
847
848 matches := sres.Files
849 if len(matches) != 1 || matches[0].FileName != "banana.py" {
850 t.Fatalf("got %v, want 1 match on 'banana.py'", matches)
851 }
852 })
853
854 t.Run("ChunkMatches", func(t *testing.T) {
855 sres := searchForTest(t, b, &query.Regexp{
856 Regexp: mustParseRE("ana.py"),
857 FileName: true,
858 }, chunkOpts)
859
860 matches := sres.Files
861 if len(matches) != 1 || matches[0].FileName != "banana.py" {
862 t.Fatalf("got %v, want 1 match on 'banana.py'", matches)
863 }
864 })
865}
866
867func TestFileSubstringSearchBruteForce(t *testing.T) {
868 b := testIndexBuilder(t, nil,
869 Document{Name: "BANZANA", Content: []byte("x orange y")},
870 Document{Name: "banana", Content: []byte("x apple y")})
871
872 q := &query.Substring{
873 Pattern: "z",
874 FileName: true,
875 }
876
877 t.Run("LineMatches", func(t *testing.T) {
878 res := searchForTest(t, b, q)
879 if len(res.Files) != 1 || res.Files[0].FileName != "BANZANA" {
880 t.Fatalf("got %v, want 1 match on 'BANZANA''", res.Files)
881 }
882 })
883
884 t.Run("ChunkMatches", func(t *testing.T) {
885 res := searchForTest(t, b, q, chunkOpts)
886 if len(res.Files) != 1 || res.Files[0].FileName != "BANZANA" {
887 t.Fatalf("got %v, want 1 match on 'BANZANA''", res.Files)
888 }
889 })
890}
891
892func TestFileSubstringSearchBruteForceEnd(t *testing.T) {
893 b := testIndexBuilder(t, nil,
894 Document{Name: "BANZANA", Content: []byte("x orange y")},
895 Document{Name: "bananaq", Content: []byte("x apple y")})
896
897 q := &query.Substring{
898 Pattern: "q",
899 FileName: true,
900 }
901 t.Run("LineMatches", func(t *testing.T) {
902 res := searchForTest(t, b, q)
903 if want := "bananaq"; len(res.Files) != 1 || res.Files[0].FileName != want {
904 t.Fatalf("got %v, want 1 match in %q", res.Files, want)
905 }
906 })
907
908 t.Run("LineMatches", func(t *testing.T) {
909 res := searchForTest(t, b, q, chunkOpts)
910 if want := "bananaq"; len(res.Files) != 1 || res.Files[0].FileName != want {
911 t.Fatalf("got %v, want 1 match in %q", res.Files, want)
912 }
913 })
914}
915
916func TestSearchMatchAll(t *testing.T) {
917 b := testIndexBuilder(t, nil,
918 Document{Name: "banzana", Content: []byte("x orange y")},
919 Document{Name: "banana", Content: []byte("x apple y")})
920
921 t.Run("LineMatches", func(t *testing.T) {
922 sres := searchForTest(t, b, &query.Const{Value: true})
923 matches := sres.Files
924 if len(matches) != 2 {
925 t.Fatalf("got %v, want 2 matches", matches)
926 }
927 })
928
929 t.Run("ChunkMatches", func(t *testing.T) {
930 sres := searchForTest(t, b, &query.Const{Value: true}, chunkOpts)
931 matches := sres.Files
932 if len(matches) != 2 {
933 t.Fatalf("got %v, want 2 matches", matches)
934 }
935 })
936}
937
938func TestSearchNewline(t *testing.T) {
939 b := testIndexBuilder(t, nil,
940 Document{Name: "banzana", Content: []byte("abcd\ndefg")})
941
942 t.Run("LineMatches", func(t *testing.T) {
943 sres := searchForTest(t, b, &query.Substring{Pattern: "d\nd"})
944
945 // Just check that we don't crash.
946
947 matches := sres.Files
948 if len(matches) != 1 {
949 t.Fatalf("got %v, want 1 matches", matches)
950 }
951 })
952
953 t.Run("ChunkMatches", func(t *testing.T) {
954 sres := searchForTest(t, b, &query.Substring{Pattern: "d\nd"}, chunkOpts)
955
956 // Just check that we don't crash.
957
958 matches := sres.Files
959 if len(matches) != 1 {
960 t.Fatalf("got %v, want 1 matches", matches)
961 }
962 })
963}
964
965func TestSearchMatchAllRegexp(t *testing.T) {
966 b := testIndexBuilder(t, nil,
967 Document{Name: "banzana", Content: []byte("abcd")},
968 Document{Name: "banana", Content: []byte("pqrs")})
969
970 t.Run("LineMatches", func(t *testing.T) {
971 sres := searchForTest(t, b, &query.Regexp{Regexp: mustParseRE(".")})
972
973 matches := sres.Files
974 if len(matches) != 2 || sres.Stats.MatchCount != 2 {
975 t.Fatalf("got %v, want 2 matches", matches)
976 }
977 if len(matches[0].LineMatches[0].Line) != 4 || len(matches[1].LineMatches[0].Line) != 4 {
978 t.Fatalf("want 4 chars in every file, got %#v", matches)
979 }
980 })
981
982 t.Run("ChunkMatches", func(t *testing.T) {
983 sres := searchForTest(t, b, &query.Regexp{Regexp: mustParseRE(".")}, chunkOpts)
984
985 matches := sres.Files
986 if len(matches) != 2 || sres.Stats.MatchCount != 8 {
987 t.Fatalf("got %v, want 2 matches", matches)
988 }
989 if len(matches[0].ChunkMatches[0].Content) != 4 || len(matches[1].ChunkMatches[0].Content) != 4 {
990 t.Fatalf("want 4 chars in every file, got %#v", matches)
991 }
992 })
993}
994
995func TestFileRestriction(t *testing.T) {
996 b := testIndexBuilder(t, nil,
997 Document{Name: "banana1", Content: []byte("x orange y")},
998 Document{Name: "banana2", Content: []byte("x apple y")},
999 Document{Name: "orange", Content: []byte("x apple z")})
1000
1001 t.Run("LineMatches", func(t *testing.T) {
1002 sres := searchForTest(t, b, query.NewAnd(
1003 &query.Substring{
1004 Pattern: "banana",
1005 FileName: true,
1006 },
1007 &query.Substring{
1008 Pattern: "apple",
1009 }))
1010
1011 matches := sres.Files
1012 if len(matches) != 1 || len(matches[0].LineMatches) != 1 {
1013 t.Fatalf("got %v, want 1 match", matches)
1014 }
1015
1016 match := matches[0].LineMatches[0]
1017 got := string(match.Line)
1018 want := "x apple y"
1019 if got != want {
1020 t.Errorf("got match %#v, want line %q", match, want)
1021 }
1022 })
1023
1024 t.Run("ChunkMatches", func(t *testing.T) {
1025 sres := searchForTest(t, b, query.NewAnd(
1026 &query.Substring{
1027 Pattern: "banana",
1028 FileName: true,
1029 },
1030 &query.Substring{
1031 Pattern: "apple",
1032 }), chunkOpts)
1033
1034 matches := sres.Files
1035 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 {
1036 t.Fatalf("got %v, want 1 match", matches)
1037 }
1038
1039 match := matches[0].ChunkMatches[0]
1040 got := string(match.Content)
1041 want := "x apple y"
1042 if got != want {
1043 t.Errorf("got match %#v, want line %q", match, want)
1044 }
1045 })
1046}
1047
1048func TestFileNameBoundary(t *testing.T) {
1049 b := testIndexBuilder(t, nil,
1050 Document{Name: "banana2", Content: []byte("x apple y")},
1051 Document{Name: "helpers.go", Content: []byte("x apple y")},
1052 Document{Name: "foo", Content: []byte("x apple y")})
1053
1054 t.Run("LineMatches", func(t *testing.T) {
1055 sres := searchForTest(t, b, &query.Substring{
1056 Pattern: "helpers.go",
1057 FileName: true,
1058 })
1059
1060 matches := sres.Files
1061 if len(matches) != 1 || len(matches[0].LineMatches) != 1 {
1062 t.Fatalf("got %v, want 1 match", matches)
1063 }
1064 })
1065
1066 t.Run("ChunkMatches", func(t *testing.T) {
1067 sres := searchForTest(t, b, &query.Substring{
1068 Pattern: "helpers.go",
1069 FileName: true,
1070 }, chunkOpts)
1071
1072 matches := sres.Files
1073 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 {
1074 t.Fatalf("got %v, want 1 match", matches)
1075 }
1076 })
1077}
1078
1079func TestDocumentOrder(t *testing.T) {
1080 var docs []Document
1081 for i := 0; i < 3; i++ {
1082 docs = append(docs, Document{Name: fmt.Sprintf("f%d", i), Content: []byte("needle")})
1083 }
1084
1085 b := testIndexBuilder(t, nil, docs...)
1086
1087 t.Run("LineMatches", func(t *testing.T) {
1088 sres := searchForTest(t, b, query.NewAnd(
1089 &query.Substring{
1090 Pattern: "needle",
1091 }))
1092
1093 want := []string{"f0", "f1", "f2"}
1094 var got []string
1095 for _, f := range sres.Files {
1096 got = append(got, f.FileName)
1097 }
1098 if !reflect.DeepEqual(got, want) {
1099 t.Fatalf("got %v, want %v", got, want)
1100 }
1101 })
1102
1103 t.Run("ChunkMatches", func(t *testing.T) {
1104 sres := searchForTest(t, b,
1105 query.NewAnd(&query.Substring{
1106 Pattern: "needle",
1107 }),
1108 chunkOpts,
1109 )
1110
1111 want := []string{"f0", "f1", "f2"}
1112 var got []string
1113 for _, f := range sres.Files {
1114 got = append(got, f.FileName)
1115 }
1116 if !reflect.DeepEqual(got, want) {
1117 t.Fatalf("got %v, want %v", got, want)
1118 }
1119 })
1120}
1121
1122func TestBranchMask(t *testing.T) {
1123 b := testIndexBuilder(t, &Repository{
1124 Branches: []RepositoryBranch{
1125 {"master", "v-master"},
1126 {"stable", "v-stable"},
1127 {"bonzai", "v-bonzai"},
1128 },
1129 }, Document{Name: "f1", Content: []byte("needle"), Branches: []string{"master"}},
1130 Document{Name: "f2", Content: []byte("needle"), Branches: []string{"stable", "master"}},
1131 Document{Name: "f3", Content: []byte("needle"), Branches: []string{"stable", "master"}},
1132 Document{Name: "f4", Content: []byte("needle"), Branches: []string{"bonzai"}},
1133 )
1134
1135 t.Run("LineMatches", func(t *testing.T) {
1136 sres := searchForTest(t, b, query.NewAnd(
1137 &query.Substring{
1138 Pattern: "needle",
1139 },
1140 &query.Branch{
1141 Pattern: "table",
1142 }))
1143
1144 if len(sres.Files) != 2 || sres.Files[0].FileName != "f2" || sres.Files[1].FileName != "f3" {
1145 t.Fatalf("got %v, want 2 result from [f2,f3]", sres.Files)
1146 }
1147
1148 if len(sres.Files[0].Branches) != 1 || sres.Files[0].Branches[0] != "stable" {
1149 t.Fatalf("got %v, want 1 branch 'stable'", sres.Files[0].Branches)
1150 }
1151 })
1152
1153 t.Run("ChunkMatches", func(t *testing.T) {
1154 sres := searchForTest(t, b, query.NewAnd(
1155 &query.Substring{
1156 Pattern: "needle",
1157 },
1158 &query.Branch{
1159 Pattern: "table",
1160 }),
1161 chunkOpts,
1162 )
1163
1164 if len(sres.Files) != 2 || sres.Files[0].FileName != "f2" || sres.Files[1].FileName != "f3" {
1165 t.Fatalf("got %v, want 2 result from [f2,f3]", sres.Files)
1166 }
1167
1168 if len(sres.Files[0].Branches) != 1 || sres.Files[0].Branches[0] != "stable" {
1169 t.Fatalf("got %v, want 1 branch 'stable'", sres.Files[0].Branches)
1170 }
1171 })
1172}
1173
1174func TestBranchLimit(t *testing.T) {
1175 for limit := 64; limit <= 65; limit++ {
1176 r := &Repository{}
1177 for i := 0; i < limit; i++ {
1178 s := fmt.Sprintf("b%d", i)
1179 r.Branches = append(r.Branches, RepositoryBranch{
1180 s, "v-" + s,
1181 })
1182 }
1183 _, err := NewIndexBuilder(r)
1184 if limit == 64 && err != nil {
1185 t.Fatalf("NewIndexBuilder: %v", err)
1186 } else if limit == 65 && err == nil {
1187 t.Fatalf("NewIndexBuilder succeeded")
1188 }
1189 }
1190}
1191
1192func TestBranchReport(t *testing.T) {
1193 branches := []string{"stable", "master"}
1194 b := testIndexBuilder(t, &Repository{
1195 Branches: []RepositoryBranch{
1196 {"stable", "vs"},
1197 {"master", "vm"},
1198 },
1199 },
1200 Document{Name: "f2", Content: []byte("needle"), Branches: branches})
1201
1202 t.Run("LineMatches", func(t *testing.T) {
1203 sres := searchForTest(t, b, &query.Substring{
1204 Pattern: "needle",
1205 })
1206 if len(sres.Files) != 1 {
1207 t.Fatalf("got %v, want 1 result from f2", sres.Files)
1208 }
1209
1210 f := sres.Files[0]
1211 if !reflect.DeepEqual(f.Branches, branches) {
1212 t.Fatalf("got branches %q, want %q", f.Branches, branches)
1213 }
1214 })
1215
1216 t.Run("ChunkMatches", func(t *testing.T) {
1217 sres := searchForTest(t, b, &query.Substring{
1218 Pattern: "needle",
1219 }, chunkOpts)
1220 if len(sres.Files) != 1 {
1221 t.Fatalf("got %v, want 1 result from f2", sres.Files)
1222 }
1223
1224 f := sres.Files[0]
1225 if !reflect.DeepEqual(f.Branches, branches) {
1226 t.Fatalf("got branches %q, want %q", f.Branches, branches)
1227 }
1228 })
1229}
1230
1231func TestBranchVersions(t *testing.T) {
1232 b := testIndexBuilder(t, &Repository{
1233 Branches: []RepositoryBranch{
1234 {"stable", "v-stable"},
1235 {"master", "v-master"},
1236 },
1237 }, Document{Name: "f2", Content: []byte("needle"), Branches: []string{"master"}})
1238
1239 t.Run("LineMatches", func(t *testing.T) {
1240 sres := searchForTest(t, b, &query.Substring{
1241 Pattern: "needle",
1242 })
1243 if len(sres.Files) != 1 {
1244 t.Fatalf("got %v, want 1 result from f2", sres.Files)
1245 }
1246
1247 f := sres.Files[0]
1248 if f.Version != "v-master" {
1249 t.Fatalf("got file %#v, want version 'v-master'", f)
1250 }
1251 })
1252
1253 t.Run("ChunkMatches", func(t *testing.T) {
1254 sres := searchForTest(t, b, &query.Substring{
1255 Pattern: "needle",
1256 }, chunkOpts)
1257 if len(sres.Files) != 1 {
1258 t.Fatalf("got %v, want 1 result from f2", sres.Files)
1259 }
1260
1261 f := sres.Files[0]
1262 if f.Version != "v-master" {
1263 t.Fatalf("got file %#v, want version 'v-master'", f)
1264 }
1265 })
1266}
1267
1268func mustParseRE(s string) *syntax.Regexp {
1269 r, err := syntax.Parse(s, syntax.Perl)
1270 if err != nil {
1271 panic(err)
1272 }
1273
1274 return r
1275}
1276
1277func TestRegexp(t *testing.T) {
1278 content := []byte("needle the bla")
1279 // ----------------01234567890123
1280
1281 b := testIndexBuilder(t, nil,
1282 Document{
1283 Name: "f1",
1284 Content: content,
1285 })
1286
1287 t.Run("LineMatches", func(t *testing.T) {
1288 sres := searchForTest(t, b,
1289 &query.Regexp{
1290 Regexp: mustParseRE("dle.*bla"),
1291 })
1292
1293 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 {
1294 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
1295 }
1296
1297 got := sres.Files[0].LineMatches[0]
1298 want := LineMatch{
1299 LineFragments: []LineFragmentMatch{{
1300 LineOffset: 3,
1301 Offset: 3,
1302 MatchLength: 11,
1303 }},
1304 Line: content,
1305 FileName: false,
1306 LineNumber: 1,
1307 LineStart: 0,
1308 LineEnd: 14,
1309 }
1310
1311 if !reflect.DeepEqual(got, want) {
1312 t.Errorf("got %#v, want %#v", got, want)
1313 }
1314 })
1315
1316 t.Run("ChunkMatches", func(t *testing.T) {
1317 sres := searchForTest(t, b,
1318 &query.Regexp{
1319 Regexp: mustParseRE("dle.*bla"),
1320 }, chunkOpts)
1321
1322 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 {
1323 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
1324 }
1325
1326 got := sres.Files[0].ChunkMatches[0]
1327 want := ChunkMatch{
1328 Content: content,
1329 ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1},
1330 Ranges: []Range{{
1331 Start: Location{ByteOffset: 3, LineNumber: 1, Column: 4},
1332 End: Location{ByteOffset: 14, LineNumber: 1, Column: 15},
1333 }},
1334 }
1335
1336 if diff := cmp.Diff(want, got); diff != "" {
1337 t.Fatal(diff)
1338 }
1339 })
1340}
1341
1342func TestRegexpFile(t *testing.T) {
1343 content := []byte("needle the bla")
1344
1345 name := "let's play: find the mussel"
1346 b := testIndexBuilder(t, nil,
1347 Document{Name: name, Content: content},
1348 Document{Name: "play.txt", Content: content})
1349
1350 t.Run("LineMatches", func(t *testing.T) {
1351 sres := searchForTest(t, b,
1352 &query.Regexp{
1353 Regexp: mustParseRE("play.*mussel"),
1354 FileName: true,
1355 })
1356
1357 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 {
1358 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
1359 }
1360
1361 if sres.Files[0].FileName != name {
1362 t.Errorf("got match %#v, want name %q", sres.Files[0], name)
1363 }
1364 })
1365
1366 t.Run("ChunkMatches", func(t *testing.T) {
1367 sres := searchForTest(t, b,
1368 &query.Regexp{
1369 Regexp: mustParseRE("play.*mussel"),
1370 FileName: true,
1371 }, chunkOpts)
1372
1373 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 {
1374 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
1375 }
1376
1377 if sres.Files[0].FileName != name {
1378 t.Errorf("got match %#v, want name %q", sres.Files[0], name)
1379 }
1380 })
1381}
1382
1383func TestRegexpOrder(t *testing.T) {
1384 content := []byte("bla the needle")
1385 // ----------------01234567890123
1386
1387 b := testIndexBuilder(t, nil,
1388 Document{Name: "f1", Content: content})
1389
1390 t.Run("LineMatches", func(t *testing.T) {
1391 sres := searchForTest(t, b,
1392 &query.Regexp{
1393 Regexp: mustParseRE("dle.*bla"),
1394 })
1395
1396 if len(sres.Files) != 0 {
1397 t.Fatalf("got %v, want 0 matches", sres.Files)
1398 }
1399 })
1400
1401 t.Run("ChunkMatches", func(t *testing.T) {
1402 sres := searchForTest(t, b,
1403 &query.Regexp{
1404 Regexp: mustParseRE("dle.*bla"),
1405 })
1406
1407 if len(sres.Files) != 0 {
1408 t.Fatalf("got %v, want 0 matches", sres.Files)
1409 }
1410 })
1411}
1412
1413func TestRepoName(t *testing.T) {
1414 content := []byte("bla the needle")
1415 // ----------------01234567890123
1416
1417 b := testIndexBuilder(t, &Repository{Name: "bla"},
1418 Document{Name: "f1", Content: content})
1419
1420 t.Run("LineMatches", func(t *testing.T) {
1421 sres := searchForTest(t, b,
1422 query.NewAnd(
1423 &query.Substring{Pattern: "needle"},
1424 &query.Repo{Regexp: regexp.MustCompile("foo")},
1425 ))
1426
1427 if len(sres.Files) != 0 {
1428 t.Fatalf("got %v, want 0 matches", sres.Files)
1429 }
1430
1431 if sres.Stats.FilesConsidered > 0 {
1432 t.Fatalf("got FilesConsidered %d, should have short circuited", sres.Stats.FilesConsidered)
1433 }
1434
1435 sres = searchForTest(t, b,
1436 query.NewAnd(
1437 &query.Substring{Pattern: "needle"},
1438 &query.Repo{Regexp: regexp.MustCompile("bla")},
1439 ))
1440 if len(sres.Files) != 1 {
1441 t.Fatalf("got %v, want 1 match", sres.Files)
1442 }
1443 })
1444
1445 t.Run("ChunkMatches", func(t *testing.T) {
1446 sres := searchForTest(t, b,
1447 query.NewAnd(
1448 &query.Substring{Pattern: "needle"},
1449 &query.Repo{Regexp: regexp.MustCompile("foo")},
1450 ),
1451 chunkOpts,
1452 )
1453
1454 if len(sres.Files) != 0 {
1455 t.Fatalf("got %v, want 0 matches", sres.Files)
1456 }
1457
1458 if sres.Stats.FilesConsidered > 0 {
1459 t.Fatalf("got FilesConsidered %d, should have short circuited", sres.Stats.FilesConsidered)
1460 }
1461
1462 sres = searchForTest(t, b,
1463 query.NewAnd(
1464 &query.Substring{Pattern: "needle"},
1465 &query.Repo{Regexp: regexp.MustCompile("bla")},
1466 ))
1467 if len(sres.Files) != 1 {
1468 t.Fatalf("got %v, want 1 match", sres.Files)
1469 }
1470 })
1471}
1472
1473func TestMergeMatches(t *testing.T) {
1474 content := []byte("blablabla")
1475 b := testIndexBuilder(t, nil,
1476 Document{Name: "f1", Content: content})
1477
1478 t.Run("LineMatches", func(t *testing.T) {
1479 sres := searchForTest(t, b,
1480 &query.Substring{Pattern: "bla"})
1481 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 {
1482 t.Fatalf("got %v, want 1 match", sres.Files)
1483 }
1484 })
1485
1486 t.Run("ChunkMatches", func(t *testing.T) {
1487 sres := searchForTest(t, b,
1488 &query.Substring{Pattern: "bla"},
1489 chunkOpts,
1490 )
1491 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 {
1492 t.Fatalf("got %v, want 1 match", sres.Files)
1493 }
1494 })
1495}
1496
1497func TestRepoURL(t *testing.T) {
1498 content := []byte("blablabla")
1499 b := testIndexBuilder(t, &Repository{
1500 Name: "name",
1501 URL: "URL",
1502 CommitURLTemplate: "commit",
1503 FileURLTemplate: "file-url",
1504 LineFragmentTemplate: "fragment",
1505 }, Document{Name: "f1", Content: content})
1506
1507 sres := searchForTest(t, b, &query.Substring{Pattern: "bla"})
1508
1509 if sres.RepoURLs["name"] != "file-url" {
1510 t.Errorf("got RepoURLs %v, want {name: URL}", sres.RepoURLs)
1511 }
1512 if sres.LineFragments["name"] != "fragment" {
1513 t.Errorf("got URLs %v, want {name: URL}", sres.LineFragments)
1514 }
1515}
1516
1517func TestRegexpCaseSensitive(t *testing.T) {
1518 content := []byte("bla\nfunc unmarshalGitiles\n")
1519 b := testIndexBuilder(t, nil, Document{
1520 Name: "f1",
1521 Content: content,
1522 })
1523
1524 t.Run("LineMatches", func(t *testing.T) {
1525 res := searchForTest(t, b,
1526 &query.Regexp{
1527 Regexp: mustParseRE("func.*Gitiles"),
1528 CaseSensitive: true,
1529 })
1530
1531 if len(res.Files) != 1 {
1532 t.Fatalf("got %v, want one match", res.Files)
1533 }
1534 })
1535
1536 t.Run("ChunkMatches", func(t *testing.T) {
1537 res := searchForTest(t, b,
1538 &query.Regexp{
1539 Regexp: mustParseRE("func.*Gitiles"),
1540 CaseSensitive: true,
1541 },
1542 chunkOpts,
1543 )
1544
1545 if len(res.Files) != 1 {
1546 t.Fatalf("got %v, want one match", res.Files)
1547 }
1548 })
1549}
1550
1551func TestRegexpCaseFolding(t *testing.T) {
1552 content := []byte("bla\nfunc unmarshalGitiles\n")
1553
1554 b := testIndexBuilder(t, nil,
1555 Document{Name: "f1", Content: content})
1556 res := searchForTest(t, b,
1557 &query.Regexp{
1558 Regexp: mustParseRE("func.*GITILES"),
1559 CaseSensitive: false,
1560 })
1561
1562 if len(res.Files) != 1 {
1563 t.Fatalf("got %v, want one match", res.Files)
1564 }
1565}
1566
1567func TestCaseRegexp(t *testing.T) {
1568 content := []byte("BLABLABLA")
1569 b := testIndexBuilder(t, nil,
1570 Document{Name: "f1", Content: content})
1571
1572 t.Run("LineMatches", func(t *testing.T) {
1573 res := searchForTest(t, b,
1574 &query.Regexp{
1575 Regexp: mustParseRE("[xb][xl][xa]"),
1576 CaseSensitive: true,
1577 })
1578
1579 if len(res.Files) > 0 {
1580 t.Fatalf("got %v, want no matches", res.Files)
1581 }
1582 })
1583
1584 t.Run("ChunkMatches", func(t *testing.T) {
1585 res := searchForTest(t, b,
1586 &query.Regexp{
1587 Regexp: mustParseRE("[xb][xl][xa]"),
1588 CaseSensitive: true,
1589 },
1590 chunkOpts,
1591 )
1592
1593 if len(res.Files) > 0 {
1594 t.Fatalf("got %v, want no matches", res.Files)
1595 }
1596 })
1597}
1598
1599func TestNegativeRegexp(t *testing.T) {
1600 content := []byte("BLABLABLA needle bla")
1601 b := testIndexBuilder(t, nil,
1602 Document{Name: "f1", Content: content})
1603
1604 t.Run("LineMatches", func(t *testing.T) {
1605 res := searchForTest(t, b,
1606 query.NewAnd(
1607 &query.Substring{
1608 Pattern: "needle",
1609 },
1610 &query.Not{
1611 Child: &query.Regexp{
1612 Regexp: mustParseRE(".cs"),
1613 },
1614 }))
1615
1616 if len(res.Files) != 1 {
1617 t.Fatalf("got %v, want 1 match", res.Files)
1618 }
1619 })
1620
1621 t.Run("ChunkMatches", func(t *testing.T) {
1622 res := searchForTest(t, b,
1623 query.NewAnd(
1624 &query.Substring{
1625 Pattern: "needle",
1626 },
1627 &query.Not{
1628 Child: &query.Regexp{
1629 Regexp: mustParseRE(".cs"),
1630 },
1631 },
1632 ),
1633 chunkOpts)
1634
1635 if len(res.Files) != 1 {
1636 t.Fatalf("got %v, want 1 match", res.Files)
1637 }
1638 })
1639}
1640
1641func TestSymbolRank(t *testing.T) {
1642 t.Skip()
1643
1644 content := []byte("func bla() blubxxxxx")
1645 // ----------------01234567890123456789
1646 b := testIndexBuilder(t, nil,
1647 Document{
1648 Name: "f1",
1649 Content: content,
1650 }, Document{
1651 Name: "f2",
1652 Content: content,
1653 Symbols: []DocumentSection{{5, 8}},
1654 }, Document{
1655 Name: "f3",
1656 Content: content,
1657 })
1658
1659 t.Run("LineMatches", func(t *testing.T) {
1660 res := searchForTest(t, b,
1661 &query.Substring{
1662 CaseSensitive: false,
1663 Pattern: "bla",
1664 })
1665
1666 if len(res.Files) != 3 {
1667 t.Fatalf("got %d files, want 3 files. Full data: %v", len(res.Files), res.Files)
1668 }
1669 if res.Files[0].FileName != "f2" {
1670 t.Errorf("got %#v, want 'f2' as top match", res.Files[0])
1671 }
1672 })
1673
1674 t.Run("ChunkMatches", func(t *testing.T) {
1675 res := searchForTest(t, b,
1676 &query.Substring{
1677 CaseSensitive: false,
1678 Pattern: "bla",
1679 }, chunkOpts)
1680
1681 if len(res.Files) != 3 {
1682 t.Fatalf("got %d files, want 3 files. Full data: %v", len(res.Files), res.Files)
1683 }
1684 if res.Files[0].FileName != "f2" {
1685 t.Errorf("got %#v, want 'f2' as top match", res.Files[0])
1686 }
1687 })
1688}
1689
1690func TestSymbolRankRegexpUTF8(t *testing.T) {
1691 t.Skip()
1692
1693 prefix := strings.Repeat(string([]rune{kelvinCodePoint}), 100) + "\n"
1694 content := []byte(prefix +
1695 "func bla() blub")
1696 // ------012345678901234
1697 b := testIndexBuilder(t, nil,
1698 Document{
1699 Name: "f1",
1700 Content: content,
1701 }, Document{
1702 Name: "f2",
1703 Content: content,
1704 Symbols: []DocumentSection{{uint32(len(prefix) + 5), uint32(len(prefix) + 8)}},
1705 }, Document{
1706 Name: "f3",
1707 Content: content,
1708 })
1709
1710 t.Run("LineMatches", func(t *testing.T) {
1711 res := searchForTest(t, b,
1712 &query.Regexp{
1713 Regexp: mustParseRE("b.a"),
1714 })
1715
1716 if len(res.Files) != 3 {
1717 t.Fatalf("got %#v, want 3 files", res.Files)
1718 }
1719 if res.Files[0].FileName != "f2" {
1720 t.Errorf("got %#v, want 'f2' as top match", res.Files[0])
1721 }
1722 })
1723
1724 t.Run("ChunjkMatches", func(t *testing.T) {
1725 res := searchForTest(t, b,
1726 &query.Regexp{
1727 Regexp: mustParseRE("b.a"),
1728 }, chunkOpts)
1729
1730 if len(res.Files) != 3 {
1731 t.Fatalf("got %#v, want 3 files", res.Files)
1732 }
1733 if res.Files[0].FileName != "f2" {
1734 t.Errorf("got %#v, want 'f2' as top match", res.Files[0])
1735 }
1736 })
1737}
1738
1739func TestPartialSymbolRank(t *testing.T) {
1740 t.Skip()
1741
1742 content := []byte("func bla() blub")
1743 // ----------------012345678901234
1744
1745 b := testIndexBuilder(t, nil,
1746 Document{
1747 Name: "f1",
1748 Content: content,
1749 Symbols: []DocumentSection{{4, 9}},
1750 }, Document{
1751 Name: "f2",
1752 Content: content,
1753 Symbols: []DocumentSection{{4, 8}},
1754 }, Document{
1755 Name: "f3",
1756 Content: content,
1757 Symbols: []DocumentSection{{4, 9}},
1758 })
1759
1760 t.Run("LineMatches", func(t *testing.T) {
1761 res := searchForTest(t, b,
1762 &query.Substring{
1763 Pattern: "bla",
1764 })
1765
1766 if len(res.Files) != 3 {
1767 t.Fatalf("got %#v, want 3 files", res.Files)
1768 }
1769 if res.Files[0].FileName != "f2" {
1770 t.Errorf("got %#v, want 'f2' as top match", res.Files[0])
1771 }
1772 })
1773
1774 t.Run("ChunkMatches", func(t *testing.T) {
1775 res := searchForTest(t, b,
1776 &query.Substring{
1777 Pattern: "bla",
1778 }, chunkOpts)
1779
1780 if len(res.Files) != 3 {
1781 t.Fatalf("got %#v, want 3 files", res.Files)
1782 }
1783 if res.Files[0].FileName != "f2" {
1784 t.Errorf("got %#v, want 'f2' as top match", res.Files[0])
1785 }
1786 })
1787}
1788
1789func TestNegativeRepo(t *testing.T) {
1790 content := []byte("bla the needle")
1791 // ----------------01234567890123
1792 b := testIndexBuilder(t, &Repository{
1793 Name: "bla",
1794 }, Document{Name: "f1", Content: content})
1795
1796 t.Run("LineMatches", func(t *testing.T) {
1797 sres := searchForTest(t, b,
1798 query.NewAnd(
1799 &query.Substring{Pattern: "needle"},
1800 &query.Not{Child: &query.Repo{Regexp: regexp.MustCompile("bla")}},
1801 ))
1802
1803 if len(sres.Files) != 0 {
1804 t.Fatalf("got %v, want 0 matches", sres.Files)
1805 }
1806 })
1807
1808 t.Run("ChunkMatches", func(t *testing.T) {
1809 sres := searchForTest(t, b,
1810 query.NewAnd(
1811 &query.Substring{Pattern: "needle"},
1812 &query.Not{Child: &query.Repo{Regexp: regexp.MustCompile("bla")}},
1813 ), chunkOpts)
1814
1815 if len(sres.Files) != 0 {
1816 t.Fatalf("got %v, want 0 matches", sres.Files)
1817 }
1818 })
1819}
1820
1821func TestListRepos(t *testing.T) {
1822 content := []byte("bla the needle\n")
1823 // ----------------012345678901234-
1824
1825 t.Run("default and minimal fallback", func(t *testing.T) {
1826 repo := &Repository{
1827 Name: "reponame",
1828 Branches: []RepositoryBranch{{Name: "main"}, {Name: "dev"}},
1829 }
1830 b := testIndexBuilder(t, repo,
1831 Document{Name: "f1", Content: content, Branches: []string{"main", "dev"}},
1832 Document{Name: "f2", Content: content, Branches: []string{"main"}},
1833 Document{Name: "f2", Content: content, Branches: []string{"dev"}},
1834 Document{Name: "f3", Content: content, Branches: []string{"dev"}})
1835
1836 searcher := searcherForTest(t, b)
1837
1838 for _, opts := range []*ListOptions{
1839 nil,
1840 {},
1841 {Field: RepoListFieldRepos},
1842 {Field: RepoListFieldReposMap},
1843 } {
1844 t.Run(fmt.Sprint(opts), func(t *testing.T) {
1845 q := &query.Repo{Regexp: regexp.MustCompile("epo")}
1846
1847 res, err := searcher.List(context.Background(), q, opts)
1848 if err != nil {
1849 t.Fatalf("List(%v): %v", q, err)
1850 }
1851
1852 want := &RepoList{
1853 Repos: []*RepoListEntry{{
1854 Repository: *repo,
1855 Stats: RepoStats{
1856 Documents: 4,
1857 ContentBytes: 68, // (15 bytes of content and 2 bytes of filename) x 4
1858 Shards: 1,
1859
1860 NewLinesCount: 4,
1861 DefaultBranchNewLinesCount: 2,
1862 OtherBranchesNewLinesCount: 3,
1863 },
1864 }},
1865 Stats: RepoStats{
1866 Repos: 1,
1867 Documents: 4,
1868 ContentBytes: 68,
1869 Shards: 1,
1870
1871 NewLinesCount: 4,
1872 DefaultBranchNewLinesCount: 2,
1873 OtherBranchesNewLinesCount: 3,
1874 },
1875 }
1876 ignored := []cmp.Option{
1877 cmpopts.EquateEmpty(),
1878 cmpopts.IgnoreFields(RepoListEntry{}, "IndexMetadata"),
1879 cmpopts.IgnoreFields(RepoStats{}, "IndexBytes"),
1880 cmpopts.IgnoreFields(Repository{}, "SubRepoMap"),
1881 cmpopts.IgnoreFields(Repository{}, "priority"),
1882 }
1883 if diff := cmp.Diff(want, res, ignored...); diff != "" {
1884 t.Fatalf("mismatch (-want +got):\n%s", diff)
1885 }
1886
1887 q = &query.Repo{Regexp: regexp.MustCompile("bla")}
1888 res, err = searcher.List(context.Background(), q, nil)
1889 if err != nil {
1890 t.Fatalf("List(%v): %v", q, err)
1891 }
1892 if len(res.Repos) != 0 || len(res.ReposMap) != 0 {
1893 t.Fatalf("got %v, want 0 matches", res)
1894 }
1895 })
1896 }
1897 })
1898
1899 t.Run("minimal", func(t *testing.T) {
1900 repo := &Repository{
1901 ID: 1234,
1902 Name: "reponame",
1903 Branches: []RepositoryBranch{{Name: "main"}, {Name: "dev"}},
1904 RawConfig: map[string]string{"repoid": "1234"},
1905 }
1906 b := testIndexBuilder(t, repo,
1907 Document{Name: "f1", Content: content, Branches: []string{"main", "dev"}},
1908 Document{Name: "f2", Content: content, Branches: []string{"main"}},
1909 Document{Name: "f2", Content: content, Branches: []string{"dev"}},
1910 Document{Name: "f3", Content: content, Branches: []string{"dev"}})
1911
1912 searcher := searcherForTest(t, b)
1913
1914 q := &query.Repo{Regexp: regexp.MustCompile("epo")}
1915 res, err := searcher.List(context.Background(), q, &ListOptions{Field: RepoListFieldReposMap})
1916 if err != nil {
1917 t.Fatalf("List(%v): %v", q, err)
1918 }
1919
1920 want := &RepoList{
1921 ReposMap: ReposMap{
1922 repo.ID: {
1923 HasSymbols: repo.HasSymbols,
1924 Branches: repo.Branches,
1925 },
1926 },
1927 Stats: RepoStats{
1928 Repos: 1,
1929 Shards: 1,
1930 Documents: 4,
1931 IndexBytes: 412,
1932 ContentBytes: 68,
1933 NewLinesCount: 4,
1934 DefaultBranchNewLinesCount: 2,
1935 OtherBranchesNewLinesCount: 3,
1936 },
1937 }
1938
1939 ignored := []cmp.Option{
1940 cmpopts.IgnoreFields(MinimalRepoListEntry{}, "IndexTimeUnix"),
1941 }
1942 if diff := cmp.Diff(want, res, ignored...); diff != "" {
1943 t.Fatalf("mismatch (-want +got):\n%s", diff)
1944 }
1945
1946 q = &query.Repo{Regexp: regexp.MustCompile("bla")}
1947 res, err = searcher.List(context.Background(), q, &ListOptions{Field: RepoListFieldReposMap})
1948 if err != nil {
1949 t.Fatalf("List(%v): %v", q, err)
1950 }
1951 if len(res.Repos) != 0 || len(res.ReposMap) != 0 {
1952 t.Fatalf("got %v, want 0 matches", res)
1953 }
1954 })
1955}
1956
1957func TestListReposByContent(t *testing.T) {
1958 content := []byte("bla the needle")
1959
1960 b := testIndexBuilder(t, &Repository{
1961 Name: "reponame",
1962 },
1963 Document{Name: "f1", Content: content},
1964 Document{Name: "f2", Content: content})
1965
1966 searcher := searcherForTest(t, b)
1967 q := &query.Substring{Pattern: "needle"}
1968 res, err := searcher.List(context.Background(), q, nil)
1969 if err != nil {
1970 t.Fatalf("List(%v): %v", q, err)
1971 }
1972 if len(res.Repos) != 1 || res.Repos[0].Repository.Name != "reponame" {
1973 t.Fatalf("got %v, want 1 matches", res)
1974 }
1975 if got := res.Repos[0].Stats.Shards; got != 1 {
1976 t.Fatalf("got %d, want 1 shard", got)
1977 }
1978 q = &query.Substring{Pattern: "foo"}
1979 res, err = searcher.List(context.Background(), q, nil)
1980 if err != nil {
1981 t.Fatalf("List(%v): %v", q, err)
1982 }
1983 if len(res.Repos) != 0 {
1984 t.Fatalf("got %v, want 0 matches", res)
1985 }
1986}
1987
1988func TestMetadata(t *testing.T) {
1989 content := []byte("bla the needle")
1990
1991 b := testIndexBuilder(t, &Repository{
1992 Name: "reponame",
1993 }, Document{Name: "f1", Content: content},
1994 Document{Name: "f2", Content: content})
1995
1996 var buf bytes.Buffer
1997 if err := b.Write(&buf); err != nil {
1998 t.Fatal(err)
1999 }
2000 f := &memSeeker{buf.Bytes()}
2001
2002 rd, _, err := ReadMetadata(f)
2003 if err != nil {
2004 t.Fatalf("ReadMetadata: %v", err)
2005 }
2006
2007 if got, want := rd[0].Name, "reponame"; got != want {
2008 t.Fatalf("got %q want %q", got, want)
2009 }
2010}
2011
2012func TestOr(t *testing.T) {
2013 b := testIndexBuilder(t, nil,
2014 Document{Name: "f1", Content: []byte("needle")},
2015 Document{Name: "f2", Content: []byte("banana")})
2016 t.Run("LineMatches", func(t *testing.T) {
2017 sres := searchForTest(t, b, query.NewOr(
2018 &query.Substring{Pattern: "needle"},
2019 &query.Substring{Pattern: "banana"}))
2020
2021 if len(sres.Files) != 2 {
2022 t.Fatalf("got %v, want 2 files", sres.Files)
2023 }
2024 })
2025
2026 t.Run("ChunkMatches", func(t *testing.T) {
2027 sres := searchForTest(t, b, query.NewOr(
2028 &query.Substring{Pattern: "needle"},
2029 &query.Substring{Pattern: "banana"}))
2030
2031 if len(sres.Files) != 2 {
2032 t.Fatalf("got %v, want 2 files", sres.Files)
2033 }
2034 })
2035}
2036
2037func TestFrequency(t *testing.T) {
2038 content := []byte("sla _Py_HashDouble(double v sla las las shd dot dot")
2039
2040 b := testIndexBuilder(t, nil,
2041 Document{
2042 Name: "f1",
2043 Content: content,
2044 })
2045
2046 t.Run("LineMatches", func(t *testing.T) {
2047 sres := searchForTest(t, b, &query.Substring{Pattern: "slashdot"})
2048 if len(sres.Files) != 0 {
2049 t.Errorf("got %v, wanted 0 matches", sres.Files)
2050 }
2051 })
2052
2053 t.Run("ChunkMatches", func(t *testing.T) {
2054 sres := searchForTest(t, b, &query.Substring{Pattern: "slashdot"}, chunkOpts)
2055 if len(sres.Files) != 0 {
2056 t.Errorf("got %v, wanted 0 matches", sres.Files)
2057 }
2058 })
2059}
2060
2061func TestMatchNewline(t *testing.T) {
2062 re, err := syntax.Parse("[^a]a", syntax.ClassNL)
2063 if err != nil {
2064 t.Fatalf("syntax.Parse: %v", err)
2065 }
2066
2067 content := []byte("pqr\nalex")
2068
2069 b := testIndexBuilder(t, nil,
2070 Document{
2071 Name: "f1",
2072 Content: content,
2073 })
2074
2075 t.Run("LineMatches", func(t *testing.T) {
2076 sres := searchForTest(t, b, &query.Regexp{Regexp: re, CaseSensitive: true})
2077 if len(sres.Files) != 1 {
2078 t.Errorf("got %v, wanted 1 matches", sres.Files)
2079 } else if l := sres.Files[0].LineMatches[0].Line; !bytes.Equal(l, content[len("pqr\n"):]) {
2080 t.Errorf("got match line %q, want %q", l, content)
2081 }
2082 })
2083
2084 t.Run("ChunkMatches", func(t *testing.T) {
2085 sres := searchForTest(t, b, &query.Regexp{Regexp: re, CaseSensitive: true}, chunkOpts)
2086 if len(sres.Files) != 1 {
2087 t.Errorf("got %v, wanted 1 matches", sres.Files)
2088 } else if c := sres.Files[0].ChunkMatches[0].Content; !bytes.Equal(c, content) {
2089 t.Errorf("got match line %q, want %q", c, content)
2090 }
2091 })
2092}
2093
2094func TestSubRepo(t *testing.T) {
2095 subRepos := map[string]*Repository{
2096 "sub": {
2097 Name: "sub-name",
2098 LineFragmentTemplate: "sub-line",
2099 },
2100 }
2101
2102 content := []byte("pqr\nalex")
2103
2104 b := testIndexBuilder(t, &Repository{
2105 SubRepoMap: subRepos,
2106 }, Document{
2107 Name: "sub/f1",
2108 Content: content,
2109 SubRepositoryPath: "sub",
2110 })
2111
2112 sres := searchForTest(t, b, &query.Substring{Pattern: "alex"})
2113 if len(sres.Files) != 1 {
2114 t.Fatalf("got %v, wanted 1 matches", sres.Files)
2115 }
2116
2117 f := sres.Files[0]
2118 if f.SubRepositoryPath != "sub" || f.SubRepositoryName != "sub-name" {
2119 t.Errorf("got %#v, want SubRepository{Path,Name} = {'sub', 'sub-name'}", f)
2120 }
2121
2122 if sres.LineFragments["sub-name"] != "sub-line" {
2123 t.Errorf("got LineFragmentTemplate %v, want {'sub':'sub-line'}", sres.LineFragments)
2124 }
2125}
2126
2127func TestSearchEither(t *testing.T) {
2128 b := testIndexBuilder(t, nil,
2129 Document{Name: "f1", Content: []byte("bla needle bla")},
2130 Document{Name: "needle-file-branch", Content: []byte("bla content")})
2131
2132 t.Run("LineMatches", func(t *testing.T) {
2133 sres := searchForTest(t, b, &query.Substring{Pattern: "needle"})
2134 if len(sres.Files) != 2 {
2135 t.Fatalf("got %v, wanted 2 matches", sres.Files)
2136 }
2137
2138 sres = searchForTest(t, b, &query.Substring{Pattern: "needle", Content: true})
2139 if len(sres.Files) != 1 {
2140 t.Fatalf("got %v, wanted 1 match", sres.Files)
2141 }
2142
2143 if got, want := sres.Files[0].FileName, "f1"; got != want {
2144 t.Errorf("got %q, want %q", got, want)
2145 }
2146 })
2147
2148 t.Run("ChunkMatches", func(t *testing.T) {
2149 sres := searchForTest(t, b, &query.Substring{Pattern: "needle"}, chunkOpts)
2150 if len(sres.Files) != 2 {
2151 t.Fatalf("got %v, wanted 2 matches", sres.Files)
2152 }
2153
2154 sres = searchForTest(t, b, &query.Substring{Pattern: "needle", Content: true}, chunkOpts)
2155 if len(sres.Files) != 1 {
2156 t.Fatalf("got %v, wanted 1 match", sres.Files)
2157 }
2158
2159 if got, want := sres.Files[0].FileName, "f1"; got != want {
2160 t.Errorf("got %q, want %q", got, want)
2161 }
2162 })
2163}
2164
2165func TestUnicodeExactMatch(t *testing.T) {
2166 needle := "néédlÉ"
2167 content := []byte("blá blá " + needle + " blâ")
2168
2169 b := testIndexBuilder(t, nil,
2170 Document{Name: "f1", Content: content})
2171
2172 t.Run("LineMatches", func(t *testing.T) {
2173 if res := searchForTest(t, b, &query.Substring{Pattern: needle, CaseSensitive: true}); len(res.Files) != 1 {
2174 t.Fatalf("case sensitive: got %v, wanted 1 match", res.Files)
2175 }
2176 })
2177
2178 t.Run("ChunkMatches", func(t *testing.T) {
2179 res := searchForTest(t, b, &query.Substring{Pattern: needle, CaseSensitive: true}, chunkOpts)
2180 if len(res.Files) != 1 {
2181 t.Fatalf("case sensitive: got %v, wanted 1 match", res.Files)
2182 }
2183 })
2184}
2185
2186func TestUnicodeCoverContent(t *testing.T) {
2187 needle := "néédlÉ"
2188 content := []byte("blá blá " + needle + " blâ")
2189
2190 b := testIndexBuilder(t, nil,
2191 Document{Name: "f1", Content: content})
2192
2193 t.Run("LineMatches", func(t *testing.T) {
2194 if res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ", CaseSensitive: true}); len(res.Files) != 0 {
2195 t.Fatalf("case sensitive: got %v, wanted 0 match", res.Files)
2196 }
2197
2198 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ"})
2199 if len(res.Files) != 1 {
2200 t.Fatalf("case insensitive: got %v, wanted 1 match", res.Files)
2201 }
2202
2203 if got, want := res.Files[0].LineMatches[0].LineFragments[0].Offset, uint32(strings.Index(string(content), needle)); got != want {
2204 t.Errorf("got %d want %d", got, want)
2205 }
2206 })
2207
2208 t.Run("ChunkMatches", func(t *testing.T) {
2209 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ", CaseSensitive: true}, chunkOpts)
2210 if len(res.Files) != 0 {
2211 t.Fatalf("case sensitive: got %v, wanted 0 match", res.Files)
2212 }
2213
2214 res = searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ"}, chunkOpts)
2215 if len(res.Files) != 1 {
2216 t.Fatalf("case insensitive: got %v, wanted 1 match", res.Files)
2217 }
2218
2219 got := res.Files[0].ChunkMatches[0].Ranges[0].Start.ByteOffset
2220 want := uint32(strings.Index(string(content), needle))
2221 if got != want {
2222 t.Errorf("got %d want %d", got, want)
2223 }
2224 })
2225}
2226
2227func TestUnicodeNonCoverContent(t *testing.T) {
2228 needle := "nééáádlÉ"
2229 content := []byte("blá blá " + needle + " blâ")
2230
2231 b := testIndexBuilder(t, nil,
2232 Document{Name: "f1", Content: content})
2233
2234 t.Run("LineMatches", func(t *testing.T) {
2235 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉÁÁDLÉ", Content: true})
2236 if len(res.Files) != 1 {
2237 t.Fatalf("got %v, wanted 1 match", res.Files)
2238 }
2239
2240 if got, want := res.Files[0].LineMatches[0].LineFragments[0].Offset, uint32(strings.Index(string(content), needle)); got != want {
2241 t.Errorf("got %d want %d", got, want)
2242 }
2243 })
2244
2245 t.Run("ChunkMatches", func(t *testing.T) {
2246 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉÁÁDLÉ", Content: true}, chunkOpts)
2247 if len(res.Files) != 1 {
2248 t.Fatalf("got %v, wanted 1 match", res.Files)
2249 }
2250
2251 got := res.Files[0].ChunkMatches[0].Ranges[0].Start.ByteOffset
2252 want := uint32(strings.Index(string(content), needle))
2253 if got != want {
2254 t.Errorf("got %d want %d", got, want)
2255 }
2256 })
2257}
2258
2259const kelvinCodePoint = 8490
2260
2261func TestUnicodeVariableLength(t *testing.T) {
2262 lower := 'k'
2263 upper := rune(kelvinCodePoint)
2264
2265 needle := "nee" + string([]rune{lower}) + "eed"
2266 corpus := []byte("nee" + string([]rune{upper}) + "eed" +
2267 " ee" + string([]rune{lower}) + "ee" +
2268 " ee" + string([]rune{upper}) + "ee")
2269
2270 t.Run("LineMatches", func(t *testing.T) {
2271 b := testIndexBuilder(t, nil,
2272 Document{Name: "f1", Content: []byte(corpus)})
2273
2274 res := searchForTest(t, b, &query.Substring{Pattern: needle, Content: true})
2275 if len(res.Files) != 1 {
2276 t.Fatalf("got %v, wanted 1 match", res.Files)
2277 }
2278 })
2279
2280 t.Run("ChunkMatches", func(t *testing.T) {
2281 b := testIndexBuilder(t, nil,
2282 Document{Name: "f1", Content: []byte(corpus)})
2283
2284 res := searchForTest(t, b, &query.Substring{Pattern: needle, Content: true}, chunkOpts)
2285 if len(res.Files) != 1 {
2286 t.Fatalf("got %v, wanted 1 match", res.Files)
2287 }
2288 })
2289}
2290
2291func TestUnicodeFileStartOffsets(t *testing.T) {
2292 unicode := "世界"
2293 wat := "waaaaaat"
2294 b := testIndexBuilder(t, nil,
2295 Document{
2296 Name: "f1",
2297 Content: []byte(unicode),
2298 },
2299 Document{
2300 Name: "f2",
2301 Content: []byte(wat),
2302 },
2303 )
2304 q := &query.Substring{Pattern: wat, Content: true}
2305 res := searchForTest(t, b, q)
2306 if len(res.Files) != 1 {
2307 t.Fatalf("got %v, wanted 1 match", res.Files)
2308 }
2309}
2310
2311func TestLongFileUTF8(t *testing.T) {
2312 needle := "neeedle"
2313
2314 // 6 bytes.
2315 unicode := "世界"
2316 content := []byte(strings.Repeat(unicode, 100) + needle)
2317 b := testIndexBuilder(t, nil,
2318 Document{
2319 Name: "f1",
2320 Content: []byte(strings.Repeat("a", 50)),
2321 },
2322 Document{
2323 Name: "f2",
2324 Content: content,
2325 })
2326
2327 t.Run("LineMatches", func(t *testing.T) {
2328 q := &query.Substring{Pattern: needle, Content: true}
2329 res := searchForTest(t, b, q)
2330 if len(res.Files) != 1 {
2331 t.Errorf("got %v, want 1 result", res)
2332 }
2333 })
2334
2335 t.Run("ChunkMatches", func(t *testing.T) {
2336 q := &query.Substring{Pattern: needle, Content: true}
2337 res := searchForTest(t, b, q, chunkOpts)
2338 if len(res.Files) != 1 {
2339 t.Errorf("got %v, want 1 result", res)
2340 }
2341 })
2342}
2343
2344func TestEstimateDocCount(t *testing.T) {
2345 content := []byte("bla needle bla")
2346 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2347 Document{Name: "f1", Content: content},
2348 Document{Name: "f2", Content: content},
2349 )
2350
2351 t.Run("LineMatches", func(t *testing.T) {
2352 if sres := searchForTest(t, b,
2353 query.NewAnd(
2354 &query.Substring{Pattern: "needle"},
2355 &query.Repo{Regexp: regexp.MustCompile("reponame")},
2356 ), SearchOptions{
2357 EstimateDocCount: true,
2358 }); sres.Stats.ShardFilesConsidered != 2 {
2359 t.Errorf("got FilesConsidered = %d, want 2", sres.Stats.FilesConsidered)
2360 }
2361 if sres := searchForTest(t, b,
2362 query.NewAnd(
2363 &query.Substring{Pattern: "needle"},
2364 &query.Repo{Regexp: regexp.MustCompile("nomatch")},
2365 ), SearchOptions{
2366 EstimateDocCount: true,
2367 }); sres.Stats.ShardFilesConsidered != 0 {
2368 t.Errorf("got FilesConsidered = %d, want 0", sres.Stats.FilesConsidered)
2369 }
2370 })
2371
2372 t.Run("ChunkMatches", func(t *testing.T) {
2373 if sres := searchForTest(t, b,
2374 query.NewAnd(
2375 &query.Substring{Pattern: "needle"},
2376 &query.Repo{Regexp: regexp.MustCompile("reponame")},
2377 ), SearchOptions{
2378 EstimateDocCount: true,
2379 ChunkMatches: true,
2380 }); sres.Stats.ShardFilesConsidered != 2 {
2381 t.Errorf("got FilesConsidered = %d, want 2", sres.Stats.FilesConsidered)
2382 }
2383 if sres := searchForTest(t, b,
2384 query.NewAnd(
2385 &query.Substring{Pattern: "needle"},
2386 &query.Repo{Regexp: regexp.MustCompile("nomatch")},
2387 ), SearchOptions{
2388 EstimateDocCount: true,
2389 ChunkMatches: true,
2390 }); sres.Stats.ShardFilesConsidered != 0 {
2391 t.Errorf("got FilesConsidered = %d, want 0", sres.Stats.FilesConsidered)
2392 }
2393 })
2394}
2395
2396func TestUTF8CorrectCorpus(t *testing.T) {
2397 needle := "neeedle"
2398
2399 // 6 bytes.
2400 unicode := "世界"
2401 b := testIndexBuilder(t, nil,
2402 Document{
2403 Name: "f1",
2404 Content: []byte(strings.Repeat(unicode, 100)),
2405 },
2406 Document{
2407 Name: "xxxxxneeedle",
2408 Content: []byte("hello"),
2409 })
2410
2411 t.Run("LineMatches", func(t *testing.T) {
2412 q := &query.Substring{Pattern: needle, FileName: true}
2413 res := searchForTest(t, b, q)
2414 if len(res.Files) != 1 {
2415 t.Errorf("got %v, want 1 result", res)
2416 }
2417 })
2418
2419 t.Run("ChunkMatches", func(t *testing.T) {
2420 q := &query.Substring{Pattern: needle, FileName: true}
2421 res := searchForTest(t, b, q, chunkOpts)
2422 if len(res.Files) != 1 {
2423 t.Errorf("got %v, want 1 result", res)
2424 }
2425 })
2426}
2427
2428func TestBuilderStats(t *testing.T) {
2429 b := testIndexBuilder(t, nil,
2430 Document{
2431 Name: "f1",
2432 Content: []byte(strings.Repeat("abcd", 1024)),
2433 })
2434 var buf bytes.Buffer
2435 if err := b.Write(&buf); err != nil {
2436 t.Fatal(err)
2437 }
2438
2439 if got, want := b.ContentSize(), uint32(2+4*1024); got != want {
2440 t.Errorf("got %d, want %d", got, want)
2441 }
2442}
2443
2444func TestIOStats(t *testing.T) {
2445 b := testIndexBuilder(t, nil,
2446 Document{
2447 Name: "f1",
2448 Content: []byte(strings.Repeat("abcd", 1024)),
2449 })
2450
2451 t.Run("LineMatches", func(t *testing.T) {
2452 q := &query.Substring{Pattern: "abc", CaseSensitive: true, Content: true}
2453 res := searchForTest(t, b, q)
2454
2455 // 4096 (content) + 2 (overhead: newlines or doc sections)
2456 if got, want := res.Stats.ContentBytesLoaded, int64(4100); got != want {
2457 t.Errorf("got content I/O %d, want %d", got, want)
2458 }
2459
2460 // 1024 entries, each 4 bytes apart. 4 fits into single byte
2461 // delta encoded.
2462 if got, want := res.Stats.IndexBytesLoaded, int64(1024); got != want {
2463 t.Errorf("got index I/O %d, want %d", got, want)
2464 }
2465 })
2466
2467 t.Run("ChunkMatches", func(t *testing.T) {
2468 q := &query.Substring{Pattern: "abc", CaseSensitive: true, Content: true}
2469 res := searchForTest(t, b, q, chunkOpts)
2470
2471 // 4096 (content) + 2 (overhead: newlines or doc sections)
2472 if got, want := res.Stats.ContentBytesLoaded, int64(4098); got != want {
2473 t.Errorf("got content I/O %d, want %d", got, want)
2474 }
2475
2476 // 1024 entries, each 4 bytes apart. 4 fits into single byte
2477 // delta encoded.
2478 if got, want := res.Stats.IndexBytesLoaded, int64(1024); got != want {
2479 t.Errorf("got index I/O %d, want %d", got, want)
2480 }
2481 })
2482}
2483
2484func TestStartLineAnchor(t *testing.T) {
2485 b := testIndexBuilder(t, nil,
2486 Document{
2487 Name: "f1",
2488 Content: []byte(
2489 `hello
2490start of middle of line
2491`),
2492 })
2493
2494 t.Run("LineMatches", func(t *testing.T) {
2495 q, err := query.Parse("^start")
2496 if err != nil {
2497 t.Errorf("parse: %v", err)
2498 }
2499
2500 res := searchForTest(t, b, q)
2501 if len(res.Files) != 1 {
2502 t.Errorf("got %v, want 1 file", res.Files)
2503 }
2504
2505 q, err = query.Parse("^middle")
2506 if err != nil {
2507 t.Errorf("parse: %v", err)
2508 }
2509 res = searchForTest(t, b, q)
2510 if len(res.Files) != 0 {
2511 t.Errorf("got %v, want 0 files", res.Files)
2512 }
2513 })
2514
2515 t.Run("ChunkMatches", func(t *testing.T) {
2516 q, err := query.Parse("^start")
2517 if err != nil {
2518 t.Errorf("parse: %v", err)
2519 }
2520
2521 res := searchForTest(t, b, q, chunkOpts)
2522 if len(res.Files) != 1 {
2523 t.Errorf("got %v, want 1 file", res.Files)
2524 }
2525
2526 q, err = query.Parse("^middle")
2527 if err != nil {
2528 t.Errorf("parse: %v", err)
2529 }
2530 res = searchForTest(t, b, q, chunkOpts)
2531 if len(res.Files) != 0 {
2532 t.Errorf("got %v, want 0 files", res.Files)
2533 }
2534 })
2535}
2536
2537func TestAndOrUnicode(t *testing.T) {
2538 q, err := query.Parse("orange.*apple")
2539 if err != nil {
2540 t.Errorf("parse: %v", err)
2541 }
2542 finalQ := query.NewAnd(q,
2543 query.NewOr(query.NewAnd(&query.Repo{Regexp: regexp.MustCompile("name")},
2544 query.NewOr(&query.Branch{Pattern: "master"}))))
2545
2546 b := testIndexBuilder(t, &Repository{
2547 Name: "name",
2548 Branches: []RepositoryBranch{{"master", "master-version"}},
2549 }, Document{
2550 Name: "f2",
2551 Content: []byte("orange\u2318apple"),
2552 // --------------0123456 78901
2553 Branches: []string{"master"},
2554 })
2555
2556 t.Run("LineMatches", func(t *testing.T) {
2557 res := searchForTest(t, b, finalQ)
2558 if len(res.Files) != 1 {
2559 t.Errorf("got %v, want 1 result", res.Files)
2560 }
2561 })
2562
2563 t.Run("ChunkMatches", func(t *testing.T) {
2564 res := searchForTest(t, b, finalQ, chunkOpts)
2565 if len(res.Files) != 1 {
2566 t.Errorf("got %v, want 1 result", res.Files)
2567 }
2568 })
2569}
2570
2571func TestAndShort(t *testing.T) {
2572 content := []byte("bla needle at orange bla")
2573 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2574 Document{Name: "f1", Content: content},
2575 Document{Name: "f2", Content: []byte("xx at xx")},
2576 Document{Name: "f3", Content: []byte("yy orange xx")},
2577 )
2578
2579 q := query.NewAnd(&query.Substring{Pattern: "at"},
2580 &query.Substring{Pattern: "orange"})
2581
2582 t.Run("LineMatches", func(t *testing.T) {
2583 res := searchForTest(t, b, q)
2584 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
2585 t.Errorf("got %v, want 1 result", res.Files)
2586 }
2587 })
2588
2589 t.Run("ChunkMatches", func(t *testing.T) {
2590 res := searchForTest(t, b, q, chunkOpts)
2591 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
2592 t.Errorf("got %v, want 1 result", res.Files)
2593 }
2594 })
2595}
2596
2597func TestNoCollectRegexpSubstring(t *testing.T) {
2598 content := []byte("bla final bla\nfoo final, foo")
2599 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2600 Document{Name: "f1", Content: content},
2601 )
2602
2603 q := &query.Regexp{
2604 Regexp: mustParseRE("final[,.]"),
2605 }
2606
2607 t.Run("LineMatches", func(t *testing.T) {
2608 res := searchForTest(t, b, q)
2609 if len(res.Files) != 1 {
2610 t.Fatalf("got %v, want 1 result", res.Files)
2611 }
2612 if f := res.Files[0]; len(f.LineMatches) != 1 {
2613 t.Fatalf("got line matches %v, want 1 line match", printLineMatches(f.LineMatches))
2614 }
2615 })
2616
2617 t.Run("ChunkMatches", func(t *testing.T) {
2618 res := searchForTest(t, b, q, chunkOpts)
2619 if len(res.Files) != 1 {
2620 t.Fatalf("got %v, want 1 result", res.Files)
2621 }
2622 if f := res.Files[0]; len(f.ChunkMatches) != 1 {
2623 t.Fatalf("got line matches %v, want 1 line match", printLineMatches(f.LineMatches))
2624 }
2625 })
2626}
2627
2628func printLineMatches(ms []LineMatch) string {
2629 var ss []string
2630 for _, m := range ms {
2631 ss = append(ss, fmt.Sprintf("%d:%q %v", m.LineNumber, m.Line, m.LineFragments))
2632 }
2633
2634 return strings.Join(ss, ", ")
2635}
2636
2637func TestLang(t *testing.T) {
2638 content := []byte("bla needle bla")
2639 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2640 Document{Name: "f1", Content: content},
2641 Document{Name: "f2", Language: "java", Content: content},
2642 Document{Name: "f3", Language: "cpp", Content: content},
2643 )
2644
2645 q := query.NewAnd(&query.Substring{Pattern: "needle"},
2646 &query.Language{Language: "cpp"})
2647
2648 t.Run("LineMatches", func(t *testing.T) {
2649 res := searchForTest(t, b, q)
2650 if len(res.Files) != 1 {
2651 t.Fatalf("got %v, want 1 result in f3", res.Files)
2652 }
2653 f := res.Files[0]
2654 if f.FileName != "f3" || f.Language != "cpp" {
2655 t.Fatalf("got %v, want 1 match with language cpp", f)
2656 }
2657 })
2658
2659 t.Run("ChunkMatches", func(t *testing.T) {
2660 res := searchForTest(t, b, q, chunkOpts)
2661 if len(res.Files) != 1 {
2662 t.Fatalf("got %v, want 1 result in f3", res.Files)
2663 }
2664 f := res.Files[0]
2665 if f.FileName != "f3" || f.Language != "cpp" {
2666 t.Fatalf("got %v, want 1 match with language cpp", f)
2667 }
2668 })
2669}
2670
2671func TestLangShortcut(t *testing.T) {
2672 content := []byte("bla needle bla")
2673 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2674 Document{Name: "f2", Language: "java", Content: content},
2675 Document{Name: "f3", Language: "cpp", Content: content},
2676 )
2677
2678 q := query.NewAnd(&query.Substring{Pattern: "needle"},
2679 &query.Language{Language: "fortran"})
2680
2681 t.Run("LineMatches", func(t *testing.T) {
2682 res := searchForTest(t, b, q)
2683 if len(res.Files) != 0 {
2684 t.Fatalf("got %v, want 0 results", res.Files)
2685 }
2686 if res.Stats.IndexBytesLoaded > 0 {
2687 t.Errorf("got IndexBytesLoaded %d, want 0", res.Stats.IndexBytesLoaded)
2688 }
2689 })
2690
2691 t.Run("ChunkMatches", func(t *testing.T) {
2692 res := searchForTest(t, b, q, chunkOpts)
2693 if len(res.Files) != 0 {
2694 t.Fatalf("got %v, want 0 results", res.Files)
2695 }
2696 if res.Stats.IndexBytesLoaded > 0 {
2697 t.Errorf("got IndexBytesLoaded %d, want 0", res.Stats.IndexBytesLoaded)
2698 }
2699 })
2700}
2701
2702func TestNoTextMatchAtoms(t *testing.T) {
2703 content := []byte("bla needle bla")
2704 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2705 Document{Name: "f1", Content: content},
2706 Document{Name: "f2", Language: "java", Content: content},
2707 Document{Name: "f3", Language: "cpp", Content: content},
2708 )
2709 q := query.NewAnd(&query.Language{Language: "java"})
2710 t.Run("LineMatches", func(t *testing.T) {
2711 res := searchForTest(t, b, q)
2712 if len(res.Files) != 1 {
2713 t.Fatalf("got %v, want 1 result in f3", res.Files)
2714 }
2715 })
2716
2717 t.Run("ChunkMatches", func(t *testing.T) {
2718 res := searchForTest(t, b, q, chunkOpts)
2719 if len(res.Files) != 1 {
2720 t.Fatalf("got %v, want 1 result in f3", res.Files)
2721 }
2722 })
2723}
2724
2725func TestNoPositiveAtoms(t *testing.T) {
2726 content := []byte("bla needle bla")
2727 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2728 Document{Name: "f1", Content: content},
2729 Document{Name: "f2", Content: content},
2730 )
2731
2732 q := query.NewAnd(
2733 &query.Not{Child: &query.Substring{Pattern: "xyz"}},
2734 &query.Repo{Regexp: regexp.MustCompile("reponame")})
2735 t.Run("LineMatches", func(t *testing.T) {
2736 res := searchForTest(t, b, q)
2737 if len(res.Files) != 2 {
2738 t.Fatalf("got %v, want 2 results in f3", res.Files)
2739 }
2740 })
2741 t.Run("ChunkMatches", func(t *testing.T) {
2742 res := searchForTest(t, b, q, chunkOpts)
2743 if len(res.Files) != 2 {
2744 t.Fatalf("got %v, want 2 results in f3", res.Files)
2745 }
2746 })
2747}
2748
2749func TestSymbolBoundaryStart(t *testing.T) {
2750 content := []byte("start\nbla bla\nend")
2751 // ----------------012345-67890123-456
2752
2753 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2754 Document{
2755 Name: "f1",
2756 Content: content,
2757 Symbols: []DocumentSection{{0, 5}, {14, 17}},
2758 },
2759 )
2760 q := &query.Symbol{
2761 Expr: &query.Substring{Pattern: "start"},
2762 }
2763 t.Run("LineMatches", func(t *testing.T) {
2764 res := searchForTest(t, b, q)
2765 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
2766 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2767 }
2768 m := res.Files[0].LineMatches[0].LineFragments[0]
2769 if m.Offset != 0 {
2770 t.Fatalf("got offset %d want 0", m.Offset)
2771 }
2772 })
2773
2774 t.Run("ChunkMatches", func(t *testing.T) {
2775 res := searchForTest(t, b, q, chunkOpts)
2776 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
2777 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2778 }
2779 m := res.Files[0].ChunkMatches[0].Ranges[0]
2780 if m.Start.ByteOffset != 0 {
2781 t.Fatalf("got offset %d want 0", m.Start.ByteOffset)
2782 }
2783 })
2784}
2785
2786func TestSymbolBoundaryEnd(t *testing.T) {
2787 content := []byte("start\nbla bla\nend")
2788 // ----------------012345-67890123-456
2789
2790 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2791 Document{
2792 Name: "f1",
2793 Content: content,
2794 Symbols: []DocumentSection{{14, 17}},
2795 },
2796 )
2797 q := &query.Symbol{
2798 Expr: &query.Substring{Pattern: "end"},
2799 }
2800 t.Run("LineMatches", func(t *testing.T) {
2801 res := searchForTest(t, b, q)
2802 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
2803 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2804 }
2805 m := res.Files[0].LineMatches[0].LineFragments[0]
2806 if m.Offset != 14 {
2807 t.Fatalf("got offset %d want 0", m.Offset)
2808 }
2809 })
2810
2811 t.Run("ChunkMatches", func(t *testing.T) {
2812 res := searchForTest(t, b, q, chunkOpts)
2813 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
2814 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2815 }
2816 m := res.Files[0].ChunkMatches[0].Ranges[0]
2817 if m.Start.ByteOffset != 14 {
2818 t.Fatalf("got offset %d want 0", m.Start.ByteOffset)
2819 }
2820 })
2821}
2822
2823func TestSymbolSubstring(t *testing.T) {
2824 content := []byte("bla\nsymblabla\nbla")
2825 // ----------------0123-4567890123-456
2826
2827 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2828 Document{
2829 Name: "f1",
2830 Content: content,
2831 Symbols: []DocumentSection{{4, 12}},
2832 },
2833 )
2834 q := &query.Symbol{
2835 Expr: &query.Substring{Pattern: "bla"},
2836 }
2837 t.Run("LineMatches", func(t *testing.T) {
2838 res := searchForTest(t, b, q)
2839 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
2840 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2841 }
2842 m := res.Files[0].LineMatches[0].LineFragments[0]
2843 if m.Offset != 7 || m.MatchLength != 3 {
2844 t.Fatalf("got offset %d, size %d want 7 size 3", m.Offset, m.MatchLength)
2845 }
2846 })
2847
2848 t.Run("ChunkMatches", func(t *testing.T) {
2849 res := searchForTest(t, b, q, chunkOpts)
2850 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
2851 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2852 }
2853 m := res.Files[0].ChunkMatches[0].Ranges[0]
2854 if m.Start.ByteOffset != 7 || m.End.ByteOffset != 10 {
2855 t.Fatalf("got offset %d, end %d want 7, 10", m.Start.ByteOffset, m.End.ByteOffset)
2856 }
2857 })
2858}
2859
2860func TestSymbolSubstringExact(t *testing.T) {
2861 content := []byte("bla\nsym\nbla\nsym\nasymb")
2862 // ----------------0123-4567-890123456-78901
2863
2864 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2865 Document{
2866 Name: "f1",
2867 Content: content,
2868 Symbols: []DocumentSection{{4, 7}},
2869 },
2870 )
2871 q := &query.Symbol{
2872 Expr: &query.Substring{Pattern: "sym"},
2873 }
2874 t.Run("LineMatches", func(t *testing.T) {
2875 res := searchForTest(t, b, q)
2876 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
2877 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2878 }
2879 m := res.Files[0].LineMatches[0].LineFragments[0]
2880 if m.Offset != 4 {
2881 t.Fatalf("got offset %d, want 7", m.Offset)
2882 }
2883 })
2884
2885 t.Run("ChunkMatches", func(t *testing.T) {
2886 res := searchForTest(t, b, q, chunkOpts)
2887 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
2888 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2889 }
2890 m := res.Files[0].ChunkMatches[0].Ranges[0]
2891 if m.Start.ByteOffset != 4 {
2892 t.Fatalf("got offset %d, want 7", m.Start.ByteOffset)
2893 }
2894 })
2895}
2896
2897func TestSymbolRegexpExact(t *testing.T) {
2898 content := []byte("blah\nbla\nbl")
2899 // ----------------01234-5678-90
2900
2901 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2902 Document{
2903 Name: "f1",
2904 Content: content,
2905 Symbols: []DocumentSection{{0, 4}, {5, 8}, {9, 11}},
2906 },
2907 )
2908 q := &query.Symbol{
2909 Expr: &query.Regexp{Regexp: mustParseRE("^bla$")},
2910 }
2911 t.Run("LineMatches", func(t *testing.T) {
2912 res := searchForTest(t, b, q)
2913 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
2914 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2915 }
2916 m := res.Files[0].LineMatches[0].LineFragments[0]
2917 if m.Offset != 5 {
2918 t.Fatalf("got offset %d, want 5", m.Offset)
2919 }
2920 })
2921
2922 t.Run("ChunkMatches", func(t *testing.T) {
2923 res := searchForTest(t, b, q, chunkOpts)
2924 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
2925 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2926 }
2927 m := res.Files[0].ChunkMatches[0].Ranges[0]
2928 if m.Start.ByteOffset != 5 {
2929 t.Fatalf("got offset %d, want 5", m.Start.ByteOffset)
2930 }
2931 })
2932}
2933
2934func TestSymbolRegexpPartial(t *testing.T) {
2935 content := []byte("abcdef")
2936 // ----------------012345
2937
2938 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2939 Document{
2940 Name: "f1",
2941 Content: content,
2942 Symbols: []DocumentSection{{0, 6}},
2943 },
2944 )
2945 q := &query.Symbol{
2946 Expr: &query.Regexp{Regexp: mustParseRE("(b|d)c(d|b)")},
2947 }
2948 t.Run("LineMatches", func(t *testing.T) {
2949 res := searchForTest(t, b, q)
2950 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
2951 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2952 }
2953 m := res.Files[0].LineMatches[0].LineFragments[0]
2954 if m.Offset != 1 {
2955 t.Fatalf("got offset %d, want 1", m.Offset)
2956 }
2957 if m.MatchLength != 3 {
2958 t.Fatalf("got match length %d, want 3", m.MatchLength)
2959 }
2960 })
2961
2962 t.Run("ChunkMatches", func(t *testing.T) {
2963 res := searchForTest(t, b, q, chunkOpts)
2964 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
2965 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2966 }
2967 m := res.Files[0].ChunkMatches[0].Ranges[0]
2968 if m.Start.ByteOffset != 1 {
2969 t.Fatalf("got offset %d, want 1", m.Start.ByteOffset)
2970 }
2971 if m.End.ByteOffset != 4 {
2972 t.Fatalf("got match end %d, want 4", m.End.ByteOffset)
2973 }
2974 })
2975}
2976
2977func TestSymbolRegexpAll(t *testing.T) {
2978 docs := []Document{
2979 {
2980 Name: "f1",
2981 Content: []byte("Hello Zoekt"),
2982 // --------------01234567890
2983 Symbols: []DocumentSection{{0, 5}, {6, 11}},
2984 },
2985 {
2986 Name: "f2",
2987 Content: []byte("Second Zoekt Third"),
2988 // --------------012345678901234567
2989 Symbols: []DocumentSection{{0, 6}, {7, 12}, {13, 18}},
2990 },
2991 }
2992
2993 b := testIndexBuilder(t, &Repository{Name: "reponame"}, docs...)
2994 q := &query.Symbol{
2995 Expr: &query.Regexp{Regexp: mustParseRE(".*")},
2996 }
2997 t.Run("LineMatches", func(t *testing.T) {
2998 res := searchForTest(t, b, q)
2999 if len(res.Files) != len(docs) {
3000 t.Fatalf("got %v, want %d file", res.Files, len(docs))
3001 }
3002 for i, want := range docs {
3003 got := res.Files[i].LineMatches[0].LineFragments
3004 if len(got) != len(want.Symbols) {
3005 t.Fatalf("got %d symbols, want %d symbols in doc %s", len(got), len(want.Symbols), want.Name)
3006 }
3007
3008 for j, sec := range want.Symbols {
3009 if sec.Start != got[j].Offset {
3010 t.Fatalf("got offset %d, want %d in doc %s", got[j].Offset, sec.Start, want.Name)
3011 }
3012 }
3013 }
3014 })
3015
3016 t.Run("ChunkMatches", func(t *testing.T) {
3017 res := searchForTest(t, b, q, chunkOpts)
3018 if len(res.Files) != len(docs) {
3019 t.Fatalf("got %v, want %d file", res.Files, len(docs))
3020 }
3021 for i, want := range docs {
3022 got := res.Files[i].ChunkMatches[0].Ranges
3023 if len(got) != len(want.Symbols) {
3024 t.Fatalf("got %d symbols, want %d symbols in doc %s", len(got), len(want.Symbols), want.Name)
3025 }
3026
3027 for j, sec := range want.Symbols {
3028 if sec.Start != uint32(got[j].Start.ByteOffset) {
3029 t.Fatalf("got offset %d, want %d in doc %s", got[j].Start.ByteOffset, sec.Start, want.Name)
3030 }
3031 }
3032 }
3033 })
3034}
3035
3036func TestHitIterTerminate(t *testing.T) {
3037 // contrived input: trigram frequencies forces selecting abc +
3038 // def for the distance iteration. There is no match, so this
3039 // will advance the compressedPostingIterator to beyond the
3040 // end.
3041 content := []byte("abc bcdbcd cdecde abcabc def efg")
3042 b := testIndexBuilder(t, nil,
3043 Document{
3044 Name: "f1",
3045 Content: content,
3046 },
3047 )
3048
3049 t.Run("LineMatches", func(t *testing.T) {
3050 searchForTest(t, b, &query.Substring{Pattern: "abcdef"})
3051 })
3052
3053 t.Run("ChunkMatches", func(t *testing.T) {
3054 searchForTest(t, b, &query.Substring{Pattern: "abcdef"}, chunkOpts)
3055 })
3056}
3057
3058func TestDistanceHitIterBailLast(t *testing.T) {
3059 content := []byte("AST AST AST UASH")
3060 b := testIndexBuilder(t, nil,
3061 Document{
3062 Name: "f1",
3063 Content: content,
3064 },
3065 )
3066 t.Run("LineMatches", func(t *testing.T) {
3067 res := searchForTest(t, b, &query.Substring{Pattern: "UAST"})
3068 if len(res.Files) != 0 {
3069 t.Fatalf("got %v, want no results", res.Files)
3070 }
3071 })
3072
3073 t.Run("LineMatches", func(t *testing.T) {
3074 res := searchForTest(t, b, &query.Substring{Pattern: "UAST"}, chunkOpts)
3075 if len(res.Files) != 0 {
3076 t.Fatalf("got %v, want no results", res.Files)
3077 }
3078 })
3079}
3080
3081func TestDocumentSectionRuneBoundary(t *testing.T) {
3082 content := string([]rune{kelvinCodePoint, kelvinCodePoint, kelvinCodePoint})
3083 b, err := NewIndexBuilder(nil)
3084 if err != nil {
3085 t.Fatalf("NewIndexBuilder: %v", err)
3086 }
3087
3088 for i, sec := range []DocumentSection{
3089 {2, 6},
3090 {3, 7},
3091 } {
3092 if err := b.Add(Document{
3093 Name: "f1",
3094 Content: []byte(content),
3095 Symbols: []DocumentSection{sec},
3096 }); err == nil {
3097 t.Errorf("%d: Add succeeded", i)
3098 }
3099 }
3100}
3101
3102func TestUnicodeQuery(t *testing.T) {
3103 content := string([]rune{kelvinCodePoint, kelvinCodePoint, kelvinCodePoint})
3104 b := testIndexBuilder(t, nil,
3105 Document{
3106 Name: "f1",
3107 Content: []byte(content),
3108 },
3109 )
3110
3111 q := &query.Substring{Pattern: content}
3112
3113 t.Run("LineMatches", func(t *testing.T) {
3114 res := searchForTest(t, b, q)
3115 if len(res.Files) != 1 {
3116 t.Fatalf("want 1 match, got %v", res.Files)
3117 }
3118
3119 f := res.Files[0]
3120 if len(f.LineMatches) != 1 {
3121 t.Fatalf("want 1 line, got %v", f.LineMatches)
3122 }
3123 l := f.LineMatches[0]
3124
3125 if len(l.LineFragments) != 1 {
3126 t.Fatalf("want 1 line fragment, got %v", l.LineFragments)
3127 }
3128 fr := l.LineFragments[0]
3129 if fr.MatchLength != len(content) {
3130 t.Fatalf("got MatchLength %d want %d", fr.MatchLength, len(content))
3131 }
3132 })
3133
3134 t.Run("ChunkMatches", func(t *testing.T) {
3135 res := searchForTest(t, b, q, chunkOpts)
3136 if len(res.Files) != 1 {
3137 t.Fatalf("want 1 match, got %v", res.Files)
3138 }
3139
3140 f := res.Files[0]
3141 if len(f.ChunkMatches) != 1 {
3142 t.Fatalf("want 1 line, got %v", f.LineMatches)
3143 }
3144 cm := f.ChunkMatches[0]
3145
3146 if len(cm.Ranges) != 1 {
3147 t.Fatalf("want 1 line fragment, got %v", cm.Ranges)
3148 }
3149 rr := cm.Ranges[0]
3150 if matchLen := rr.End.ByteOffset - rr.Start.ByteOffset; int(matchLen) != len(content) {
3151 t.Fatalf("got MatchLength %d want %d", matchLen, len(content))
3152 }
3153 })
3154}
3155
3156func TestSkipInvalidContent(t *testing.T) {
3157 for _, content := range []string{
3158 // Binary
3159 "abc def \x00 abc",
3160 } {
3161
3162 b, err := NewIndexBuilder(nil)
3163 if err != nil {
3164 t.Fatalf("NewIndexBuilder: %v", err)
3165 }
3166
3167 if err := b.Add(Document{
3168 Name: "f1",
3169 Content: []byte(content),
3170 }); err != nil {
3171 t.Fatal(err)
3172 }
3173
3174 t.Run("LineMatches", func(t *testing.T) {
3175 q := &query.Substring{Pattern: "abc def"}
3176 res := searchForTest(t, b, q)
3177 if len(res.Files) != 0 {
3178 t.Fatalf("got %v, want no results", res.Files)
3179 }
3180
3181 q = &query.Substring{Pattern: "NOT-INDEXED"}
3182 res = searchForTest(t, b, q)
3183 if len(res.Files) != 1 {
3184 t.Fatalf("got %v, want 1 result", res.Files)
3185 }
3186 })
3187
3188 t.Run("ChunkMatches", func(t *testing.T) {
3189 q := &query.Substring{Pattern: "abc def"}
3190 res := searchForTest(t, b, q, chunkOpts)
3191 if len(res.Files) != 0 {
3192 t.Fatalf("got %v, want no results", res.Files)
3193 }
3194
3195 q = &query.Substring{Pattern: "NOT-INDEXED"}
3196 res = searchForTest(t, b, q, chunkOpts)
3197 if len(res.Files) != 1 {
3198 t.Fatalf("got %v, want 1 result", res.Files)
3199 }
3200 })
3201 }
3202}
3203
3204func TestDocChecker(t *testing.T) {
3205 docChecker := DocChecker{}
3206
3207 // Test valid and invalid text
3208 for _, text := range []string{"", "simple ascii", "símplé unicödé", "\uFEFFwith utf8 'bom'", "with \uFFFD unicode replacement char"} {
3209 if err := docChecker.Check([]byte(text), 20000, false); err != nil {
3210 t.Errorf("Check(%q): %v", text, err)
3211 }
3212 }
3213 for _, text := range []string{"zero\x00byte", "xx", "0123456789abcdefghi"} {
3214 if err := docChecker.Check([]byte(text), 15, false); err == nil {
3215 t.Errorf("Check(%q) succeeded", text)
3216 }
3217 }
3218
3219 // Test valid and invalid text with an allowed large file
3220 for _, text := range []string{"0123456789abcdefghi", "qwertyuiopasdfghjklzxcvbnm"} {
3221 if err := docChecker.Check([]byte(text), 15, true); err != nil {
3222 t.Errorf("Check(%q): %v", text, err)
3223 }
3224 }
3225 for _, text := range []string{"zero\x00byte", "xx"} {
3226 if err := docChecker.Check([]byte(text), 15, true); err == nil {
3227 t.Errorf("Check(%q) succeeded", text)
3228 }
3229 }
3230}
3231
3232func TestLineAnd(t *testing.T) {
3233 b := testIndexBuilder(t, &Repository{Name: "reponame"},
3234 Document{Name: "f1", Content: []byte("apple\nbanana\napple banana chocolate apple pudding banana\ngrape")},
3235 Document{Name: "f2", Content: []byte("apple orange\nbanana")},
3236 Document{Name: "f3", Content: []byte("banana grape")},
3237 )
3238 pattern := "(apple)(?-s:.)*?(banana)"
3239 r, _ := syntax.Parse(pattern, syntax.Perl)
3240
3241 q := query.Regexp{
3242 Regexp: r,
3243 Content: true,
3244 }
3245 t.Run("LineMatches", func(t *testing.T) {
3246 res := searchForTest(t, b, &q)
3247 wantRegexpCount := 1
3248 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3249 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3250 }
3251 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
3252 t.Errorf("got %v, want 1 result", res.Files)
3253 }
3254 })
3255
3256 t.Run("ChunkMatches", func(t *testing.T) {
3257 res := searchForTest(t, b, &q, chunkOpts)
3258 wantRegexpCount := 1
3259 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3260 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3261 }
3262 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
3263 t.Errorf("got %v, want 1 result", res.Files)
3264 }
3265 })
3266}
3267
3268func TestLineAndFileName(t *testing.T) {
3269 b := testIndexBuilder(t, &Repository{Name: "reponame"},
3270 Document{Name: "f1", Content: []byte("apple banana\ngrape")},
3271 Document{Name: "f2", Content: []byte("apple banana\norange")},
3272 Document{Name: "apple banana", Content: []byte("banana grape")},
3273 )
3274 pattern := "(apple)(?-s:.)*?(banana)"
3275 r, _ := syntax.Parse(pattern, syntax.Perl)
3276
3277 q := query.Regexp{
3278 Regexp: r,
3279 FileName: true,
3280 }
3281 t.Run("LineMatches", func(t *testing.T) {
3282 res := searchForTest(t, b, &q)
3283 wantRegexpCount := 1
3284 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3285 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3286 }
3287 if len(res.Files) != 1 || res.Files[0].FileName != "apple banana" {
3288 t.Errorf("got %v, want 1 result", res.Files)
3289 }
3290 })
3291
3292 t.Run("ChunkMatches", func(t *testing.T) {
3293 res := searchForTest(t, b, &q, chunkOpts)
3294 wantRegexpCount := 1
3295 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3296 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3297 }
3298 if len(res.Files) != 1 || res.Files[0].FileName != "apple banana" {
3299 t.Errorf("got %v, want 1 result", res.Files)
3300 }
3301 })
3302}
3303
3304func TestMultiLineRegex(t *testing.T) {
3305 b := testIndexBuilder(t, &Repository{Name: "reponame"},
3306 Document{Name: "f1", Content: []byte("apple banana\ngrape")},
3307 Document{Name: "f2", Content: []byte("apple orange")},
3308 Document{Name: "f3", Content: []byte("grape apple")},
3309 )
3310 pattern := "(apple).*?[[:space:]].*?(grape)"
3311 r, _ := syntax.Parse(pattern, syntax.Perl)
3312
3313 q := query.Regexp{
3314 Regexp: r,
3315 }
3316 t.Run("LineMatches", func(t *testing.T) {
3317 res := searchForTest(t, b, &q)
3318 wantRegexpCount := 2
3319 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3320 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3321 }
3322 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
3323 t.Errorf("got %v, want 1 result", res.Files)
3324 }
3325 if l := len(res.Files[0].LineMatches); l != 2 {
3326 t.Errorf("got %v, want 2 line matches", l)
3327 }
3328 })
3329
3330 t.Run("ChunkMatches", func(t *testing.T) {
3331 res := searchForTest(t, b, &q, chunkOpts)
3332 wantRegexpCount := 2
3333 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3334 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3335 }
3336 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
3337 t.Errorf("got %v, want 1 result", res.Files)
3338 }
3339 if l := len(res.Files[0].ChunkMatches); l != 1 {
3340 t.Errorf("got %v, want 1 chunk matches", l)
3341 }
3342 if l := len(res.Files[0].ChunkMatches[0].Ranges); l != 1 {
3343 t.Errorf("got %v, want 1 chunk ranges", l)
3344 }
3345 })
3346}
3347
3348func TestSearchTypeFileName(t *testing.T) {
3349 b := testIndexBuilder(t, &Repository{
3350 Name: "reponame",
3351 },
3352 Document{Name: "f1", Content: []byte("bla the needle")},
3353 Document{Name: "f2", Content: []byte("another file another\nneedle")},
3354 // -----------------------------------012345678901234567890-123456
3355 )
3356
3357 t.Run("LineMatches", func(t *testing.T) {
3358 wantSingleMatch := func(res *SearchResult, want string) {
3359 t.Helper()
3360 fmatches := res.Files
3361 if len(fmatches) != 1 {
3362 t.Errorf("got %v, want 1 matches", len(fmatches))
3363 return
3364 }
3365 if len(fmatches[0].LineMatches) != 1 {
3366 t.Errorf("got %d line matches", len(fmatches[0].LineMatches))
3367 return
3368 }
3369 var got string
3370 if fmatches[0].LineMatches[0].FileName {
3371 got = fmatches[0].FileName
3372 } else {
3373 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].LineMatches[0].LineFragments[0].Offset)
3374 }
3375
3376 if got != want {
3377 t.Errorf("got %s, want %s", got, want)
3378 }
3379 }
3380
3381 // Only return the later match in the second file
3382 res := searchForTest(t, b, query.NewAnd(
3383 &query.Type{
3384 Type: query.TypeFileName,
3385 Child: &query.Substring{Pattern: "needle"},
3386 },
3387 &query.Substring{Pattern: "file"}))
3388 wantSingleMatch(res, "f2:8")
3389
3390 // Only return a filename result
3391 res = searchForTest(t, b,
3392 &query.Type{
3393 Type: query.TypeFileName,
3394 Child: &query.Substring{Pattern: "file"},
3395 })
3396 wantSingleMatch(res, "f2")
3397 })
3398
3399 t.Run("ChunkMatches", func(t *testing.T) {
3400 wantSingleMatch := func(res *SearchResult, want string) {
3401 t.Helper()
3402 fmatches := res.Files
3403 if len(fmatches) != 1 {
3404 t.Errorf("got %v, want 1 matches", len(fmatches))
3405 return
3406 }
3407 if len(fmatches[0].ChunkMatches) != 1 {
3408 t.Errorf("got %d line matches", len(fmatches[0].ChunkMatches))
3409 return
3410 }
3411 var got string
3412 if fmatches[0].ChunkMatches[0].FileName {
3413 got = fmatches[0].FileName
3414 } else {
3415 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset)
3416 }
3417
3418 if got != want {
3419 t.Errorf("got %s, want %s", got, want)
3420 }
3421 }
3422
3423 // Only return the later match in the second file
3424 res := searchForTest(t, b, query.NewAnd(
3425 &query.Type{
3426 Type: query.TypeFileName,
3427 Child: &query.Substring{Pattern: "needle"},
3428 },
3429 &query.Substring{Pattern: "file"}),
3430 chunkOpts,
3431 )
3432 wantSingleMatch(res, "f2:8")
3433
3434 // Only return a filename result
3435 res = searchForTest(t, b,
3436 &query.Type{
3437 Type: query.TypeFileName,
3438 Child: &query.Substring{Pattern: "file"},
3439 },
3440 chunkOpts,
3441 )
3442 wantSingleMatch(res, "f2")
3443 })
3444}
3445
3446func TestSearchTypeLanguage(t *testing.T) {
3447 b := testIndexBuilder(t, &Repository{
3448 Name: "reponame",
3449 },
3450 Document{Name: "apex.cls", Content: []byte("public class Car extends Vehicle {")},
3451 Document{Name: "tex.cls", Content: []byte(`\DeclareOption*{`)},
3452 Document{Name: "hello.h", Content: []byte(`#include <stdio.h>`)},
3453 Document{Name: "be.magik", Content: []byte(`_package unicorn`)},
3454 )
3455
3456 t.Log(b.languageMap)
3457
3458 t.Run("LineMatches", func(t *testing.T) {
3459 wantSingleMatch := func(res *SearchResult, want string) {
3460 t.Helper()
3461 fmatches := res.Files
3462 if len(fmatches) != 1 {
3463 t.Errorf("got %v, want 1 matches", len(fmatches))
3464 return
3465 }
3466 if len(fmatches[0].LineMatches) != 1 {
3467 t.Errorf("got %d line matches", len(fmatches[0].LineMatches))
3468 return
3469 }
3470 var got string
3471 if fmatches[0].LineMatches[0].FileName {
3472 got = fmatches[0].FileName
3473 } else {
3474 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].LineMatches[0].LineFragments[0].Offset)
3475 }
3476
3477 if got != want {
3478 t.Errorf("got %s, want %s", got, want)
3479 }
3480 }
3481
3482 res := searchForTest(t, b, &query.Language{Language: "Apex"})
3483 wantSingleMatch(res, "apex.cls")
3484
3485 res = searchForTest(t, b, &query.Language{Language: "TeX"})
3486 wantSingleMatch(res, "tex.cls")
3487
3488 res = searchForTest(t, b, &query.Language{Language: "C"})
3489 wantSingleMatch(res, "hello.h")
3490
3491 res = searchForTest(t, b, &query.Language{Language: "Magik"})
3492 wantSingleMatch(res, "be.magik")
3493
3494 // test fallback language search by pretending it's an older index version
3495 res = searchForTest(t, b, &query.Language{Language: "C++"})
3496 if len(res.Files) != 0 {
3497 t.Errorf("got %d results for C++, want 0", len(res.Files))
3498 }
3499
3500 b.featureVersion = 11 // force fallback
3501 res = searchForTest(t, b, &query.Language{Language: "C++"})
3502 wantSingleMatch(res, "hello.h")
3503 })
3504
3505 t.Run("ChunkMatches", func(t *testing.T) {
3506 wantSingleMatch := func(res *SearchResult, want string) {
3507 t.Helper()
3508 fmatches := res.Files
3509 if len(fmatches) != 1 {
3510 t.Errorf("got %v, want 1 matches", len(fmatches))
3511 return
3512 }
3513 if len(fmatches[0].ChunkMatches) != 1 {
3514 t.Errorf("got %d line matches", len(fmatches[0].ChunkMatches))
3515 return
3516 }
3517 var got string
3518 if fmatches[0].ChunkMatches[0].FileName {
3519 got = fmatches[0].FileName
3520 } else {
3521 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset)
3522 }
3523
3524 if got != want {
3525 t.Errorf("got %s, want %s", got, want)
3526 }
3527 }
3528
3529 b.featureVersion = FeatureVersion // reset feature version
3530 res := searchForTest(t, b, &query.Language{Language: "Apex"}, chunkOpts)
3531 wantSingleMatch(res, "apex.cls")
3532
3533 res = searchForTest(t, b, &query.Language{Language: "TeX"}, chunkOpts)
3534 wantSingleMatch(res, "tex.cls")
3535
3536 res = searchForTest(t, b, &query.Language{Language: "C"}, chunkOpts)
3537 wantSingleMatch(res, "hello.h")
3538
3539 // test fallback language search by pretending it's an older index version
3540 res = searchForTest(t, b, &query.Language{Language: "C++"}, chunkOpts)
3541 if len(res.Files) != 0 {
3542 t.Errorf("got %d results for C++, want 0", len(res.Files))
3543 }
3544
3545 b.featureVersion = 11 // force fallback
3546 res = searchForTest(t, b, &query.Language{Language: "C++"}, chunkOpts)
3547 wantSingleMatch(res, "hello.h")
3548 })
3549}
3550
3551func TestStats(t *testing.T) {
3552 ignored := []cmp.Option{
3553 cmpopts.EquateEmpty(),
3554 cmpopts.IgnoreFields(RepoListEntry{}, "Repository"),
3555 cmpopts.IgnoreFields(RepoListEntry{}, "IndexMetadata"),
3556 cmpopts.IgnoreFields(RepoStats{}, "IndexBytes"),
3557 }
3558
3559 repoListEntries := func(b *IndexBuilder) []RepoListEntry {
3560 searcher := searcherForTest(t, b)
3561 indexdata := searcher.(*indexData)
3562 return indexdata.repoListEntry
3563 }
3564
3565 t.Run("one empty repo", func(t *testing.T) {
3566 b := testIndexBuilder(t, nil)
3567 got := repoListEntries(b)
3568 want := []RepoListEntry{
3569 {
3570 Stats: RepoStats{
3571 Repos: 0,
3572 Shards: 1,
3573 Documents: 0,
3574 IndexBytes: 20,
3575 ContentBytes: 0,
3576 NewLinesCount: 0,
3577 DefaultBranchNewLinesCount: 0,
3578 OtherBranchesNewLinesCount: 0,
3579 },
3580 },
3581 }
3582
3583 if diff := cmp.Diff(want, got, ignored...); diff != "" {
3584 t.Fatalf("mismatch (-want +got):\n%s", diff)
3585 }
3586 })
3587
3588 t.Run("one simple shard", func(t *testing.T) {
3589 b := testIndexBuilder(t, nil,
3590 Document{Name: "doc 0", Content: []byte("content 0")},
3591 Document{Name: "doc 1", Content: []byte("content 1")},
3592 )
3593 got := repoListEntries(b)
3594 want := []RepoListEntry{
3595 {
3596 Stats: RepoStats{
3597 Repos: 0,
3598 Shards: 1,
3599 Documents: 2,
3600 IndexBytes: 224,
3601 ContentBytes: 28,
3602 NewLinesCount: 0,
3603 DefaultBranchNewLinesCount: 0,
3604 OtherBranchesNewLinesCount: 0,
3605 },
3606 },
3607 }
3608
3609 if diff := cmp.Diff(want, got, ignored...); diff != "" {
3610 t.Fatalf("mismatch (-want +got):\n%s", diff)
3611 }
3612 })
3613
3614 t.Run("one compound shard", func(t *testing.T) {
3615 b := testIndexBuilderCompound(t,
3616 []*Repository{
3617 {Name: "repo 0"},
3618 {Name: "repo 1"},
3619 },
3620 [][]Document{
3621 {
3622 {Name: "doc 0", Content: []byte("content 0")},
3623 {Name: "doc 1", Content: []byte("content 1")},
3624 },
3625 {
3626 {Name: "doc 2", Content: []byte("content 2")},
3627 {Name: "doc 3", Content: []byte("content 3")},
3628 },
3629 },
3630 )
3631 got := repoListEntries(b)
3632 want := []RepoListEntry{
3633 {
3634 Stats: RepoStats{
3635 Repos: 0,
3636 Shards: 1,
3637 Documents: 2,
3638 IndexBytes: 180,
3639 ContentBytes: 28,
3640 NewLinesCount: 0,
3641 DefaultBranchNewLinesCount: 0,
3642 OtherBranchesNewLinesCount: 0,
3643 },
3644 },
3645 {
3646 Stats: RepoStats{
3647 Repos: 0,
3648 Shards: 1,
3649 Documents: 2,
3650 IndexBytes: 180,
3651 ContentBytes: 28,
3652 NewLinesCount: 0,
3653 DefaultBranchNewLinesCount: 0,
3654 OtherBranchesNewLinesCount: 0,
3655 },
3656 },
3657 }
3658
3659 if diff := cmp.Diff(want, got, ignored...); diff != "" {
3660 t.Fatalf("mismatch (-want +got):\n%s", diff)
3661 }
3662 })
3663
3664 t.Run("compound shard with empty repos", func(t *testing.T) {
3665 b := testIndexBuilderCompound(t,
3666 []*Repository{
3667 {Name: "repo 0"},
3668 {Name: "repo 1"},
3669 {Name: "repo 2"},
3670 {Name: "repo 3"},
3671 {Name: "repo 4"},
3672 },
3673 [][]Document{
3674 {{Name: "doc 0", Content: []byte("content 0")}},
3675 nil,
3676 {{Name: "doc 1", Content: []byte("content 1")}},
3677 nil,
3678 nil,
3679 },
3680 )
3681 got := repoListEntries(b)
3682
3683 entryEmpty := RepoListEntry{Stats: RepoStats{
3684 Shards: 1,
3685 Documents: 0,
3686 ContentBytes: 0,
3687 }}
3688 entryNonEmpty := RepoListEntry{Stats: RepoStats{
3689 Shards: 1,
3690 Documents: 1,
3691 ContentBytes: 14,
3692 }}
3693
3694 want := []RepoListEntry{
3695 entryNonEmpty,
3696 entryEmpty,
3697 entryNonEmpty,
3698 entryEmpty,
3699 entryEmpty,
3700 }
3701
3702 if diff := cmp.Diff(want, got, ignored...); diff != "" {
3703 t.Fatalf("mismatch (-want +got):\n%s", diff)
3704 }
3705 })
3706}
3707
3708// This tests the frequent pattern "\bLITERAL\b".
3709func TestWordSearch(t *testing.T) {
3710 content := []byte("needle the bla")
3711 // ----------------01234567890123
3712
3713 b := testIndexBuilder(t, nil,
3714 Document{
3715 Name: "f1",
3716 Content: content,
3717 })
3718
3719 t.Run("LineMatches", func(t *testing.T) {
3720 sres := searchForTest(t, b,
3721 &query.Regexp{
3722 Regexp: mustParseRE("\\bthe\\b"),
3723 CaseSensitive: true,
3724 Content: true,
3725 })
3726
3727 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 {
3728 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
3729 }
3730
3731 if sres.Stats.RegexpsConsidered != 0 {
3732 t.Fatal("expected regexp to be skipped")
3733 }
3734
3735 got := sres.Files[0].LineMatches[0]
3736 want := LineMatch{
3737 LineFragments: []LineFragmentMatch{{
3738 LineOffset: 7,
3739 Offset: 7,
3740 MatchLength: 3,
3741 }},
3742 Line: content,
3743 FileName: false,
3744 LineNumber: 1,
3745 LineStart: 0,
3746 LineEnd: 14,
3747 }
3748
3749 if !reflect.DeepEqual(got, want) {
3750 t.Errorf("got %#v, want %#v", got, want)
3751 }
3752 })
3753
3754 t.Run("ChunkMatches", func(t *testing.T) {
3755 sres := searchForTest(t, b,
3756 &query.Regexp{
3757 Regexp: mustParseRE("\\bthe\\b"),
3758 CaseSensitive: true,
3759 }, chunkOpts)
3760
3761 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 {
3762 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
3763 }
3764
3765 if sres.Stats.RegexpsConsidered != 0 {
3766 t.Fatal("expected regexp to be skipped")
3767 }
3768
3769 got := sres.Files[0].ChunkMatches[0]
3770 want := ChunkMatch{
3771 Content: content,
3772 ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1},
3773 Ranges: []Range{{
3774 Start: Location{ByteOffset: 7, LineNumber: 1, Column: 8},
3775 End: Location{ByteOffset: 10, LineNumber: 1, Column: 11},
3776 }},
3777 }
3778
3779 if diff := cmp.Diff(want, got); diff != "" {
3780 t.Fatal(diff)
3781 }
3782 })
3783}