fork of https://github.com/sourcegraph/zoekt
1// Copyright 2016 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package zoekt
16
17import (
18 "bytes"
19 "context"
20 "fmt"
21 "reflect"
22 "regexp/syntax"
23 "strings"
24 "testing"
25
26 "github.com/google/go-cmp/cmp"
27 "github.com/google/go-cmp/cmp/cmpopts"
28 "github.com/grafana/regexp"
29
30 "github.com/sourcegraph/zoekt/query"
31)
32
33func clearScores(r *SearchResult) {
34 for i := range r.Files {
35 r.Files[i].Score = 0.0
36 for j := range r.Files[i].LineMatches {
37 r.Files[i].LineMatches[j].Score = 0.0
38 }
39 for j := range r.Files[i].ChunkMatches {
40 r.Files[i].ChunkMatches[j].Score = 0.0
41 }
42 r.Files[i].Checksum = nil
43 r.Files[i].Debug = ""
44 }
45}
46
47func testIndexBuilder(t *testing.T, repo *Repository, docs ...Document) *IndexBuilder {
48 t.Helper()
49
50 b, err := NewIndexBuilder(repo)
51 if err != nil {
52 t.Fatalf("NewIndexBuilder: %v", err)
53 }
54
55 for i, d := range docs {
56 if err := b.Add(d); err != nil {
57 t.Fatalf("Add %d: %v", i, err)
58 }
59 }
60
61 return b
62}
63
64func testIndexBuilderCompound(t *testing.T, repos []*Repository, docs [][]Document) *IndexBuilder {
65 t.Helper()
66
67 b := newIndexBuilder()
68 b.indexFormatVersion = NextIndexFormatVersion
69
70 if len(repos) != len(docs) {
71 t.Fatalf("testIndexBuilderCompound: repos must be the same length as docs, got: len(repos)=%d len(docs)=%d", len(repos), len(docs))
72 }
73
74 for i, repo := range repos {
75 if err := b.setRepository(repo); err != nil {
76 t.Fatal(err)
77 }
78 for j, d := range docs[i] {
79 if err := b.Add(d); err != nil {
80 t.Fatalf("Add %d %d: %v", i, j, err)
81 }
82 }
83 }
84
85 return b
86}
87
88func TestBoundary(t *testing.T) {
89 b := testIndexBuilder(t, nil,
90 Document{Name: "f1", Content: []byte("x the")},
91 Document{Name: "f1", Content: []byte("reader")})
92 res := searchForTest(t, b, &query.Substring{Pattern: "there"})
93 if len(res.Files) > 0 {
94 t.Fatalf("got %v, want no matches", res.Files)
95 }
96}
97
98func TestDocSectionInvalid(t *testing.T) {
99 b, err := NewIndexBuilder(nil)
100 if err != nil {
101 t.Fatalf("NewIndexBuilder: %v", err)
102 }
103 doc := Document{
104 Name: "f1",
105 Content: []byte("01234567890123"),
106 Symbols: []DocumentSection{{5, 8}, {7, 9}},
107 }
108
109 if err := b.Add(doc); err == nil {
110 t.Errorf("overlapping doc sections should fail")
111 }
112
113 doc = Document{
114 Name: "f1",
115 Content: []byte("01234567890123"),
116 Symbols: []DocumentSection{{0, 20}},
117 }
118
119 if err := b.Add(doc); err == nil {
120 t.Errorf("doc sections beyond EOF should fail")
121 }
122}
123
124func TestBasic(t *testing.T) {
125 b := testIndexBuilder(t, nil,
126 Document{
127 Name: "f2",
128 Content: []byte("to carry water in the no later bla"),
129 // --------------0123456789012345678901234567890123
130 })
131
132 t.Run("LineMatch", func(t *testing.T) {
133 res := searchForTest(t, b, &query.Substring{
134 Pattern: "water",
135 CaseSensitive: true,
136 })
137 fmatches := res.Files
138 if len(fmatches) != 1 || len(fmatches[0].LineMatches) != 1 {
139 t.Fatalf("got %v, want 1 matches", fmatches)
140 }
141
142 got := fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].LineMatches[0].LineFragments[0].Offset)
143 want := "f2:9"
144 if got != want {
145 t.Errorf("1: got %s, want %s", got, want)
146 }
147 })
148
149 t.Run("ChunkMatch", func(t *testing.T) {
150 res := searchForTest(t, b, &query.Substring{
151 Pattern: "water",
152 CaseSensitive: true,
153 }, chunkOpts)
154 fmatches := res.Files
155 if len(fmatches) != 1 || len(fmatches[0].ChunkMatches) != 1 {
156 t.Fatalf("got %v, want 1 matches", fmatches)
157 }
158
159 got := fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset)
160 want := "f2:9"
161 if got != want {
162 t.Errorf("1: got %s, want %s", got, want)
163 }
164 })
165}
166
167func TestEmptyIndex(t *testing.T) {
168 b := testIndexBuilder(t, nil)
169 searcher := searcherForTest(t, b)
170
171 var opts SearchOptions
172 if _, err := searcher.Search(context.Background(), &query.Substring{}, &opts); err != nil {
173 t.Fatalf("Search: %v", err)
174 }
175
176 if _, err := searcher.List(context.Background(), &query.Repo{Regexp: regexp.MustCompile("")}, nil); err != nil {
177 t.Fatalf("List: %v", err)
178 }
179
180 if _, err := searcher.Search(context.Background(), &query.Substring{Pattern: "java", FileName: true}, &opts); err != nil {
181 t.Fatalf("Search: %v", err)
182 }
183}
184
185type memSeeker struct {
186 data []byte
187}
188
189func (s *memSeeker) Name() string {
190 return "memseeker"
191}
192
193func (s *memSeeker) Close() {}
194func (s *memSeeker) Read(off, sz uint32) ([]byte, error) {
195 return s.data[off : off+sz], nil
196}
197
198func (s *memSeeker) Size() (uint32, error) {
199 return uint32(len(s.data)), nil
200}
201
202func TestNewlines(t *testing.T) {
203 b := testIndexBuilder(t, nil,
204 Document{Name: "filename", Content: []byte("line1\nline2\nbla")})
205 // ---------------------------------------------012345-678901-234
206
207 t.Run("LineMatches", func(t *testing.T) {
208 sres := searchForTest(t, b, &query.Substring{Pattern: "ne2"})
209
210 matches := sres.Files
211 want := []FileMatch{{
212 FileName: "filename",
213 LineMatches: []LineMatch{{
214 LineFragments: []LineFragmentMatch{{
215 Offset: 8,
216 LineOffset: 2,
217 MatchLength: 3,
218 }},
219 Line: []byte("line2"),
220 LineStart: 6,
221 LineEnd: 11,
222 LineNumber: 2,
223 }},
224 }}
225
226 if !reflect.DeepEqual(matches, want) {
227 t.Errorf("got %v, want %v", matches, want)
228 }
229 })
230
231 t.Run("ChunkMatches", func(t *testing.T) {
232 sres := searchForTest(t, b, &query.Substring{Pattern: "ne2"}, chunkOpts)
233
234 matches := sres.Files
235 want := []FileMatch{{
236 FileName: "filename",
237 ChunkMatches: []ChunkMatch{{
238 Content: []byte("line2"),
239 ContentStart: Location{
240 ByteOffset: 6,
241 LineNumber: 2,
242 Column: 1,
243 },
244 Ranges: []Range{{
245 Start: Location{ByteOffset: 8, LineNumber: 2, Column: 3},
246 End: Location{ByteOffset: 11, LineNumber: 2, Column: 6},
247 }},
248 }},
249 }}
250
251 if diff := cmp.Diff(want, matches); diff != "" {
252 t.Fatal(diff)
253 }
254 })
255}
256
257// A result spanning multiple lines should have LineMatches that only cover
258// single lines.
259func TestQueryNewlines(t *testing.T) {
260 text := "line1\nline2\nbla"
261 b := testIndexBuilder(t, nil,
262 Document{Name: "filename", Content: []byte(text)})
263
264 t.Run("LineMatches", func(t *testing.T) {
265 sres := searchForTest(t, b, &query.Substring{Pattern: "ine2\nbla"})
266 matches := sres.Files
267 if len(matches) != 1 {
268 t.Fatalf("got %d file matches, want exactly one", len(matches))
269 }
270 m := matches[0]
271 if len(m.LineMatches) != 2 {
272 t.Fatalf("got %d line matches, want exactly two", len(m.LineMatches))
273 }
274 })
275
276 t.Run("ChunkMatches", func(t *testing.T) {
277 sres := searchForTest(t, b, &query.Substring{Pattern: "ine2\nbla"}, chunkOpts)
278 matches := sres.Files
279 if len(matches) != 1 {
280 t.Fatalf("got %d file matches, want exactly one", len(matches))
281 }
282 m := matches[0]
283 if len(m.ChunkMatches) != 1 {
284 t.Fatalf("got %d chunk matches, want exactly one", len(m.ChunkMatches))
285 }
286 })
287}
288
289var chunkOpts = SearchOptions{ChunkMatches: true}
290
291func searchForTest(t *testing.T, b *IndexBuilder, q query.Q, o ...SearchOptions) *SearchResult {
292 searcher := searcherForTest(t, b)
293 var opts SearchOptions
294 if len(o) > 0 {
295 opts = o[0]
296 }
297 res, err := searcher.Search(context.Background(), q, &opts)
298 if err != nil {
299 t.Fatalf("Search(%s): %v", q, err)
300 }
301 clearScores(res)
302 return res
303}
304
305func searcherForTest(t *testing.T, b *IndexBuilder) Searcher {
306 var buf bytes.Buffer
307 if err := b.Write(&buf); err != nil {
308 t.Fatal(err)
309 }
310 f := &memSeeker{buf.Bytes()}
311
312 searcher, err := NewSearcher(f)
313 if err != nil {
314 t.Fatalf("NewSearcher: %v", err)
315 }
316
317 return searcher
318}
319
320func TestCaseFold(t *testing.T) {
321 b := testIndexBuilder(t, nil,
322 Document{Name: "f1", Content: []byte("I love BaNaNAS.")},
323 // -----------------------------------012345678901234
324 )
325 t.Run("LineMatches", func(t *testing.T) {
326 sres := searchForTest(t, b, &query.Substring{
327 Pattern: "bananas",
328 CaseSensitive: true,
329 })
330 matches := sres.Files
331 if len(matches) != 0 {
332 t.Errorf("foldcase: got %#v, want 0 matches", matches)
333 }
334
335 sres = searchForTest(t, b,
336 &query.Substring{
337 Pattern: "BaNaNAS",
338 CaseSensitive: true,
339 })
340 matches = sres.Files
341 if len(matches) != 1 {
342 t.Errorf("no foldcase: got %v, want 1 matches", matches)
343 } else if matches[0].LineMatches[0].LineFragments[0].Offset != 7 {
344 t.Errorf("foldcase: got %v, want offsets 7", matches)
345 }
346 })
347
348 t.Run("ChunkMatches", func(t *testing.T) {
349 sres := searchForTest(t, b, &query.Substring{
350 Pattern: "bananas",
351 CaseSensitive: true,
352 }, chunkOpts)
353 matches := sres.Files
354 if len(matches) != 0 {
355 t.Errorf("foldcase: got %#v, want 0 matches", matches)
356 }
357
358 sres = searchForTest(t, b,
359 &query.Substring{
360 Pattern: "BaNaNAS",
361 CaseSensitive: true,
362 })
363 matches = sres.Files
364 if len(matches) != 1 {
365 t.Errorf("no foldcase: got %v, want 1 matches", matches)
366 } else if matches[0].LineMatches[0].LineFragments[0].Offset != 7 {
367 t.Errorf("foldcase: got %v, want offsets 7", matches)
368 }
369 })
370}
371
372// wordsAsSymbols finds all ASCII words in doc.Content which are longer than 2
373// chars. Those are then set as symbols.
374func wordsAsSymbols(doc Document) Document {
375 re := regexp.MustCompile(`\b\w{2,}\b`)
376 var symbols []DocumentSection
377 for _, match := range re.FindAllIndex(doc.Content, -1) {
378 symbols = append(symbols, DocumentSection{
379 Start: uint32(match[0]),
380 End: uint32(match[1]),
381 })
382 }
383 doc.Symbols = symbols
384 return doc
385}
386
387func TestSearchStats(t *testing.T) {
388 ctx := context.Background()
389 searcher := searcherForTest(t, testIndexBuilder(t, nil,
390 wordsAsSymbols(Document{Name: "f1", Content: []byte("x banana y")}),
391 wordsAsSymbols(Document{Name: "f2", Content: []byte("x apple y")}),
392 wordsAsSymbols(Document{Name: "f3", Content: []byte("x banana apple y")}),
393 // --------------------------------------------------0123456789012345
394 ))
395
396 andQuery := query.NewAnd(
397 &query.Substring{
398 Pattern: "banana",
399 },
400 &query.Substring{
401 Pattern: "apple",
402 },
403 )
404
405 t.Run("LineMatches", func(t *testing.T) {
406 sres, err := searcher.Search(ctx, andQuery, &SearchOptions{})
407 if err != nil {
408 t.Fatal(err)
409 }
410 matches := sres.Files
411 if len(matches) != 1 || len(matches[0].LineMatches) != 1 || len(matches[0].LineMatches[0].LineFragments) != 2 {
412 t.Fatalf("got %#v, want 1 match with 2 fragments", matches)
413 }
414
415 if matches[0].LineMatches[0].LineFragments[0].Offset != 2 || matches[0].LineMatches[0].LineFragments[1].Offset != 9 {
416 t.Fatalf("got %#v, want offsets 2,9", matches)
417 }
418 })
419 t.Run("ChunkMatches", func(t *testing.T) {
420 sres, err := searcher.Search(ctx, andQuery, &chunkOpts)
421 if err != nil {
422 t.Fatal(err)
423 }
424 matches := sres.Files
425 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 || len(matches[0].ChunkMatches[0].Ranges) != 2 {
426 t.Fatalf("got %#v, want 1 chunk match with 2 ranges", matches)
427 }
428
429 if matches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset != 2 || matches[0].ChunkMatches[0].Ranges[1].Start.ByteOffset != 9 {
430 t.Fatalf("got %#v, want offsets 2,9", matches)
431 }
432 })
433 t.Run("Stats", func(t *testing.T) {
434 cases := []struct {
435 Name string
436 Q query.Q
437 Want Stats
438 }{{
439 Name: "and-query",
440 Q: andQuery,
441 Want: Stats{
442 FilesLoaded: 1,
443 ContentBytesLoaded: 22,
444 IndexBytesLoaded: 8,
445 NgramMatches: 3, // we look at doc 1, because it's max(0,1) due to AND
446 NgramLookups: 104,
447 MatchCount: 2,
448 FileCount: 1,
449 FilesConsidered: 2,
450 ShardsScanned: 1,
451 },
452 }, {
453 Name: "one-trigram",
454 Q: &query.Substring{
455 Pattern: "a y",
456 Content: true,
457 CaseSensitive: true,
458 },
459 Want: Stats{
460 ContentBytesLoaded: 14,
461 IndexBytesLoaded: 1,
462 FileCount: 1,
463 FilesConsidered: 1,
464 FilesLoaded: 1,
465 ShardsScanned: 1,
466 MatchCount: 1,
467 NgramMatches: 1,
468 NgramLookups: 2, // once to lookup frequency then again to access posting list.
469 },
470 }, {
471 Name: "one-trigram-case-insensitive",
472 Q: &query.Substring{
473 Pattern: "a y",
474 Content: true,
475 },
476 Want: Stats{
477 ContentBytesLoaded: 14,
478 IndexBytesLoaded: 1,
479 FileCount: 1,
480 FilesConsidered: 1,
481 FilesLoaded: 1,
482 ShardsScanned: 1,
483 MatchCount: 1,
484 NgramMatches: 1,
485 NgramLookups: 8, // "a y" has 2**2 casings which we lookup twice.
486 },
487 }, {
488 Name: "one-trigram-pruned",
489 Q: &query.Substring{
490 Pattern: "foo",
491 Content: true,
492 CaseSensitive: true,
493 },
494 Want: Stats{
495 ShardsSkippedFilter: 1,
496 NgramLookups: 1, // only had to lookup once
497 },
498 }, {
499 Name: "one-trigram-branch-pruned",
500 Q: query.NewAnd(
501 &query.Substring{
502 Pattern: "foo",
503 Content: true,
504 CaseSensitive: true,
505 },
506 &query.Substring{
507 Pattern: "a y",
508 Content: true,
509 CaseSensitive: true,
510 },
511 ),
512 Want: Stats{
513 IndexBytesLoaded: 1, // we created an iterator for "a y" before pruning.
514 ShardsSkippedFilter: 1,
515 NgramLookups: 3, // we lookedup "foo" once (1), but lookedup and created "a y" (2).
516 },
517 }, {
518 Name: "symbol-substr-nomatch",
519 Q: &query.Symbol{Expr: &query.Substring{
520 Pattern: "banana apple",
521 Content: true,
522 CaseSensitive: true,
523 }},
524 Want: Stats{
525 IndexBytesLoaded: 3,
526 FilesConsidered: 1, // important that we only check 1 file to ensure we are using the index
527 MatchCount: 0, // even though there is a match it doesn't align with a symbol
528 ShardsScanned: 1,
529 NgramMatches: 1,
530 NgramLookups: 12,
531 },
532 }, {
533 Name: "symbol-substr",
534 Q: &query.Symbol{Expr: &query.Substring{
535 Pattern: "apple",
536 Content: true,
537 CaseSensitive: true,
538 }},
539 Want: Stats{
540 ContentBytesLoaded: 35,
541 IndexBytesLoaded: 4,
542 FileCount: 2,
543 FilesConsidered: 2, // must be 2 to ensure we used the index
544 FilesLoaded: 2,
545 MatchCount: 2, // apple symbols is in two files
546 ShardsScanned: 1,
547 NgramMatches: 2,
548 NgramLookups: 5,
549 },
550 }, {
551 Name: "symbol-regexp-nomatch",
552 Q: &query.Symbol{Expr: &query.Regexp{
553 Regexp: mustParseRE("^apple.banana$"),
554 Content: true,
555 CaseSensitive: true,
556 }},
557 Want: Stats{
558 ContentBytesLoaded: 33, // we still have to run regex since "app" matches two documents
559 IndexBytesLoaded: 8,
560 FilesConsidered: 2, // important that we don't check 3 to ensure we are using the index
561 FilesLoaded: 2,
562 MatchCount: 0, // even though there is a match it doesn't align with a symbol
563 ShardsScanned: 1,
564 NgramMatches: 3,
565 NgramLookups: 11,
566 },
567 }, {
568 Name: "symbol-regexp",
569 Q: &query.Symbol{Expr: &query.Regexp{
570 Regexp: mustParseRE("^app.e$"),
571 Content: true,
572 CaseSensitive: true,
573 }},
574 Want: Stats{
575 ContentBytesLoaded: 35,
576 IndexBytesLoaded: 2,
577 FileCount: 2,
578 FilesConsidered: 2, // must be 2 to ensure we used the index
579 FilesLoaded: 2,
580 MatchCount: 2, // apple symbols is in two files
581 ShardsScanned: 1,
582 NgramMatches: 2,
583 NgramLookups: 2,
584 },
585 }}
586
587 for _, tc := range cases {
588 t.Run(tc.Name, func(t *testing.T) {
589 sres, err := searcher.Search(ctx, tc.Q, &chunkOpts)
590 if err != nil {
591 t.Fatal(err)
592 }
593 if diff := cmp.Diff(tc.Want, sres.Stats, cmpopts.IgnoreFields(Stats{}, "MatchTreeConstruction", "MatchTreeSearch")); diff != "" {
594 t.Errorf("unexpected Stats (-want +got):\n%s", diff)
595 }
596 })
597 }
598
599 })
600}
601
602func TestAndNegateSearch(t *testing.T) {
603 b := testIndexBuilder(t, nil,
604 Document{Name: "f1", Content: []byte("x banana y")},
605 // -----------------------------------0123456789
606 Document{Name: "f4", Content: []byte("x banana apple y")})
607
608 t.Run("LineMatches", func(t *testing.T) {
609 sres := searchForTest(t, b, query.NewAnd(
610 &query.Substring{
611 Pattern: "banana",
612 },
613 &query.Not{Child: &query.Substring{
614 Pattern: "apple",
615 }}))
616
617 matches := sres.Files
618
619 if len(matches) != 1 || len(matches[0].LineMatches) != 1 {
620 t.Fatalf("got %v, want 1 match", matches)
621 }
622 if matches[0].FileName != "f1" {
623 t.Fatalf("got match %#v, want FileName: f1", matches[0])
624 }
625 if matches[0].LineMatches[0].LineFragments[0].Offset != 2 {
626 t.Fatalf("got %v, want offset 2", matches)
627 }
628 })
629
630 t.Run("ChunkMatches", func(t *testing.T) {
631 sres := searchForTest(t, b,
632 query.NewAnd(
633 &query.Substring{
634 Pattern: "banana",
635 },
636 &query.Not{Child: &query.Substring{
637 Pattern: "apple",
638 }},
639 ),
640 chunkOpts,
641 )
642
643 matches := sres.Files
644
645 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 {
646 t.Fatalf("got %v, want 1 match", matches)
647 }
648 if matches[0].FileName != "f1" {
649 t.Fatalf("got match %#v, want FileName: f1", matches[0])
650 }
651 if matches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset != 2 {
652 t.Fatalf("got %v, want offset 2", matches)
653 }
654 })
655}
656
657func TestNegativeMatchesOnlyShortcut(t *testing.T) {
658 b := testIndexBuilder(t, nil,
659 Document{Name: "f1", Content: []byte("x banana y")},
660 Document{Name: "f2", Content: []byte("x appelmoes y")},
661 Document{Name: "f3", Content: []byte("x appelmoes y")},
662 Document{Name: "f3", Content: []byte("x appelmoes y")})
663
664 t.Run("LineMatches", func(t *testing.T) {
665 sres := searchForTest(t, b, query.NewAnd(
666 &query.Substring{
667 Pattern: "banana",
668 },
669 &query.Not{Child: &query.Substring{
670 Pattern: "appel",
671 }}))
672
673 if sres.Stats.FilesConsidered != 1 {
674 t.Errorf("got %#v, want FilesConsidered: 1", sres.Stats)
675 }
676 })
677
678 t.Run("ChunkMatches", func(t *testing.T) {
679 sres := searchForTest(t, b, query.NewAnd(
680 &query.Substring{
681 Pattern: "banana",
682 },
683 &query.Not{Child: &query.Substring{
684 Pattern: "appel",
685 }}), chunkOpts)
686
687 if sres.Stats.FilesConsidered != 1 {
688 t.Errorf("got %#v, want FilesConsidered: 1", sres.Stats)
689 }
690 })
691}
692
693func TestFileSearch(t *testing.T) {
694 b := testIndexBuilder(t, nil,
695 Document{Name: "banzana", Content: []byte("x orange y")},
696 // -------------0123456
697 Document{Name: "banana", Content: []byte("x apple y")},
698 // -------------012345
699 )
700
701 t.Run("LineMatches", func(t *testing.T) {
702 sres := searchForTest(t, b, &query.Substring{
703 Pattern: "anan",
704 FileName: true,
705 })
706
707 matches := sres.Files
708 if len(matches) != 1 || len(matches[0].LineMatches) != 1 {
709 t.Fatalf("got %v, want 1 match", matches)
710 }
711
712 got := matches[0].LineMatches[0]
713 want := LineMatch{
714 Line: []byte("banana"),
715 LineFragments: []LineFragmentMatch{{
716 Offset: 1,
717 LineOffset: 1,
718 MatchLength: 4,
719 }},
720 FileName: true,
721 }
722
723 if !reflect.DeepEqual(got, want) {
724 t.Errorf("got %#v, want %#v", got, want)
725 }
726 })
727
728 t.Run("ChunkMatches", func(t *testing.T) {
729 sres := searchForTest(t, b, &query.Substring{
730 Pattern: "anan",
731 FileName: true,
732 }, chunkOpts)
733
734 matches := sres.Files
735 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 {
736 t.Fatalf("got %v, want 1 match", matches)
737 }
738
739 got := matches[0].ChunkMatches[0]
740 want := ChunkMatch{
741 Content: []byte("banana"),
742 ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1},
743 Ranges: []Range{{
744 Start: Location{ByteOffset: 1, LineNumber: 1, Column: 2},
745 End: Location{ByteOffset: 5, LineNumber: 1, Column: 6},
746 }},
747 FileName: true,
748 }
749
750 if diff := cmp.Diff(want, got); diff != "" {
751 t.Fatal(diff)
752 }
753 })
754
755 t.Run("FileNameSet", func(t *testing.T) {
756 sres := searchForTest(t, b, query.NewFileNameSet("banana"), chunkOpts)
757
758 matches := sres.Files
759 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 {
760 t.Fatalf("got %v, want 1 match", matches)
761 }
762
763 got := matches[0].ChunkMatches[0]
764 want := ChunkMatch{
765 Content: []byte("banana"),
766 ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1},
767 Ranges: []Range{{
768 Start: Location{ByteOffset: 0, LineNumber: 1, Column: 1},
769 End: Location{ByteOffset: 6, LineNumber: 1, Column: 7},
770 }},
771 FileName: true,
772 }
773
774 if diff := cmp.Diff(want, got); diff != "" {
775 t.Fatal(diff)
776 }
777 })
778}
779
780func TestFileCase(t *testing.T) {
781 b := testIndexBuilder(t, nil,
782 Document{Name: "BANANA", Content: []byte("x orange y")})
783
784 t.Run("LineMatches", func(t *testing.T) {
785 sres := searchForTest(t, b, &query.Substring{
786 Pattern: "banana",
787 FileName: true,
788 })
789
790 matches := sres.Files
791 if len(matches) != 1 || matches[0].FileName != "BANANA" {
792 t.Fatalf("got %v, want 1 match 'BANANA'", matches)
793 }
794 })
795
796 t.Run("ChunkMatches", func(t *testing.T) {
797 sres := searchForTest(t, b, &query.Substring{
798 Pattern: "banana",
799 FileName: true,
800 }, chunkOpts)
801
802 matches := sres.Files
803 if len(matches) != 1 || matches[0].FileName != "BANANA" {
804 t.Fatalf("got %v, want 1 match 'BANANA'", matches)
805 }
806 })
807}
808
809func TestFileRegexpSearchBruteForce(t *testing.T) {
810 b := testIndexBuilder(t, nil,
811 Document{Name: "banzana", Content: []byte("x orange y")},
812 Document{Name: "banana", Content: []byte("x apple y")},
813 )
814 t.Run("LineMatches", func(t *testing.T) {
815 sres := searchForTest(t, b, &query.Regexp{
816 Regexp: mustParseRE("[qn][zx]"),
817 FileName: true,
818 })
819
820 matches := sres.Files
821 if len(matches) != 1 || matches[0].FileName != "banzana" {
822 t.Fatalf("got %v, want 1 match on 'banzana'", matches)
823 }
824 })
825 t.Run("LineMatches", func(t *testing.T) {
826 sres := searchForTest(t, b, &query.Regexp{
827 Regexp: mustParseRE("[qn][zx]"),
828 FileName: true,
829 }, chunkOpts)
830
831 matches := sres.Files
832 if len(matches) != 1 || matches[0].FileName != "banzana" {
833 t.Fatalf("got %v, want 1 match on 'banzana'", matches)
834 }
835 })
836}
837
838func TestFileRegexpSearchShortString(t *testing.T) {
839 b := testIndexBuilder(t, nil,
840 Document{Name: "banana.py", Content: []byte("x orange y")})
841
842 t.Run("LineMatches", func(t *testing.T) {
843 sres := searchForTest(t, b, &query.Regexp{
844 Regexp: mustParseRE("ana.py"),
845 FileName: true,
846 })
847
848 matches := sres.Files
849 if len(matches) != 1 || matches[0].FileName != "banana.py" {
850 t.Fatalf("got %v, want 1 match on 'banana.py'", matches)
851 }
852 })
853
854 t.Run("ChunkMatches", func(t *testing.T) {
855 sres := searchForTest(t, b, &query.Regexp{
856 Regexp: mustParseRE("ana.py"),
857 FileName: true,
858 }, chunkOpts)
859
860 matches := sres.Files
861 if len(matches) != 1 || matches[0].FileName != "banana.py" {
862 t.Fatalf("got %v, want 1 match on 'banana.py'", matches)
863 }
864 })
865}
866
867func TestFileSubstringSearchBruteForce(t *testing.T) {
868 b := testIndexBuilder(t, nil,
869 Document{Name: "BANZANA", Content: []byte("x orange y")},
870 Document{Name: "banana", Content: []byte("x apple y")})
871
872 q := &query.Substring{
873 Pattern: "z",
874 FileName: true,
875 }
876
877 t.Run("LineMatches", func(t *testing.T) {
878 res := searchForTest(t, b, q)
879 if len(res.Files) != 1 || res.Files[0].FileName != "BANZANA" {
880 t.Fatalf("got %v, want 1 match on 'BANZANA''", res.Files)
881 }
882 })
883
884 t.Run("ChunkMatches", func(t *testing.T) {
885 res := searchForTest(t, b, q, chunkOpts)
886 if len(res.Files) != 1 || res.Files[0].FileName != "BANZANA" {
887 t.Fatalf("got %v, want 1 match on 'BANZANA''", res.Files)
888 }
889 })
890}
891
892func TestFileSubstringSearchBruteForceEnd(t *testing.T) {
893 b := testIndexBuilder(t, nil,
894 Document{Name: "BANZANA", Content: []byte("x orange y")},
895 Document{Name: "bananaq", Content: []byte("x apple y")})
896
897 q := &query.Substring{
898 Pattern: "q",
899 FileName: true,
900 }
901 t.Run("LineMatches", func(t *testing.T) {
902 res := searchForTest(t, b, q)
903 if want := "bananaq"; len(res.Files) != 1 || res.Files[0].FileName != want {
904 t.Fatalf("got %v, want 1 match in %q", res.Files, want)
905 }
906 })
907
908 t.Run("LineMatches", func(t *testing.T) {
909 res := searchForTest(t, b, q, chunkOpts)
910 if want := "bananaq"; len(res.Files) != 1 || res.Files[0].FileName != want {
911 t.Fatalf("got %v, want 1 match in %q", res.Files, want)
912 }
913 })
914}
915
916func TestSearchMatchAll(t *testing.T) {
917 b := testIndexBuilder(t, nil,
918 Document{Name: "banzana", Content: []byte("x orange y")},
919 Document{Name: "banana", Content: []byte("x apple y")})
920
921 t.Run("LineMatches", func(t *testing.T) {
922 sres := searchForTest(t, b, &query.Const{Value: true})
923 matches := sres.Files
924 if len(matches) != 2 {
925 t.Fatalf("got %v, want 2 matches", matches)
926 }
927 })
928
929 t.Run("ChunkMatches", func(t *testing.T) {
930 sres := searchForTest(t, b, &query.Const{Value: true}, chunkOpts)
931 matches := sres.Files
932 if len(matches) != 2 {
933 t.Fatalf("got %v, want 2 matches", matches)
934 }
935 })
936}
937
938func TestSearchNewline(t *testing.T) {
939 b := testIndexBuilder(t, nil,
940 Document{Name: "banzana", Content: []byte("abcd\ndefg")})
941
942 t.Run("LineMatches", func(t *testing.T) {
943 sres := searchForTest(t, b, &query.Substring{Pattern: "d\nd"})
944
945 // Just check that we don't crash.
946
947 matches := sres.Files
948 if len(matches) != 1 {
949 t.Fatalf("got %v, want 1 matches", matches)
950 }
951 })
952
953 t.Run("ChunkMatches", func(t *testing.T) {
954 sres := searchForTest(t, b, &query.Substring{Pattern: "d\nd"}, chunkOpts)
955
956 // Just check that we don't crash.
957
958 matches := sres.Files
959 if len(matches) != 1 {
960 t.Fatalf("got %v, want 1 matches", matches)
961 }
962 })
963}
964
965func TestSearchMatchAllRegexp(t *testing.T) {
966 b := testIndexBuilder(t, nil,
967 Document{Name: "banzana", Content: []byte("abcd")},
968 Document{Name: "banana", Content: []byte("pqrs")})
969
970 t.Run("LineMatches", func(t *testing.T) {
971 sres := searchForTest(t, b, &query.Regexp{Regexp: mustParseRE(".")})
972
973 matches := sres.Files
974 if len(matches) != 2 || sres.Stats.MatchCount != 2 {
975 t.Fatalf("got %v, want 2 matches", matches)
976 }
977 if len(matches[0].LineMatches[0].Line) != 4 || len(matches[1].LineMatches[0].Line) != 4 {
978 t.Fatalf("want 4 chars in every file, got %#v", matches)
979 }
980
981 })
982
983 t.Run("ChunkMatches", func(t *testing.T) {
984 sres := searchForTest(t, b, &query.Regexp{Regexp: mustParseRE(".")}, chunkOpts)
985
986 matches := sres.Files
987 if len(matches) != 2 || sres.Stats.MatchCount != 8 {
988 t.Fatalf("got %v, want 2 matches", matches)
989 }
990 if len(matches[0].ChunkMatches[0].Content) != 4 || len(matches[1].ChunkMatches[0].Content) != 4 {
991 t.Fatalf("want 4 chars in every file, got %#v", matches)
992 }
993
994 })
995}
996
997func TestFileRestriction(t *testing.T) {
998 b := testIndexBuilder(t, nil,
999 Document{Name: "banana1", Content: []byte("x orange y")},
1000 Document{Name: "banana2", Content: []byte("x apple y")},
1001 Document{Name: "orange", Content: []byte("x apple z")})
1002
1003 t.Run("LineMatches", func(t *testing.T) {
1004 sres := searchForTest(t, b, query.NewAnd(
1005 &query.Substring{
1006 Pattern: "banana",
1007 FileName: true,
1008 },
1009 &query.Substring{
1010 Pattern: "apple",
1011 }))
1012
1013 matches := sres.Files
1014 if len(matches) != 1 || len(matches[0].LineMatches) != 1 {
1015 t.Fatalf("got %v, want 1 match", matches)
1016 }
1017
1018 match := matches[0].LineMatches[0]
1019 got := string(match.Line)
1020 want := "x apple y"
1021 if got != want {
1022 t.Errorf("got match %#v, want line %q", match, want)
1023 }
1024 })
1025
1026 t.Run("ChunkMatches", func(t *testing.T) {
1027 sres := searchForTest(t, b, query.NewAnd(
1028 &query.Substring{
1029 Pattern: "banana",
1030 FileName: true,
1031 },
1032 &query.Substring{
1033 Pattern: "apple",
1034 }), chunkOpts)
1035
1036 matches := sres.Files
1037 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 {
1038 t.Fatalf("got %v, want 1 match", matches)
1039 }
1040
1041 match := matches[0].ChunkMatches[0]
1042 got := string(match.Content)
1043 want := "x apple y"
1044 if got != want {
1045 t.Errorf("got match %#v, want line %q", match, want)
1046 }
1047 })
1048}
1049
1050func TestFileNameBoundary(t *testing.T) {
1051 b := testIndexBuilder(t, nil,
1052 Document{Name: "banana2", Content: []byte("x apple y")},
1053 Document{Name: "helpers.go", Content: []byte("x apple y")},
1054 Document{Name: "foo", Content: []byte("x apple y")})
1055
1056 t.Run("LineMatches", func(t *testing.T) {
1057 sres := searchForTest(t, b, &query.Substring{
1058 Pattern: "helpers.go",
1059 FileName: true,
1060 })
1061
1062 matches := sres.Files
1063 if len(matches) != 1 || len(matches[0].LineMatches) != 1 {
1064 t.Fatalf("got %v, want 1 match", matches)
1065 }
1066 })
1067
1068 t.Run("ChunkMatches", func(t *testing.T) {
1069 sres := searchForTest(t, b, &query.Substring{
1070 Pattern: "helpers.go",
1071 FileName: true,
1072 }, chunkOpts)
1073
1074 matches := sres.Files
1075 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 {
1076 t.Fatalf("got %v, want 1 match", matches)
1077 }
1078 })
1079}
1080
1081func TestDocumentOrder(t *testing.T) {
1082 var docs []Document
1083 for i := 0; i < 3; i++ {
1084 docs = append(docs, Document{Name: fmt.Sprintf("f%d", i), Content: []byte("needle")})
1085 }
1086
1087 b := testIndexBuilder(t, nil, docs...)
1088
1089 t.Run("LineMatches", func(t *testing.T) {
1090 sres := searchForTest(t, b, query.NewAnd(
1091 &query.Substring{
1092 Pattern: "needle",
1093 }))
1094
1095 want := []string{"f0", "f1", "f2"}
1096 var got []string
1097 for _, f := range sres.Files {
1098 got = append(got, f.FileName)
1099 }
1100 if !reflect.DeepEqual(got, want) {
1101 t.Fatalf("got %v, want %v", got, want)
1102 }
1103 })
1104
1105 t.Run("ChunkMatches", func(t *testing.T) {
1106 sres := searchForTest(t, b,
1107 query.NewAnd(&query.Substring{
1108 Pattern: "needle",
1109 }),
1110 chunkOpts,
1111 )
1112
1113 want := []string{"f0", "f1", "f2"}
1114 var got []string
1115 for _, f := range sres.Files {
1116 got = append(got, f.FileName)
1117 }
1118 if !reflect.DeepEqual(got, want) {
1119 t.Fatalf("got %v, want %v", got, want)
1120 }
1121 })
1122}
1123
1124func TestBranchMask(t *testing.T) {
1125 b := testIndexBuilder(t, &Repository{
1126 Branches: []RepositoryBranch{
1127 {"master", "v-master"},
1128 {"stable", "v-stable"},
1129 {"bonzai", "v-bonzai"},
1130 },
1131 }, Document{Name: "f1", Content: []byte("needle"), Branches: []string{"master"}},
1132 Document{Name: "f2", Content: []byte("needle"), Branches: []string{"stable", "master"}},
1133 Document{Name: "f3", Content: []byte("needle"), Branches: []string{"stable", "master"}},
1134 Document{Name: "f4", Content: []byte("needle"), Branches: []string{"bonzai"}},
1135 )
1136
1137 t.Run("LineMatches", func(t *testing.T) {
1138 sres := searchForTest(t, b, query.NewAnd(
1139 &query.Substring{
1140 Pattern: "needle",
1141 },
1142 &query.Branch{
1143 Pattern: "table",
1144 }))
1145
1146 if len(sres.Files) != 2 || sres.Files[0].FileName != "f2" || sres.Files[1].FileName != "f3" {
1147 t.Fatalf("got %v, want 2 result from [f2,f3]", sres.Files)
1148 }
1149
1150 if len(sres.Files[0].Branches) != 1 || sres.Files[0].Branches[0] != "stable" {
1151 t.Fatalf("got %v, want 1 branch 'stable'", sres.Files[0].Branches)
1152 }
1153 })
1154
1155 t.Run("ChunkMatches", func(t *testing.T) {
1156 sres := searchForTest(t, b, query.NewAnd(
1157 &query.Substring{
1158 Pattern: "needle",
1159 },
1160 &query.Branch{
1161 Pattern: "table",
1162 }),
1163 chunkOpts,
1164 )
1165
1166 if len(sres.Files) != 2 || sres.Files[0].FileName != "f2" || sres.Files[1].FileName != "f3" {
1167 t.Fatalf("got %v, want 2 result from [f2,f3]", sres.Files)
1168 }
1169
1170 if len(sres.Files[0].Branches) != 1 || sres.Files[0].Branches[0] != "stable" {
1171 t.Fatalf("got %v, want 1 branch 'stable'", sres.Files[0].Branches)
1172 }
1173 })
1174}
1175
1176func TestBranchLimit(t *testing.T) {
1177 for limit := 64; limit <= 65; limit++ {
1178 r := &Repository{}
1179 for i := 0; i < limit; i++ {
1180 s := fmt.Sprintf("b%d", i)
1181 r.Branches = append(r.Branches, RepositoryBranch{
1182 s, "v-" + s,
1183 })
1184 }
1185 _, err := NewIndexBuilder(r)
1186 if limit == 64 && err != nil {
1187 t.Fatalf("NewIndexBuilder: %v", err)
1188 } else if limit == 65 && err == nil {
1189 t.Fatalf("NewIndexBuilder succeeded")
1190 }
1191 }
1192}
1193
1194func TestBranchReport(t *testing.T) {
1195 branches := []string{"stable", "master"}
1196 b := testIndexBuilder(t, &Repository{
1197 Branches: []RepositoryBranch{
1198 {"stable", "vs"},
1199 {"master", "vm"},
1200 },
1201 },
1202 Document{Name: "f2", Content: []byte("needle"), Branches: branches})
1203
1204 t.Run("LineMatches", func(t *testing.T) {
1205 sres := searchForTest(t, b, &query.Substring{
1206 Pattern: "needle",
1207 })
1208 if len(sres.Files) != 1 {
1209 t.Fatalf("got %v, want 1 result from f2", sres.Files)
1210 }
1211
1212 f := sres.Files[0]
1213 if !reflect.DeepEqual(f.Branches, branches) {
1214 t.Fatalf("got branches %q, want %q", f.Branches, branches)
1215 }
1216 })
1217
1218 t.Run("ChunkMatches", func(t *testing.T) {
1219 sres := searchForTest(t, b, &query.Substring{
1220 Pattern: "needle",
1221 }, chunkOpts)
1222 if len(sres.Files) != 1 {
1223 t.Fatalf("got %v, want 1 result from f2", sres.Files)
1224 }
1225
1226 f := sres.Files[0]
1227 if !reflect.DeepEqual(f.Branches, branches) {
1228 t.Fatalf("got branches %q, want %q", f.Branches, branches)
1229 }
1230 })
1231
1232}
1233
1234func TestBranchVersions(t *testing.T) {
1235 b := testIndexBuilder(t, &Repository{
1236 Branches: []RepositoryBranch{
1237 {"stable", "v-stable"},
1238 {"master", "v-master"},
1239 },
1240 }, Document{Name: "f2", Content: []byte("needle"), Branches: []string{"master"}})
1241
1242 t.Run("LineMatches", func(t *testing.T) {
1243 sres := searchForTest(t, b, &query.Substring{
1244 Pattern: "needle",
1245 })
1246 if len(sres.Files) != 1 {
1247 t.Fatalf("got %v, want 1 result from f2", sres.Files)
1248 }
1249
1250 f := sres.Files[0]
1251 if f.Version != "v-master" {
1252 t.Fatalf("got file %#v, want version 'v-master'", f)
1253 }
1254 })
1255
1256 t.Run("ChunkMatches", func(t *testing.T) {
1257 sres := searchForTest(t, b, &query.Substring{
1258 Pattern: "needle",
1259 }, chunkOpts)
1260 if len(sres.Files) != 1 {
1261 t.Fatalf("got %v, want 1 result from f2", sres.Files)
1262 }
1263
1264 f := sres.Files[0]
1265 if f.Version != "v-master" {
1266 t.Fatalf("got file %#v, want version 'v-master'", f)
1267 }
1268 })
1269}
1270
1271func mustParseRE(s string) *syntax.Regexp {
1272 r, err := syntax.Parse(s, syntax.Perl)
1273 if err != nil {
1274 panic(err)
1275 }
1276
1277 return r
1278}
1279
1280func TestRegexp(t *testing.T) {
1281 content := []byte("needle the bla")
1282 // ----------------01234567890123
1283
1284 b := testIndexBuilder(t, nil,
1285 Document{
1286 Name: "f1",
1287 Content: content,
1288 })
1289
1290 t.Run("LineMatches", func(t *testing.T) {
1291 sres := searchForTest(t, b,
1292 &query.Regexp{
1293 Regexp: mustParseRE("dle.*bla"),
1294 })
1295
1296 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 {
1297 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
1298 }
1299
1300 got := sres.Files[0].LineMatches[0]
1301 want := LineMatch{
1302 LineFragments: []LineFragmentMatch{{
1303 LineOffset: 3,
1304 Offset: 3,
1305 MatchLength: 11,
1306 }},
1307 Line: content,
1308 FileName: false,
1309 LineNumber: 1,
1310 LineStart: 0,
1311 LineEnd: 14,
1312 }
1313
1314 if !reflect.DeepEqual(got, want) {
1315 t.Errorf("got %#v, want %#v", got, want)
1316 }
1317 })
1318
1319 t.Run("ChunkMatches", func(t *testing.T) {
1320 sres := searchForTest(t, b,
1321 &query.Regexp{
1322 Regexp: mustParseRE("dle.*bla"),
1323 }, chunkOpts)
1324
1325 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 {
1326 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
1327 }
1328
1329 got := sres.Files[0].ChunkMatches[0]
1330 want := ChunkMatch{
1331 Content: content,
1332 ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1},
1333 Ranges: []Range{{
1334 Start: Location{ByteOffset: 3, LineNumber: 1, Column: 4},
1335 End: Location{ByteOffset: 14, LineNumber: 1, Column: 15},
1336 }},
1337 }
1338
1339 if diff := cmp.Diff(want, got); diff != "" {
1340 t.Fatal(diff)
1341 }
1342 })
1343}
1344
1345func TestRegexpFile(t *testing.T) {
1346 content := []byte("needle the bla")
1347
1348 name := "let's play: find the mussel"
1349 b := testIndexBuilder(t, nil,
1350 Document{Name: name, Content: content},
1351 Document{Name: "play.txt", Content: content})
1352
1353 t.Run("LineMatches", func(t *testing.T) {
1354 sres := searchForTest(t, b,
1355 &query.Regexp{
1356 Regexp: mustParseRE("play.*mussel"),
1357 FileName: true,
1358 })
1359
1360 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 {
1361 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
1362 }
1363
1364 if sres.Files[0].FileName != name {
1365 t.Errorf("got match %#v, want name %q", sres.Files[0], name)
1366 }
1367 })
1368
1369 t.Run("ChunkMatches", func(t *testing.T) {
1370 sres := searchForTest(t, b,
1371 &query.Regexp{
1372 Regexp: mustParseRE("play.*mussel"),
1373 FileName: true,
1374 }, chunkOpts)
1375
1376 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 {
1377 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
1378 }
1379
1380 if sres.Files[0].FileName != name {
1381 t.Errorf("got match %#v, want name %q", sres.Files[0], name)
1382 }
1383 })
1384}
1385
1386func TestRegexpOrder(t *testing.T) {
1387 content := []byte("bla the needle")
1388 // ----------------01234567890123
1389
1390 b := testIndexBuilder(t, nil,
1391 Document{Name: "f1", Content: content})
1392
1393 t.Run("LineMatches", func(t *testing.T) {
1394 sres := searchForTest(t, b,
1395 &query.Regexp{
1396 Regexp: mustParseRE("dle.*bla"),
1397 })
1398
1399 if len(sres.Files) != 0 {
1400 t.Fatalf("got %v, want 0 matches", sres.Files)
1401 }
1402 })
1403
1404 t.Run("ChunkMatches", func(t *testing.T) {
1405 sres := searchForTest(t, b,
1406 &query.Regexp{
1407 Regexp: mustParseRE("dle.*bla"),
1408 })
1409
1410 if len(sres.Files) != 0 {
1411 t.Fatalf("got %v, want 0 matches", sres.Files)
1412 }
1413 })
1414}
1415
1416func TestRepoName(t *testing.T) {
1417 content := []byte("bla the needle")
1418 // ----------------01234567890123
1419
1420 b := testIndexBuilder(t, &Repository{Name: "bla"},
1421 Document{Name: "f1", Content: content})
1422
1423 t.Run("LineMatches", func(t *testing.T) {
1424 sres := searchForTest(t, b,
1425 query.NewAnd(
1426 &query.Substring{Pattern: "needle"},
1427 &query.Repo{Regexp: regexp.MustCompile("foo")},
1428 ))
1429
1430 if len(sres.Files) != 0 {
1431 t.Fatalf("got %v, want 0 matches", sres.Files)
1432 }
1433
1434 if sres.Stats.FilesConsidered > 0 {
1435 t.Fatalf("got FilesConsidered %d, should have short circuited", sres.Stats.FilesConsidered)
1436 }
1437
1438 sres = searchForTest(t, b,
1439 query.NewAnd(
1440 &query.Substring{Pattern: "needle"},
1441 &query.Repo{Regexp: regexp.MustCompile("bla")},
1442 ))
1443 if len(sres.Files) != 1 {
1444 t.Fatalf("got %v, want 1 match", sres.Files)
1445 }
1446 })
1447
1448 t.Run("ChunkMatches", func(t *testing.T) {
1449 sres := searchForTest(t, b,
1450 query.NewAnd(
1451 &query.Substring{Pattern: "needle"},
1452 &query.Repo{Regexp: regexp.MustCompile("foo")},
1453 ),
1454 chunkOpts,
1455 )
1456
1457 if len(sres.Files) != 0 {
1458 t.Fatalf("got %v, want 0 matches", sres.Files)
1459 }
1460
1461 if sres.Stats.FilesConsidered > 0 {
1462 t.Fatalf("got FilesConsidered %d, should have short circuited", sres.Stats.FilesConsidered)
1463 }
1464
1465 sres = searchForTest(t, b,
1466 query.NewAnd(
1467 &query.Substring{Pattern: "needle"},
1468 &query.Repo{Regexp: regexp.MustCompile("bla")},
1469 ))
1470 if len(sres.Files) != 1 {
1471 t.Fatalf("got %v, want 1 match", sres.Files)
1472 }
1473 })
1474}
1475
1476func TestMergeMatches(t *testing.T) {
1477 content := []byte("blablabla")
1478 b := testIndexBuilder(t, nil,
1479 Document{Name: "f1", Content: content})
1480
1481 t.Run("LineMatches", func(t *testing.T) {
1482 sres := searchForTest(t, b,
1483 &query.Substring{Pattern: "bla"})
1484 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 {
1485 t.Fatalf("got %v, want 1 match", sres.Files)
1486 }
1487 })
1488
1489 t.Run("ChunkMatches", func(t *testing.T) {
1490 sres := searchForTest(t, b,
1491 &query.Substring{Pattern: "bla"},
1492 chunkOpts,
1493 )
1494 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 {
1495 t.Fatalf("got %v, want 1 match", sres.Files)
1496 }
1497 })
1498}
1499
1500func TestRepoURL(t *testing.T) {
1501 content := []byte("blablabla")
1502 b := testIndexBuilder(t, &Repository{
1503 Name: "name",
1504 URL: "URL",
1505 CommitURLTemplate: "commit",
1506 FileURLTemplate: "file-url",
1507 LineFragmentTemplate: "fragment",
1508 }, Document{Name: "f1", Content: content})
1509
1510 sres := searchForTest(t, b, &query.Substring{Pattern: "bla"})
1511
1512 if sres.RepoURLs["name"] != "file-url" {
1513 t.Errorf("got RepoURLs %v, want {name: URL}", sres.RepoURLs)
1514 }
1515 if sres.LineFragments["name"] != "fragment" {
1516 t.Errorf("got URLs %v, want {name: URL}", sres.LineFragments)
1517 }
1518}
1519
1520func TestRegexpCaseSensitive(t *testing.T) {
1521 content := []byte("bla\nfunc unmarshalGitiles\n")
1522 b := testIndexBuilder(t, nil, Document{
1523 Name: "f1",
1524 Content: content,
1525 })
1526
1527 t.Run("LineMatches", func(t *testing.T) {
1528 res := searchForTest(t, b,
1529 &query.Regexp{
1530 Regexp: mustParseRE("func.*Gitiles"),
1531 CaseSensitive: true,
1532 })
1533
1534 if len(res.Files) != 1 {
1535 t.Fatalf("got %v, want one match", res.Files)
1536 }
1537 })
1538
1539 t.Run("ChunkMatches", func(t *testing.T) {
1540 res := searchForTest(t, b,
1541 &query.Regexp{
1542 Regexp: mustParseRE("func.*Gitiles"),
1543 CaseSensitive: true,
1544 },
1545 chunkOpts,
1546 )
1547
1548 if len(res.Files) != 1 {
1549 t.Fatalf("got %v, want one match", res.Files)
1550 }
1551 })
1552}
1553
1554func TestRegexpCaseFolding(t *testing.T) {
1555 content := []byte("bla\nfunc unmarshalGitiles\n")
1556
1557 b := testIndexBuilder(t, nil,
1558 Document{Name: "f1", Content: content})
1559 res := searchForTest(t, b,
1560 &query.Regexp{
1561 Regexp: mustParseRE("func.*GITILES"),
1562 CaseSensitive: false,
1563 })
1564
1565 if len(res.Files) != 1 {
1566 t.Fatalf("got %v, want one match", res.Files)
1567 }
1568}
1569
1570func TestCaseRegexp(t *testing.T) {
1571 content := []byte("BLABLABLA")
1572 b := testIndexBuilder(t, nil,
1573 Document{Name: "f1", Content: content})
1574
1575 t.Run("LineMatches", func(t *testing.T) {
1576 res := searchForTest(t, b,
1577 &query.Regexp{
1578 Regexp: mustParseRE("[xb][xl][xa]"),
1579 CaseSensitive: true,
1580 })
1581
1582 if len(res.Files) > 0 {
1583 t.Fatalf("got %v, want no matches", res.Files)
1584 }
1585 })
1586
1587 t.Run("ChunkMatches", func(t *testing.T) {
1588 res := searchForTest(t, b,
1589 &query.Regexp{
1590 Regexp: mustParseRE("[xb][xl][xa]"),
1591 CaseSensitive: true,
1592 },
1593 chunkOpts,
1594 )
1595
1596 if len(res.Files) > 0 {
1597 t.Fatalf("got %v, want no matches", res.Files)
1598 }
1599 })
1600}
1601
1602func TestNegativeRegexp(t *testing.T) {
1603 content := []byte("BLABLABLA needle bla")
1604 b := testIndexBuilder(t, nil,
1605 Document{Name: "f1", Content: content})
1606
1607 t.Run("LineMatches", func(t *testing.T) {
1608 res := searchForTest(t, b,
1609 query.NewAnd(
1610 &query.Substring{
1611 Pattern: "needle",
1612 },
1613 &query.Not{
1614 Child: &query.Regexp{
1615 Regexp: mustParseRE(".cs"),
1616 },
1617 }))
1618
1619 if len(res.Files) != 1 {
1620 t.Fatalf("got %v, want 1 match", res.Files)
1621 }
1622 })
1623
1624 t.Run("ChunkMatches", func(t *testing.T) {
1625 res := searchForTest(t, b,
1626 query.NewAnd(
1627 &query.Substring{
1628 Pattern: "needle",
1629 },
1630 &query.Not{
1631 Child: &query.Regexp{
1632 Regexp: mustParseRE(".cs"),
1633 },
1634 },
1635 ),
1636 chunkOpts)
1637
1638 if len(res.Files) != 1 {
1639 t.Fatalf("got %v, want 1 match", res.Files)
1640 }
1641 })
1642}
1643
1644func TestSymbolRank(t *testing.T) {
1645 t.Skip()
1646
1647 content := []byte("func bla() blubxxxxx")
1648 // ----------------01234567890123456789
1649 b := testIndexBuilder(t, nil,
1650 Document{
1651 Name: "f1",
1652 Content: content,
1653 }, Document{
1654 Name: "f2",
1655 Content: content,
1656 Symbols: []DocumentSection{{5, 8}},
1657 }, Document{
1658 Name: "f3",
1659 Content: content,
1660 })
1661
1662 t.Run("LineMatches", func(t *testing.T) {
1663 res := searchForTest(t, b,
1664 &query.Substring{
1665 CaseSensitive: false,
1666 Pattern: "bla",
1667 })
1668
1669 if len(res.Files) != 3 {
1670 t.Fatalf("got %d files, want 3 files. Full data: %v", len(res.Files), res.Files)
1671 }
1672 if res.Files[0].FileName != "f2" {
1673 t.Errorf("got %#v, want 'f2' as top match", res.Files[0])
1674 }
1675 })
1676
1677 t.Run("ChunkMatches", func(t *testing.T) {
1678 res := searchForTest(t, b,
1679 &query.Substring{
1680 CaseSensitive: false,
1681 Pattern: "bla",
1682 }, chunkOpts)
1683
1684 if len(res.Files) != 3 {
1685 t.Fatalf("got %d files, want 3 files. Full data: %v", len(res.Files), res.Files)
1686 }
1687 if res.Files[0].FileName != "f2" {
1688 t.Errorf("got %#v, want 'f2' as top match", res.Files[0])
1689 }
1690 })
1691}
1692
1693func TestSymbolRankRegexpUTF8(t *testing.T) {
1694 t.Skip()
1695
1696 prefix := strings.Repeat(string([]rune{kelvinCodePoint}), 100) + "\n"
1697 content := []byte(prefix +
1698 "func bla() blub")
1699 // ------012345678901234
1700 b := testIndexBuilder(t, nil,
1701 Document{
1702 Name: "f1",
1703 Content: content,
1704 }, Document{
1705 Name: "f2",
1706 Content: content,
1707 Symbols: []DocumentSection{{uint32(len(prefix) + 5), uint32(len(prefix) + 8)}},
1708 }, Document{
1709 Name: "f3",
1710 Content: content,
1711 })
1712
1713 t.Run("LineMatches", func(t *testing.T) {
1714 res := searchForTest(t, b,
1715 &query.Regexp{
1716 Regexp: mustParseRE("b.a"),
1717 })
1718
1719 if len(res.Files) != 3 {
1720 t.Fatalf("got %#v, want 3 files", res.Files)
1721 }
1722 if res.Files[0].FileName != "f2" {
1723 t.Errorf("got %#v, want 'f2' as top match", res.Files[0])
1724 }
1725 })
1726
1727 t.Run("ChunjkMatches", func(t *testing.T) {
1728 res := searchForTest(t, b,
1729 &query.Regexp{
1730 Regexp: mustParseRE("b.a"),
1731 }, chunkOpts)
1732
1733 if len(res.Files) != 3 {
1734 t.Fatalf("got %#v, want 3 files", res.Files)
1735 }
1736 if res.Files[0].FileName != "f2" {
1737 t.Errorf("got %#v, want 'f2' as top match", res.Files[0])
1738 }
1739 })
1740}
1741
1742func TestPartialSymbolRank(t *testing.T) {
1743 t.Skip()
1744
1745 content := []byte("func bla() blub")
1746 // ----------------012345678901234
1747
1748 b := testIndexBuilder(t, nil,
1749 Document{
1750 Name: "f1",
1751 Content: content,
1752 Symbols: []DocumentSection{{4, 9}},
1753 }, Document{
1754 Name: "f2",
1755 Content: content,
1756 Symbols: []DocumentSection{{4, 8}},
1757 }, Document{
1758 Name: "f3",
1759 Content: content,
1760 Symbols: []DocumentSection{{4, 9}},
1761 })
1762
1763 t.Run("LineMatches", func(t *testing.T) {
1764 res := searchForTest(t, b,
1765 &query.Substring{
1766 Pattern: "bla",
1767 })
1768
1769 if len(res.Files) != 3 {
1770 t.Fatalf("got %#v, want 3 files", res.Files)
1771 }
1772 if res.Files[0].FileName != "f2" {
1773 t.Errorf("got %#v, want 'f2' as top match", res.Files[0])
1774 }
1775 })
1776
1777 t.Run("ChunkMatches", func(t *testing.T) {
1778 res := searchForTest(t, b,
1779 &query.Substring{
1780 Pattern: "bla",
1781 }, chunkOpts)
1782
1783 if len(res.Files) != 3 {
1784 t.Fatalf("got %#v, want 3 files", res.Files)
1785 }
1786 if res.Files[0].FileName != "f2" {
1787 t.Errorf("got %#v, want 'f2' as top match", res.Files[0])
1788 }
1789 })
1790}
1791
1792func TestNegativeRepo(t *testing.T) {
1793 content := []byte("bla the needle")
1794 // ----------------01234567890123
1795 b := testIndexBuilder(t, &Repository{
1796 Name: "bla",
1797 }, Document{Name: "f1", Content: content})
1798
1799 t.Run("LineMatches", func(t *testing.T) {
1800 sres := searchForTest(t, b,
1801 query.NewAnd(
1802 &query.Substring{Pattern: "needle"},
1803 &query.Not{Child: &query.Repo{Regexp: regexp.MustCompile("bla")}},
1804 ))
1805
1806 if len(sres.Files) != 0 {
1807 t.Fatalf("got %v, want 0 matches", sres.Files)
1808 }
1809 })
1810
1811 t.Run("ChunkMatches", func(t *testing.T) {
1812 sres := searchForTest(t, b,
1813 query.NewAnd(
1814 &query.Substring{Pattern: "needle"},
1815 &query.Not{Child: &query.Repo{Regexp: regexp.MustCompile("bla")}},
1816 ), chunkOpts)
1817
1818 if len(sres.Files) != 0 {
1819 t.Fatalf("got %v, want 0 matches", sres.Files)
1820 }
1821 })
1822}
1823
1824func TestListRepos(t *testing.T) {
1825 content := []byte("bla the needle\n")
1826 // ----------------012345678901234-
1827
1828 t.Run("default and minimal fallback", func(t *testing.T) {
1829 repo := &Repository{
1830 Name: "reponame",
1831 Branches: []RepositoryBranch{{Name: "main"}, {Name: "dev"}},
1832 }
1833 b := testIndexBuilder(t, repo,
1834 Document{Name: "f1", Content: content, Branches: []string{"main", "dev"}},
1835 Document{Name: "f2", Content: content, Branches: []string{"main"}},
1836 Document{Name: "f2", Content: content, Branches: []string{"dev"}},
1837 Document{Name: "f3", Content: content, Branches: []string{"dev"}})
1838
1839 searcher := searcherForTest(t, b)
1840
1841 for _, opts := range []*ListOptions{
1842 nil,
1843 {},
1844 {Field: RepoListFieldRepos},
1845 {Field: RepoListFieldReposMap},
1846 } {
1847 t.Run(fmt.Sprint(opts), func(t *testing.T) {
1848 q := &query.Repo{Regexp: regexp.MustCompile("epo")}
1849
1850 res, err := searcher.List(context.Background(), q, opts)
1851 if err != nil {
1852 t.Fatalf("List(%v): %v", q, err)
1853 }
1854
1855 want := &RepoList{
1856 Repos: []*RepoListEntry{{
1857 Repository: *repo,
1858 Stats: RepoStats{
1859 Documents: 4,
1860 ContentBytes: 68, // (15 bytes of content and 2 bytes of filename) x 4
1861 Shards: 1,
1862
1863 NewLinesCount: 4,
1864 DefaultBranchNewLinesCount: 2,
1865 OtherBranchesNewLinesCount: 3,
1866 },
1867 }},
1868 Stats: RepoStats{
1869 Repos: 1,
1870 Documents: 4,
1871 ContentBytes: 68,
1872 Shards: 1,
1873
1874 NewLinesCount: 4,
1875 DefaultBranchNewLinesCount: 2,
1876 OtherBranchesNewLinesCount: 3,
1877 },
1878 }
1879 ignored := []cmp.Option{
1880 cmpopts.EquateEmpty(),
1881 cmpopts.IgnoreFields(RepoListEntry{}, "IndexMetadata"),
1882 cmpopts.IgnoreFields(RepoStats{}, "IndexBytes"),
1883 cmpopts.IgnoreFields(Repository{}, "SubRepoMap"),
1884 cmpopts.IgnoreFields(Repository{}, "priority"),
1885 }
1886 if diff := cmp.Diff(want, res, ignored...); diff != "" {
1887 t.Fatalf("mismatch (-want +got):\n%s", diff)
1888 }
1889
1890 q = &query.Repo{Regexp: regexp.MustCompile("bla")}
1891 res, err = searcher.List(context.Background(), q, nil)
1892 if err != nil {
1893 t.Fatalf("List(%v): %v", q, err)
1894 }
1895 if len(res.Repos) != 0 || len(res.ReposMap) != 0 {
1896 t.Fatalf("got %v, want 0 matches", res)
1897 }
1898 })
1899 }
1900 })
1901
1902 t.Run("minimal", func(t *testing.T) {
1903 repo := &Repository{
1904 ID: 1234,
1905 Name: "reponame",
1906 Branches: []RepositoryBranch{{Name: "main"}, {Name: "dev"}},
1907 RawConfig: map[string]string{"repoid": "1234"},
1908 }
1909 b := testIndexBuilder(t, repo,
1910 Document{Name: "f1", Content: content, Branches: []string{"main", "dev"}},
1911 Document{Name: "f2", Content: content, Branches: []string{"main"}},
1912 Document{Name: "f2", Content: content, Branches: []string{"dev"}},
1913 Document{Name: "f3", Content: content, Branches: []string{"dev"}})
1914
1915 searcher := searcherForTest(t, b)
1916
1917 q := &query.Repo{Regexp: regexp.MustCompile("epo")}
1918 res, err := searcher.List(context.Background(), q, &ListOptions{Field: RepoListFieldReposMap})
1919 if err != nil {
1920 t.Fatalf("List(%v): %v", q, err)
1921 }
1922
1923 want := &RepoList{
1924 ReposMap: ReposMap{
1925 repo.ID: {
1926 HasSymbols: repo.HasSymbols,
1927 Branches: repo.Branches,
1928 },
1929 },
1930 Stats: RepoStats{
1931 Repos: 1,
1932 Shards: 1,
1933 Documents: 4,
1934 IndexBytes: 412,
1935 ContentBytes: 68,
1936 NewLinesCount: 4,
1937 DefaultBranchNewLinesCount: 2,
1938 OtherBranchesNewLinesCount: 3,
1939 },
1940 }
1941
1942 ignored := []cmp.Option{
1943 cmpopts.IgnoreFields(MinimalRepoListEntry{}, "IndexTimeUnix"),
1944 }
1945 if diff := cmp.Diff(want, res, ignored...); diff != "" {
1946 t.Fatalf("mismatch (-want +got):\n%s", diff)
1947 }
1948
1949 q = &query.Repo{Regexp: regexp.MustCompile("bla")}
1950 res, err = searcher.List(context.Background(), q, &ListOptions{Field: RepoListFieldReposMap})
1951 if err != nil {
1952 t.Fatalf("List(%v): %v", q, err)
1953 }
1954 if len(res.Repos) != 0 || len(res.ReposMap) != 0 {
1955 t.Fatalf("got %v, want 0 matches", res)
1956 }
1957 })
1958}
1959
1960func TestListReposByContent(t *testing.T) {
1961 content := []byte("bla the needle")
1962
1963 b := testIndexBuilder(t, &Repository{
1964 Name: "reponame",
1965 },
1966 Document{Name: "f1", Content: content},
1967 Document{Name: "f2", Content: content})
1968
1969 searcher := searcherForTest(t, b)
1970 q := &query.Substring{Pattern: "needle"}
1971 res, err := searcher.List(context.Background(), q, nil)
1972 if err != nil {
1973 t.Fatalf("List(%v): %v", q, err)
1974 }
1975 if len(res.Repos) != 1 || res.Repos[0].Repository.Name != "reponame" {
1976 t.Fatalf("got %v, want 1 matches", res)
1977 }
1978 if got := res.Repos[0].Stats.Shards; got != 1 {
1979 t.Fatalf("got %d, want 1 shard", got)
1980 }
1981 q = &query.Substring{Pattern: "foo"}
1982 res, err = searcher.List(context.Background(), q, nil)
1983 if err != nil {
1984 t.Fatalf("List(%v): %v", q, err)
1985 }
1986 if len(res.Repos) != 0 {
1987 t.Fatalf("got %v, want 0 matches", res)
1988 }
1989}
1990
1991func TestMetadata(t *testing.T) {
1992 content := []byte("bla the needle")
1993
1994 b := testIndexBuilder(t, &Repository{
1995 Name: "reponame",
1996 }, Document{Name: "f1", Content: content},
1997 Document{Name: "f2", Content: content})
1998
1999 var buf bytes.Buffer
2000 if err := b.Write(&buf); err != nil {
2001 t.Fatal(err)
2002 }
2003 f := &memSeeker{buf.Bytes()}
2004
2005 rd, _, err := ReadMetadata(f)
2006 if err != nil {
2007 t.Fatalf("ReadMetadata: %v", err)
2008 }
2009
2010 if got, want := rd[0].Name, "reponame"; got != want {
2011 t.Fatalf("got %q want %q", got, want)
2012 }
2013}
2014
2015func TestOr(t *testing.T) {
2016 b := testIndexBuilder(t, nil,
2017 Document{Name: "f1", Content: []byte("needle")},
2018 Document{Name: "f2", Content: []byte("banana")})
2019 t.Run("LineMatches", func(t *testing.T) {
2020 sres := searchForTest(t, b, query.NewOr(
2021 &query.Substring{Pattern: "needle"},
2022 &query.Substring{Pattern: "banana"}))
2023
2024 if len(sres.Files) != 2 {
2025 t.Fatalf("got %v, want 2 files", sres.Files)
2026 }
2027 })
2028
2029 t.Run("ChunkMatches", func(t *testing.T) {
2030 sres := searchForTest(t, b, query.NewOr(
2031 &query.Substring{Pattern: "needle"},
2032 &query.Substring{Pattern: "banana"}))
2033
2034 if len(sres.Files) != 2 {
2035 t.Fatalf("got %v, want 2 files", sres.Files)
2036 }
2037 })
2038}
2039
2040func TestFrequency(t *testing.T) {
2041 content := []byte("sla _Py_HashDouble(double v sla las las shd dot dot")
2042
2043 b := testIndexBuilder(t, nil,
2044 Document{
2045 Name: "f1",
2046 Content: content,
2047 })
2048
2049 t.Run("LineMatches", func(t *testing.T) {
2050 sres := searchForTest(t, b, &query.Substring{Pattern: "slashdot"})
2051 if len(sres.Files) != 0 {
2052 t.Errorf("got %v, wanted 0 matches", sres.Files)
2053 }
2054 })
2055
2056 t.Run("ChunkMatches", func(t *testing.T) {
2057 sres := searchForTest(t, b, &query.Substring{Pattern: "slashdot"}, chunkOpts)
2058 if len(sres.Files) != 0 {
2059 t.Errorf("got %v, wanted 0 matches", sres.Files)
2060 }
2061 })
2062}
2063
2064func TestMatchNewline(t *testing.T) {
2065 re, err := syntax.Parse("[^a]a", syntax.ClassNL)
2066 if err != nil {
2067 t.Fatalf("syntax.Parse: %v", err)
2068 }
2069
2070 content := []byte("pqr\nalex")
2071
2072 b := testIndexBuilder(t, nil,
2073 Document{
2074 Name: "f1",
2075 Content: content,
2076 })
2077
2078 t.Run("LineMatches", func(t *testing.T) {
2079 sres := searchForTest(t, b, &query.Regexp{Regexp: re, CaseSensitive: true})
2080 if len(sres.Files) != 1 {
2081 t.Errorf("got %v, wanted 1 matches", sres.Files)
2082 } else if l := sres.Files[0].LineMatches[0].Line; !bytes.Equal(l, content[len("pqr\n"):]) {
2083 t.Errorf("got match line %q, want %q", l, content)
2084 }
2085 })
2086
2087 t.Run("ChunkMatches", func(t *testing.T) {
2088 sres := searchForTest(t, b, &query.Regexp{Regexp: re, CaseSensitive: true}, chunkOpts)
2089 if len(sres.Files) != 1 {
2090 t.Errorf("got %v, wanted 1 matches", sres.Files)
2091 } else if c := sres.Files[0].ChunkMatches[0].Content; !bytes.Equal(c, content) {
2092 t.Errorf("got match line %q, want %q", c, content)
2093 }
2094 })
2095}
2096
2097func TestSubRepo(t *testing.T) {
2098 subRepos := map[string]*Repository{
2099 "sub": {
2100 Name: "sub-name",
2101 LineFragmentTemplate: "sub-line",
2102 },
2103 }
2104
2105 content := []byte("pqr\nalex")
2106
2107 b := testIndexBuilder(t, &Repository{
2108 SubRepoMap: subRepos,
2109 }, Document{
2110 Name: "sub/f1",
2111 Content: content,
2112 SubRepositoryPath: "sub",
2113 })
2114
2115 sres := searchForTest(t, b, &query.Substring{Pattern: "alex"})
2116 if len(sres.Files) != 1 {
2117 t.Fatalf("got %v, wanted 1 matches", sres.Files)
2118 }
2119
2120 f := sres.Files[0]
2121 if f.SubRepositoryPath != "sub" || f.SubRepositoryName != "sub-name" {
2122 t.Errorf("got %#v, want SubRepository{Path,Name} = {'sub', 'sub-name'}", f)
2123 }
2124
2125 if sres.LineFragments["sub-name"] != "sub-line" {
2126 t.Errorf("got LineFragmentTemplate %v, want {'sub':'sub-line'}", sres.LineFragments)
2127 }
2128}
2129
2130func TestSearchEither(t *testing.T) {
2131 b := testIndexBuilder(t, nil,
2132 Document{Name: "f1", Content: []byte("bla needle bla")},
2133 Document{Name: "needle-file-branch", Content: []byte("bla content")})
2134
2135 t.Run("LineMatches", func(t *testing.T) {
2136 sres := searchForTest(t, b, &query.Substring{Pattern: "needle"})
2137 if len(sres.Files) != 2 {
2138 t.Fatalf("got %v, wanted 2 matches", sres.Files)
2139 }
2140
2141 sres = searchForTest(t, b, &query.Substring{Pattern: "needle", Content: true})
2142 if len(sres.Files) != 1 {
2143 t.Fatalf("got %v, wanted 1 match", sres.Files)
2144 }
2145
2146 if got, want := sres.Files[0].FileName, "f1"; got != want {
2147 t.Errorf("got %q, want %q", got, want)
2148 }
2149 })
2150
2151 t.Run("ChunkMatches", func(t *testing.T) {
2152 sres := searchForTest(t, b, &query.Substring{Pattern: "needle"}, chunkOpts)
2153 if len(sres.Files) != 2 {
2154 t.Fatalf("got %v, wanted 2 matches", sres.Files)
2155 }
2156
2157 sres = searchForTest(t, b, &query.Substring{Pattern: "needle", Content: true}, chunkOpts)
2158 if len(sres.Files) != 1 {
2159 t.Fatalf("got %v, wanted 1 match", sres.Files)
2160 }
2161
2162 if got, want := sres.Files[0].FileName, "f1"; got != want {
2163 t.Errorf("got %q, want %q", got, want)
2164 }
2165 })
2166}
2167
2168func TestUnicodeExactMatch(t *testing.T) {
2169 needle := "néédlÉ"
2170 content := []byte("blá blá " + needle + " blâ")
2171
2172 b := testIndexBuilder(t, nil,
2173 Document{Name: "f1", Content: content})
2174
2175 t.Run("LineMatches", func(t *testing.T) {
2176 if res := searchForTest(t, b, &query.Substring{Pattern: needle, CaseSensitive: true}); len(res.Files) != 1 {
2177 t.Fatalf("case sensitive: got %v, wanted 1 match", res.Files)
2178 }
2179 })
2180
2181 t.Run("ChunkMatches", func(t *testing.T) {
2182 res := searchForTest(t, b, &query.Substring{Pattern: needle, CaseSensitive: true}, chunkOpts)
2183 if len(res.Files) != 1 {
2184 t.Fatalf("case sensitive: got %v, wanted 1 match", res.Files)
2185 }
2186 })
2187}
2188
2189func TestUnicodeCoverContent(t *testing.T) {
2190 needle := "néédlÉ"
2191 content := []byte("blá blá " + needle + " blâ")
2192
2193 b := testIndexBuilder(t, nil,
2194 Document{Name: "f1", Content: content})
2195
2196 t.Run("LineMatches", func(t *testing.T) {
2197 if res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ", CaseSensitive: true}); len(res.Files) != 0 {
2198 t.Fatalf("case sensitive: got %v, wanted 0 match", res.Files)
2199 }
2200
2201 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ"})
2202 if len(res.Files) != 1 {
2203 t.Fatalf("case insensitive: got %v, wanted 1 match", res.Files)
2204 }
2205
2206 if got, want := res.Files[0].LineMatches[0].LineFragments[0].Offset, uint32(strings.Index(string(content), needle)); got != want {
2207 t.Errorf("got %d want %d", got, want)
2208 }
2209 })
2210
2211 t.Run("ChunkMatches", func(t *testing.T) {
2212 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ", CaseSensitive: true}, chunkOpts)
2213 if len(res.Files) != 0 {
2214 t.Fatalf("case sensitive: got %v, wanted 0 match", res.Files)
2215 }
2216
2217 res = searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ"}, chunkOpts)
2218 if len(res.Files) != 1 {
2219 t.Fatalf("case insensitive: got %v, wanted 1 match", res.Files)
2220 }
2221
2222 got := res.Files[0].ChunkMatches[0].Ranges[0].Start.ByteOffset
2223 want := uint32(strings.Index(string(content), needle))
2224 if got != want {
2225 t.Errorf("got %d want %d", got, want)
2226 }
2227 })
2228}
2229
2230func TestUnicodeNonCoverContent(t *testing.T) {
2231 needle := "nééáádlÉ"
2232 content := []byte("blá blá " + needle + " blâ")
2233
2234 b := testIndexBuilder(t, nil,
2235 Document{Name: "f1", Content: content})
2236
2237 t.Run("LineMatches", func(t *testing.T) {
2238 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉÁÁDLÉ", Content: true})
2239 if len(res.Files) != 1 {
2240 t.Fatalf("got %v, wanted 1 match", res.Files)
2241 }
2242
2243 if got, want := res.Files[0].LineMatches[0].LineFragments[0].Offset, uint32(strings.Index(string(content), needle)); got != want {
2244 t.Errorf("got %d want %d", got, want)
2245 }
2246 })
2247
2248 t.Run("ChunkMatches", func(t *testing.T) {
2249 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉÁÁDLÉ", Content: true}, chunkOpts)
2250 if len(res.Files) != 1 {
2251 t.Fatalf("got %v, wanted 1 match", res.Files)
2252 }
2253
2254 got := res.Files[0].ChunkMatches[0].Ranges[0].Start.ByteOffset
2255 want := uint32(strings.Index(string(content), needle))
2256 if got != want {
2257 t.Errorf("got %d want %d", got, want)
2258 }
2259 })
2260}
2261
2262const kelvinCodePoint = 8490
2263
2264func TestUnicodeVariableLength(t *testing.T) {
2265 lower := 'k'
2266 upper := rune(kelvinCodePoint)
2267
2268 needle := "nee" + string([]rune{lower}) + "eed"
2269 corpus := []byte("nee" + string([]rune{upper}) + "eed" +
2270 " ee" + string([]rune{lower}) + "ee" +
2271 " ee" + string([]rune{upper}) + "ee")
2272
2273 t.Run("LineMatches", func(t *testing.T) {
2274 b := testIndexBuilder(t, nil,
2275 Document{Name: "f1", Content: []byte(corpus)})
2276
2277 res := searchForTest(t, b, &query.Substring{Pattern: needle, Content: true})
2278 if len(res.Files) != 1 {
2279 t.Fatalf("got %v, wanted 1 match", res.Files)
2280 }
2281 })
2282
2283 t.Run("ChunkMatches", func(t *testing.T) {
2284 b := testIndexBuilder(t, nil,
2285 Document{Name: "f1", Content: []byte(corpus)})
2286
2287 res := searchForTest(t, b, &query.Substring{Pattern: needle, Content: true}, chunkOpts)
2288 if len(res.Files) != 1 {
2289 t.Fatalf("got %v, wanted 1 match", res.Files)
2290 }
2291 })
2292}
2293
2294func TestUnicodeFileStartOffsets(t *testing.T) {
2295 unicode := "世界"
2296 wat := "waaaaaat"
2297 b := testIndexBuilder(t, nil,
2298 Document{
2299 Name: "f1",
2300 Content: []byte(unicode),
2301 },
2302 Document{
2303 Name: "f2",
2304 Content: []byte(wat),
2305 },
2306 )
2307 q := &query.Substring{Pattern: wat, Content: true}
2308 res := searchForTest(t, b, q)
2309 if len(res.Files) != 1 {
2310 t.Fatalf("got %v, wanted 1 match", res.Files)
2311 }
2312}
2313
2314func TestLongFileUTF8(t *testing.T) {
2315 needle := "neeedle"
2316
2317 // 6 bytes.
2318 unicode := "世界"
2319 content := []byte(strings.Repeat(unicode, 100) + needle)
2320 b := testIndexBuilder(t, nil,
2321 Document{
2322 Name: "f1",
2323 Content: []byte(strings.Repeat("a", 50)),
2324 },
2325 Document{
2326 Name: "f2",
2327 Content: content,
2328 })
2329
2330 t.Run("LineMatches", func(t *testing.T) {
2331 q := &query.Substring{Pattern: needle, Content: true}
2332 res := searchForTest(t, b, q)
2333 if len(res.Files) != 1 {
2334 t.Errorf("got %v, want 1 result", res)
2335 }
2336 })
2337
2338 t.Run("ChunkMatches", func(t *testing.T) {
2339 q := &query.Substring{Pattern: needle, Content: true}
2340 res := searchForTest(t, b, q, chunkOpts)
2341 if len(res.Files) != 1 {
2342 t.Errorf("got %v, want 1 result", res)
2343 }
2344 })
2345}
2346
2347func TestEstimateDocCount(t *testing.T) {
2348 content := []byte("bla needle bla")
2349 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2350 Document{Name: "f1", Content: content},
2351 Document{Name: "f2", Content: content},
2352 )
2353
2354 t.Run("LineMatches", func(t *testing.T) {
2355 if sres := searchForTest(t, b,
2356 query.NewAnd(
2357 &query.Substring{Pattern: "needle"},
2358 &query.Repo{Regexp: regexp.MustCompile("reponame")},
2359 ), SearchOptions{
2360 EstimateDocCount: true,
2361 }); sres.Stats.ShardFilesConsidered != 2 {
2362 t.Errorf("got FilesConsidered = %d, want 2", sres.Stats.FilesConsidered)
2363 }
2364 if sres := searchForTest(t, b,
2365 query.NewAnd(
2366 &query.Substring{Pattern: "needle"},
2367 &query.Repo{Regexp: regexp.MustCompile("nomatch")},
2368 ), SearchOptions{
2369 EstimateDocCount: true,
2370 }); sres.Stats.ShardFilesConsidered != 0 {
2371 t.Errorf("got FilesConsidered = %d, want 0", sres.Stats.FilesConsidered)
2372 }
2373 })
2374
2375 t.Run("ChunkMatches", func(t *testing.T) {
2376 if sres := searchForTest(t, b,
2377 query.NewAnd(
2378 &query.Substring{Pattern: "needle"},
2379 &query.Repo{Regexp: regexp.MustCompile("reponame")},
2380 ), SearchOptions{
2381 EstimateDocCount: true,
2382 ChunkMatches: true,
2383 }); sres.Stats.ShardFilesConsidered != 2 {
2384 t.Errorf("got FilesConsidered = %d, want 2", sres.Stats.FilesConsidered)
2385 }
2386 if sres := searchForTest(t, b,
2387 query.NewAnd(
2388 &query.Substring{Pattern: "needle"},
2389 &query.Repo{Regexp: regexp.MustCompile("nomatch")},
2390 ), SearchOptions{
2391 EstimateDocCount: true,
2392 ChunkMatches: true,
2393 }); sres.Stats.ShardFilesConsidered != 0 {
2394 t.Errorf("got FilesConsidered = %d, want 0", sres.Stats.FilesConsidered)
2395 }
2396 })
2397}
2398
2399func TestUTF8CorrectCorpus(t *testing.T) {
2400 needle := "neeedle"
2401
2402 // 6 bytes.
2403 unicode := "世界"
2404 b := testIndexBuilder(t, nil,
2405 Document{
2406 Name: "f1",
2407 Content: []byte(strings.Repeat(unicode, 100)),
2408 },
2409 Document{
2410 Name: "xxxxxneeedle",
2411 Content: []byte("hello"),
2412 })
2413
2414 t.Run("LineMatches", func(t *testing.T) {
2415 q := &query.Substring{Pattern: needle, FileName: true}
2416 res := searchForTest(t, b, q)
2417 if len(res.Files) != 1 {
2418 t.Errorf("got %v, want 1 result", res)
2419 }
2420 })
2421
2422 t.Run("ChunkMatches", func(t *testing.T) {
2423 q := &query.Substring{Pattern: needle, FileName: true}
2424 res := searchForTest(t, b, q, chunkOpts)
2425 if len(res.Files) != 1 {
2426 t.Errorf("got %v, want 1 result", res)
2427 }
2428 })
2429}
2430
2431func TestBuilderStats(t *testing.T) {
2432 b := testIndexBuilder(t, nil,
2433 Document{
2434 Name: "f1",
2435 Content: []byte(strings.Repeat("abcd", 1024)),
2436 })
2437 var buf bytes.Buffer
2438 if err := b.Write(&buf); err != nil {
2439 t.Fatal(err)
2440 }
2441
2442 if got, want := b.ContentSize(), uint32(2+4*1024); got != want {
2443 t.Errorf("got %d, want %d", got, want)
2444 }
2445}
2446
2447func TestIOStats(t *testing.T) {
2448 b := testIndexBuilder(t, nil,
2449 Document{
2450 Name: "f1",
2451 Content: []byte(strings.Repeat("abcd", 1024)),
2452 })
2453
2454 t.Run("LineMatches", func(t *testing.T) {
2455 q := &query.Substring{Pattern: "abc", CaseSensitive: true, Content: true}
2456 res := searchForTest(t, b, q)
2457
2458 // 4096 (content) + 2 (overhead: newlines or doc sections)
2459 if got, want := res.Stats.ContentBytesLoaded, int64(4098); got != want {
2460 t.Errorf("got content I/O %d, want %d", got, want)
2461 }
2462
2463 // 1024 entries, each 4 bytes apart. 4 fits into single byte
2464 // delta encoded.
2465 if got, want := res.Stats.IndexBytesLoaded, int64(1024); got != want {
2466 t.Errorf("got index I/O %d, want %d", got, want)
2467 }
2468 })
2469
2470 t.Run("ChunkMatches", func(t *testing.T) {
2471 q := &query.Substring{Pattern: "abc", CaseSensitive: true, Content: true}
2472 res := searchForTest(t, b, q, chunkOpts)
2473
2474 // 4096 (content) + 2 (overhead: newlines or doc sections)
2475 if got, want := res.Stats.ContentBytesLoaded, int64(4098); got != want {
2476 t.Errorf("got content I/O %d, want %d", got, want)
2477 }
2478
2479 // 1024 entries, each 4 bytes apart. 4 fits into single byte
2480 // delta encoded.
2481 if got, want := res.Stats.IndexBytesLoaded, int64(1024); got != want {
2482 t.Errorf("got index I/O %d, want %d", got, want)
2483 }
2484 })
2485}
2486
2487func TestStartLineAnchor(t *testing.T) {
2488 b := testIndexBuilder(t, nil,
2489 Document{
2490 Name: "f1",
2491 Content: []byte(
2492 `hello
2493start of middle of line
2494`),
2495 })
2496
2497 t.Run("LineMatches", func(t *testing.T) {
2498 q, err := query.Parse("^start")
2499 if err != nil {
2500 t.Errorf("parse: %v", err)
2501 }
2502
2503 res := searchForTest(t, b, q)
2504 if len(res.Files) != 1 {
2505 t.Errorf("got %v, want 1 file", res.Files)
2506 }
2507
2508 q, err = query.Parse("^middle")
2509 if err != nil {
2510 t.Errorf("parse: %v", err)
2511 }
2512 res = searchForTest(t, b, q)
2513 if len(res.Files) != 0 {
2514 t.Errorf("got %v, want 0 files", res.Files)
2515 }
2516 })
2517
2518 t.Run("ChunkMatches", func(t *testing.T) {
2519 q, err := query.Parse("^start")
2520 if err != nil {
2521 t.Errorf("parse: %v", err)
2522 }
2523
2524 res := searchForTest(t, b, q, chunkOpts)
2525 if len(res.Files) != 1 {
2526 t.Errorf("got %v, want 1 file", res.Files)
2527 }
2528
2529 q, err = query.Parse("^middle")
2530 if err != nil {
2531 t.Errorf("parse: %v", err)
2532 }
2533 res = searchForTest(t, b, q, chunkOpts)
2534 if len(res.Files) != 0 {
2535 t.Errorf("got %v, want 0 files", res.Files)
2536 }
2537 })
2538}
2539
2540func TestAndOrUnicode(t *testing.T) {
2541 q, err := query.Parse("orange.*apple")
2542 if err != nil {
2543 t.Errorf("parse: %v", err)
2544 }
2545 finalQ := query.NewAnd(q,
2546 query.NewOr(query.NewAnd(&query.Repo{Regexp: regexp.MustCompile("name")},
2547 query.NewOr(&query.Branch{Pattern: "master"}))))
2548
2549 b := testIndexBuilder(t, &Repository{
2550 Name: "name",
2551 Branches: []RepositoryBranch{{"master", "master-version"}},
2552 }, Document{
2553 Name: "f2",
2554 Content: []byte("orange\u2318apple"),
2555 // --------------0123456 78901
2556 Branches: []string{"master"},
2557 })
2558
2559 t.Run("LineMatches", func(t *testing.T) {
2560 res := searchForTest(t, b, finalQ)
2561 if len(res.Files) != 1 {
2562 t.Errorf("got %v, want 1 result", res.Files)
2563 }
2564 })
2565
2566 t.Run("ChunkMatches", func(t *testing.T) {
2567 res := searchForTest(t, b, finalQ, chunkOpts)
2568 if len(res.Files) != 1 {
2569 t.Errorf("got %v, want 1 result", res.Files)
2570 }
2571 })
2572}
2573
2574func TestAndShort(t *testing.T) {
2575 content := []byte("bla needle at orange bla")
2576 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2577 Document{Name: "f1", Content: content},
2578 Document{Name: "f2", Content: []byte("xx at xx")},
2579 Document{Name: "f3", Content: []byte("yy orange xx")},
2580 )
2581
2582 q := query.NewAnd(&query.Substring{Pattern: "at"},
2583 &query.Substring{Pattern: "orange"})
2584
2585 t.Run("LineMatches", func(t *testing.T) {
2586 res := searchForTest(t, b, q)
2587 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
2588 t.Errorf("got %v, want 1 result", res.Files)
2589 }
2590 })
2591
2592 t.Run("ChunkMatches", func(t *testing.T) {
2593 res := searchForTest(t, b, q, chunkOpts)
2594 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
2595 t.Errorf("got %v, want 1 result", res.Files)
2596 }
2597 })
2598}
2599
2600func TestNoCollectRegexpSubstring(t *testing.T) {
2601 content := []byte("bla final bla\nfoo final, foo")
2602 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2603 Document{Name: "f1", Content: content},
2604 )
2605
2606 q := &query.Regexp{
2607 Regexp: mustParseRE("final[,.]"),
2608 }
2609
2610 t.Run("LineMatches", func(t *testing.T) {
2611 res := searchForTest(t, b, q)
2612 if len(res.Files) != 1 {
2613 t.Fatalf("got %v, want 1 result", res.Files)
2614 }
2615 if f := res.Files[0]; len(f.LineMatches) != 1 {
2616 t.Fatalf("got line matches %v, want 1 line match", printLineMatches(f.LineMatches))
2617 }
2618 })
2619
2620 t.Run("ChunkMatches", func(t *testing.T) {
2621 res := searchForTest(t, b, q, chunkOpts)
2622 if len(res.Files) != 1 {
2623 t.Fatalf("got %v, want 1 result", res.Files)
2624 }
2625 if f := res.Files[0]; len(f.ChunkMatches) != 1 {
2626 t.Fatalf("got line matches %v, want 1 line match", printLineMatches(f.LineMatches))
2627 }
2628 })
2629}
2630
2631func printLineMatches(ms []LineMatch) string {
2632 var ss []string
2633 for _, m := range ms {
2634 ss = append(ss, fmt.Sprintf("%d:%q %v", m.LineNumber, m.Line, m.LineFragments))
2635 }
2636
2637 return strings.Join(ss, ", ")
2638}
2639
2640func TestLang(t *testing.T) {
2641 content := []byte("bla needle bla")
2642 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2643 Document{Name: "f1", Content: content},
2644 Document{Name: "f2", Language: "java", Content: content},
2645 Document{Name: "f3", Language: "cpp", Content: content},
2646 )
2647
2648 q := query.NewAnd(&query.Substring{Pattern: "needle"},
2649 &query.Language{Language: "cpp"})
2650
2651 t.Run("LineMatches", func(t *testing.T) {
2652 res := searchForTest(t, b, q)
2653 if len(res.Files) != 1 {
2654 t.Fatalf("got %v, want 1 result in f3", res.Files)
2655 }
2656 f := res.Files[0]
2657 if f.FileName != "f3" || f.Language != "cpp" {
2658 t.Fatalf("got %v, want 1 match with language cpp", f)
2659 }
2660 })
2661
2662 t.Run("ChunkMatches", func(t *testing.T) {
2663 res := searchForTest(t, b, q, chunkOpts)
2664 if len(res.Files) != 1 {
2665 t.Fatalf("got %v, want 1 result in f3", res.Files)
2666 }
2667 f := res.Files[0]
2668 if f.FileName != "f3" || f.Language != "cpp" {
2669 t.Fatalf("got %v, want 1 match with language cpp", f)
2670 }
2671 })
2672}
2673
2674func TestLangShortcut(t *testing.T) {
2675 content := []byte("bla needle bla")
2676 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2677 Document{Name: "f2", Language: "java", Content: content},
2678 Document{Name: "f3", Language: "cpp", Content: content},
2679 )
2680
2681 q := query.NewAnd(&query.Substring{Pattern: "needle"},
2682 &query.Language{Language: "fortran"})
2683
2684 t.Run("LineMatches", func(t *testing.T) {
2685 res := searchForTest(t, b, q)
2686 if len(res.Files) != 0 {
2687 t.Fatalf("got %v, want 0 results", res.Files)
2688 }
2689 if res.Stats.IndexBytesLoaded > 0 {
2690 t.Errorf("got IndexBytesLoaded %d, want 0", res.Stats.IndexBytesLoaded)
2691 }
2692 })
2693
2694 t.Run("ChunkMatches", func(t *testing.T) {
2695 res := searchForTest(t, b, q, chunkOpts)
2696 if len(res.Files) != 0 {
2697 t.Fatalf("got %v, want 0 results", res.Files)
2698 }
2699 if res.Stats.IndexBytesLoaded > 0 {
2700 t.Errorf("got IndexBytesLoaded %d, want 0", res.Stats.IndexBytesLoaded)
2701 }
2702 })
2703}
2704
2705func TestNoTextMatchAtoms(t *testing.T) {
2706 content := []byte("bla needle bla")
2707 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2708 Document{Name: "f1", Content: content},
2709 Document{Name: "f2", Language: "java", Content: content},
2710 Document{Name: "f3", Language: "cpp", Content: content},
2711 )
2712 q := query.NewAnd(&query.Language{Language: "java"})
2713 t.Run("LineMatches", func(t *testing.T) {
2714 res := searchForTest(t, b, q)
2715 if len(res.Files) != 1 {
2716 t.Fatalf("got %v, want 1 result in f3", res.Files)
2717 }
2718 })
2719
2720 t.Run("ChunkMatches", func(t *testing.T) {
2721 res := searchForTest(t, b, q, chunkOpts)
2722 if len(res.Files) != 1 {
2723 t.Fatalf("got %v, want 1 result in f3", res.Files)
2724 }
2725 })
2726}
2727
2728func TestNoPositiveAtoms(t *testing.T) {
2729 content := []byte("bla needle bla")
2730 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2731 Document{Name: "f1", Content: content},
2732 Document{Name: "f2", Content: content},
2733 )
2734
2735 q := query.NewAnd(
2736 &query.Not{Child: &query.Substring{Pattern: "xyz"}},
2737 &query.Repo{Regexp: regexp.MustCompile("reponame")})
2738 t.Run("LineMatches", func(t *testing.T) {
2739 res := searchForTest(t, b, q)
2740 if len(res.Files) != 2 {
2741 t.Fatalf("got %v, want 2 results in f3", res.Files)
2742 }
2743 })
2744 t.Run("ChunkMatches", func(t *testing.T) {
2745 res := searchForTest(t, b, q, chunkOpts)
2746 if len(res.Files) != 2 {
2747 t.Fatalf("got %v, want 2 results in f3", res.Files)
2748 }
2749 })
2750}
2751
2752func TestSymbolBoundaryStart(t *testing.T) {
2753 content := []byte("start\nbla bla\nend")
2754 // ----------------012345-67890123-456
2755
2756 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2757 Document{
2758 Name: "f1",
2759 Content: content,
2760 Symbols: []DocumentSection{{0, 5}, {14, 17}},
2761 },
2762 )
2763 q := &query.Symbol{
2764 Expr: &query.Substring{Pattern: "start"},
2765 }
2766 t.Run("LineMatches", func(t *testing.T) {
2767 res := searchForTest(t, b, q)
2768 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
2769 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2770 }
2771 m := res.Files[0].LineMatches[0].LineFragments[0]
2772 if m.Offset != 0 {
2773 t.Fatalf("got offset %d want 0", m.Offset)
2774 }
2775 })
2776
2777 t.Run("ChunkMatches", func(t *testing.T) {
2778 res := searchForTest(t, b, q, chunkOpts)
2779 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
2780 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2781 }
2782 m := res.Files[0].ChunkMatches[0].Ranges[0]
2783 if m.Start.ByteOffset != 0 {
2784 t.Fatalf("got offset %d want 0", m.Start.ByteOffset)
2785 }
2786 })
2787}
2788
2789func TestSymbolBoundaryEnd(t *testing.T) {
2790 content := []byte("start\nbla bla\nend")
2791 // ----------------012345-67890123-456
2792
2793 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2794 Document{
2795 Name: "f1",
2796 Content: content,
2797 Symbols: []DocumentSection{{14, 17}},
2798 },
2799 )
2800 q := &query.Symbol{
2801 Expr: &query.Substring{Pattern: "end"},
2802 }
2803 t.Run("LineMatches", func(t *testing.T) {
2804 res := searchForTest(t, b, q)
2805 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
2806 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2807 }
2808 m := res.Files[0].LineMatches[0].LineFragments[0]
2809 if m.Offset != 14 {
2810 t.Fatalf("got offset %d want 0", m.Offset)
2811 }
2812 })
2813
2814 t.Run("ChunkMatches", func(t *testing.T) {
2815 res := searchForTest(t, b, q, chunkOpts)
2816 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
2817 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2818 }
2819 m := res.Files[0].ChunkMatches[0].Ranges[0]
2820 if m.Start.ByteOffset != 14 {
2821 t.Fatalf("got offset %d want 0", m.Start.ByteOffset)
2822 }
2823 })
2824}
2825
2826func TestSymbolSubstring(t *testing.T) {
2827 content := []byte("bla\nsymblabla\nbla")
2828 // ----------------0123-4567890123-456
2829
2830 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2831 Document{
2832 Name: "f1",
2833 Content: content,
2834 Symbols: []DocumentSection{{4, 12}},
2835 },
2836 )
2837 q := &query.Symbol{
2838 Expr: &query.Substring{Pattern: "bla"},
2839 }
2840 t.Run("LineMatches", func(t *testing.T) {
2841 res := searchForTest(t, b, q)
2842 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
2843 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2844 }
2845 m := res.Files[0].LineMatches[0].LineFragments[0]
2846 if m.Offset != 7 || m.MatchLength != 3 {
2847 t.Fatalf("got offset %d, size %d want 7 size 3", m.Offset, m.MatchLength)
2848 }
2849 })
2850
2851 t.Run("ChunkMatches", func(t *testing.T) {
2852 res := searchForTest(t, b, q, chunkOpts)
2853 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
2854 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2855 }
2856 m := res.Files[0].ChunkMatches[0].Ranges[0]
2857 if m.Start.ByteOffset != 7 || m.End.ByteOffset != 10 {
2858 t.Fatalf("got offset %d, end %d want 7, 10", m.Start.ByteOffset, m.End.ByteOffset)
2859 }
2860 })
2861}
2862
2863func TestSymbolSubstringExact(t *testing.T) {
2864 content := []byte("bla\nsym\nbla\nsym\nasymb")
2865 // ----------------0123-4567-890123456-78901
2866
2867 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2868 Document{
2869 Name: "f1",
2870 Content: content,
2871 Symbols: []DocumentSection{{4, 7}},
2872 },
2873 )
2874 q := &query.Symbol{
2875 Expr: &query.Substring{Pattern: "sym"},
2876 }
2877 t.Run("LineMatches", func(t *testing.T) {
2878 res := searchForTest(t, b, q)
2879 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
2880 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2881 }
2882 m := res.Files[0].LineMatches[0].LineFragments[0]
2883 if m.Offset != 4 {
2884 t.Fatalf("got offset %d, want 7", m.Offset)
2885 }
2886 })
2887
2888 t.Run("ChunkMatches", func(t *testing.T) {
2889 res := searchForTest(t, b, q, chunkOpts)
2890 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
2891 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2892 }
2893 m := res.Files[0].ChunkMatches[0].Ranges[0]
2894 if m.Start.ByteOffset != 4 {
2895 t.Fatalf("got offset %d, want 7", m.Start.ByteOffset)
2896 }
2897 })
2898}
2899
2900func TestSymbolRegexpExact(t *testing.T) {
2901 content := []byte("blah\nbla\nbl")
2902 // ----------------01234-5678-90
2903
2904 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2905 Document{
2906 Name: "f1",
2907 Content: content,
2908 Symbols: []DocumentSection{{0, 4}, {5, 8}, {9, 11}},
2909 },
2910 )
2911 q := &query.Symbol{
2912 Expr: &query.Regexp{Regexp: mustParseRE("^bla$")},
2913 }
2914 t.Run("LineMatches", func(t *testing.T) {
2915 res := searchForTest(t, b, q)
2916 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
2917 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2918 }
2919 m := res.Files[0].LineMatches[0].LineFragments[0]
2920 if m.Offset != 5 {
2921 t.Fatalf("got offset %d, want 5", m.Offset)
2922 }
2923 })
2924
2925 t.Run("ChunkMatches", func(t *testing.T) {
2926 res := searchForTest(t, b, q, chunkOpts)
2927 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
2928 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2929 }
2930 m := res.Files[0].ChunkMatches[0].Ranges[0]
2931 if m.Start.ByteOffset != 5 {
2932 t.Fatalf("got offset %d, want 5", m.Start.ByteOffset)
2933 }
2934 })
2935}
2936
2937func TestSymbolRegexpPartial(t *testing.T) {
2938 content := []byte("abcdef")
2939 // ----------------012345
2940
2941 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2942 Document{
2943 Name: "f1",
2944 Content: content,
2945 Symbols: []DocumentSection{{0, 6}},
2946 },
2947 )
2948 q := &query.Symbol{
2949 Expr: &query.Regexp{Regexp: mustParseRE("(b|d)c(d|b)")},
2950 }
2951 t.Run("LineMatches", func(t *testing.T) {
2952 res := searchForTest(t, b, q)
2953 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
2954 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2955 }
2956 m := res.Files[0].LineMatches[0].LineFragments[0]
2957 if m.Offset != 1 {
2958 t.Fatalf("got offset %d, want 1", m.Offset)
2959 }
2960 if m.MatchLength != 3 {
2961 t.Fatalf("got match length %d, want 3", m.MatchLength)
2962 }
2963 })
2964
2965 t.Run("ChunkMatches", func(t *testing.T) {
2966 res := searchForTest(t, b, q, chunkOpts)
2967 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
2968 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2969 }
2970 m := res.Files[0].ChunkMatches[0].Ranges[0]
2971 if m.Start.ByteOffset != 1 {
2972 t.Fatalf("got offset %d, want 1", m.Start.ByteOffset)
2973 }
2974 if m.End.ByteOffset != 4 {
2975 t.Fatalf("got match end %d, want 4", m.End.ByteOffset)
2976 }
2977 })
2978}
2979
2980func TestSymbolRegexpAll(t *testing.T) {
2981 docs := []Document{
2982 {
2983 Name: "f1",
2984 Content: []byte("Hello Zoekt"),
2985 // --------------01234567890
2986 Symbols: []DocumentSection{{0, 5}, {6, 11}},
2987 },
2988 {
2989 Name: "f2",
2990 Content: []byte("Second Zoekt Third"),
2991 // --------------012345678901234567
2992 Symbols: []DocumentSection{{0, 6}, {7, 12}, {13, 18}},
2993 },
2994 }
2995
2996 b := testIndexBuilder(t, &Repository{Name: "reponame"}, docs...)
2997 q := &query.Symbol{
2998 Expr: &query.Regexp{Regexp: mustParseRE(".*")},
2999 }
3000 t.Run("LineMatches", func(t *testing.T) {
3001 res := searchForTest(t, b, q)
3002 if len(res.Files) != len(docs) {
3003 t.Fatalf("got %v, want %d file", res.Files, len(docs))
3004 }
3005 for i, want := range docs {
3006 got := res.Files[i].LineMatches[0].LineFragments
3007 if len(got) != len(want.Symbols) {
3008 t.Fatalf("got %d symbols, want %d symbols in doc %s", len(got), len(want.Symbols), want.Name)
3009 }
3010
3011 for j, sec := range want.Symbols {
3012 if sec.Start != got[j].Offset {
3013 t.Fatalf("got offset %d, want %d in doc %s", got[j].Offset, sec.Start, want.Name)
3014 }
3015 }
3016 }
3017 })
3018
3019 t.Run("ChunkMatches", func(t *testing.T) {
3020 res := searchForTest(t, b, q, chunkOpts)
3021 if len(res.Files) != len(docs) {
3022 t.Fatalf("got %v, want %d file", res.Files, len(docs))
3023 }
3024 for i, want := range docs {
3025 got := res.Files[i].ChunkMatches[0].Ranges
3026 if len(got) != len(want.Symbols) {
3027 t.Fatalf("got %d symbols, want %d symbols in doc %s", len(got), len(want.Symbols), want.Name)
3028 }
3029
3030 for j, sec := range want.Symbols {
3031 if sec.Start != uint32(got[j].Start.ByteOffset) {
3032 t.Fatalf("got offset %d, want %d in doc %s", got[j].Start.ByteOffset, sec.Start, want.Name)
3033 }
3034 }
3035 }
3036 })
3037}
3038
3039func TestHitIterTerminate(t *testing.T) {
3040 // contrived input: trigram frequencies forces selecting abc +
3041 // def for the distance iteration. There is no match, so this
3042 // will advance the compressedPostingIterator to beyond the
3043 // end.
3044 content := []byte("abc bcdbcd cdecde abcabc def efg")
3045 b := testIndexBuilder(t, nil,
3046 Document{
3047 Name: "f1",
3048 Content: content,
3049 },
3050 )
3051
3052 t.Run("LineMatches", func(t *testing.T) {
3053 searchForTest(t, b, &query.Substring{Pattern: "abcdef"})
3054 })
3055
3056 t.Run("ChunkMatches", func(t *testing.T) {
3057 searchForTest(t, b, &query.Substring{Pattern: "abcdef"}, chunkOpts)
3058 })
3059}
3060
3061func TestDistanceHitIterBailLast(t *testing.T) {
3062 content := []byte("AST AST AST UASH")
3063 b := testIndexBuilder(t, nil,
3064 Document{
3065 Name: "f1",
3066 Content: content,
3067 },
3068 )
3069 t.Run("LineMatches", func(t *testing.T) {
3070 res := searchForTest(t, b, &query.Substring{Pattern: "UAST"})
3071 if len(res.Files) != 0 {
3072 t.Fatalf("got %v, want no results", res.Files)
3073 }
3074 })
3075
3076 t.Run("LineMatches", func(t *testing.T) {
3077 res := searchForTest(t, b, &query.Substring{Pattern: "UAST"}, chunkOpts)
3078 if len(res.Files) != 0 {
3079 t.Fatalf("got %v, want no results", res.Files)
3080 }
3081 })
3082}
3083
3084func TestDocumentSectionRuneBoundary(t *testing.T) {
3085 content := string([]rune{kelvinCodePoint, kelvinCodePoint, kelvinCodePoint})
3086 b, err := NewIndexBuilder(nil)
3087 if err != nil {
3088 t.Fatalf("NewIndexBuilder: %v", err)
3089 }
3090
3091 for i, sec := range []DocumentSection{
3092 {2, 6},
3093 {3, 7},
3094 } {
3095 if err := b.Add(Document{
3096 Name: "f1",
3097 Content: []byte(content),
3098 Symbols: []DocumentSection{sec},
3099 }); err == nil {
3100 t.Errorf("%d: Add succeeded", i)
3101 }
3102 }
3103}
3104
3105func TestUnicodeQuery(t *testing.T) {
3106 content := string([]rune{kelvinCodePoint, kelvinCodePoint, kelvinCodePoint})
3107 b := testIndexBuilder(t, nil,
3108 Document{
3109 Name: "f1",
3110 Content: []byte(content),
3111 },
3112 )
3113
3114 q := &query.Substring{Pattern: content}
3115
3116 t.Run("LineMatches", func(t *testing.T) {
3117 res := searchForTest(t, b, q)
3118 if len(res.Files) != 1 {
3119 t.Fatalf("want 1 match, got %v", res.Files)
3120 }
3121
3122 f := res.Files[0]
3123 if len(f.LineMatches) != 1 {
3124 t.Fatalf("want 1 line, got %v", f.LineMatches)
3125 }
3126 l := f.LineMatches[0]
3127
3128 if len(l.LineFragments) != 1 {
3129 t.Fatalf("want 1 line fragment, got %v", l.LineFragments)
3130 }
3131 fr := l.LineFragments[0]
3132 if fr.MatchLength != len(content) {
3133 t.Fatalf("got MatchLength %d want %d", fr.MatchLength, len(content))
3134 }
3135 })
3136
3137 t.Run("ChunkMatches", func(t *testing.T) {
3138 res := searchForTest(t, b, q, chunkOpts)
3139 if len(res.Files) != 1 {
3140 t.Fatalf("want 1 match, got %v", res.Files)
3141 }
3142
3143 f := res.Files[0]
3144 if len(f.ChunkMatches) != 1 {
3145 t.Fatalf("want 1 line, got %v", f.LineMatches)
3146 }
3147 cm := f.ChunkMatches[0]
3148
3149 if len(cm.Ranges) != 1 {
3150 t.Fatalf("want 1 line fragment, got %v", cm.Ranges)
3151 }
3152 rr := cm.Ranges[0]
3153 if matchLen := rr.End.ByteOffset - rr.Start.ByteOffset; int(matchLen) != len(content) {
3154 t.Fatalf("got MatchLength %d want %d", matchLen, len(content))
3155 }
3156 })
3157}
3158
3159func TestSkipInvalidContent(t *testing.T) {
3160 for _, content := range []string{
3161 // Binary
3162 "abc def \x00 abc",
3163 } {
3164
3165 b, err := NewIndexBuilder(nil)
3166 if err != nil {
3167 t.Fatalf("NewIndexBuilder: %v", err)
3168 }
3169
3170 if err := b.Add(Document{
3171 Name: "f1",
3172 Content: []byte(content),
3173 }); err != nil {
3174 t.Fatal(err)
3175 }
3176
3177 t.Run("LineMatches", func(t *testing.T) {
3178 q := &query.Substring{Pattern: "abc def"}
3179 res := searchForTest(t, b, q)
3180 if len(res.Files) != 0 {
3181 t.Fatalf("got %v, want no results", res.Files)
3182 }
3183
3184 q = &query.Substring{Pattern: "NOT-INDEXED"}
3185 res = searchForTest(t, b, q)
3186 if len(res.Files) != 1 {
3187 t.Fatalf("got %v, want 1 result", res.Files)
3188 }
3189 })
3190
3191 t.Run("ChunkMatches", func(t *testing.T) {
3192 q := &query.Substring{Pattern: "abc def"}
3193 res := searchForTest(t, b, q, chunkOpts)
3194 if len(res.Files) != 0 {
3195 t.Fatalf("got %v, want no results", res.Files)
3196 }
3197
3198 q = &query.Substring{Pattern: "NOT-INDEXED"}
3199 res = searchForTest(t, b, q, chunkOpts)
3200 if len(res.Files) != 1 {
3201 t.Fatalf("got %v, want 1 result", res.Files)
3202 }
3203 })
3204 }
3205}
3206
3207func TestCheckText(t *testing.T) {
3208 for _, text := range []string{"", "simple ascii", "símplé unicödé", "\uFEFFwith utf8 'bom'", "with \uFFFD unicode replacement char"} {
3209 if err := CheckText([]byte(text), 20000); err != nil {
3210 t.Errorf("CheckText(%q): %v", text, err)
3211 }
3212 }
3213 for _, text := range []string{"zero\x00byte", "xx", "0123456789abcdefghi"} {
3214 if err := CheckText([]byte(text), 15); err == nil {
3215 t.Errorf("CheckText(%q) succeeded", text)
3216 }
3217 }
3218}
3219
3220func TestLineAnd(t *testing.T) {
3221 b := testIndexBuilder(t, &Repository{Name: "reponame"},
3222 Document{Name: "f1", Content: []byte("apple\nbanana\napple banana chocolate apple pudding banana\ngrape")},
3223 Document{Name: "f2", Content: []byte("apple orange\nbanana")},
3224 Document{Name: "f3", Content: []byte("banana grape")},
3225 )
3226 pattern := "(apple)(?-s:.)*?(banana)"
3227 r, _ := syntax.Parse(pattern, syntax.Perl)
3228
3229 q := query.Regexp{
3230 Regexp: r,
3231 Content: true,
3232 }
3233 t.Run("LineMatches", func(t *testing.T) {
3234 res := searchForTest(t, b, &q)
3235 wantRegexpCount := 1
3236 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3237 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3238 }
3239 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
3240 t.Errorf("got %v, want 1 result", res.Files)
3241 }
3242 })
3243
3244 t.Run("ChunkMatches", func(t *testing.T) {
3245 res := searchForTest(t, b, &q, chunkOpts)
3246 wantRegexpCount := 1
3247 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3248 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3249 }
3250 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
3251 t.Errorf("got %v, want 1 result", res.Files)
3252 }
3253 })
3254}
3255
3256func TestLineAndFileName(t *testing.T) {
3257 b := testIndexBuilder(t, &Repository{Name: "reponame"},
3258 Document{Name: "f1", Content: []byte("apple banana\ngrape")},
3259 Document{Name: "f2", Content: []byte("apple banana\norange")},
3260 Document{Name: "apple banana", Content: []byte("banana grape")},
3261 )
3262 pattern := "(apple)(?-s:.)*?(banana)"
3263 r, _ := syntax.Parse(pattern, syntax.Perl)
3264
3265 q := query.Regexp{
3266 Regexp: r,
3267 FileName: true,
3268 }
3269 t.Run("LineMatches", func(t *testing.T) {
3270 res := searchForTest(t, b, &q)
3271 wantRegexpCount := 1
3272 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3273 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3274 }
3275 if len(res.Files) != 1 || res.Files[0].FileName != "apple banana" {
3276 t.Errorf("got %v, want 1 result", res.Files)
3277 }
3278 })
3279
3280 t.Run("ChunkMatches", func(t *testing.T) {
3281 res := searchForTest(t, b, &q, chunkOpts)
3282 wantRegexpCount := 1
3283 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3284 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3285 }
3286 if len(res.Files) != 1 || res.Files[0].FileName != "apple banana" {
3287 t.Errorf("got %v, want 1 result", res.Files)
3288 }
3289 })
3290}
3291
3292func TestMultiLineRegex(t *testing.T) {
3293 b := testIndexBuilder(t, &Repository{Name: "reponame"},
3294 Document{Name: "f1", Content: []byte("apple banana\ngrape")},
3295 Document{Name: "f2", Content: []byte("apple orange")},
3296 Document{Name: "f3", Content: []byte("grape apple")},
3297 )
3298 pattern := "(apple).*?[[:space:]].*?(grape)"
3299 r, _ := syntax.Parse(pattern, syntax.Perl)
3300
3301 q := query.Regexp{
3302 Regexp: r,
3303 }
3304 t.Run("LineMatches", func(t *testing.T) {
3305 res := searchForTest(t, b, &q)
3306 wantRegexpCount := 2
3307 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3308 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3309 }
3310 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
3311 t.Errorf("got %v, want 1 result", res.Files)
3312 }
3313 if l := len(res.Files[0].LineMatches); l != 2 {
3314 t.Errorf("got %v, want 2 line matches", l)
3315 }
3316 })
3317
3318 t.Run("ChunkMatches", func(t *testing.T) {
3319 res := searchForTest(t, b, &q, chunkOpts)
3320 wantRegexpCount := 2
3321 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3322 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3323 }
3324 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
3325 t.Errorf("got %v, want 1 result", res.Files)
3326 }
3327 if l := len(res.Files[0].ChunkMatches); l != 1 {
3328 t.Errorf("got %v, want 1 chunk matches", l)
3329 }
3330 if l := len(res.Files[0].ChunkMatches[0].Ranges); l != 1 {
3331 t.Errorf("got %v, want 1 chunk ranges", l)
3332 }
3333 })
3334}
3335
3336func TestSearchTypeFileName(t *testing.T) {
3337 b := testIndexBuilder(t, &Repository{
3338 Name: "reponame",
3339 },
3340 Document{Name: "f1", Content: []byte("bla the needle")},
3341 Document{Name: "f2", Content: []byte("another file another\nneedle")},
3342 // -----------------------------------012345678901234567890-123456
3343 )
3344
3345 t.Run("LineMatches", func(t *testing.T) {
3346 wantSingleMatch := func(res *SearchResult, want string) {
3347 t.Helper()
3348 fmatches := res.Files
3349 if len(fmatches) != 1 {
3350 t.Errorf("got %v, want 1 matches", len(fmatches))
3351 return
3352 }
3353 if len(fmatches[0].LineMatches) != 1 {
3354 t.Errorf("got %d line matches", len(fmatches[0].LineMatches))
3355 return
3356 }
3357 var got string
3358 if fmatches[0].LineMatches[0].FileName {
3359 got = fmatches[0].FileName
3360 } else {
3361 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].LineMatches[0].LineFragments[0].Offset)
3362 }
3363
3364 if got != want {
3365 t.Errorf("got %s, want %s", got, want)
3366 }
3367 }
3368
3369 // Only return the later match in the second file
3370 res := searchForTest(t, b, query.NewAnd(
3371 &query.Type{
3372 Type: query.TypeFileName,
3373 Child: &query.Substring{Pattern: "needle"},
3374 },
3375 &query.Substring{Pattern: "file"}))
3376 wantSingleMatch(res, "f2:8")
3377
3378 // Only return a filename result
3379 res = searchForTest(t, b,
3380 &query.Type{
3381 Type: query.TypeFileName,
3382 Child: &query.Substring{Pattern: "file"},
3383 })
3384 wantSingleMatch(res, "f2")
3385 })
3386
3387 t.Run("ChunkMatches", func(t *testing.T) {
3388 wantSingleMatch := func(res *SearchResult, want string) {
3389 t.Helper()
3390 fmatches := res.Files
3391 if len(fmatches) != 1 {
3392 t.Errorf("got %v, want 1 matches", len(fmatches))
3393 return
3394 }
3395 if len(fmatches[0].ChunkMatches) != 1 {
3396 t.Errorf("got %d line matches", len(fmatches[0].ChunkMatches))
3397 return
3398 }
3399 var got string
3400 if fmatches[0].ChunkMatches[0].FileName {
3401 got = fmatches[0].FileName
3402 } else {
3403 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset)
3404 }
3405
3406 if got != want {
3407 t.Errorf("got %s, want %s", got, want)
3408 }
3409 }
3410
3411 // Only return the later match in the second file
3412 res := searchForTest(t, b, query.NewAnd(
3413 &query.Type{
3414 Type: query.TypeFileName,
3415 Child: &query.Substring{Pattern: "needle"},
3416 },
3417 &query.Substring{Pattern: "file"}),
3418 chunkOpts,
3419 )
3420 wantSingleMatch(res, "f2:8")
3421
3422 // Only return a filename result
3423 res = searchForTest(t, b,
3424 &query.Type{
3425 Type: query.TypeFileName,
3426 Child: &query.Substring{Pattern: "file"},
3427 },
3428 chunkOpts,
3429 )
3430 wantSingleMatch(res, "f2")
3431 })
3432}
3433
3434func TestSearchTypeLanguage(t *testing.T) {
3435 b := testIndexBuilder(t, &Repository{
3436 Name: "reponame",
3437 },
3438 Document{Name: "apex.cls", Content: []byte("public class Car extends Vehicle {")},
3439 Document{Name: "tex.cls", Content: []byte(`\DeclareOption*{`)},
3440 Document{Name: "hello.h", Content: []byte(`#include <stdio.h>`)},
3441 )
3442
3443 t.Log(b.languageMap)
3444
3445 t.Run("LineMatches", func(t *testing.T) {
3446 wantSingleMatch := func(res *SearchResult, want string) {
3447 t.Helper()
3448 fmatches := res.Files
3449 if len(fmatches) != 1 {
3450 t.Errorf("got %v, want 1 matches", len(fmatches))
3451 return
3452 }
3453 if len(fmatches[0].LineMatches) != 1 {
3454 t.Errorf("got %d line matches", len(fmatches[0].LineMatches))
3455 return
3456 }
3457 var got string
3458 if fmatches[0].LineMatches[0].FileName {
3459 got = fmatches[0].FileName
3460 } else {
3461 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].LineMatches[0].LineFragments[0].Offset)
3462 }
3463
3464 if got != want {
3465 t.Errorf("got %s, want %s", got, want)
3466 }
3467 }
3468
3469 res := searchForTest(t, b, &query.Language{Language: "Apex"})
3470 wantSingleMatch(res, "apex.cls")
3471
3472 res = searchForTest(t, b, &query.Language{Language: "TeX"})
3473 wantSingleMatch(res, "tex.cls")
3474
3475 res = searchForTest(t, b, &query.Language{Language: "C"})
3476 wantSingleMatch(res, "hello.h")
3477
3478 // test fallback language search by pretending it's an older index version
3479 res = searchForTest(t, b, &query.Language{Language: "C++"})
3480 if len(res.Files) != 0 {
3481 t.Errorf("got %d results for C++, want 0", len(res.Files))
3482 }
3483
3484 b.featureVersion = 11 // force fallback
3485 res = searchForTest(t, b, &query.Language{Language: "C++"})
3486 wantSingleMatch(res, "hello.h")
3487 })
3488
3489 t.Run("ChunkMatches", func(t *testing.T) {
3490 wantSingleMatch := func(res *SearchResult, want string) {
3491 t.Helper()
3492 fmatches := res.Files
3493 if len(fmatches) != 1 {
3494 t.Errorf("got %v, want 1 matches", len(fmatches))
3495 return
3496 }
3497 if len(fmatches[0].ChunkMatches) != 1 {
3498 t.Errorf("got %d line matches", len(fmatches[0].ChunkMatches))
3499 return
3500 }
3501 var got string
3502 if fmatches[0].ChunkMatches[0].FileName {
3503 got = fmatches[0].FileName
3504 } else {
3505 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset)
3506 }
3507
3508 if got != want {
3509 t.Errorf("got %s, want %s", got, want)
3510 }
3511 }
3512
3513 b.featureVersion = FeatureVersion // reset feature version
3514 res := searchForTest(t, b, &query.Language{Language: "Apex"}, chunkOpts)
3515 wantSingleMatch(res, "apex.cls")
3516
3517 res = searchForTest(t, b, &query.Language{Language: "TeX"}, chunkOpts)
3518 wantSingleMatch(res, "tex.cls")
3519
3520 res = searchForTest(t, b, &query.Language{Language: "C"}, chunkOpts)
3521 wantSingleMatch(res, "hello.h")
3522
3523 // test fallback language search by pretending it's an older index version
3524 res = searchForTest(t, b, &query.Language{Language: "C++"}, chunkOpts)
3525 if len(res.Files) != 0 {
3526 t.Errorf("got %d results for C++, want 0", len(res.Files))
3527 }
3528
3529 b.featureVersion = 11 // force fallback
3530 res = searchForTest(t, b, &query.Language{Language: "C++"}, chunkOpts)
3531 wantSingleMatch(res, "hello.h")
3532 })
3533}
3534
3535func TestStats(t *testing.T) {
3536 ignored := []cmp.Option{
3537 cmpopts.EquateEmpty(),
3538 cmpopts.IgnoreFields(RepoListEntry{}, "Repository"),
3539 cmpopts.IgnoreFields(RepoListEntry{}, "IndexMetadata"),
3540 cmpopts.IgnoreFields(RepoStats{}, "IndexBytes"),
3541 }
3542
3543 repoListEntries := func(b *IndexBuilder) []RepoListEntry {
3544 searcher := searcherForTest(t, b)
3545 indexdata := searcher.(*indexData)
3546 return indexdata.repoListEntry
3547 }
3548
3549 t.Run("one empty repo", func(t *testing.T) {
3550 b := testIndexBuilder(t, nil)
3551 got := repoListEntries(b)
3552 want := []RepoListEntry{
3553 {
3554 Stats: RepoStats{
3555 Repos: 0,
3556 Shards: 1,
3557 Documents: 0,
3558 IndexBytes: 20,
3559 ContentBytes: 0,
3560 NewLinesCount: 0,
3561 DefaultBranchNewLinesCount: 0,
3562 OtherBranchesNewLinesCount: 0,
3563 },
3564 },
3565 }
3566
3567 if diff := cmp.Diff(want, got, ignored...); diff != "" {
3568 t.Fatalf("mismatch (-want +got):\n%s", diff)
3569 }
3570
3571 })
3572
3573 t.Run("one simple shard", func(t *testing.T) {
3574 b := testIndexBuilder(t, nil,
3575 Document{Name: "doc 0", Content: []byte("content 0")},
3576 Document{Name: "doc 1", Content: []byte("content 1")},
3577 )
3578 got := repoListEntries(b)
3579 want := []RepoListEntry{
3580 {
3581 Stats: RepoStats{
3582 Repos: 0,
3583 Shards: 1,
3584 Documents: 2,
3585 IndexBytes: 224,
3586 ContentBytes: 28,
3587 NewLinesCount: 0,
3588 DefaultBranchNewLinesCount: 0,
3589 OtherBranchesNewLinesCount: 0,
3590 },
3591 },
3592 }
3593
3594 if diff := cmp.Diff(want, got, ignored...); diff != "" {
3595 t.Fatalf("mismatch (-want +got):\n%s", diff)
3596 }
3597
3598 })
3599
3600 t.Run("one compound shard", func(t *testing.T) {
3601 b := testIndexBuilderCompound(t,
3602 []*Repository{
3603 {Name: "repo 0"},
3604 {Name: "repo 1"},
3605 },
3606 [][]Document{
3607 {
3608 {Name: "doc 0", Content: []byte("content 0")},
3609 {Name: "doc 1", Content: []byte("content 1")},
3610 },
3611 {
3612 {Name: "doc 2", Content: []byte("content 2")},
3613 {Name: "doc 3", Content: []byte("content 3")},
3614 },
3615 },
3616 )
3617 got := repoListEntries(b)
3618 want := []RepoListEntry{
3619 {
3620 Stats: RepoStats{
3621 Repos: 0,
3622 Shards: 1,
3623 Documents: 2,
3624 IndexBytes: 180,
3625 ContentBytes: 28,
3626 NewLinesCount: 0,
3627 DefaultBranchNewLinesCount: 0,
3628 OtherBranchesNewLinesCount: 0,
3629 },
3630 },
3631 {
3632 Stats: RepoStats{
3633 Repos: 0,
3634 Shards: 1,
3635 Documents: 2,
3636 IndexBytes: 180,
3637 ContentBytes: 28,
3638 NewLinesCount: 0,
3639 DefaultBranchNewLinesCount: 0,
3640 OtherBranchesNewLinesCount: 0,
3641 },
3642 },
3643 }
3644
3645 if diff := cmp.Diff(want, got, ignored...); diff != "" {
3646 t.Fatalf("mismatch (-want +got):\n%s", diff)
3647 }
3648 })
3649
3650 t.Run("compound shard with empty repos", func(t *testing.T) {
3651 b := testIndexBuilderCompound(t,
3652 []*Repository{
3653 {Name: "repo 0"},
3654 {Name: "repo 1"},
3655 {Name: "repo 2"},
3656 {Name: "repo 3"},
3657 {Name: "repo 4"},
3658 },
3659 [][]Document{
3660 {{Name: "doc 0", Content: []byte("content 0")}},
3661 nil,
3662 {{Name: "doc 1", Content: []byte("content 1")}},
3663 nil,
3664 nil,
3665 },
3666 )
3667 got := repoListEntries(b)
3668
3669 entryEmpty := RepoListEntry{Stats: RepoStats{
3670 Shards: 1,
3671 Documents: 0,
3672 ContentBytes: 0,
3673 }}
3674 entryNonEmpty := RepoListEntry{Stats: RepoStats{
3675 Shards: 1,
3676 Documents: 1,
3677 ContentBytes: 14,
3678 }}
3679
3680 want := []RepoListEntry{
3681 entryNonEmpty,
3682 entryEmpty,
3683 entryNonEmpty,
3684 entryEmpty,
3685 entryEmpty,
3686 }
3687
3688 if diff := cmp.Diff(want, got, ignored...); diff != "" {
3689 t.Fatalf("mismatch (-want +got):\n%s", diff)
3690 }
3691
3692 })
3693}
3694
3695// This tests the frequent pattern "\bLITERAL\b".
3696func TestWordSearch(t *testing.T) {
3697 content := []byte("needle the bla")
3698 // ----------------01234567890123
3699
3700 b := testIndexBuilder(t, nil,
3701 Document{
3702 Name: "f1",
3703 Content: content,
3704 })
3705
3706 t.Run("LineMatches", func(t *testing.T) {
3707 sres := searchForTest(t, b,
3708 &query.Regexp{
3709 Regexp: mustParseRE("\\bthe\\b"),
3710 CaseSensitive: true,
3711 Content: true,
3712 })
3713
3714 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 {
3715 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
3716 }
3717
3718 if sres.Stats.RegexpsConsidered != 0 {
3719 t.Fatal("expected regexp to be skipped")
3720 }
3721
3722 got := sres.Files[0].LineMatches[0]
3723 want := LineMatch{
3724 LineFragments: []LineFragmentMatch{{
3725 LineOffset: 7,
3726 Offset: 7,
3727 MatchLength: 3,
3728 }},
3729 Line: content,
3730 FileName: false,
3731 LineNumber: 1,
3732 LineStart: 0,
3733 LineEnd: 14,
3734 }
3735
3736 if !reflect.DeepEqual(got, want) {
3737 t.Errorf("got %#v, want %#v", got, want)
3738 }
3739 })
3740
3741 t.Run("ChunkMatches", func(t *testing.T) {
3742 sres := searchForTest(t, b,
3743 &query.Regexp{
3744 Regexp: mustParseRE("\\bthe\\b"),
3745 CaseSensitive: true,
3746 }, chunkOpts)
3747
3748 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 {
3749 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
3750 }
3751
3752 if sres.Stats.RegexpsConsidered != 0 {
3753 t.Fatal("expected regexp to be skipped")
3754 }
3755
3756 got := sres.Files[0].ChunkMatches[0]
3757 want := ChunkMatch{
3758 Content: content,
3759 ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1},
3760 Ranges: []Range{{
3761 Start: Location{ByteOffset: 7, LineNumber: 1, Column: 8},
3762 End: Location{ByteOffset: 10, LineNumber: 1, Column: 11},
3763 }},
3764 }
3765
3766 if diff := cmp.Diff(want, got); diff != "" {
3767 t.Fatal(diff)
3768 }
3769 })
3770}