fork of https://github.com/sourcegraph/zoekt
1// Copyright 2016 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package zoekt
16
17import (
18 "bytes"
19 "context"
20 "fmt"
21 "reflect"
22 "regexp/syntax"
23 "strings"
24 "testing"
25
26 "github.com/google/go-cmp/cmp"
27 "github.com/google/go-cmp/cmp/cmpopts"
28 "github.com/grafana/regexp"
29
30 "github.com/sourcegraph/zoekt/query"
31)
32
33func clearScores(r *SearchResult) {
34 for i := range r.Files {
35 r.Files[i].Score = 0.0
36 for j := range r.Files[i].LineMatches {
37 r.Files[i].LineMatches[j].Score = 0.0
38 }
39 for j := range r.Files[i].ChunkMatches {
40 r.Files[i].ChunkMatches[j].Score = 0.0
41 }
42 r.Files[i].Checksum = nil
43 r.Files[i].Debug = ""
44 }
45}
46
47func testIndexBuilder(t *testing.T, repo *Repository, docs ...Document) *IndexBuilder {
48 t.Helper()
49
50 b, err := NewIndexBuilder(repo)
51 if err != nil {
52 t.Fatalf("NewIndexBuilder: %v", err)
53 }
54
55 for i, d := range docs {
56 if err := b.Add(d); err != nil {
57 t.Fatalf("Add %d: %v", i, err)
58 }
59 }
60
61 return b
62}
63
64func testIndexBuilderCompound(t *testing.T, repos []*Repository, docs [][]Document) *IndexBuilder {
65 t.Helper()
66
67 b := newIndexBuilder()
68 b.indexFormatVersion = NextIndexFormatVersion
69
70 if len(repos) != len(docs) {
71 t.Fatalf("testIndexBuilderCompound: repos must be the same length as docs, got: len(repos)=%d len(docs)=%d", len(repos), len(docs))
72 }
73
74 for i, repo := range repos {
75 if err := b.setRepository(repo); err != nil {
76 t.Fatal(err)
77 }
78 for j, d := range docs[i] {
79 if err := b.Add(d); err != nil {
80 t.Fatalf("Add %d %d: %v", i, j, err)
81 }
82 }
83 }
84
85 return b
86}
87
88func TestBoundary(t *testing.T) {
89 b := testIndexBuilder(t, nil,
90 Document{Name: "f1", Content: []byte("x the")},
91 Document{Name: "f1", Content: []byte("reader")})
92 res := searchForTest(t, b, &query.Substring{Pattern: "there"})
93 if len(res.Files) > 0 {
94 t.Fatalf("got %v, want no matches", res.Files)
95 }
96}
97
98func TestDocSectionInvalid(t *testing.T) {
99 b, err := NewIndexBuilder(nil)
100 if err != nil {
101 t.Fatalf("NewIndexBuilder: %v", err)
102 }
103 doc := Document{
104 Name: "f1",
105 Content: []byte("01234567890123"),
106 Symbols: []DocumentSection{{5, 8}, {7, 9}},
107 }
108
109 if err := b.Add(doc); err == nil {
110 t.Errorf("overlapping doc sections should fail")
111 }
112
113 doc = Document{
114 Name: "f1",
115 Content: []byte("01234567890123"),
116 Symbols: []DocumentSection{{0, 20}},
117 }
118
119 if err := b.Add(doc); err == nil {
120 t.Errorf("doc sections beyond EOF should fail")
121 }
122}
123
124func TestBasic(t *testing.T) {
125 b := testIndexBuilder(t, nil,
126 Document{
127 Name: "f2",
128 Content: []byte("to carry water in the no later bla"),
129 // --------------0123456789012345678901234567890123
130 })
131
132 t.Run("LineMatch", func(t *testing.T) {
133 res := searchForTest(t, b, &query.Substring{
134 Pattern: "water",
135 CaseSensitive: true,
136 })
137 fmatches := res.Files
138 if len(fmatches) != 1 || len(fmatches[0].LineMatches) != 1 {
139 t.Fatalf("got %v, want 1 matches", fmatches)
140 }
141
142 got := fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].LineMatches[0].LineFragments[0].Offset)
143 want := "f2:9"
144 if got != want {
145 t.Errorf("1: got %s, want %s", got, want)
146 }
147 })
148
149 t.Run("ChunkMatch", func(t *testing.T) {
150 res := searchForTest(t, b, &query.Substring{
151 Pattern: "water",
152 CaseSensitive: true,
153 }, chunkOpts)
154 fmatches := res.Files
155 if len(fmatches) != 1 || len(fmatches[0].ChunkMatches) != 1 {
156 t.Fatalf("got %v, want 1 matches", fmatches)
157 }
158
159 got := fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset)
160 want := "f2:9"
161 if got != want {
162 t.Errorf("1: got %s, want %s", got, want)
163 }
164 })
165}
166
167func TestEmptyIndex(t *testing.T) {
168 b := testIndexBuilder(t, nil)
169 searcher := searcherForTest(t, b)
170
171 var opts SearchOptions
172 if _, err := searcher.Search(context.Background(), &query.Substring{}, &opts); err != nil {
173 t.Fatalf("Search: %v", err)
174 }
175
176 if _, err := searcher.List(context.Background(), &query.Repo{Regexp: regexp.MustCompile("")}, nil); err != nil {
177 t.Fatalf("List: %v", err)
178 }
179
180 if _, err := searcher.Search(context.Background(), &query.Substring{Pattern: "java", FileName: true}, &opts); err != nil {
181 t.Fatalf("Search: %v", err)
182 }
183}
184
185type memSeeker struct {
186 data []byte
187}
188
189func (s *memSeeker) Name() string {
190 return "memseeker"
191}
192
193func (s *memSeeker) Close() {}
194func (s *memSeeker) Read(off, sz uint32) ([]byte, error) {
195 return s.data[off : off+sz], nil
196}
197
198func (s *memSeeker) Size() (uint32, error) {
199 return uint32(len(s.data)), nil
200}
201
202func TestNewlines(t *testing.T) {
203 b := testIndexBuilder(t, nil,
204 // -----------------------------------------012345-678901-234
205 Document{Name: "filename", Content: []byte("line1\nline2\nbla")})
206
207 t.Run("LineMatches", func(t *testing.T) {
208 sres := searchForTest(t, b, &query.Substring{Pattern: "ne2"})
209
210 matches := sres.Files
211 want := []FileMatch{{
212 FileName: "filename",
213 LineMatches: []LineMatch{{
214 LineFragments: []LineFragmentMatch{{
215 Offset: 8,
216 LineOffset: 2,
217 MatchLength: 3,
218 }},
219 Line: []byte("line2\n"),
220 LineStart: 6,
221 LineEnd: 12,
222 LineNumber: 2,
223 }},
224 }}
225
226 if diff := cmp.Diff(matches, want); diff != "" {
227 t.Fatal(diff)
228 }
229 })
230
231 t.Run("ChunkMatches", func(t *testing.T) {
232 sres := searchForTest(t, b, &query.Substring{Pattern: "ne2"}, chunkOpts)
233
234 matches := sres.Files
235 want := []FileMatch{{
236 FileName: "filename",
237 ChunkMatches: []ChunkMatch{{
238 Content: []byte("line2\n"),
239 ContentStart: Location{
240 ByteOffset: 6,
241 LineNumber: 2,
242 Column: 1,
243 },
244 Ranges: []Range{{
245 Start: Location{ByteOffset: 8, LineNumber: 2, Column: 3},
246 End: Location{ByteOffset: 11, LineNumber: 2, Column: 6},
247 }},
248 }},
249 }}
250
251 if diff := cmp.Diff(want, matches); diff != "" {
252 t.Fatal(diff)
253 }
254 })
255}
256
257// A result spanning multiple lines should have LineMatches that only cover
258// single lines.
259func TestQueryNewlines(t *testing.T) {
260 text := "line1\nline2\nbla"
261 b := testIndexBuilder(t, nil,
262 Document{Name: "filename", Content: []byte(text)})
263
264 t.Run("LineMatches", func(t *testing.T) {
265 sres := searchForTest(t, b, &query.Substring{Pattern: "ine2\nbla"})
266 matches := sres.Files
267 if len(matches) != 1 {
268 t.Fatalf("got %d file matches, want exactly one", len(matches))
269 }
270 m := matches[0]
271 if len(m.LineMatches) != 2 {
272 t.Fatalf("got %d line matches, want exactly two %#v", len(m.LineMatches), m.LineMatches)
273 }
274 })
275
276 t.Run("ChunkMatches", func(t *testing.T) {
277 sres := searchForTest(t, b, &query.Substring{Pattern: "ine2\nbla"}, chunkOpts)
278 matches := sres.Files
279 if len(matches) != 1 {
280 t.Fatalf("got %d file matches, want exactly one", len(matches))
281 }
282 m := matches[0]
283 if len(m.ChunkMatches) != 1 {
284 t.Fatalf("got %d chunk matches, want exactly one", len(m.ChunkMatches))
285 }
286 })
287}
288
289var chunkOpts = SearchOptions{ChunkMatches: true}
290
291func searchForTest(t *testing.T, b *IndexBuilder, q query.Q, o ...SearchOptions) *SearchResult {
292 searcher := searcherForTest(t, b)
293 var opts SearchOptions
294 if len(o) > 0 {
295 opts = o[0]
296 }
297 res, err := searcher.Search(context.Background(), q, &opts)
298 if err != nil {
299 t.Fatalf("Search(%s): %v", q, err)
300 }
301 clearScores(res)
302 return res
303}
304
305func searcherForTest(t *testing.T, b *IndexBuilder) Searcher {
306 var buf bytes.Buffer
307 if err := b.Write(&buf); err != nil {
308 t.Fatal(err)
309 }
310 f := &memSeeker{buf.Bytes()}
311
312 searcher, err := NewSearcher(f)
313 if err != nil {
314 t.Fatalf("NewSearcher: %v", err)
315 }
316
317 return searcher
318}
319
320func TestCaseFold(t *testing.T) {
321 b := testIndexBuilder(t, nil,
322 Document{Name: "f1", Content: []byte("I love BaNaNAS.")},
323 // -----------------------------------012345678901234
324 )
325 t.Run("LineMatches", func(t *testing.T) {
326 sres := searchForTest(t, b, &query.Substring{
327 Pattern: "bananas",
328 CaseSensitive: true,
329 })
330 matches := sres.Files
331 if len(matches) != 0 {
332 t.Errorf("foldcase: got %#v, want 0 matches", matches)
333 }
334
335 sres = searchForTest(t, b,
336 &query.Substring{
337 Pattern: "BaNaNAS",
338 CaseSensitive: true,
339 })
340 matches = sres.Files
341 if len(matches) != 1 {
342 t.Errorf("no foldcase: got %v, want 1 matches", matches)
343 } else if matches[0].LineMatches[0].LineFragments[0].Offset != 7 {
344 t.Errorf("foldcase: got %v, want offsets 7", matches)
345 }
346 })
347
348 t.Run("ChunkMatches", func(t *testing.T) {
349 sres := searchForTest(t, b, &query.Substring{
350 Pattern: "bananas",
351 CaseSensitive: true,
352 }, chunkOpts)
353 matches := sres.Files
354 if len(matches) != 0 {
355 t.Errorf("foldcase: got %#v, want 0 matches", matches)
356 }
357
358 sres = searchForTest(t, b,
359 &query.Substring{
360 Pattern: "BaNaNAS",
361 CaseSensitive: true,
362 })
363 matches = sres.Files
364 if len(matches) != 1 {
365 t.Errorf("no foldcase: got %v, want 1 matches", matches)
366 } else if matches[0].LineMatches[0].LineFragments[0].Offset != 7 {
367 t.Errorf("foldcase: got %v, want offsets 7", matches)
368 }
369 })
370}
371
372// wordsAsSymbols finds all ASCII words in doc.Content which are longer than 2
373// chars. Those are then set as symbols.
374func wordsAsSymbols(doc Document) Document {
375 re := regexp.MustCompile(`\b\w{2,}\b`)
376 var symbols []DocumentSection
377 for _, match := range re.FindAllIndex(doc.Content, -1) {
378 symbols = append(symbols, DocumentSection{
379 Start: uint32(match[0]),
380 End: uint32(match[1]),
381 })
382 }
383 doc.Symbols = symbols
384 return doc
385}
386
387func TestSearchStats(t *testing.T) {
388 ctx := context.Background()
389 searcher := searcherForTest(t, testIndexBuilder(t, nil,
390 wordsAsSymbols(Document{Name: "f1", Content: []byte("x banana y")}),
391 wordsAsSymbols(Document{Name: "f2", Content: []byte("x apple y")}),
392 wordsAsSymbols(Document{Name: "f3", Content: []byte("x banana apple y")}),
393 // --------------------------------------------------0123456789012345
394 ))
395
396 andQuery := query.NewAnd(
397 &query.Substring{
398 Pattern: "banana",
399 },
400 &query.Substring{
401 Pattern: "apple",
402 },
403 )
404
405 t.Run("LineMatches", func(t *testing.T) {
406 sres, err := searcher.Search(ctx, andQuery, &SearchOptions{})
407 if err != nil {
408 t.Fatal(err)
409 }
410 matches := sres.Files
411 if len(matches) != 1 || len(matches[0].LineMatches) != 1 || len(matches[0].LineMatches[0].LineFragments) != 2 {
412 t.Fatalf("got %#v, want 1 match with 2 fragments", matches)
413 }
414
415 if matches[0].LineMatches[0].LineFragments[0].Offset != 2 || matches[0].LineMatches[0].LineFragments[1].Offset != 9 {
416 t.Fatalf("got %#v, want offsets 2,9", matches)
417 }
418 })
419 t.Run("ChunkMatches", func(t *testing.T) {
420 sres, err := searcher.Search(ctx, andQuery, &chunkOpts)
421 if err != nil {
422 t.Fatal(err)
423 }
424 matches := sres.Files
425 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 || len(matches[0].ChunkMatches[0].Ranges) != 2 {
426 t.Fatalf("got %#v, want 1 chunk match with 2 ranges", matches)
427 }
428
429 if matches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset != 2 || matches[0].ChunkMatches[0].Ranges[1].Start.ByteOffset != 9 {
430 t.Fatalf("got %#v, want offsets 2,9", matches)
431 }
432 })
433 t.Run("Stats", func(t *testing.T) {
434 cases := []struct {
435 Name string
436 Q query.Q
437 Want Stats
438 }{{
439 Name: "and-query",
440 Q: andQuery,
441 Want: Stats{
442 FilesLoaded: 1,
443 ContentBytesLoaded: 22,
444 IndexBytesLoaded: 8,
445 NgramMatches: 3, // we look at doc 1, because it's max(0,1) due to AND
446 NgramLookups: 104,
447 MatchCount: 2,
448 FileCount: 1,
449 FilesConsidered: 2,
450 ShardsScanned: 1,
451 },
452 }, {
453 Name: "one-trigram",
454 Q: &query.Substring{
455 Pattern: "a y",
456 Content: true,
457 CaseSensitive: true,
458 },
459 Want: Stats{
460 ContentBytesLoaded: 14,
461 IndexBytesLoaded: 1,
462 FileCount: 1,
463 FilesConsidered: 1,
464 FilesLoaded: 1,
465 ShardsScanned: 1,
466 MatchCount: 1,
467 NgramMatches: 1,
468 NgramLookups: 2, // once to lookup frequency then again to access posting list.
469 },
470 }, {
471 Name: "one-trigram-case-insensitive",
472 Q: &query.Substring{
473 Pattern: "a y",
474 Content: true,
475 },
476 Want: Stats{
477 ContentBytesLoaded: 14,
478 IndexBytesLoaded: 1,
479 FileCount: 1,
480 FilesConsidered: 1,
481 FilesLoaded: 1,
482 ShardsScanned: 1,
483 MatchCount: 1,
484 NgramMatches: 1,
485 NgramLookups: 8, // "a y" has 2**2 casings which we lookup twice.
486 },
487 }, {
488 Name: "one-trigram-pruned",
489 Q: &query.Substring{
490 Pattern: "foo",
491 Content: true,
492 CaseSensitive: true,
493 },
494 Want: Stats{
495 ShardsSkippedFilter: 1,
496 NgramLookups: 1, // only had to lookup once
497 },
498 }, {
499 Name: "one-trigram-branch-pruned",
500 Q: query.NewAnd(
501 &query.Substring{
502 Pattern: "foo",
503 Content: true,
504 CaseSensitive: true,
505 },
506 &query.Substring{
507 Pattern: "a y",
508 Content: true,
509 CaseSensitive: true,
510 },
511 ),
512 Want: Stats{
513 IndexBytesLoaded: 1, // we created an iterator for "a y" before pruning.
514 ShardsSkippedFilter: 1,
515 NgramLookups: 3, // we lookedup "foo" once (1), but lookedup and created "a y" (2).
516 },
517 }, {
518 Name: "symbol-substr-nomatch",
519 Q: &query.Symbol{Expr: &query.Substring{
520 Pattern: "banana apple",
521 Content: true,
522 CaseSensitive: true,
523 }},
524 Want: Stats{
525 IndexBytesLoaded: 3,
526 FilesConsidered: 1, // important that we only check 1 file to ensure we are using the index
527 MatchCount: 0, // even though there is a match it doesn't align with a symbol
528 ShardsScanned: 1,
529 NgramMatches: 1,
530 NgramLookups: 12,
531 },
532 }, {
533 Name: "symbol-substr",
534 Q: &query.Symbol{Expr: &query.Substring{
535 Pattern: "apple",
536 Content: true,
537 CaseSensitive: true,
538 }},
539 Want: Stats{
540 ContentBytesLoaded: 35,
541 IndexBytesLoaded: 4,
542 FileCount: 2,
543 FilesConsidered: 2, // must be 2 to ensure we used the index
544 FilesLoaded: 2,
545 MatchCount: 2, // apple symbols is in two files
546 ShardsScanned: 1,
547 NgramMatches: 2,
548 NgramLookups: 5,
549 },
550 }, {
551 Name: "symbol-regexp-nomatch",
552 Q: &query.Symbol{Expr: &query.Regexp{
553 Regexp: mustParseRE("^apple.banana$"),
554 Content: true,
555 CaseSensitive: true,
556 }},
557 Want: Stats{
558 ContentBytesLoaded: 33, // we still have to run regex since "app" matches two documents
559 IndexBytesLoaded: 8,
560 FilesConsidered: 2, // important that we don't check 3 to ensure we are using the index
561 FilesLoaded: 2,
562 MatchCount: 0, // even though there is a match it doesn't align with a symbol
563 ShardsScanned: 1,
564 NgramMatches: 3,
565 NgramLookups: 11,
566 },
567 }, {
568 Name: "symbol-regexp",
569 Q: &query.Symbol{Expr: &query.Regexp{
570 Regexp: mustParseRE("^app.e$"),
571 Content: true,
572 CaseSensitive: true,
573 }},
574 Want: Stats{
575 ContentBytesLoaded: 35,
576 IndexBytesLoaded: 2,
577 FileCount: 2,
578 FilesConsidered: 2, // must be 2 to ensure we used the index
579 FilesLoaded: 2,
580 MatchCount: 2, // apple symbols is in two files
581 ShardsScanned: 1,
582 NgramMatches: 2,
583 NgramLookups: 2,
584 },
585 }}
586
587 for _, tc := range cases {
588 t.Run(tc.Name, func(t *testing.T) {
589 sres, err := searcher.Search(ctx, tc.Q, &chunkOpts)
590 if err != nil {
591 t.Fatal(err)
592 }
593 if diff := cmp.Diff(tc.Want, sres.Stats, cmpopts.IgnoreFields(Stats{}, "MatchTreeConstruction", "MatchTreeSearch")); diff != "" {
594 t.Errorf("unexpected Stats (-want +got):\n%s", diff)
595 }
596 })
597 }
598 })
599}
600
601func TestAndNegateSearch(t *testing.T) {
602 b := testIndexBuilder(t, nil,
603 Document{Name: "f1", Content: []byte("x banana y")},
604 // -----------------------------------0123456789
605 Document{Name: "f4", Content: []byte("x banana apple y")})
606
607 t.Run("LineMatches", func(t *testing.T) {
608 sres := searchForTest(t, b, query.NewAnd(
609 &query.Substring{
610 Pattern: "banana",
611 },
612 &query.Not{Child: &query.Substring{
613 Pattern: "apple",
614 }}))
615
616 matches := sres.Files
617
618 if len(matches) != 1 || len(matches[0].LineMatches) != 1 {
619 t.Fatalf("got %v, want 1 match", matches)
620 }
621 if matches[0].FileName != "f1" {
622 t.Fatalf("got match %#v, want FileName: f1", matches[0])
623 }
624 if matches[0].LineMatches[0].LineFragments[0].Offset != 2 {
625 t.Fatalf("got %v, want offset 2", matches)
626 }
627 })
628
629 t.Run("ChunkMatches", func(t *testing.T) {
630 sres := searchForTest(t, b,
631 query.NewAnd(
632 &query.Substring{
633 Pattern: "banana",
634 },
635 &query.Not{Child: &query.Substring{
636 Pattern: "apple",
637 }},
638 ),
639 chunkOpts,
640 )
641
642 matches := sres.Files
643
644 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 {
645 t.Fatalf("got %v, want 1 match", matches)
646 }
647 if matches[0].FileName != "f1" {
648 t.Fatalf("got match %#v, want FileName: f1", matches[0])
649 }
650 if matches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset != 2 {
651 t.Fatalf("got %v, want offset 2", matches)
652 }
653 })
654}
655
656func TestNegativeMatchesOnlyShortcut(t *testing.T) {
657 b := testIndexBuilder(t, nil,
658 Document{Name: "f1", Content: []byte("x banana y")},
659 Document{Name: "f2", Content: []byte("x appelmoes y")},
660 Document{Name: "f3", Content: []byte("x appelmoes y")},
661 Document{Name: "f3", Content: []byte("x appelmoes y")})
662
663 t.Run("LineMatches", func(t *testing.T) {
664 sres := searchForTest(t, b, query.NewAnd(
665 &query.Substring{
666 Pattern: "banana",
667 },
668 &query.Not{Child: &query.Substring{
669 Pattern: "appel",
670 }}))
671
672 if sres.Stats.FilesConsidered != 1 {
673 t.Errorf("got %#v, want FilesConsidered: 1", sres.Stats)
674 }
675 })
676
677 t.Run("ChunkMatches", func(t *testing.T) {
678 sres := searchForTest(t, b, query.NewAnd(
679 &query.Substring{
680 Pattern: "banana",
681 },
682 &query.Not{Child: &query.Substring{
683 Pattern: "appel",
684 }}), chunkOpts)
685
686 if sres.Stats.FilesConsidered != 1 {
687 t.Errorf("got %#v, want FilesConsidered: 1", sres.Stats)
688 }
689 })
690}
691
692func TestFileSearch(t *testing.T) {
693 b := testIndexBuilder(t, nil,
694 Document{Name: "banzana", Content: []byte("x orange y")},
695 // -------------0123456
696 Document{Name: "banana", Content: []byte("x apple y")},
697 // -------------012345
698 )
699
700 t.Run("LineMatches", func(t *testing.T) {
701 sres := searchForTest(t, b, &query.Substring{
702 Pattern: "anan",
703 FileName: true,
704 })
705
706 matches := sres.Files
707 if len(matches) != 1 || len(matches[0].LineMatches) != 1 {
708 t.Fatalf("got %v, want 1 match", matches)
709 }
710
711 got := matches[0].LineMatches[0]
712 want := LineMatch{
713 Line: []byte("banana"),
714 LineFragments: []LineFragmentMatch{{
715 Offset: 1,
716 LineOffset: 1,
717 MatchLength: 4,
718 }},
719 FileName: true,
720 }
721
722 if !reflect.DeepEqual(got, want) {
723 t.Errorf("got %#v, want %#v", got, want)
724 }
725 })
726
727 t.Run("ChunkMatches", func(t *testing.T) {
728 sres := searchForTest(t, b, &query.Substring{
729 Pattern: "anan",
730 FileName: true,
731 }, chunkOpts)
732
733 matches := sres.Files
734 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 {
735 t.Fatalf("got %v, want 1 match", matches)
736 }
737
738 got := matches[0].ChunkMatches[0]
739 want := ChunkMatch{
740 Content: []byte("banana"),
741 ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1},
742 Ranges: []Range{{
743 Start: Location{ByteOffset: 1, LineNumber: 1, Column: 2},
744 End: Location{ByteOffset: 5, LineNumber: 1, Column: 6},
745 }},
746 FileName: true,
747 }
748
749 if diff := cmp.Diff(want, got); diff != "" {
750 t.Fatal(diff)
751 }
752 })
753
754 t.Run("FileNameSet", func(t *testing.T) {
755 sres := searchForTest(t, b, query.NewFileNameSet("banana"), chunkOpts)
756
757 matches := sres.Files
758 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 {
759 t.Fatalf("got %v, want 1 match", matches)
760 }
761
762 got := matches[0].ChunkMatches[0]
763 want := ChunkMatch{
764 Content: []byte("banana"),
765 ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1},
766 Ranges: []Range{{
767 Start: Location{ByteOffset: 0, LineNumber: 1, Column: 1},
768 End: Location{ByteOffset: 6, LineNumber: 1, Column: 7},
769 }},
770 FileName: true,
771 }
772
773 if diff := cmp.Diff(want, got); diff != "" {
774 t.Fatal(diff)
775 }
776 })
777}
778
779func TestFileCase(t *testing.T) {
780 b := testIndexBuilder(t, nil,
781 Document{Name: "BANANA", Content: []byte("x orange y")})
782
783 t.Run("LineMatches", func(t *testing.T) {
784 sres := searchForTest(t, b, &query.Substring{
785 Pattern: "banana",
786 FileName: true,
787 })
788
789 matches := sres.Files
790 if len(matches) != 1 || matches[0].FileName != "BANANA" {
791 t.Fatalf("got %v, want 1 match 'BANANA'", matches)
792 }
793 })
794
795 t.Run("ChunkMatches", func(t *testing.T) {
796 sres := searchForTest(t, b, &query.Substring{
797 Pattern: "banana",
798 FileName: true,
799 }, chunkOpts)
800
801 matches := sres.Files
802 if len(matches) != 1 || matches[0].FileName != "BANANA" {
803 t.Fatalf("got %v, want 1 match 'BANANA'", matches)
804 }
805 })
806}
807
808func TestFileRegexpSearchBruteForce(t *testing.T) {
809 b := testIndexBuilder(t, nil,
810 Document{Name: "banzana", Content: []byte("x orange y")},
811 Document{Name: "banana", Content: []byte("x apple y")},
812 )
813 t.Run("LineMatches", func(t *testing.T) {
814 sres := searchForTest(t, b, &query.Regexp{
815 Regexp: mustParseRE("[qn][zx]"),
816 FileName: true,
817 })
818
819 matches := sres.Files
820 if len(matches) != 1 || matches[0].FileName != "banzana" {
821 t.Fatalf("got %v, want 1 match on 'banzana'", matches)
822 }
823 })
824 t.Run("LineMatches", func(t *testing.T) {
825 sres := searchForTest(t, b, &query.Regexp{
826 Regexp: mustParseRE("[qn][zx]"),
827 FileName: true,
828 }, chunkOpts)
829
830 matches := sres.Files
831 if len(matches) != 1 || matches[0].FileName != "banzana" {
832 t.Fatalf("got %v, want 1 match on 'banzana'", matches)
833 }
834 })
835}
836
837func TestFileRegexpSearchShortString(t *testing.T) {
838 b := testIndexBuilder(t, nil,
839 Document{Name: "banana.py", Content: []byte("x orange y")})
840
841 t.Run("LineMatches", func(t *testing.T) {
842 sres := searchForTest(t, b, &query.Regexp{
843 Regexp: mustParseRE("ana.py"),
844 FileName: true,
845 })
846
847 matches := sres.Files
848 if len(matches) != 1 || matches[0].FileName != "banana.py" {
849 t.Fatalf("got %v, want 1 match on 'banana.py'", matches)
850 }
851 })
852
853 t.Run("ChunkMatches", func(t *testing.T) {
854 sres := searchForTest(t, b, &query.Regexp{
855 Regexp: mustParseRE("ana.py"),
856 FileName: true,
857 }, chunkOpts)
858
859 matches := sres.Files
860 if len(matches) != 1 || matches[0].FileName != "banana.py" {
861 t.Fatalf("got %v, want 1 match on 'banana.py'", matches)
862 }
863 })
864}
865
866func TestFileSubstringSearchBruteForce(t *testing.T) {
867 b := testIndexBuilder(t, nil,
868 Document{Name: "BANZANA", Content: []byte("x orange y")},
869 Document{Name: "banana", Content: []byte("x apple y")})
870
871 q := &query.Substring{
872 Pattern: "z",
873 FileName: true,
874 }
875
876 t.Run("LineMatches", func(t *testing.T) {
877 res := searchForTest(t, b, q)
878 if len(res.Files) != 1 || res.Files[0].FileName != "BANZANA" {
879 t.Fatalf("got %v, want 1 match on 'BANZANA''", res.Files)
880 }
881 })
882
883 t.Run("ChunkMatches", func(t *testing.T) {
884 res := searchForTest(t, b, q, chunkOpts)
885 if len(res.Files) != 1 || res.Files[0].FileName != "BANZANA" {
886 t.Fatalf("got %v, want 1 match on 'BANZANA''", res.Files)
887 }
888 })
889}
890
891func TestFileSubstringSearchBruteForceEnd(t *testing.T) {
892 b := testIndexBuilder(t, nil,
893 Document{Name: "BANZANA", Content: []byte("x orange y")},
894 Document{Name: "bananaq", Content: []byte("x apple y")})
895
896 q := &query.Substring{
897 Pattern: "q",
898 FileName: true,
899 }
900 t.Run("LineMatches", func(t *testing.T) {
901 res := searchForTest(t, b, q)
902 if want := "bananaq"; len(res.Files) != 1 || res.Files[0].FileName != want {
903 t.Fatalf("got %v, want 1 match in %q", res.Files, want)
904 }
905 })
906
907 t.Run("LineMatches", func(t *testing.T) {
908 res := searchForTest(t, b, q, chunkOpts)
909 if want := "bananaq"; len(res.Files) != 1 || res.Files[0].FileName != want {
910 t.Fatalf("got %v, want 1 match in %q", res.Files, want)
911 }
912 })
913}
914
915func TestSearchMatchAll(t *testing.T) {
916 b := testIndexBuilder(t, nil,
917 Document{Name: "banzana", Content: []byte("x orange y")},
918 Document{Name: "banana", Content: []byte("x apple y")})
919
920 t.Run("LineMatches", func(t *testing.T) {
921 sres := searchForTest(t, b, &query.Const{Value: true})
922 matches := sres.Files
923 if len(matches) != 2 {
924 t.Fatalf("got %v, want 2 matches", matches)
925 }
926 })
927
928 t.Run("ChunkMatches", func(t *testing.T) {
929 sres := searchForTest(t, b, &query.Const{Value: true}, chunkOpts)
930 matches := sres.Files
931 if len(matches) != 2 {
932 t.Fatalf("got %v, want 2 matches", matches)
933 }
934 })
935}
936
937func TestSearchNewline(t *testing.T) {
938 b := testIndexBuilder(t, nil,
939 Document{Name: "banzana", Content: []byte("abcd\ndefg")})
940
941 t.Run("LineMatches", func(t *testing.T) {
942 sres := searchForTest(t, b, &query.Substring{Pattern: "d\nd"})
943
944 // Just check that we don't crash.
945
946 matches := sres.Files
947 if len(matches) != 1 {
948 t.Fatalf("got %v, want 1 matches", matches)
949 }
950 })
951
952 t.Run("ChunkMatches", func(t *testing.T) {
953 sres := searchForTest(t, b, &query.Substring{Pattern: "d\nd"}, chunkOpts)
954
955 // Just check that we don't crash.
956
957 matches := sres.Files
958 if len(matches) != 1 {
959 t.Fatalf("got %v, want 1 matches", matches)
960 }
961 })
962}
963
964func TestSearchMatchAllRegexp(t *testing.T) {
965 b := testIndexBuilder(t, nil,
966 Document{Name: "banzana", Content: []byte("abcd")},
967 Document{Name: "banana", Content: []byte("pqrs")})
968
969 t.Run("LineMatches", func(t *testing.T) {
970 sres := searchForTest(t, b, &query.Regexp{Regexp: mustParseRE(".")})
971
972 matches := sres.Files
973 if len(matches) != 2 || sres.Stats.MatchCount != 2 {
974 t.Fatalf("got %v, want 2 matches", matches)
975 }
976 if len(matches[0].LineMatches[0].Line) != 4 || len(matches[1].LineMatches[0].Line) != 4 {
977 t.Fatalf("want 4 chars in every file, got %#v", matches)
978 }
979 })
980
981 t.Run("ChunkMatches", func(t *testing.T) {
982 sres := searchForTest(t, b, &query.Regexp{Regexp: mustParseRE(".")}, chunkOpts)
983
984 matches := sres.Files
985 if len(matches) != 2 || sres.Stats.MatchCount != 8 {
986 t.Fatalf("got %v, want 2 matches", matches)
987 }
988 if len(matches[0].ChunkMatches[0].Content) != 4 || len(matches[1].ChunkMatches[0].Content) != 4 {
989 t.Fatalf("want 4 chars in every file, got %#v", matches)
990 }
991 })
992}
993
994func TestFileRestriction(t *testing.T) {
995 b := testIndexBuilder(t, nil,
996 Document{Name: "banana1", Content: []byte("x orange y")},
997 Document{Name: "banana2", Content: []byte("x apple y")},
998 Document{Name: "orange", Content: []byte("x apple z")})
999
1000 t.Run("LineMatches", func(t *testing.T) {
1001 sres := searchForTest(t, b, query.NewAnd(
1002 &query.Substring{
1003 Pattern: "banana",
1004 FileName: true,
1005 },
1006 &query.Substring{
1007 Pattern: "apple",
1008 }))
1009
1010 matches := sres.Files
1011 if len(matches) != 1 || len(matches[0].LineMatches) != 1 {
1012 t.Fatalf("got %v, want 1 match", matches)
1013 }
1014
1015 match := matches[0].LineMatches[0]
1016 got := string(match.Line)
1017 want := "x apple y"
1018 if got != want {
1019 t.Errorf("got match %#v, want line %q", match, want)
1020 }
1021 })
1022
1023 t.Run("ChunkMatches", func(t *testing.T) {
1024 sres := searchForTest(t, b, query.NewAnd(
1025 &query.Substring{
1026 Pattern: "banana",
1027 FileName: true,
1028 },
1029 &query.Substring{
1030 Pattern: "apple",
1031 }), chunkOpts)
1032
1033 matches := sres.Files
1034 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 {
1035 t.Fatalf("got %v, want 1 match", matches)
1036 }
1037
1038 match := matches[0].ChunkMatches[0]
1039 got := string(match.Content)
1040 want := "x apple y"
1041 if got != want {
1042 t.Errorf("got match %#v, want line %q", match, want)
1043 }
1044 })
1045}
1046
1047func TestFileNameBoundary(t *testing.T) {
1048 b := testIndexBuilder(t, nil,
1049 Document{Name: "banana2", Content: []byte("x apple y")},
1050 Document{Name: "helpers.go", Content: []byte("x apple y")},
1051 Document{Name: "foo", Content: []byte("x apple y")})
1052
1053 t.Run("LineMatches", func(t *testing.T) {
1054 sres := searchForTest(t, b, &query.Substring{
1055 Pattern: "helpers.go",
1056 FileName: true,
1057 })
1058
1059 matches := sres.Files
1060 if len(matches) != 1 || len(matches[0].LineMatches) != 1 {
1061 t.Fatalf("got %v, want 1 match", matches)
1062 }
1063 })
1064
1065 t.Run("ChunkMatches", func(t *testing.T) {
1066 sres := searchForTest(t, b, &query.Substring{
1067 Pattern: "helpers.go",
1068 FileName: true,
1069 }, chunkOpts)
1070
1071 matches := sres.Files
1072 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 {
1073 t.Fatalf("got %v, want 1 match", matches)
1074 }
1075 })
1076}
1077
1078func TestDocumentOrder(t *testing.T) {
1079 var docs []Document
1080 for i := 0; i < 3; i++ {
1081 docs = append(docs, Document{Name: fmt.Sprintf("f%d", i), Content: []byte("needle")})
1082 }
1083
1084 b := testIndexBuilder(t, nil, docs...)
1085
1086 t.Run("LineMatches", func(t *testing.T) {
1087 sres := searchForTest(t, b, query.NewAnd(
1088 &query.Substring{
1089 Pattern: "needle",
1090 }))
1091
1092 want := []string{"f0", "f1", "f2"}
1093 var got []string
1094 for _, f := range sres.Files {
1095 got = append(got, f.FileName)
1096 }
1097 if !reflect.DeepEqual(got, want) {
1098 t.Fatalf("got %v, want %v", got, want)
1099 }
1100 })
1101
1102 t.Run("ChunkMatches", func(t *testing.T) {
1103 sres := searchForTest(t, b,
1104 query.NewAnd(&query.Substring{
1105 Pattern: "needle",
1106 }),
1107 chunkOpts,
1108 )
1109
1110 want := []string{"f0", "f1", "f2"}
1111 var got []string
1112 for _, f := range sres.Files {
1113 got = append(got, f.FileName)
1114 }
1115 if !reflect.DeepEqual(got, want) {
1116 t.Fatalf("got %v, want %v", got, want)
1117 }
1118 })
1119}
1120
1121func TestBranchMask(t *testing.T) {
1122 b := testIndexBuilder(t, &Repository{
1123 Branches: []RepositoryBranch{
1124 {"master", "v-master"},
1125 {"stable", "v-stable"},
1126 {"bonzai", "v-bonzai"},
1127 },
1128 }, Document{Name: "f1", Content: []byte("needle"), Branches: []string{"master"}},
1129 Document{Name: "f2", Content: []byte("needle"), Branches: []string{"stable", "master"}},
1130 Document{Name: "f3", Content: []byte("needle"), Branches: []string{"stable", "master"}},
1131 Document{Name: "f4", Content: []byte("needle"), Branches: []string{"bonzai"}},
1132 )
1133
1134 t.Run("LineMatches", func(t *testing.T) {
1135 sres := searchForTest(t, b, query.NewAnd(
1136 &query.Substring{
1137 Pattern: "needle",
1138 },
1139 &query.Branch{
1140 Pattern: "table",
1141 }))
1142
1143 if len(sres.Files) != 2 || sres.Files[0].FileName != "f2" || sres.Files[1].FileName != "f3" {
1144 t.Fatalf("got %v, want 2 result from [f2,f3]", sres.Files)
1145 }
1146
1147 if len(sres.Files[0].Branches) != 1 || sres.Files[0].Branches[0] != "stable" {
1148 t.Fatalf("got %v, want 1 branch 'stable'", sres.Files[0].Branches)
1149 }
1150 })
1151
1152 t.Run("ChunkMatches", func(t *testing.T) {
1153 sres := searchForTest(t, b, query.NewAnd(
1154 &query.Substring{
1155 Pattern: "needle",
1156 },
1157 &query.Branch{
1158 Pattern: "table",
1159 }),
1160 chunkOpts,
1161 )
1162
1163 if len(sres.Files) != 2 || sres.Files[0].FileName != "f2" || sres.Files[1].FileName != "f3" {
1164 t.Fatalf("got %v, want 2 result from [f2,f3]", sres.Files)
1165 }
1166
1167 if len(sres.Files[0].Branches) != 1 || sres.Files[0].Branches[0] != "stable" {
1168 t.Fatalf("got %v, want 1 branch 'stable'", sres.Files[0].Branches)
1169 }
1170 })
1171}
1172
1173func TestBranchLimit(t *testing.T) {
1174 for limit := 64; limit <= 65; limit++ {
1175 r := &Repository{}
1176 for i := 0; i < limit; i++ {
1177 s := fmt.Sprintf("b%d", i)
1178 r.Branches = append(r.Branches, RepositoryBranch{
1179 s, "v-" + s,
1180 })
1181 }
1182 _, err := NewIndexBuilder(r)
1183 if limit == 64 && err != nil {
1184 t.Fatalf("NewIndexBuilder: %v", err)
1185 } else if limit == 65 && err == nil {
1186 t.Fatalf("NewIndexBuilder succeeded")
1187 }
1188 }
1189}
1190
1191func TestBranchReport(t *testing.T) {
1192 branches := []string{"stable", "master"}
1193 b := testIndexBuilder(t, &Repository{
1194 Branches: []RepositoryBranch{
1195 {"stable", "vs"},
1196 {"master", "vm"},
1197 },
1198 },
1199 Document{Name: "f2", Content: []byte("needle"), Branches: branches})
1200
1201 t.Run("LineMatches", func(t *testing.T) {
1202 sres := searchForTest(t, b, &query.Substring{
1203 Pattern: "needle",
1204 })
1205 if len(sres.Files) != 1 {
1206 t.Fatalf("got %v, want 1 result from f2", sres.Files)
1207 }
1208
1209 f := sres.Files[0]
1210 if !reflect.DeepEqual(f.Branches, branches) {
1211 t.Fatalf("got branches %q, want %q", f.Branches, branches)
1212 }
1213 })
1214
1215 t.Run("ChunkMatches", func(t *testing.T) {
1216 sres := searchForTest(t, b, &query.Substring{
1217 Pattern: "needle",
1218 }, chunkOpts)
1219 if len(sres.Files) != 1 {
1220 t.Fatalf("got %v, want 1 result from f2", sres.Files)
1221 }
1222
1223 f := sres.Files[0]
1224 if !reflect.DeepEqual(f.Branches, branches) {
1225 t.Fatalf("got branches %q, want %q", f.Branches, branches)
1226 }
1227 })
1228}
1229
1230func TestBranchVersions(t *testing.T) {
1231 b := testIndexBuilder(t, &Repository{
1232 Branches: []RepositoryBranch{
1233 {"stable", "v-stable"},
1234 {"master", "v-master"},
1235 },
1236 }, Document{Name: "f2", Content: []byte("needle"), Branches: []string{"master"}})
1237
1238 t.Run("LineMatches", func(t *testing.T) {
1239 sres := searchForTest(t, b, &query.Substring{
1240 Pattern: "needle",
1241 })
1242 if len(sres.Files) != 1 {
1243 t.Fatalf("got %v, want 1 result from f2", sres.Files)
1244 }
1245
1246 f := sres.Files[0]
1247 if f.Version != "v-master" {
1248 t.Fatalf("got file %#v, want version 'v-master'", f)
1249 }
1250 })
1251
1252 t.Run("ChunkMatches", func(t *testing.T) {
1253 sres := searchForTest(t, b, &query.Substring{
1254 Pattern: "needle",
1255 }, chunkOpts)
1256 if len(sres.Files) != 1 {
1257 t.Fatalf("got %v, want 1 result from f2", sres.Files)
1258 }
1259
1260 f := sres.Files[0]
1261 if f.Version != "v-master" {
1262 t.Fatalf("got file %#v, want version 'v-master'", f)
1263 }
1264 })
1265}
1266
1267func mustParseRE(s string) *syntax.Regexp {
1268 r, err := syntax.Parse(s, syntax.Perl)
1269 if err != nil {
1270 panic(err)
1271 }
1272
1273 return r
1274}
1275
1276func TestRegexp(t *testing.T) {
1277 content := []byte("needle the bla")
1278 // ----------------01234567890123
1279
1280 b := testIndexBuilder(t, nil,
1281 Document{
1282 Name: "f1",
1283 Content: content,
1284 })
1285
1286 t.Run("LineMatches", func(t *testing.T) {
1287 sres := searchForTest(t, b,
1288 &query.Regexp{
1289 Regexp: mustParseRE("dle.*bla"),
1290 })
1291
1292 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 {
1293 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
1294 }
1295
1296 got := sres.Files[0].LineMatches[0]
1297 want := LineMatch{
1298 LineFragments: []LineFragmentMatch{{
1299 LineOffset: 3,
1300 Offset: 3,
1301 MatchLength: 11,
1302 }},
1303 Line: content,
1304 FileName: false,
1305 LineNumber: 1,
1306 LineStart: 0,
1307 LineEnd: 14,
1308 }
1309
1310 if !reflect.DeepEqual(got, want) {
1311 t.Errorf("got %#v, want %#v", got, want)
1312 }
1313 })
1314
1315 t.Run("ChunkMatches", func(t *testing.T) {
1316 sres := searchForTest(t, b,
1317 &query.Regexp{
1318 Regexp: mustParseRE("dle.*bla"),
1319 }, chunkOpts)
1320
1321 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 {
1322 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
1323 }
1324
1325 got := sres.Files[0].ChunkMatches[0]
1326 want := ChunkMatch{
1327 Content: content,
1328 ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1},
1329 Ranges: []Range{{
1330 Start: Location{ByteOffset: 3, LineNumber: 1, Column: 4},
1331 End: Location{ByteOffset: 14, LineNumber: 1, Column: 15},
1332 }},
1333 }
1334
1335 if diff := cmp.Diff(want, got); diff != "" {
1336 t.Fatal(diff)
1337 }
1338 })
1339}
1340
1341func TestRegexpFile(t *testing.T) {
1342 content := []byte("needle the bla")
1343
1344 name := "let's play: find the mussel"
1345 b := testIndexBuilder(t, nil,
1346 Document{Name: name, Content: content},
1347 Document{Name: "play.txt", Content: content})
1348
1349 t.Run("LineMatches", func(t *testing.T) {
1350 sres := searchForTest(t, b,
1351 &query.Regexp{
1352 Regexp: mustParseRE("play.*mussel"),
1353 FileName: true,
1354 })
1355
1356 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 {
1357 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
1358 }
1359
1360 if sres.Files[0].FileName != name {
1361 t.Errorf("got match %#v, want name %q", sres.Files[0], name)
1362 }
1363 })
1364
1365 t.Run("ChunkMatches", func(t *testing.T) {
1366 sres := searchForTest(t, b,
1367 &query.Regexp{
1368 Regexp: mustParseRE("play.*mussel"),
1369 FileName: true,
1370 }, chunkOpts)
1371
1372 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 {
1373 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
1374 }
1375
1376 if sres.Files[0].FileName != name {
1377 t.Errorf("got match %#v, want name %q", sres.Files[0], name)
1378 }
1379 })
1380}
1381
1382func TestRegexpOrder(t *testing.T) {
1383 content := []byte("bla the needle")
1384 // ----------------01234567890123
1385
1386 b := testIndexBuilder(t, nil,
1387 Document{Name: "f1", Content: content})
1388
1389 t.Run("LineMatches", func(t *testing.T) {
1390 sres := searchForTest(t, b,
1391 &query.Regexp{
1392 Regexp: mustParseRE("dle.*bla"),
1393 })
1394
1395 if len(sres.Files) != 0 {
1396 t.Fatalf("got %v, want 0 matches", sres.Files)
1397 }
1398 })
1399
1400 t.Run("ChunkMatches", func(t *testing.T) {
1401 sres := searchForTest(t, b,
1402 &query.Regexp{
1403 Regexp: mustParseRE("dle.*bla"),
1404 })
1405
1406 if len(sres.Files) != 0 {
1407 t.Fatalf("got %v, want 0 matches", sres.Files)
1408 }
1409 })
1410}
1411
1412func TestRepoName(t *testing.T) {
1413 content := []byte("bla the needle")
1414 // ----------------01234567890123
1415
1416 b := testIndexBuilder(t, &Repository{Name: "bla"},
1417 Document{Name: "f1", Content: content})
1418
1419 t.Run("LineMatches", func(t *testing.T) {
1420 sres := searchForTest(t, b,
1421 query.NewAnd(
1422 &query.Substring{Pattern: "needle"},
1423 &query.Repo{Regexp: regexp.MustCompile("foo")},
1424 ))
1425
1426 if len(sres.Files) != 0 {
1427 t.Fatalf("got %v, want 0 matches", sres.Files)
1428 }
1429
1430 if sres.Stats.FilesConsidered > 0 {
1431 t.Fatalf("got FilesConsidered %d, should have short circuited", sres.Stats.FilesConsidered)
1432 }
1433
1434 sres = searchForTest(t, b,
1435 query.NewAnd(
1436 &query.Substring{Pattern: "needle"},
1437 &query.Repo{Regexp: regexp.MustCompile("bla")},
1438 ))
1439 if len(sres.Files) != 1 {
1440 t.Fatalf("got %v, want 1 match", sres.Files)
1441 }
1442 })
1443
1444 t.Run("ChunkMatches", func(t *testing.T) {
1445 sres := searchForTest(t, b,
1446 query.NewAnd(
1447 &query.Substring{Pattern: "needle"},
1448 &query.Repo{Regexp: regexp.MustCompile("foo")},
1449 ),
1450 chunkOpts,
1451 )
1452
1453 if len(sres.Files) != 0 {
1454 t.Fatalf("got %v, want 0 matches", sres.Files)
1455 }
1456
1457 if sres.Stats.FilesConsidered > 0 {
1458 t.Fatalf("got FilesConsidered %d, should have short circuited", sres.Stats.FilesConsidered)
1459 }
1460
1461 sres = searchForTest(t, b,
1462 query.NewAnd(
1463 &query.Substring{Pattern: "needle"},
1464 &query.Repo{Regexp: regexp.MustCompile("bla")},
1465 ))
1466 if len(sres.Files) != 1 {
1467 t.Fatalf("got %v, want 1 match", sres.Files)
1468 }
1469 })
1470}
1471
1472func TestMergeMatches(t *testing.T) {
1473 content := []byte("blablabla")
1474 b := testIndexBuilder(t, nil,
1475 Document{Name: "f1", Content: content})
1476
1477 t.Run("LineMatches", func(t *testing.T) {
1478 sres := searchForTest(t, b,
1479 &query.Substring{Pattern: "bla"})
1480 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 {
1481 t.Fatalf("got %v, want 1 match", sres.Files)
1482 }
1483 })
1484
1485 t.Run("ChunkMatches", func(t *testing.T) {
1486 sres := searchForTest(t, b,
1487 &query.Substring{Pattern: "bla"},
1488 chunkOpts,
1489 )
1490 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 {
1491 t.Fatalf("got %v, want 1 match", sres.Files)
1492 }
1493 })
1494}
1495
1496func TestRepoURL(t *testing.T) {
1497 content := []byte("blablabla")
1498 b := testIndexBuilder(t, &Repository{
1499 Name: "name",
1500 URL: "URL",
1501 CommitURLTemplate: "commit",
1502 FileURLTemplate: "file-url",
1503 LineFragmentTemplate: "fragment",
1504 }, Document{Name: "f1", Content: content})
1505
1506 sres := searchForTest(t, b, &query.Substring{Pattern: "bla"})
1507
1508 if sres.RepoURLs["name"] != "file-url" {
1509 t.Errorf("got RepoURLs %v, want {name: URL}", sres.RepoURLs)
1510 }
1511 if sres.LineFragments["name"] != "fragment" {
1512 t.Errorf("got URLs %v, want {name: URL}", sres.LineFragments)
1513 }
1514}
1515
1516func TestRegexpCaseSensitive(t *testing.T) {
1517 content := []byte("bla\nfunc unmarshalGitiles\n")
1518 b := testIndexBuilder(t, nil, Document{
1519 Name: "f1",
1520 Content: content,
1521 })
1522
1523 t.Run("LineMatches", func(t *testing.T) {
1524 res := searchForTest(t, b,
1525 &query.Regexp{
1526 Regexp: mustParseRE("func.*Gitiles"),
1527 CaseSensitive: true,
1528 })
1529
1530 if len(res.Files) != 1 {
1531 t.Fatalf("got %v, want one match", res.Files)
1532 }
1533 })
1534
1535 t.Run("ChunkMatches", func(t *testing.T) {
1536 res := searchForTest(t, b,
1537 &query.Regexp{
1538 Regexp: mustParseRE("func.*Gitiles"),
1539 CaseSensitive: true,
1540 },
1541 chunkOpts,
1542 )
1543
1544 if len(res.Files) != 1 {
1545 t.Fatalf("got %v, want one match", res.Files)
1546 }
1547 })
1548}
1549
1550func TestRegexpCaseFolding(t *testing.T) {
1551 content := []byte("bla\nfunc unmarshalGitiles\n")
1552
1553 b := testIndexBuilder(t, nil,
1554 Document{Name: "f1", Content: content})
1555 res := searchForTest(t, b,
1556 &query.Regexp{
1557 Regexp: mustParseRE("func.*GITILES"),
1558 CaseSensitive: false,
1559 })
1560
1561 if len(res.Files) != 1 {
1562 t.Fatalf("got %v, want one match", res.Files)
1563 }
1564}
1565
1566func TestCaseRegexp(t *testing.T) {
1567 content := []byte("BLABLABLA")
1568 b := testIndexBuilder(t, nil,
1569 Document{Name: "f1", Content: content})
1570
1571 t.Run("LineMatches", func(t *testing.T) {
1572 res := searchForTest(t, b,
1573 &query.Regexp{
1574 Regexp: mustParseRE("[xb][xl][xa]"),
1575 CaseSensitive: true,
1576 })
1577
1578 if len(res.Files) > 0 {
1579 t.Fatalf("got %v, want no matches", res.Files)
1580 }
1581 })
1582
1583 t.Run("ChunkMatches", func(t *testing.T) {
1584 res := searchForTest(t, b,
1585 &query.Regexp{
1586 Regexp: mustParseRE("[xb][xl][xa]"),
1587 CaseSensitive: true,
1588 },
1589 chunkOpts,
1590 )
1591
1592 if len(res.Files) > 0 {
1593 t.Fatalf("got %v, want no matches", res.Files)
1594 }
1595 })
1596}
1597
1598func TestNegativeRegexp(t *testing.T) {
1599 content := []byte("BLABLABLA needle bla")
1600 b := testIndexBuilder(t, nil,
1601 Document{Name: "f1", Content: content})
1602
1603 t.Run("LineMatches", func(t *testing.T) {
1604 res := searchForTest(t, b,
1605 query.NewAnd(
1606 &query.Substring{
1607 Pattern: "needle",
1608 },
1609 &query.Not{
1610 Child: &query.Regexp{
1611 Regexp: mustParseRE(".cs"),
1612 },
1613 }))
1614
1615 if len(res.Files) != 1 {
1616 t.Fatalf("got %v, want 1 match", res.Files)
1617 }
1618 })
1619
1620 t.Run("ChunkMatches", func(t *testing.T) {
1621 res := searchForTest(t, b,
1622 query.NewAnd(
1623 &query.Substring{
1624 Pattern: "needle",
1625 },
1626 &query.Not{
1627 Child: &query.Regexp{
1628 Regexp: mustParseRE(".cs"),
1629 },
1630 },
1631 ),
1632 chunkOpts)
1633
1634 if len(res.Files) != 1 {
1635 t.Fatalf("got %v, want 1 match", res.Files)
1636 }
1637 })
1638}
1639
1640func TestSymbolRank(t *testing.T) {
1641 t.Skip()
1642
1643 content := []byte("func bla() blubxxxxx")
1644 // ----------------01234567890123456789
1645 b := testIndexBuilder(t, nil,
1646 Document{
1647 Name: "f1",
1648 Content: content,
1649 }, Document{
1650 Name: "f2",
1651 Content: content,
1652 Symbols: []DocumentSection{{5, 8}},
1653 }, Document{
1654 Name: "f3",
1655 Content: content,
1656 })
1657
1658 t.Run("LineMatches", func(t *testing.T) {
1659 res := searchForTest(t, b,
1660 &query.Substring{
1661 CaseSensitive: false,
1662 Pattern: "bla",
1663 })
1664
1665 if len(res.Files) != 3 {
1666 t.Fatalf("got %d files, want 3 files. Full data: %v", len(res.Files), res.Files)
1667 }
1668 if res.Files[0].FileName != "f2" {
1669 t.Errorf("got %#v, want 'f2' as top match", res.Files[0])
1670 }
1671 })
1672
1673 t.Run("ChunkMatches", func(t *testing.T) {
1674 res := searchForTest(t, b,
1675 &query.Substring{
1676 CaseSensitive: false,
1677 Pattern: "bla",
1678 }, chunkOpts)
1679
1680 if len(res.Files) != 3 {
1681 t.Fatalf("got %d files, want 3 files. Full data: %v", len(res.Files), res.Files)
1682 }
1683 if res.Files[0].FileName != "f2" {
1684 t.Errorf("got %#v, want 'f2' as top match", res.Files[0])
1685 }
1686 })
1687}
1688
1689func TestSymbolRankRegexpUTF8(t *testing.T) {
1690 t.Skip()
1691
1692 prefix := strings.Repeat(string([]rune{kelvinCodePoint}), 100) + "\n"
1693 content := []byte(prefix +
1694 "func bla() blub")
1695 // ------012345678901234
1696 b := testIndexBuilder(t, nil,
1697 Document{
1698 Name: "f1",
1699 Content: content,
1700 }, Document{
1701 Name: "f2",
1702 Content: content,
1703 Symbols: []DocumentSection{{uint32(len(prefix) + 5), uint32(len(prefix) + 8)}},
1704 }, Document{
1705 Name: "f3",
1706 Content: content,
1707 })
1708
1709 t.Run("LineMatches", func(t *testing.T) {
1710 res := searchForTest(t, b,
1711 &query.Regexp{
1712 Regexp: mustParseRE("b.a"),
1713 })
1714
1715 if len(res.Files) != 3 {
1716 t.Fatalf("got %#v, want 3 files", res.Files)
1717 }
1718 if res.Files[0].FileName != "f2" {
1719 t.Errorf("got %#v, want 'f2' as top match", res.Files[0])
1720 }
1721 })
1722
1723 t.Run("ChunjkMatches", func(t *testing.T) {
1724 res := searchForTest(t, b,
1725 &query.Regexp{
1726 Regexp: mustParseRE("b.a"),
1727 }, chunkOpts)
1728
1729 if len(res.Files) != 3 {
1730 t.Fatalf("got %#v, want 3 files", res.Files)
1731 }
1732 if res.Files[0].FileName != "f2" {
1733 t.Errorf("got %#v, want 'f2' as top match", res.Files[0])
1734 }
1735 })
1736}
1737
1738func TestPartialSymbolRank(t *testing.T) {
1739 t.Skip()
1740
1741 content := []byte("func bla() blub")
1742 // ----------------012345678901234
1743
1744 b := testIndexBuilder(t, nil,
1745 Document{
1746 Name: "f1",
1747 Content: content,
1748 Symbols: []DocumentSection{{4, 9}},
1749 }, Document{
1750 Name: "f2",
1751 Content: content,
1752 Symbols: []DocumentSection{{4, 8}},
1753 }, Document{
1754 Name: "f3",
1755 Content: content,
1756 Symbols: []DocumentSection{{4, 9}},
1757 })
1758
1759 t.Run("LineMatches", func(t *testing.T) {
1760 res := searchForTest(t, b,
1761 &query.Substring{
1762 Pattern: "bla",
1763 })
1764
1765 if len(res.Files) != 3 {
1766 t.Fatalf("got %#v, want 3 files", res.Files)
1767 }
1768 if res.Files[0].FileName != "f2" {
1769 t.Errorf("got %#v, want 'f2' as top match", res.Files[0])
1770 }
1771 })
1772
1773 t.Run("ChunkMatches", func(t *testing.T) {
1774 res := searchForTest(t, b,
1775 &query.Substring{
1776 Pattern: "bla",
1777 }, chunkOpts)
1778
1779 if len(res.Files) != 3 {
1780 t.Fatalf("got %#v, want 3 files", res.Files)
1781 }
1782 if res.Files[0].FileName != "f2" {
1783 t.Errorf("got %#v, want 'f2' as top match", res.Files[0])
1784 }
1785 })
1786}
1787
1788func TestNegativeRepo(t *testing.T) {
1789 content := []byte("bla the needle")
1790 // ----------------01234567890123
1791 b := testIndexBuilder(t, &Repository{
1792 Name: "bla",
1793 }, Document{Name: "f1", Content: content})
1794
1795 t.Run("LineMatches", func(t *testing.T) {
1796 sres := searchForTest(t, b,
1797 query.NewAnd(
1798 &query.Substring{Pattern: "needle"},
1799 &query.Not{Child: &query.Repo{Regexp: regexp.MustCompile("bla")}},
1800 ))
1801
1802 if len(sres.Files) != 0 {
1803 t.Fatalf("got %v, want 0 matches", sres.Files)
1804 }
1805 })
1806
1807 t.Run("ChunkMatches", func(t *testing.T) {
1808 sres := searchForTest(t, b,
1809 query.NewAnd(
1810 &query.Substring{Pattern: "needle"},
1811 &query.Not{Child: &query.Repo{Regexp: regexp.MustCompile("bla")}},
1812 ), chunkOpts)
1813
1814 if len(sres.Files) != 0 {
1815 t.Fatalf("got %v, want 0 matches", sres.Files)
1816 }
1817 })
1818}
1819
1820func TestListRepos(t *testing.T) {
1821 content := []byte("bla the needle\n")
1822 // ----------------012345678901234-
1823
1824 t.Run("default and minimal fallback", func(t *testing.T) {
1825 repo := &Repository{
1826 Name: "reponame",
1827 Branches: []RepositoryBranch{{Name: "main"}, {Name: "dev"}},
1828 }
1829 b := testIndexBuilder(t, repo,
1830 Document{Name: "f1", Content: content, Branches: []string{"main", "dev"}},
1831 Document{Name: "f2", Content: content, Branches: []string{"main"}},
1832 Document{Name: "f2", Content: content, Branches: []string{"dev"}},
1833 Document{Name: "f3", Content: content, Branches: []string{"dev"}})
1834
1835 searcher := searcherForTest(t, b)
1836
1837 for _, opts := range []*ListOptions{
1838 nil,
1839 {},
1840 {Field: RepoListFieldRepos},
1841 {Field: RepoListFieldReposMap},
1842 } {
1843 t.Run(fmt.Sprint(opts), func(t *testing.T) {
1844 q := &query.Repo{Regexp: regexp.MustCompile("epo")}
1845
1846 res, err := searcher.List(context.Background(), q, opts)
1847 if err != nil {
1848 t.Fatalf("List(%v): %v", q, err)
1849 }
1850
1851 want := &RepoList{
1852 Repos: []*RepoListEntry{{
1853 Repository: *repo,
1854 Stats: RepoStats{
1855 Documents: 4,
1856 ContentBytes: 68, // (15 bytes of content and 2 bytes of filename) x 4
1857 Shards: 1,
1858
1859 NewLinesCount: 4,
1860 DefaultBranchNewLinesCount: 2,
1861 OtherBranchesNewLinesCount: 3,
1862 },
1863 }},
1864 Stats: RepoStats{
1865 Repos: 1,
1866 Documents: 4,
1867 ContentBytes: 68,
1868 Shards: 1,
1869
1870 NewLinesCount: 4,
1871 DefaultBranchNewLinesCount: 2,
1872 OtherBranchesNewLinesCount: 3,
1873 },
1874 }
1875 ignored := []cmp.Option{
1876 cmpopts.EquateEmpty(),
1877 cmpopts.IgnoreFields(RepoListEntry{}, "IndexMetadata"),
1878 cmpopts.IgnoreFields(RepoStats{}, "IndexBytes"),
1879 cmpopts.IgnoreFields(Repository{}, "SubRepoMap"),
1880 cmpopts.IgnoreFields(Repository{}, "priority"),
1881 }
1882 if diff := cmp.Diff(want, res, ignored...); diff != "" {
1883 t.Fatalf("mismatch (-want +got):\n%s", diff)
1884 }
1885
1886 q = &query.Repo{Regexp: regexp.MustCompile("bla")}
1887 res, err = searcher.List(context.Background(), q, nil)
1888 if err != nil {
1889 t.Fatalf("List(%v): %v", q, err)
1890 }
1891 if len(res.Repos) != 0 || len(res.ReposMap) != 0 {
1892 t.Fatalf("got %v, want 0 matches", res)
1893 }
1894 })
1895 }
1896 })
1897
1898 t.Run("minimal", func(t *testing.T) {
1899 repo := &Repository{
1900 ID: 1234,
1901 Name: "reponame",
1902 Branches: []RepositoryBranch{{Name: "main"}, {Name: "dev"}},
1903 RawConfig: map[string]string{"repoid": "1234"},
1904 }
1905 b := testIndexBuilder(t, repo,
1906 Document{Name: "f1", Content: content, Branches: []string{"main", "dev"}},
1907 Document{Name: "f2", Content: content, Branches: []string{"main"}},
1908 Document{Name: "f2", Content: content, Branches: []string{"dev"}},
1909 Document{Name: "f3", Content: content, Branches: []string{"dev"}})
1910
1911 searcher := searcherForTest(t, b)
1912
1913 q := &query.Repo{Regexp: regexp.MustCompile("epo")}
1914 res, err := searcher.List(context.Background(), q, &ListOptions{Field: RepoListFieldReposMap})
1915 if err != nil {
1916 t.Fatalf("List(%v): %v", q, err)
1917 }
1918
1919 want := &RepoList{
1920 ReposMap: ReposMap{
1921 repo.ID: {
1922 HasSymbols: repo.HasSymbols,
1923 Branches: repo.Branches,
1924 },
1925 },
1926 Stats: RepoStats{
1927 Repos: 1,
1928 Shards: 1,
1929 Documents: 4,
1930 IndexBytes: 412,
1931 ContentBytes: 68,
1932 NewLinesCount: 4,
1933 DefaultBranchNewLinesCount: 2,
1934 OtherBranchesNewLinesCount: 3,
1935 },
1936 }
1937
1938 ignored := []cmp.Option{
1939 cmpopts.IgnoreFields(MinimalRepoListEntry{}, "IndexTimeUnix"),
1940 }
1941 if diff := cmp.Diff(want, res, ignored...); diff != "" {
1942 t.Fatalf("mismatch (-want +got):\n%s", diff)
1943 }
1944
1945 q = &query.Repo{Regexp: regexp.MustCompile("bla")}
1946 res, err = searcher.List(context.Background(), q, &ListOptions{Field: RepoListFieldReposMap})
1947 if err != nil {
1948 t.Fatalf("List(%v): %v", q, err)
1949 }
1950 if len(res.Repos) != 0 || len(res.ReposMap) != 0 {
1951 t.Fatalf("got %v, want 0 matches", res)
1952 }
1953 })
1954}
1955
1956func TestListReposByContent(t *testing.T) {
1957 content := []byte("bla the needle")
1958
1959 b := testIndexBuilder(t, &Repository{
1960 Name: "reponame",
1961 },
1962 Document{Name: "f1", Content: content},
1963 Document{Name: "f2", Content: content})
1964
1965 searcher := searcherForTest(t, b)
1966 q := &query.Substring{Pattern: "needle"}
1967 res, err := searcher.List(context.Background(), q, nil)
1968 if err != nil {
1969 t.Fatalf("List(%v): %v", q, err)
1970 }
1971 if len(res.Repos) != 1 || res.Repos[0].Repository.Name != "reponame" {
1972 t.Fatalf("got %v, want 1 matches", res)
1973 }
1974 if got := res.Repos[0].Stats.Shards; got != 1 {
1975 t.Fatalf("got %d, want 1 shard", got)
1976 }
1977 q = &query.Substring{Pattern: "foo"}
1978 res, err = searcher.List(context.Background(), q, nil)
1979 if err != nil {
1980 t.Fatalf("List(%v): %v", q, err)
1981 }
1982 if len(res.Repos) != 0 {
1983 t.Fatalf("got %v, want 0 matches", res)
1984 }
1985}
1986
1987func TestMetadata(t *testing.T) {
1988 content := []byte("bla the needle")
1989
1990 b := testIndexBuilder(t, &Repository{
1991 Name: "reponame",
1992 }, Document{Name: "f1", Content: content},
1993 Document{Name: "f2", Content: content})
1994
1995 var buf bytes.Buffer
1996 if err := b.Write(&buf); err != nil {
1997 t.Fatal(err)
1998 }
1999 f := &memSeeker{buf.Bytes()}
2000
2001 rd, _, err := ReadMetadata(f)
2002 if err != nil {
2003 t.Fatalf("ReadMetadata: %v", err)
2004 }
2005
2006 if got, want := rd[0].Name, "reponame"; got != want {
2007 t.Fatalf("got %q want %q", got, want)
2008 }
2009}
2010
2011func TestOr(t *testing.T) {
2012 b := testIndexBuilder(t, nil,
2013 Document{Name: "f1", Content: []byte("needle")},
2014 Document{Name: "f2", Content: []byte("banana")})
2015 t.Run("LineMatches", func(t *testing.T) {
2016 sres := searchForTest(t, b, query.NewOr(
2017 &query.Substring{Pattern: "needle"},
2018 &query.Substring{Pattern: "banana"}))
2019
2020 if len(sres.Files) != 2 {
2021 t.Fatalf("got %v, want 2 files", sres.Files)
2022 }
2023 })
2024
2025 t.Run("ChunkMatches", func(t *testing.T) {
2026 sres := searchForTest(t, b, query.NewOr(
2027 &query.Substring{Pattern: "needle"},
2028 &query.Substring{Pattern: "banana"}))
2029
2030 if len(sres.Files) != 2 {
2031 t.Fatalf("got %v, want 2 files", sres.Files)
2032 }
2033 })
2034}
2035
2036func TestFrequency(t *testing.T) {
2037 content := []byte("sla _Py_HashDouble(double v sla las las shd dot dot")
2038
2039 b := testIndexBuilder(t, nil,
2040 Document{
2041 Name: "f1",
2042 Content: content,
2043 })
2044
2045 t.Run("LineMatches", func(t *testing.T) {
2046 sres := searchForTest(t, b, &query.Substring{Pattern: "slashdot"})
2047 if len(sres.Files) != 0 {
2048 t.Errorf("got %v, wanted 0 matches", sres.Files)
2049 }
2050 })
2051
2052 t.Run("ChunkMatches", func(t *testing.T) {
2053 sres := searchForTest(t, b, &query.Substring{Pattern: "slashdot"}, chunkOpts)
2054 if len(sres.Files) != 0 {
2055 t.Errorf("got %v, wanted 0 matches", sres.Files)
2056 }
2057 })
2058}
2059
2060func TestMatchNewline(t *testing.T) {
2061 re, err := syntax.Parse("[^a]a", syntax.ClassNL)
2062 if err != nil {
2063 t.Fatalf("syntax.Parse: %v", err)
2064 }
2065
2066 content := []byte("pqr\nalex")
2067
2068 b := testIndexBuilder(t, nil,
2069 Document{
2070 Name: "f1",
2071 Content: content,
2072 })
2073
2074 t.Run("LineMatches", func(t *testing.T) {
2075 sres := searchForTest(t, b, &query.Regexp{Regexp: re, CaseSensitive: true})
2076 if len(sres.Files) != 1 {
2077 t.Errorf("got %v, wanted 1 matches", sres.Files)
2078 } else if l := sres.Files[0].LineMatches[0].Line; !bytes.Equal(l, content[len("pqr\n"):]) {
2079 t.Errorf("got match line %q, want %q", l, content)
2080 }
2081 })
2082
2083 t.Run("ChunkMatches", func(t *testing.T) {
2084 sres := searchForTest(t, b, &query.Regexp{Regexp: re, CaseSensitive: true}, chunkOpts)
2085 if len(sres.Files) != 1 {
2086 t.Errorf("got %v, wanted 1 matches", sres.Files)
2087 } else if c := sres.Files[0].ChunkMatches[0].Content; !bytes.Equal(c, content) {
2088 t.Errorf("got match line %q, want %q", c, content)
2089 }
2090 })
2091}
2092
2093func TestSubRepo(t *testing.T) {
2094 subRepos := map[string]*Repository{
2095 "sub": {
2096 Name: "sub-name",
2097 LineFragmentTemplate: "sub-line",
2098 },
2099 }
2100
2101 content := []byte("pqr\nalex")
2102
2103 b := testIndexBuilder(t, &Repository{
2104 SubRepoMap: subRepos,
2105 }, Document{
2106 Name: "sub/f1",
2107 Content: content,
2108 SubRepositoryPath: "sub",
2109 })
2110
2111 sres := searchForTest(t, b, &query.Substring{Pattern: "alex"})
2112 if len(sres.Files) != 1 {
2113 t.Fatalf("got %v, wanted 1 matches", sres.Files)
2114 }
2115
2116 f := sres.Files[0]
2117 if f.SubRepositoryPath != "sub" || f.SubRepositoryName != "sub-name" {
2118 t.Errorf("got %#v, want SubRepository{Path,Name} = {'sub', 'sub-name'}", f)
2119 }
2120
2121 if sres.LineFragments["sub-name"] != "sub-line" {
2122 t.Errorf("got LineFragmentTemplate %v, want {'sub':'sub-line'}", sres.LineFragments)
2123 }
2124}
2125
2126func TestSearchEither(t *testing.T) {
2127 b := testIndexBuilder(t, nil,
2128 Document{Name: "f1", Content: []byte("bla needle bla")},
2129 Document{Name: "needle-file-branch", Content: []byte("bla content")})
2130
2131 t.Run("LineMatches", func(t *testing.T) {
2132 sres := searchForTest(t, b, &query.Substring{Pattern: "needle"})
2133 if len(sres.Files) != 2 {
2134 t.Fatalf("got %v, wanted 2 matches", sres.Files)
2135 }
2136
2137 sres = searchForTest(t, b, &query.Substring{Pattern: "needle", Content: true})
2138 if len(sres.Files) != 1 {
2139 t.Fatalf("got %v, wanted 1 match", sres.Files)
2140 }
2141
2142 if got, want := sres.Files[0].FileName, "f1"; got != want {
2143 t.Errorf("got %q, want %q", got, want)
2144 }
2145 })
2146
2147 t.Run("ChunkMatches", func(t *testing.T) {
2148 sres := searchForTest(t, b, &query.Substring{Pattern: "needle"}, chunkOpts)
2149 if len(sres.Files) != 2 {
2150 t.Fatalf("got %v, wanted 2 matches", sres.Files)
2151 }
2152
2153 sres = searchForTest(t, b, &query.Substring{Pattern: "needle", Content: true}, chunkOpts)
2154 if len(sres.Files) != 1 {
2155 t.Fatalf("got %v, wanted 1 match", sres.Files)
2156 }
2157
2158 if got, want := sres.Files[0].FileName, "f1"; got != want {
2159 t.Errorf("got %q, want %q", got, want)
2160 }
2161 })
2162}
2163
2164func TestUnicodeExactMatch(t *testing.T) {
2165 needle := "néédlÉ"
2166 content := []byte("blá blá " + needle + " blâ")
2167
2168 b := testIndexBuilder(t, nil,
2169 Document{Name: "f1", Content: content})
2170
2171 t.Run("LineMatches", func(t *testing.T) {
2172 if res := searchForTest(t, b, &query.Substring{Pattern: needle, CaseSensitive: true}); len(res.Files) != 1 {
2173 t.Fatalf("case sensitive: got %v, wanted 1 match", res.Files)
2174 }
2175 })
2176
2177 t.Run("ChunkMatches", func(t *testing.T) {
2178 res := searchForTest(t, b, &query.Substring{Pattern: needle, CaseSensitive: true}, chunkOpts)
2179 if len(res.Files) != 1 {
2180 t.Fatalf("case sensitive: got %v, wanted 1 match", res.Files)
2181 }
2182 })
2183}
2184
2185func TestUnicodeCoverContent(t *testing.T) {
2186 needle := "néédlÉ"
2187 content := []byte("blá blá " + needle + " blâ")
2188
2189 b := testIndexBuilder(t, nil,
2190 Document{Name: "f1", Content: content})
2191
2192 t.Run("LineMatches", func(t *testing.T) {
2193 if res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ", CaseSensitive: true}); len(res.Files) != 0 {
2194 t.Fatalf("case sensitive: got %v, wanted 0 match", res.Files)
2195 }
2196
2197 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ"})
2198 if len(res.Files) != 1 {
2199 t.Fatalf("case insensitive: got %v, wanted 1 match", res.Files)
2200 }
2201
2202 if got, want := res.Files[0].LineMatches[0].LineFragments[0].Offset, uint32(strings.Index(string(content), needle)); got != want {
2203 t.Errorf("got %d want %d", got, want)
2204 }
2205 })
2206
2207 t.Run("ChunkMatches", func(t *testing.T) {
2208 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ", CaseSensitive: true}, chunkOpts)
2209 if len(res.Files) != 0 {
2210 t.Fatalf("case sensitive: got %v, wanted 0 match", res.Files)
2211 }
2212
2213 res = searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ"}, chunkOpts)
2214 if len(res.Files) != 1 {
2215 t.Fatalf("case insensitive: got %v, wanted 1 match", res.Files)
2216 }
2217
2218 got := res.Files[0].ChunkMatches[0].Ranges[0].Start.ByteOffset
2219 want := uint32(strings.Index(string(content), needle))
2220 if got != want {
2221 t.Errorf("got %d want %d", got, want)
2222 }
2223 })
2224}
2225
2226func TestUnicodeNonCoverContent(t *testing.T) {
2227 needle := "nééáádlÉ"
2228 content := []byte("blá blá " + needle + " blâ")
2229
2230 b := testIndexBuilder(t, nil,
2231 Document{Name: "f1", Content: content})
2232
2233 t.Run("LineMatches", func(t *testing.T) {
2234 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉÁÁDLÉ", Content: true})
2235 if len(res.Files) != 1 {
2236 t.Fatalf("got %v, wanted 1 match", res.Files)
2237 }
2238
2239 if got, want := res.Files[0].LineMatches[0].LineFragments[0].Offset, uint32(strings.Index(string(content), needle)); got != want {
2240 t.Errorf("got %d want %d", got, want)
2241 }
2242 })
2243
2244 t.Run("ChunkMatches", func(t *testing.T) {
2245 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉÁÁDLÉ", Content: true}, chunkOpts)
2246 if len(res.Files) != 1 {
2247 t.Fatalf("got %v, wanted 1 match", res.Files)
2248 }
2249
2250 got := res.Files[0].ChunkMatches[0].Ranges[0].Start.ByteOffset
2251 want := uint32(strings.Index(string(content), needle))
2252 if got != want {
2253 t.Errorf("got %d want %d", got, want)
2254 }
2255 })
2256}
2257
2258const kelvinCodePoint = 8490
2259
2260func TestUnicodeVariableLength(t *testing.T) {
2261 lower := 'k'
2262 upper := rune(kelvinCodePoint)
2263
2264 needle := "nee" + string([]rune{lower}) + "eed"
2265 corpus := []byte("nee" + string([]rune{upper}) + "eed" +
2266 " ee" + string([]rune{lower}) + "ee" +
2267 " ee" + string([]rune{upper}) + "ee")
2268
2269 t.Run("LineMatches", func(t *testing.T) {
2270 b := testIndexBuilder(t, nil,
2271 Document{Name: "f1", Content: []byte(corpus)})
2272
2273 res := searchForTest(t, b, &query.Substring{Pattern: needle, Content: true})
2274 if len(res.Files) != 1 {
2275 t.Fatalf("got %v, wanted 1 match", res.Files)
2276 }
2277 })
2278
2279 t.Run("ChunkMatches", func(t *testing.T) {
2280 b := testIndexBuilder(t, nil,
2281 Document{Name: "f1", Content: []byte(corpus)})
2282
2283 res := searchForTest(t, b, &query.Substring{Pattern: needle, Content: true}, chunkOpts)
2284 if len(res.Files) != 1 {
2285 t.Fatalf("got %v, wanted 1 match", res.Files)
2286 }
2287 })
2288}
2289
2290func TestUnicodeFileStartOffsets(t *testing.T) {
2291 unicode := "世界"
2292 wat := "waaaaaat"
2293 b := testIndexBuilder(t, nil,
2294 Document{
2295 Name: "f1",
2296 Content: []byte(unicode),
2297 },
2298 Document{
2299 Name: "f2",
2300 Content: []byte(wat),
2301 },
2302 )
2303 q := &query.Substring{Pattern: wat, Content: true}
2304 res := searchForTest(t, b, q)
2305 if len(res.Files) != 1 {
2306 t.Fatalf("got %v, wanted 1 match", res.Files)
2307 }
2308}
2309
2310func TestLongFileUTF8(t *testing.T) {
2311 needle := "neeedle"
2312
2313 // 6 bytes.
2314 unicode := "世界"
2315 content := []byte(strings.Repeat(unicode, 100) + needle)
2316 b := testIndexBuilder(t, nil,
2317 Document{
2318 Name: "f1",
2319 Content: []byte(strings.Repeat("a", 50)),
2320 },
2321 Document{
2322 Name: "f2",
2323 Content: content,
2324 })
2325
2326 t.Run("LineMatches", func(t *testing.T) {
2327 q := &query.Substring{Pattern: needle, Content: true}
2328 res := searchForTest(t, b, q)
2329 if len(res.Files) != 1 {
2330 t.Errorf("got %v, want 1 result", res)
2331 }
2332 })
2333
2334 t.Run("ChunkMatches", func(t *testing.T) {
2335 q := &query.Substring{Pattern: needle, Content: true}
2336 res := searchForTest(t, b, q, chunkOpts)
2337 if len(res.Files) != 1 {
2338 t.Errorf("got %v, want 1 result", res)
2339 }
2340 })
2341}
2342
2343func TestEstimateDocCount(t *testing.T) {
2344 content := []byte("bla needle bla")
2345 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2346 Document{Name: "f1", Content: content},
2347 Document{Name: "f2", Content: content},
2348 )
2349
2350 t.Run("LineMatches", func(t *testing.T) {
2351 if sres := searchForTest(t, b,
2352 query.NewAnd(
2353 &query.Substring{Pattern: "needle"},
2354 &query.Repo{Regexp: regexp.MustCompile("reponame")},
2355 ), SearchOptions{
2356 EstimateDocCount: true,
2357 }); sres.Stats.ShardFilesConsidered != 2 {
2358 t.Errorf("got FilesConsidered = %d, want 2", sres.Stats.FilesConsidered)
2359 }
2360 if sres := searchForTest(t, b,
2361 query.NewAnd(
2362 &query.Substring{Pattern: "needle"},
2363 &query.Repo{Regexp: regexp.MustCompile("nomatch")},
2364 ), SearchOptions{
2365 EstimateDocCount: true,
2366 }); sres.Stats.ShardFilesConsidered != 0 {
2367 t.Errorf("got FilesConsidered = %d, want 0", sres.Stats.FilesConsidered)
2368 }
2369 })
2370
2371 t.Run("ChunkMatches", func(t *testing.T) {
2372 if sres := searchForTest(t, b,
2373 query.NewAnd(
2374 &query.Substring{Pattern: "needle"},
2375 &query.Repo{Regexp: regexp.MustCompile("reponame")},
2376 ), SearchOptions{
2377 EstimateDocCount: true,
2378 ChunkMatches: true,
2379 }); sres.Stats.ShardFilesConsidered != 2 {
2380 t.Errorf("got FilesConsidered = %d, want 2", sres.Stats.FilesConsidered)
2381 }
2382 if sres := searchForTest(t, b,
2383 query.NewAnd(
2384 &query.Substring{Pattern: "needle"},
2385 &query.Repo{Regexp: regexp.MustCompile("nomatch")},
2386 ), SearchOptions{
2387 EstimateDocCount: true,
2388 ChunkMatches: true,
2389 }); sres.Stats.ShardFilesConsidered != 0 {
2390 t.Errorf("got FilesConsidered = %d, want 0", sres.Stats.FilesConsidered)
2391 }
2392 })
2393}
2394
2395func TestUTF8CorrectCorpus(t *testing.T) {
2396 needle := "neeedle"
2397
2398 // 6 bytes.
2399 unicode := "世界"
2400 b := testIndexBuilder(t, nil,
2401 Document{
2402 Name: "f1",
2403 Content: []byte(strings.Repeat(unicode, 100)),
2404 },
2405 Document{
2406 Name: "xxxxxneeedle",
2407 Content: []byte("hello"),
2408 })
2409
2410 t.Run("LineMatches", func(t *testing.T) {
2411 q := &query.Substring{Pattern: needle, FileName: true}
2412 res := searchForTest(t, b, q)
2413 if len(res.Files) != 1 {
2414 t.Errorf("got %v, want 1 result", res)
2415 }
2416 })
2417
2418 t.Run("ChunkMatches", func(t *testing.T) {
2419 q := &query.Substring{Pattern: needle, FileName: true}
2420 res := searchForTest(t, b, q, chunkOpts)
2421 if len(res.Files) != 1 {
2422 t.Errorf("got %v, want 1 result", res)
2423 }
2424 })
2425}
2426
2427func TestBuilderStats(t *testing.T) {
2428 b := testIndexBuilder(t, nil,
2429 Document{
2430 Name: "f1",
2431 Content: []byte(strings.Repeat("abcd", 1024)),
2432 })
2433 var buf bytes.Buffer
2434 if err := b.Write(&buf); err != nil {
2435 t.Fatal(err)
2436 }
2437
2438 if got, want := b.ContentSize(), uint32(2+4*1024); got != want {
2439 t.Errorf("got %d, want %d", got, want)
2440 }
2441}
2442
2443func TestIOStats(t *testing.T) {
2444 b := testIndexBuilder(t, nil,
2445 Document{
2446 Name: "f1",
2447 Content: []byte(strings.Repeat("abcd", 1024)),
2448 })
2449
2450 t.Run("LineMatches", func(t *testing.T) {
2451 q := &query.Substring{Pattern: "abc", CaseSensitive: true, Content: true}
2452 res := searchForTest(t, b, q)
2453
2454 // 4096 (content) + 2 (overhead: newlines or doc sections)
2455 if got, want := res.Stats.ContentBytesLoaded, int64(4100); got != want {
2456 t.Errorf("got content I/O %d, want %d", got, want)
2457 }
2458
2459 // 1024 entries, each 4 bytes apart. 4 fits into single byte
2460 // delta encoded.
2461 if got, want := res.Stats.IndexBytesLoaded, int64(1024); got != want {
2462 t.Errorf("got index I/O %d, want %d", got, want)
2463 }
2464 })
2465
2466 t.Run("ChunkMatches", func(t *testing.T) {
2467 q := &query.Substring{Pattern: "abc", CaseSensitive: true, Content: true}
2468 res := searchForTest(t, b, q, chunkOpts)
2469
2470 // 4096 (content) + 2 (overhead: newlines or doc sections)
2471 if got, want := res.Stats.ContentBytesLoaded, int64(4098); got != want {
2472 t.Errorf("got content I/O %d, want %d", got, want)
2473 }
2474
2475 // 1024 entries, each 4 bytes apart. 4 fits into single byte
2476 // delta encoded.
2477 if got, want := res.Stats.IndexBytesLoaded, int64(1024); got != want {
2478 t.Errorf("got index I/O %d, want %d", got, want)
2479 }
2480 })
2481}
2482
2483func TestStartLineAnchor(t *testing.T) {
2484 b := testIndexBuilder(t, nil,
2485 Document{
2486 Name: "f1",
2487 Content: []byte(
2488 `hello
2489start of middle of line
2490`),
2491 })
2492
2493 t.Run("LineMatches", func(t *testing.T) {
2494 q, err := query.Parse("^start")
2495 if err != nil {
2496 t.Errorf("parse: %v", err)
2497 }
2498
2499 res := searchForTest(t, b, q)
2500 if len(res.Files) != 1 {
2501 t.Errorf("got %v, want 1 file", res.Files)
2502 }
2503
2504 q, err = query.Parse("^middle")
2505 if err != nil {
2506 t.Errorf("parse: %v", err)
2507 }
2508 res = searchForTest(t, b, q)
2509 if len(res.Files) != 0 {
2510 t.Errorf("got %v, want 0 files", res.Files)
2511 }
2512 })
2513
2514 t.Run("ChunkMatches", func(t *testing.T) {
2515 q, err := query.Parse("^start")
2516 if err != nil {
2517 t.Errorf("parse: %v", err)
2518 }
2519
2520 res := searchForTest(t, b, q, chunkOpts)
2521 if len(res.Files) != 1 {
2522 t.Errorf("got %v, want 1 file", res.Files)
2523 }
2524
2525 q, err = query.Parse("^middle")
2526 if err != nil {
2527 t.Errorf("parse: %v", err)
2528 }
2529 res = searchForTest(t, b, q, chunkOpts)
2530 if len(res.Files) != 0 {
2531 t.Errorf("got %v, want 0 files", res.Files)
2532 }
2533 })
2534}
2535
2536func TestAndOrUnicode(t *testing.T) {
2537 q, err := query.Parse("orange.*apple")
2538 if err != nil {
2539 t.Errorf("parse: %v", err)
2540 }
2541 finalQ := query.NewAnd(q,
2542 query.NewOr(query.NewAnd(&query.Repo{Regexp: regexp.MustCompile("name")},
2543 query.NewOr(&query.Branch{Pattern: "master"}))))
2544
2545 b := testIndexBuilder(t, &Repository{
2546 Name: "name",
2547 Branches: []RepositoryBranch{{"master", "master-version"}},
2548 }, Document{
2549 Name: "f2",
2550 Content: []byte("orange\u2318apple"),
2551 // --------------0123456 78901
2552 Branches: []string{"master"},
2553 })
2554
2555 t.Run("LineMatches", func(t *testing.T) {
2556 res := searchForTest(t, b, finalQ)
2557 if len(res.Files) != 1 {
2558 t.Errorf("got %v, want 1 result", res.Files)
2559 }
2560 })
2561
2562 t.Run("ChunkMatches", func(t *testing.T) {
2563 res := searchForTest(t, b, finalQ, chunkOpts)
2564 if len(res.Files) != 1 {
2565 t.Errorf("got %v, want 1 result", res.Files)
2566 }
2567 })
2568}
2569
2570func TestAndShort(t *testing.T) {
2571 content := []byte("bla needle at orange bla")
2572 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2573 Document{Name: "f1", Content: content},
2574 Document{Name: "f2", Content: []byte("xx at xx")},
2575 Document{Name: "f3", Content: []byte("yy orange xx")},
2576 )
2577
2578 q := query.NewAnd(&query.Substring{Pattern: "at"},
2579 &query.Substring{Pattern: "orange"})
2580
2581 t.Run("LineMatches", func(t *testing.T) {
2582 res := searchForTest(t, b, q)
2583 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
2584 t.Errorf("got %v, want 1 result", res.Files)
2585 }
2586 })
2587
2588 t.Run("ChunkMatches", func(t *testing.T) {
2589 res := searchForTest(t, b, q, chunkOpts)
2590 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
2591 t.Errorf("got %v, want 1 result", res.Files)
2592 }
2593 })
2594}
2595
2596func TestNoCollectRegexpSubstring(t *testing.T) {
2597 content := []byte("bla final bla\nfoo final, foo")
2598 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2599 Document{Name: "f1", Content: content},
2600 )
2601
2602 q := &query.Regexp{
2603 Regexp: mustParseRE("final[,.]"),
2604 }
2605
2606 t.Run("LineMatches", func(t *testing.T) {
2607 res := searchForTest(t, b, q)
2608 if len(res.Files) != 1 {
2609 t.Fatalf("got %v, want 1 result", res.Files)
2610 }
2611 if f := res.Files[0]; len(f.LineMatches) != 1 {
2612 t.Fatalf("got line matches %v, want 1 line match", printLineMatches(f.LineMatches))
2613 }
2614 })
2615
2616 t.Run("ChunkMatches", func(t *testing.T) {
2617 res := searchForTest(t, b, q, chunkOpts)
2618 if len(res.Files) != 1 {
2619 t.Fatalf("got %v, want 1 result", res.Files)
2620 }
2621 if f := res.Files[0]; len(f.ChunkMatches) != 1 {
2622 t.Fatalf("got line matches %v, want 1 line match", printLineMatches(f.LineMatches))
2623 }
2624 })
2625}
2626
2627func printLineMatches(ms []LineMatch) string {
2628 var ss []string
2629 for _, m := range ms {
2630 ss = append(ss, fmt.Sprintf("%d:%q %v", m.LineNumber, m.Line, m.LineFragments))
2631 }
2632
2633 return strings.Join(ss, ", ")
2634}
2635
2636func TestLang(t *testing.T) {
2637 content := []byte("bla needle bla")
2638 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2639 Document{Name: "f1", Content: content},
2640 Document{Name: "f2", Language: "java", Content: content},
2641 Document{Name: "f3", Language: "cpp", Content: content},
2642 )
2643
2644 q := query.NewAnd(&query.Substring{Pattern: "needle"},
2645 &query.Language{Language: "cpp"})
2646
2647 t.Run("LineMatches", func(t *testing.T) {
2648 res := searchForTest(t, b, q)
2649 if len(res.Files) != 1 {
2650 t.Fatalf("got %v, want 1 result in f3", res.Files)
2651 }
2652 f := res.Files[0]
2653 if f.FileName != "f3" || f.Language != "cpp" {
2654 t.Fatalf("got %v, want 1 match with language cpp", f)
2655 }
2656 })
2657
2658 t.Run("ChunkMatches", func(t *testing.T) {
2659 res := searchForTest(t, b, q, chunkOpts)
2660 if len(res.Files) != 1 {
2661 t.Fatalf("got %v, want 1 result in f3", res.Files)
2662 }
2663 f := res.Files[0]
2664 if f.FileName != "f3" || f.Language != "cpp" {
2665 t.Fatalf("got %v, want 1 match with language cpp", f)
2666 }
2667 })
2668}
2669
2670func TestLangShortcut(t *testing.T) {
2671 content := []byte("bla needle bla")
2672 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2673 Document{Name: "f2", Language: "java", Content: content},
2674 Document{Name: "f3", Language: "cpp", Content: content},
2675 )
2676
2677 q := query.NewAnd(&query.Substring{Pattern: "needle"},
2678 &query.Language{Language: "fortran"})
2679
2680 t.Run("LineMatches", func(t *testing.T) {
2681 res := searchForTest(t, b, q)
2682 if len(res.Files) != 0 {
2683 t.Fatalf("got %v, want 0 results", res.Files)
2684 }
2685 if res.Stats.IndexBytesLoaded > 0 {
2686 t.Errorf("got IndexBytesLoaded %d, want 0", res.Stats.IndexBytesLoaded)
2687 }
2688 })
2689
2690 t.Run("ChunkMatches", func(t *testing.T) {
2691 res := searchForTest(t, b, q, chunkOpts)
2692 if len(res.Files) != 0 {
2693 t.Fatalf("got %v, want 0 results", res.Files)
2694 }
2695 if res.Stats.IndexBytesLoaded > 0 {
2696 t.Errorf("got IndexBytesLoaded %d, want 0", res.Stats.IndexBytesLoaded)
2697 }
2698 })
2699}
2700
2701func TestNoTextMatchAtoms(t *testing.T) {
2702 content := []byte("bla needle bla")
2703 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2704 Document{Name: "f1", Content: content},
2705 Document{Name: "f2", Language: "java", Content: content},
2706 Document{Name: "f3", Language: "cpp", Content: content},
2707 )
2708 q := query.NewAnd(&query.Language{Language: "java"})
2709 t.Run("LineMatches", func(t *testing.T) {
2710 res := searchForTest(t, b, q)
2711 if len(res.Files) != 1 {
2712 t.Fatalf("got %v, want 1 result in f3", res.Files)
2713 }
2714 })
2715
2716 t.Run("ChunkMatches", func(t *testing.T) {
2717 res := searchForTest(t, b, q, chunkOpts)
2718 if len(res.Files) != 1 {
2719 t.Fatalf("got %v, want 1 result in f3", res.Files)
2720 }
2721 })
2722}
2723
2724func TestNoPositiveAtoms(t *testing.T) {
2725 content := []byte("bla needle bla")
2726 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2727 Document{Name: "f1", Content: content},
2728 Document{Name: "f2", Content: content},
2729 )
2730
2731 q := query.NewAnd(
2732 &query.Not{Child: &query.Substring{Pattern: "xyz"}},
2733 &query.Repo{Regexp: regexp.MustCompile("reponame")})
2734 t.Run("LineMatches", func(t *testing.T) {
2735 res := searchForTest(t, b, q)
2736 if len(res.Files) != 2 {
2737 t.Fatalf("got %v, want 2 results in f3", res.Files)
2738 }
2739 })
2740 t.Run("ChunkMatches", func(t *testing.T) {
2741 res := searchForTest(t, b, q, chunkOpts)
2742 if len(res.Files) != 2 {
2743 t.Fatalf("got %v, want 2 results in f3", res.Files)
2744 }
2745 })
2746}
2747
2748func TestSymbolBoundaryStart(t *testing.T) {
2749 content := []byte("start\nbla bla\nend")
2750 // ----------------012345-67890123-456
2751
2752 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2753 Document{
2754 Name: "f1",
2755 Content: content,
2756 Symbols: []DocumentSection{{0, 5}, {14, 17}},
2757 },
2758 )
2759 q := &query.Symbol{
2760 Expr: &query.Substring{Pattern: "start"},
2761 }
2762 t.Run("LineMatches", func(t *testing.T) {
2763 res := searchForTest(t, b, q)
2764 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
2765 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2766 }
2767 m := res.Files[0].LineMatches[0].LineFragments[0]
2768 if m.Offset != 0 {
2769 t.Fatalf("got offset %d want 0", m.Offset)
2770 }
2771 })
2772
2773 t.Run("ChunkMatches", func(t *testing.T) {
2774 res := searchForTest(t, b, q, chunkOpts)
2775 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
2776 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2777 }
2778 m := res.Files[0].ChunkMatches[0].Ranges[0]
2779 if m.Start.ByteOffset != 0 {
2780 t.Fatalf("got offset %d want 0", m.Start.ByteOffset)
2781 }
2782 })
2783}
2784
2785func TestSymbolBoundaryEnd(t *testing.T) {
2786 content := []byte("start\nbla bla\nend")
2787 // ----------------012345-67890123-456
2788
2789 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2790 Document{
2791 Name: "f1",
2792 Content: content,
2793 Symbols: []DocumentSection{{14, 17}},
2794 },
2795 )
2796 q := &query.Symbol{
2797 Expr: &query.Substring{Pattern: "end"},
2798 }
2799 t.Run("LineMatches", func(t *testing.T) {
2800 res := searchForTest(t, b, q)
2801 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
2802 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2803 }
2804 m := res.Files[0].LineMatches[0].LineFragments[0]
2805 if m.Offset != 14 {
2806 t.Fatalf("got offset %d want 0", m.Offset)
2807 }
2808 })
2809
2810 t.Run("ChunkMatches", func(t *testing.T) {
2811 res := searchForTest(t, b, q, chunkOpts)
2812 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
2813 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2814 }
2815 m := res.Files[0].ChunkMatches[0].Ranges[0]
2816 if m.Start.ByteOffset != 14 {
2817 t.Fatalf("got offset %d want 0", m.Start.ByteOffset)
2818 }
2819 })
2820}
2821
2822func TestSymbolSubstring(t *testing.T) {
2823 content := []byte("bla\nsymblabla\nbla")
2824 // ----------------0123-4567890123-456
2825
2826 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2827 Document{
2828 Name: "f1",
2829 Content: content,
2830 Symbols: []DocumentSection{{4, 12}},
2831 },
2832 )
2833 q := &query.Symbol{
2834 Expr: &query.Substring{Pattern: "bla"},
2835 }
2836 t.Run("LineMatches", func(t *testing.T) {
2837 res := searchForTest(t, b, q)
2838 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
2839 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2840 }
2841 m := res.Files[0].LineMatches[0].LineFragments[0]
2842 if m.Offset != 7 || m.MatchLength != 3 {
2843 t.Fatalf("got offset %d, size %d want 7 size 3", m.Offset, m.MatchLength)
2844 }
2845 })
2846
2847 t.Run("ChunkMatches", func(t *testing.T) {
2848 res := searchForTest(t, b, q, chunkOpts)
2849 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
2850 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2851 }
2852 m := res.Files[0].ChunkMatches[0].Ranges[0]
2853 if m.Start.ByteOffset != 7 || m.End.ByteOffset != 10 {
2854 t.Fatalf("got offset %d, end %d want 7, 10", m.Start.ByteOffset, m.End.ByteOffset)
2855 }
2856 })
2857}
2858
2859func TestSymbolSubstringExact(t *testing.T) {
2860 content := []byte("bla\nsym\nbla\nsym\nasymb")
2861 // ----------------0123-4567-890123456-78901
2862
2863 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2864 Document{
2865 Name: "f1",
2866 Content: content,
2867 Symbols: []DocumentSection{{4, 7}},
2868 },
2869 )
2870 q := &query.Symbol{
2871 Expr: &query.Substring{Pattern: "sym"},
2872 }
2873 t.Run("LineMatches", func(t *testing.T) {
2874 res := searchForTest(t, b, q)
2875 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
2876 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2877 }
2878 m := res.Files[0].LineMatches[0].LineFragments[0]
2879 if m.Offset != 4 {
2880 t.Fatalf("got offset %d, want 7", m.Offset)
2881 }
2882 })
2883
2884 t.Run("ChunkMatches", func(t *testing.T) {
2885 res := searchForTest(t, b, q, chunkOpts)
2886 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
2887 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2888 }
2889 m := res.Files[0].ChunkMatches[0].Ranges[0]
2890 if m.Start.ByteOffset != 4 {
2891 t.Fatalf("got offset %d, want 7", m.Start.ByteOffset)
2892 }
2893 })
2894}
2895
2896func TestSymbolRegexpExact(t *testing.T) {
2897 content := []byte("blah\nbla\nbl")
2898 // ----------------01234-5678-90
2899
2900 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2901 Document{
2902 Name: "f1",
2903 Content: content,
2904 Symbols: []DocumentSection{{0, 4}, {5, 8}, {9, 11}},
2905 },
2906 )
2907 q := &query.Symbol{
2908 Expr: &query.Regexp{Regexp: mustParseRE("^bla$")},
2909 }
2910 t.Run("LineMatches", func(t *testing.T) {
2911 res := searchForTest(t, b, q)
2912 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
2913 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2914 }
2915 m := res.Files[0].LineMatches[0].LineFragments[0]
2916 if m.Offset != 5 {
2917 t.Fatalf("got offset %d, want 5", m.Offset)
2918 }
2919 })
2920
2921 t.Run("ChunkMatches", func(t *testing.T) {
2922 res := searchForTest(t, b, q, chunkOpts)
2923 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
2924 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2925 }
2926 m := res.Files[0].ChunkMatches[0].Ranges[0]
2927 if m.Start.ByteOffset != 5 {
2928 t.Fatalf("got offset %d, want 5", m.Start.ByteOffset)
2929 }
2930 })
2931}
2932
2933func TestSymbolRegexpPartial(t *testing.T) {
2934 content := []byte("abcdef")
2935 // ----------------012345
2936
2937 b := testIndexBuilder(t, &Repository{Name: "reponame"},
2938 Document{
2939 Name: "f1",
2940 Content: content,
2941 Symbols: []DocumentSection{{0, 6}},
2942 },
2943 )
2944 q := &query.Symbol{
2945 Expr: &query.Regexp{Regexp: mustParseRE("(b|d)c(d|b)")},
2946 }
2947 t.Run("LineMatches", func(t *testing.T) {
2948 res := searchForTest(t, b, q)
2949 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 {
2950 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2951 }
2952 m := res.Files[0].LineMatches[0].LineFragments[0]
2953 if m.Offset != 1 {
2954 t.Fatalf("got offset %d, want 1", m.Offset)
2955 }
2956 if m.MatchLength != 3 {
2957 t.Fatalf("got match length %d, want 3", m.MatchLength)
2958 }
2959 })
2960
2961 t.Run("ChunkMatches", func(t *testing.T) {
2962 res := searchForTest(t, b, q, chunkOpts)
2963 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 {
2964 t.Fatalf("got %v, want 1 line in 1 file", res.Files)
2965 }
2966 m := res.Files[0].ChunkMatches[0].Ranges[0]
2967 if m.Start.ByteOffset != 1 {
2968 t.Fatalf("got offset %d, want 1", m.Start.ByteOffset)
2969 }
2970 if m.End.ByteOffset != 4 {
2971 t.Fatalf("got match end %d, want 4", m.End.ByteOffset)
2972 }
2973 })
2974}
2975
2976func TestSymbolRegexpAll(t *testing.T) {
2977 docs := []Document{
2978 {
2979 Name: "f1",
2980 Content: []byte("Hello Zoekt"),
2981 // --------------01234567890
2982 Symbols: []DocumentSection{{0, 5}, {6, 11}},
2983 },
2984 {
2985 Name: "f2",
2986 Content: []byte("Second Zoekt Third"),
2987 // --------------012345678901234567
2988 Symbols: []DocumentSection{{0, 6}, {7, 12}, {13, 18}},
2989 },
2990 }
2991
2992 b := testIndexBuilder(t, &Repository{Name: "reponame"}, docs...)
2993 q := &query.Symbol{
2994 Expr: &query.Regexp{Regexp: mustParseRE(".*")},
2995 }
2996 t.Run("LineMatches", func(t *testing.T) {
2997 res := searchForTest(t, b, q)
2998 if len(res.Files) != len(docs) {
2999 t.Fatalf("got %v, want %d file", res.Files, len(docs))
3000 }
3001 for i, want := range docs {
3002 got := res.Files[i].LineMatches[0].LineFragments
3003 if len(got) != len(want.Symbols) {
3004 t.Fatalf("got %d symbols, want %d symbols in doc %s", len(got), len(want.Symbols), want.Name)
3005 }
3006
3007 for j, sec := range want.Symbols {
3008 if sec.Start != got[j].Offset {
3009 t.Fatalf("got offset %d, want %d in doc %s", got[j].Offset, sec.Start, want.Name)
3010 }
3011 }
3012 }
3013 })
3014
3015 t.Run("ChunkMatches", func(t *testing.T) {
3016 res := searchForTest(t, b, q, chunkOpts)
3017 if len(res.Files) != len(docs) {
3018 t.Fatalf("got %v, want %d file", res.Files, len(docs))
3019 }
3020 for i, want := range docs {
3021 got := res.Files[i].ChunkMatches[0].Ranges
3022 if len(got) != len(want.Symbols) {
3023 t.Fatalf("got %d symbols, want %d symbols in doc %s", len(got), len(want.Symbols), want.Name)
3024 }
3025
3026 for j, sec := range want.Symbols {
3027 if sec.Start != uint32(got[j].Start.ByteOffset) {
3028 t.Fatalf("got offset %d, want %d in doc %s", got[j].Start.ByteOffset, sec.Start, want.Name)
3029 }
3030 }
3031 }
3032 })
3033}
3034
3035func TestHitIterTerminate(t *testing.T) {
3036 // contrived input: trigram frequencies forces selecting abc +
3037 // def for the distance iteration. There is no match, so this
3038 // will advance the compressedPostingIterator to beyond the
3039 // end.
3040 content := []byte("abc bcdbcd cdecde abcabc def efg")
3041 b := testIndexBuilder(t, nil,
3042 Document{
3043 Name: "f1",
3044 Content: content,
3045 },
3046 )
3047
3048 t.Run("LineMatches", func(t *testing.T) {
3049 searchForTest(t, b, &query.Substring{Pattern: "abcdef"})
3050 })
3051
3052 t.Run("ChunkMatches", func(t *testing.T) {
3053 searchForTest(t, b, &query.Substring{Pattern: "abcdef"}, chunkOpts)
3054 })
3055}
3056
3057func TestDistanceHitIterBailLast(t *testing.T) {
3058 content := []byte("AST AST AST UASH")
3059 b := testIndexBuilder(t, nil,
3060 Document{
3061 Name: "f1",
3062 Content: content,
3063 },
3064 )
3065 t.Run("LineMatches", func(t *testing.T) {
3066 res := searchForTest(t, b, &query.Substring{Pattern: "UAST"})
3067 if len(res.Files) != 0 {
3068 t.Fatalf("got %v, want no results", res.Files)
3069 }
3070 })
3071
3072 t.Run("LineMatches", func(t *testing.T) {
3073 res := searchForTest(t, b, &query.Substring{Pattern: "UAST"}, chunkOpts)
3074 if len(res.Files) != 0 {
3075 t.Fatalf("got %v, want no results", res.Files)
3076 }
3077 })
3078}
3079
3080func TestDocumentSectionRuneBoundary(t *testing.T) {
3081 content := string([]rune{kelvinCodePoint, kelvinCodePoint, kelvinCodePoint})
3082 b, err := NewIndexBuilder(nil)
3083 if err != nil {
3084 t.Fatalf("NewIndexBuilder: %v", err)
3085 }
3086
3087 for i, sec := range []DocumentSection{
3088 {2, 6},
3089 {3, 7},
3090 } {
3091 if err := b.Add(Document{
3092 Name: "f1",
3093 Content: []byte(content),
3094 Symbols: []DocumentSection{sec},
3095 }); err == nil {
3096 t.Errorf("%d: Add succeeded", i)
3097 }
3098 }
3099}
3100
3101func TestUnicodeQuery(t *testing.T) {
3102 content := string([]rune{kelvinCodePoint, kelvinCodePoint, kelvinCodePoint})
3103 b := testIndexBuilder(t, nil,
3104 Document{
3105 Name: "f1",
3106 Content: []byte(content),
3107 },
3108 )
3109
3110 q := &query.Substring{Pattern: content}
3111
3112 t.Run("LineMatches", func(t *testing.T) {
3113 res := searchForTest(t, b, q)
3114 if len(res.Files) != 1 {
3115 t.Fatalf("want 1 match, got %v", res.Files)
3116 }
3117
3118 f := res.Files[0]
3119 if len(f.LineMatches) != 1 {
3120 t.Fatalf("want 1 line, got %v", f.LineMatches)
3121 }
3122 l := f.LineMatches[0]
3123
3124 if len(l.LineFragments) != 1 {
3125 t.Fatalf("want 1 line fragment, got %v", l.LineFragments)
3126 }
3127 fr := l.LineFragments[0]
3128 if fr.MatchLength != len(content) {
3129 t.Fatalf("got MatchLength %d want %d", fr.MatchLength, len(content))
3130 }
3131 })
3132
3133 t.Run("ChunkMatches", func(t *testing.T) {
3134 res := searchForTest(t, b, q, chunkOpts)
3135 if len(res.Files) != 1 {
3136 t.Fatalf("want 1 match, got %v", res.Files)
3137 }
3138
3139 f := res.Files[0]
3140 if len(f.ChunkMatches) != 1 {
3141 t.Fatalf("want 1 line, got %v", f.LineMatches)
3142 }
3143 cm := f.ChunkMatches[0]
3144
3145 if len(cm.Ranges) != 1 {
3146 t.Fatalf("want 1 line fragment, got %v", cm.Ranges)
3147 }
3148 rr := cm.Ranges[0]
3149 if matchLen := rr.End.ByteOffset - rr.Start.ByteOffset; int(matchLen) != len(content) {
3150 t.Fatalf("got MatchLength %d want %d", matchLen, len(content))
3151 }
3152 })
3153}
3154
3155func TestSkipInvalidContent(t *testing.T) {
3156 for _, content := range []string{
3157 // Binary
3158 "abc def \x00 abc",
3159 } {
3160
3161 b, err := NewIndexBuilder(nil)
3162 if err != nil {
3163 t.Fatalf("NewIndexBuilder: %v", err)
3164 }
3165
3166 if err := b.Add(Document{
3167 Name: "f1",
3168 Content: []byte(content),
3169 }); err != nil {
3170 t.Fatal(err)
3171 }
3172
3173 t.Run("LineMatches", func(t *testing.T) {
3174 q := &query.Substring{Pattern: "abc def"}
3175 res := searchForTest(t, b, q)
3176 if len(res.Files) != 0 {
3177 t.Fatalf("got %v, want no results", res.Files)
3178 }
3179
3180 q = &query.Substring{Pattern: "NOT-INDEXED"}
3181 res = searchForTest(t, b, q)
3182 if len(res.Files) != 1 {
3183 t.Fatalf("got %v, want 1 result", res.Files)
3184 }
3185 })
3186
3187 t.Run("ChunkMatches", func(t *testing.T) {
3188 q := &query.Substring{Pattern: "abc def"}
3189 res := searchForTest(t, b, q, chunkOpts)
3190 if len(res.Files) != 0 {
3191 t.Fatalf("got %v, want no results", res.Files)
3192 }
3193
3194 q = &query.Substring{Pattern: "NOT-INDEXED"}
3195 res = searchForTest(t, b, q, chunkOpts)
3196 if len(res.Files) != 1 {
3197 t.Fatalf("got %v, want 1 result", res.Files)
3198 }
3199 })
3200 }
3201}
3202
3203func TestDocChecker(t *testing.T) {
3204 docChecker := DocChecker{}
3205
3206 // Test valid and invalid text
3207 for _, text := range []string{"", "simple ascii", "símplé unicödé", "\uFEFFwith utf8 'bom'", "with \uFFFD unicode replacement char"} {
3208 if err := docChecker.Check([]byte(text), 20000, false); err != nil {
3209 t.Errorf("Check(%q): %v", text, err)
3210 }
3211 }
3212 for _, text := range []string{"zero\x00byte", "xx", "0123456789abcdefghi"} {
3213 if err := docChecker.Check([]byte(text), 15, false); err == nil {
3214 t.Errorf("Check(%q) succeeded", text)
3215 }
3216 }
3217
3218 // Test valid and invalid text with an allowed large file
3219 for _, text := range []string{"0123456789abcdefghi", "qwertyuiopasdfghjklzxcvbnm"} {
3220 if err := docChecker.Check([]byte(text), 15, true); err != nil {
3221 t.Errorf("Check(%q): %v", text, err)
3222 }
3223 }
3224 for _, text := range []string{"zero\x00byte", "xx"} {
3225 if err := docChecker.Check([]byte(text), 15, true); err == nil {
3226 t.Errorf("Check(%q) succeeded", text)
3227 }
3228 }
3229}
3230
3231func TestLineAnd(t *testing.T) {
3232 b := testIndexBuilder(t, &Repository{Name: "reponame"},
3233 Document{Name: "f1", Content: []byte("apple\nbanana\napple banana chocolate apple pudding banana\ngrape")},
3234 Document{Name: "f2", Content: []byte("apple orange\nbanana")},
3235 Document{Name: "f3", Content: []byte("banana grape")},
3236 )
3237 pattern := "(apple)(?-s:.)*?(banana)"
3238 r, _ := syntax.Parse(pattern, syntax.Perl)
3239
3240 q := query.Regexp{
3241 Regexp: r,
3242 Content: true,
3243 }
3244 t.Run("LineMatches", func(t *testing.T) {
3245 res := searchForTest(t, b, &q)
3246 wantRegexpCount := 1
3247 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3248 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3249 }
3250 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
3251 t.Errorf("got %v, want 1 result", res.Files)
3252 }
3253 })
3254
3255 t.Run("ChunkMatches", func(t *testing.T) {
3256 res := searchForTest(t, b, &q, chunkOpts)
3257 wantRegexpCount := 1
3258 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3259 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3260 }
3261 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
3262 t.Errorf("got %v, want 1 result", res.Files)
3263 }
3264 })
3265}
3266
3267func TestLineAndFileName(t *testing.T) {
3268 b := testIndexBuilder(t, &Repository{Name: "reponame"},
3269 Document{Name: "f1", Content: []byte("apple banana\ngrape")},
3270 Document{Name: "f2", Content: []byte("apple banana\norange")},
3271 Document{Name: "apple banana", Content: []byte("banana grape")},
3272 )
3273 pattern := "(apple)(?-s:.)*?(banana)"
3274 r, _ := syntax.Parse(pattern, syntax.Perl)
3275
3276 q := query.Regexp{
3277 Regexp: r,
3278 FileName: true,
3279 }
3280 t.Run("LineMatches", func(t *testing.T) {
3281 res := searchForTest(t, b, &q)
3282 wantRegexpCount := 1
3283 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3284 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3285 }
3286 if len(res.Files) != 1 || res.Files[0].FileName != "apple banana" {
3287 t.Errorf("got %v, want 1 result", res.Files)
3288 }
3289 })
3290
3291 t.Run("ChunkMatches", func(t *testing.T) {
3292 res := searchForTest(t, b, &q, chunkOpts)
3293 wantRegexpCount := 1
3294 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3295 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3296 }
3297 if len(res.Files) != 1 || res.Files[0].FileName != "apple banana" {
3298 t.Errorf("got %v, want 1 result", res.Files)
3299 }
3300 })
3301}
3302
3303func TestMultiLineRegex(t *testing.T) {
3304 b := testIndexBuilder(t, &Repository{Name: "reponame"},
3305 Document{Name: "f1", Content: []byte("apple banana\ngrape")},
3306 Document{Name: "f2", Content: []byte("apple orange")},
3307 Document{Name: "f3", Content: []byte("grape apple")},
3308 )
3309 pattern := "(apple).*?[[:space:]].*?(grape)"
3310 r, _ := syntax.Parse(pattern, syntax.Perl)
3311
3312 q := query.Regexp{
3313 Regexp: r,
3314 }
3315 t.Run("LineMatches", func(t *testing.T) {
3316 res := searchForTest(t, b, &q)
3317 wantRegexpCount := 2
3318 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3319 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3320 }
3321 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
3322 t.Errorf("got %v, want 1 result", res.Files)
3323 }
3324 if l := len(res.Files[0].LineMatches); l != 2 {
3325 t.Errorf("got %v, want 2 line matches", l)
3326 }
3327 })
3328
3329 t.Run("ChunkMatches", func(t *testing.T) {
3330 res := searchForTest(t, b, &q, chunkOpts)
3331 wantRegexpCount := 2
3332 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount {
3333 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount)
3334 }
3335 if len(res.Files) != 1 || res.Files[0].FileName != "f1" {
3336 t.Errorf("got %v, want 1 result", res.Files)
3337 }
3338 if l := len(res.Files[0].ChunkMatches); l != 1 {
3339 t.Errorf("got %v, want 1 chunk matches", l)
3340 }
3341 if l := len(res.Files[0].ChunkMatches[0].Ranges); l != 1 {
3342 t.Errorf("got %v, want 1 chunk ranges", l)
3343 }
3344 })
3345}
3346
3347func TestSearchTypeFileName(t *testing.T) {
3348 b := testIndexBuilder(t, &Repository{
3349 Name: "reponame",
3350 },
3351 Document{Name: "f1", Content: []byte("bla the needle")},
3352 Document{Name: "f2", Content: []byte("another file another\nneedle")},
3353 // -----------------------------------012345678901234567890-123456
3354 )
3355
3356 t.Run("LineMatches", func(t *testing.T) {
3357 wantSingleMatch := func(res *SearchResult, want string) {
3358 t.Helper()
3359 fmatches := res.Files
3360 if len(fmatches) != 1 {
3361 t.Errorf("got %v, want 1 matches", len(fmatches))
3362 return
3363 }
3364 if len(fmatches[0].LineMatches) != 1 {
3365 t.Errorf("got %d line matches", len(fmatches[0].LineMatches))
3366 return
3367 }
3368 var got string
3369 if fmatches[0].LineMatches[0].FileName {
3370 got = fmatches[0].FileName
3371 } else {
3372 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].LineMatches[0].LineFragments[0].Offset)
3373 }
3374
3375 if got != want {
3376 t.Errorf("got %s, want %s", got, want)
3377 }
3378 }
3379
3380 // Only return the later match in the second file
3381 res := searchForTest(t, b, query.NewAnd(
3382 &query.Type{
3383 Type: query.TypeFileName,
3384 Child: &query.Substring{Pattern: "needle"},
3385 },
3386 &query.Substring{Pattern: "file"}))
3387 wantSingleMatch(res, "f2:8")
3388
3389 // Only return a filename result
3390 res = searchForTest(t, b,
3391 &query.Type{
3392 Type: query.TypeFileName,
3393 Child: &query.Substring{Pattern: "file"},
3394 })
3395 wantSingleMatch(res, "f2")
3396 })
3397
3398 t.Run("ChunkMatches", func(t *testing.T) {
3399 wantSingleMatch := func(res *SearchResult, want string) {
3400 t.Helper()
3401 fmatches := res.Files
3402 if len(fmatches) != 1 {
3403 t.Errorf("got %v, want 1 matches", len(fmatches))
3404 return
3405 }
3406 if len(fmatches[0].ChunkMatches) != 1 {
3407 t.Errorf("got %d line matches", len(fmatches[0].ChunkMatches))
3408 return
3409 }
3410 var got string
3411 if fmatches[0].ChunkMatches[0].FileName {
3412 got = fmatches[0].FileName
3413 } else {
3414 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset)
3415 }
3416
3417 if got != want {
3418 t.Errorf("got %s, want %s", got, want)
3419 }
3420 }
3421
3422 // Only return the later match in the second file
3423 res := searchForTest(t, b, query.NewAnd(
3424 &query.Type{
3425 Type: query.TypeFileName,
3426 Child: &query.Substring{Pattern: "needle"},
3427 },
3428 &query.Substring{Pattern: "file"}),
3429 chunkOpts,
3430 )
3431 wantSingleMatch(res, "f2:8")
3432
3433 // Only return a filename result
3434 res = searchForTest(t, b,
3435 &query.Type{
3436 Type: query.TypeFileName,
3437 Child: &query.Substring{Pattern: "file"},
3438 },
3439 chunkOpts,
3440 )
3441 wantSingleMatch(res, "f2")
3442 })
3443}
3444
3445func TestSearchTypeLanguage(t *testing.T) {
3446 b := testIndexBuilder(t, &Repository{
3447 Name: "reponame",
3448 },
3449 Document{Name: "apex.cls", Content: []byte("public class Car extends Vehicle {")},
3450 Document{Name: "tex.cls", Content: []byte(`\DeclareOption*{`)},
3451 Document{Name: "hello.h", Content: []byte(`#include <stdio.h>`)},
3452 )
3453
3454 t.Log(b.languageMap)
3455
3456 t.Run("LineMatches", func(t *testing.T) {
3457 wantSingleMatch := func(res *SearchResult, want string) {
3458 t.Helper()
3459 fmatches := res.Files
3460 if len(fmatches) != 1 {
3461 t.Errorf("got %v, want 1 matches", len(fmatches))
3462 return
3463 }
3464 if len(fmatches[0].LineMatches) != 1 {
3465 t.Errorf("got %d line matches", len(fmatches[0].LineMatches))
3466 return
3467 }
3468 var got string
3469 if fmatches[0].LineMatches[0].FileName {
3470 got = fmatches[0].FileName
3471 } else {
3472 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].LineMatches[0].LineFragments[0].Offset)
3473 }
3474
3475 if got != want {
3476 t.Errorf("got %s, want %s", got, want)
3477 }
3478 }
3479
3480 res := searchForTest(t, b, &query.Language{Language: "Apex"})
3481 wantSingleMatch(res, "apex.cls")
3482
3483 res = searchForTest(t, b, &query.Language{Language: "TeX"})
3484 wantSingleMatch(res, "tex.cls")
3485
3486 res = searchForTest(t, b, &query.Language{Language: "C"})
3487 wantSingleMatch(res, "hello.h")
3488
3489 // test fallback language search by pretending it's an older index version
3490 res = searchForTest(t, b, &query.Language{Language: "C++"})
3491 if len(res.Files) != 0 {
3492 t.Errorf("got %d results for C++, want 0", len(res.Files))
3493 }
3494
3495 b.featureVersion = 11 // force fallback
3496 res = searchForTest(t, b, &query.Language{Language: "C++"})
3497 wantSingleMatch(res, "hello.h")
3498 })
3499
3500 t.Run("ChunkMatches", func(t *testing.T) {
3501 wantSingleMatch := func(res *SearchResult, want string) {
3502 t.Helper()
3503 fmatches := res.Files
3504 if len(fmatches) != 1 {
3505 t.Errorf("got %v, want 1 matches", len(fmatches))
3506 return
3507 }
3508 if len(fmatches[0].ChunkMatches) != 1 {
3509 t.Errorf("got %d line matches", len(fmatches[0].ChunkMatches))
3510 return
3511 }
3512 var got string
3513 if fmatches[0].ChunkMatches[0].FileName {
3514 got = fmatches[0].FileName
3515 } else {
3516 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset)
3517 }
3518
3519 if got != want {
3520 t.Errorf("got %s, want %s", got, want)
3521 }
3522 }
3523
3524 b.featureVersion = FeatureVersion // reset feature version
3525 res := searchForTest(t, b, &query.Language{Language: "Apex"}, chunkOpts)
3526 wantSingleMatch(res, "apex.cls")
3527
3528 res = searchForTest(t, b, &query.Language{Language: "TeX"}, chunkOpts)
3529 wantSingleMatch(res, "tex.cls")
3530
3531 res = searchForTest(t, b, &query.Language{Language: "C"}, chunkOpts)
3532 wantSingleMatch(res, "hello.h")
3533
3534 // test fallback language search by pretending it's an older index version
3535 res = searchForTest(t, b, &query.Language{Language: "C++"}, chunkOpts)
3536 if len(res.Files) != 0 {
3537 t.Errorf("got %d results for C++, want 0", len(res.Files))
3538 }
3539
3540 b.featureVersion = 11 // force fallback
3541 res = searchForTest(t, b, &query.Language{Language: "C++"}, chunkOpts)
3542 wantSingleMatch(res, "hello.h")
3543 })
3544}
3545
3546func TestStats(t *testing.T) {
3547 ignored := []cmp.Option{
3548 cmpopts.EquateEmpty(),
3549 cmpopts.IgnoreFields(RepoListEntry{}, "Repository"),
3550 cmpopts.IgnoreFields(RepoListEntry{}, "IndexMetadata"),
3551 cmpopts.IgnoreFields(RepoStats{}, "IndexBytes"),
3552 }
3553
3554 repoListEntries := func(b *IndexBuilder) []RepoListEntry {
3555 searcher := searcherForTest(t, b)
3556 indexdata := searcher.(*indexData)
3557 return indexdata.repoListEntry
3558 }
3559
3560 t.Run("one empty repo", func(t *testing.T) {
3561 b := testIndexBuilder(t, nil)
3562 got := repoListEntries(b)
3563 want := []RepoListEntry{
3564 {
3565 Stats: RepoStats{
3566 Repos: 0,
3567 Shards: 1,
3568 Documents: 0,
3569 IndexBytes: 20,
3570 ContentBytes: 0,
3571 NewLinesCount: 0,
3572 DefaultBranchNewLinesCount: 0,
3573 OtherBranchesNewLinesCount: 0,
3574 },
3575 },
3576 }
3577
3578 if diff := cmp.Diff(want, got, ignored...); diff != "" {
3579 t.Fatalf("mismatch (-want +got):\n%s", diff)
3580 }
3581 })
3582
3583 t.Run("one simple shard", func(t *testing.T) {
3584 b := testIndexBuilder(t, nil,
3585 Document{Name: "doc 0", Content: []byte("content 0")},
3586 Document{Name: "doc 1", Content: []byte("content 1")},
3587 )
3588 got := repoListEntries(b)
3589 want := []RepoListEntry{
3590 {
3591 Stats: RepoStats{
3592 Repos: 0,
3593 Shards: 1,
3594 Documents: 2,
3595 IndexBytes: 224,
3596 ContentBytes: 28,
3597 NewLinesCount: 0,
3598 DefaultBranchNewLinesCount: 0,
3599 OtherBranchesNewLinesCount: 0,
3600 },
3601 },
3602 }
3603
3604 if diff := cmp.Diff(want, got, ignored...); diff != "" {
3605 t.Fatalf("mismatch (-want +got):\n%s", diff)
3606 }
3607 })
3608
3609 t.Run("one compound shard", func(t *testing.T) {
3610 b := testIndexBuilderCompound(t,
3611 []*Repository{
3612 {Name: "repo 0"},
3613 {Name: "repo 1"},
3614 },
3615 [][]Document{
3616 {
3617 {Name: "doc 0", Content: []byte("content 0")},
3618 {Name: "doc 1", Content: []byte("content 1")},
3619 },
3620 {
3621 {Name: "doc 2", Content: []byte("content 2")},
3622 {Name: "doc 3", Content: []byte("content 3")},
3623 },
3624 },
3625 )
3626 got := repoListEntries(b)
3627 want := []RepoListEntry{
3628 {
3629 Stats: RepoStats{
3630 Repos: 0,
3631 Shards: 1,
3632 Documents: 2,
3633 IndexBytes: 180,
3634 ContentBytes: 28,
3635 NewLinesCount: 0,
3636 DefaultBranchNewLinesCount: 0,
3637 OtherBranchesNewLinesCount: 0,
3638 },
3639 },
3640 {
3641 Stats: RepoStats{
3642 Repos: 0,
3643 Shards: 1,
3644 Documents: 2,
3645 IndexBytes: 180,
3646 ContentBytes: 28,
3647 NewLinesCount: 0,
3648 DefaultBranchNewLinesCount: 0,
3649 OtherBranchesNewLinesCount: 0,
3650 },
3651 },
3652 }
3653
3654 if diff := cmp.Diff(want, got, ignored...); diff != "" {
3655 t.Fatalf("mismatch (-want +got):\n%s", diff)
3656 }
3657 })
3658
3659 t.Run("compound shard with empty repos", func(t *testing.T) {
3660 b := testIndexBuilderCompound(t,
3661 []*Repository{
3662 {Name: "repo 0"},
3663 {Name: "repo 1"},
3664 {Name: "repo 2"},
3665 {Name: "repo 3"},
3666 {Name: "repo 4"},
3667 },
3668 [][]Document{
3669 {{Name: "doc 0", Content: []byte("content 0")}},
3670 nil,
3671 {{Name: "doc 1", Content: []byte("content 1")}},
3672 nil,
3673 nil,
3674 },
3675 )
3676 got := repoListEntries(b)
3677
3678 entryEmpty := RepoListEntry{Stats: RepoStats{
3679 Shards: 1,
3680 Documents: 0,
3681 ContentBytes: 0,
3682 }}
3683 entryNonEmpty := RepoListEntry{Stats: RepoStats{
3684 Shards: 1,
3685 Documents: 1,
3686 ContentBytes: 14,
3687 }}
3688
3689 want := []RepoListEntry{
3690 entryNonEmpty,
3691 entryEmpty,
3692 entryNonEmpty,
3693 entryEmpty,
3694 entryEmpty,
3695 }
3696
3697 if diff := cmp.Diff(want, got, ignored...); diff != "" {
3698 t.Fatalf("mismatch (-want +got):\n%s", diff)
3699 }
3700 })
3701}
3702
3703// This tests the frequent pattern "\bLITERAL\b".
3704func TestWordSearch(t *testing.T) {
3705 content := []byte("needle the bla")
3706 // ----------------01234567890123
3707
3708 b := testIndexBuilder(t, nil,
3709 Document{
3710 Name: "f1",
3711 Content: content,
3712 })
3713
3714 t.Run("LineMatches", func(t *testing.T) {
3715 sres := searchForTest(t, b,
3716 &query.Regexp{
3717 Regexp: mustParseRE("\\bthe\\b"),
3718 CaseSensitive: true,
3719 Content: true,
3720 })
3721
3722 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 {
3723 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
3724 }
3725
3726 if sres.Stats.RegexpsConsidered != 0 {
3727 t.Fatal("expected regexp to be skipped")
3728 }
3729
3730 got := sres.Files[0].LineMatches[0]
3731 want := LineMatch{
3732 LineFragments: []LineFragmentMatch{{
3733 LineOffset: 7,
3734 Offset: 7,
3735 MatchLength: 3,
3736 }},
3737 Line: content,
3738 FileName: false,
3739 LineNumber: 1,
3740 LineStart: 0,
3741 LineEnd: 14,
3742 }
3743
3744 if !reflect.DeepEqual(got, want) {
3745 t.Errorf("got %#v, want %#v", got, want)
3746 }
3747 })
3748
3749 t.Run("ChunkMatches", func(t *testing.T) {
3750 sres := searchForTest(t, b,
3751 &query.Regexp{
3752 Regexp: mustParseRE("\\bthe\\b"),
3753 CaseSensitive: true,
3754 }, chunkOpts)
3755
3756 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 {
3757 t.Fatalf("got %v, want 1 match in 1 file", sres.Files)
3758 }
3759
3760 if sres.Stats.RegexpsConsidered != 0 {
3761 t.Fatal("expected regexp to be skipped")
3762 }
3763
3764 got := sres.Files[0].ChunkMatches[0]
3765 want := ChunkMatch{
3766 Content: content,
3767 ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1},
3768 Ranges: []Range{{
3769 Start: Location{ByteOffset: 7, LineNumber: 1, Column: 8},
3770 End: Location{ByteOffset: 10, LineNumber: 1, Column: 11},
3771 }},
3772 }
3773
3774 if diff := cmp.Diff(want, got); diff != "" {
3775 t.Fatal(diff)
3776 }
3777 })
3778}