fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

1// Copyright 2016 Google Inc. All rights reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package zoekt 16 17import ( 18 "bytes" 19 "context" 20 "fmt" 21 "reflect" 22 "regexp/syntax" 23 "strings" 24 "testing" 25 26 "github.com/google/go-cmp/cmp" 27 "github.com/google/go-cmp/cmp/cmpopts" 28 "github.com/grafana/regexp" 29 30 "github.com/sourcegraph/zoekt/query" 31) 32 33func clearScores(r *SearchResult) { 34 for i := range r.Files { 35 r.Files[i].Score = 0.0 36 for j := range r.Files[i].LineMatches { 37 r.Files[i].LineMatches[j].Score = 0.0 38 } 39 for j := range r.Files[i].ChunkMatches { 40 r.Files[i].ChunkMatches[j].Score = 0.0 41 r.Files[i].ChunkMatches[j].BestLineMatch = 0 42 } 43 r.Files[i].Checksum = nil 44 r.Files[i].Debug = "" 45 } 46} 47 48func testIndexBuilder(t *testing.T, repo *Repository, docs ...Document) *IndexBuilder { 49 t.Helper() 50 51 b, err := NewIndexBuilder(repo) 52 if err != nil { 53 t.Fatalf("NewIndexBuilder: %v", err) 54 } 55 56 for i, d := range docs { 57 if err := b.Add(d); err != nil { 58 t.Fatalf("Add %d: %v", i, err) 59 } 60 } 61 62 return b 63} 64 65func testIndexBuilderCompound(t *testing.T, repos []*Repository, docs [][]Document) *IndexBuilder { 66 t.Helper() 67 68 b := newIndexBuilder() 69 b.indexFormatVersion = NextIndexFormatVersion 70 71 if len(repos) != len(docs) { 72 t.Fatalf("testIndexBuilderCompound: repos must be the same length as docs, got: len(repos)=%d len(docs)=%d", len(repos), len(docs)) 73 } 74 75 for i, repo := range repos { 76 if err := b.setRepository(repo); err != nil { 77 t.Fatal(err) 78 } 79 for j, d := range docs[i] { 80 if err := b.Add(d); err != nil { 81 t.Fatalf("Add %d %d: %v", i, j, err) 82 } 83 } 84 } 85 86 return b 87} 88 89func TestBoundary(t *testing.T) { 90 b := testIndexBuilder(t, nil, 91 Document{Name: "f1", Content: []byte("x the")}, 92 Document{Name: "f1", Content: []byte("reader")}) 93 res := searchForTest(t, b, &query.Substring{Pattern: "there"}) 94 if len(res.Files) > 0 { 95 t.Fatalf("got %v, want no matches", res.Files) 96 } 97} 98 99func TestDocSectionInvalid(t *testing.T) { 100 b, err := NewIndexBuilder(nil) 101 if err != nil { 102 t.Fatalf("NewIndexBuilder: %v", err) 103 } 104 doc := Document{ 105 Name: "f1", 106 Content: []byte("01234567890123"), 107 Symbols: []DocumentSection{{5, 8}, {7, 9}}, 108 } 109 110 if err := b.Add(doc); err == nil { 111 t.Errorf("overlapping doc sections should fail") 112 } 113 114 doc = Document{ 115 Name: "f1", 116 Content: []byte("01234567890123"), 117 Symbols: []DocumentSection{{0, 20}}, 118 } 119 120 if err := b.Add(doc); err == nil { 121 t.Errorf("doc sections beyond EOF should fail") 122 } 123} 124 125func TestBasic(t *testing.T) { 126 b := testIndexBuilder(t, nil, 127 Document{ 128 Name: "f2", 129 Content: []byte("to carry water in the no later bla"), 130 // --------------0123456789012345678901234567890123 131 }) 132 133 t.Run("LineMatch", func(t *testing.T) { 134 res := searchForTest(t, b, &query.Substring{ 135 Pattern: "water", 136 CaseSensitive: true, 137 }) 138 fmatches := res.Files 139 if len(fmatches) != 1 || len(fmatches[0].LineMatches) != 1 { 140 t.Fatalf("got %v, want 1 matches", fmatches) 141 } 142 143 got := fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].LineMatches[0].LineFragments[0].Offset) 144 want := "f2:9" 145 if got != want { 146 t.Errorf("1: got %s, want %s", got, want) 147 } 148 }) 149 150 t.Run("ChunkMatch", func(t *testing.T) { 151 res := searchForTest(t, b, &query.Substring{ 152 Pattern: "water", 153 CaseSensitive: true, 154 }, chunkOpts) 155 fmatches := res.Files 156 if len(fmatches) != 1 || len(fmatches[0].ChunkMatches) != 1 { 157 t.Fatalf("got %v, want 1 matches", fmatches) 158 } 159 160 got := fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset) 161 want := "f2:9" 162 if got != want { 163 t.Errorf("1: got %s, want %s", got, want) 164 } 165 }) 166} 167 168func TestEmptyIndex(t *testing.T) { 169 b := testIndexBuilder(t, nil) 170 searcher := searcherForTest(t, b) 171 172 var opts SearchOptions 173 if _, err := searcher.Search(context.Background(), &query.Substring{}, &opts); err != nil { 174 t.Fatalf("Search: %v", err) 175 } 176 177 if _, err := searcher.List(context.Background(), &query.Repo{Regexp: regexp.MustCompile("")}, nil); err != nil { 178 t.Fatalf("List: %v", err) 179 } 180 181 if _, err := searcher.Search(context.Background(), &query.Substring{Pattern: "java", FileName: true}, &opts); err != nil { 182 t.Fatalf("Search: %v", err) 183 } 184} 185 186type memSeeker struct { 187 data []byte 188} 189 190func (s *memSeeker) Name() string { 191 return "memseeker" 192} 193 194func (s *memSeeker) Close() {} 195func (s *memSeeker) Read(off, sz uint32) ([]byte, error) { 196 return s.data[off : off+sz], nil 197} 198 199func (s *memSeeker) Size() (uint32, error) { 200 return uint32(len(s.data)), nil 201} 202 203func TestNewlines(t *testing.T) { 204 b := testIndexBuilder(t, nil, 205 // -----------------------------------------012345-678901-234 206 Document{Name: "filename", Content: []byte("line1\nline2\nbla")}) 207 208 t.Run("LineMatches", func(t *testing.T) { 209 sres := searchForTest(t, b, &query.Substring{Pattern: "ne2"}) 210 211 matches := sres.Files 212 want := []FileMatch{{ 213 FileName: "filename", 214 LineMatches: []LineMatch{{ 215 LineFragments: []LineFragmentMatch{{ 216 Offset: 8, 217 LineOffset: 2, 218 MatchLength: 3, 219 }}, 220 Line: []byte("line2\n"), 221 LineStart: 6, 222 LineEnd: 12, 223 LineNumber: 2, 224 }}, 225 }} 226 227 if diff := cmp.Diff(matches, want); diff != "" { 228 t.Fatal(diff) 229 } 230 }) 231 232 t.Run("ChunkMatches", func(t *testing.T) { 233 sres := searchForTest(t, b, &query.Substring{Pattern: "ne2"}, chunkOpts) 234 235 matches := sres.Files 236 want := []FileMatch{{ 237 FileName: "filename", 238 ChunkMatches: []ChunkMatch{{ 239 Content: []byte("line2\n"), 240 ContentStart: Location{ 241 ByteOffset: 6, 242 LineNumber: 2, 243 Column: 1, 244 }, 245 Ranges: []Range{{ 246 Start: Location{ByteOffset: 8, LineNumber: 2, Column: 3}, 247 End: Location{ByteOffset: 11, LineNumber: 2, Column: 6}, 248 }}, 249 }}, 250 }} 251 252 if diff := cmp.Diff(want, matches); diff != "" { 253 t.Fatal(diff) 254 } 255 }) 256} 257 258// A result spanning multiple lines should have LineMatches that only cover 259// single lines. 260func TestQueryNewlines(t *testing.T) { 261 text := "line1\nline2\nbla" 262 b := testIndexBuilder(t, nil, 263 Document{Name: "filename", Content: []byte(text)}) 264 265 t.Run("LineMatches", func(t *testing.T) { 266 sres := searchForTest(t, b, &query.Substring{Pattern: "ine2\nbla"}) 267 matches := sres.Files 268 if len(matches) != 1 { 269 t.Fatalf("got %d file matches, want exactly one", len(matches)) 270 } 271 m := matches[0] 272 if len(m.LineMatches) != 2 { 273 t.Fatalf("got %d line matches, want exactly two %#v", len(m.LineMatches), m.LineMatches) 274 } 275 }) 276 277 t.Run("ChunkMatches", func(t *testing.T) { 278 sres := searchForTest(t, b, &query.Substring{Pattern: "ine2\nbla"}, chunkOpts) 279 matches := sres.Files 280 if len(matches) != 1 { 281 t.Fatalf("got %d file matches, want exactly one", len(matches)) 282 } 283 m := matches[0] 284 if len(m.ChunkMatches) != 1 { 285 t.Fatalf("got %d chunk matches, want exactly one", len(m.ChunkMatches)) 286 } 287 }) 288} 289 290var chunkOpts = SearchOptions{ChunkMatches: true} 291 292func searchForTest(t *testing.T, b *IndexBuilder, q query.Q, o ...SearchOptions) *SearchResult { 293 searcher := searcherForTest(t, b) 294 var opts SearchOptions 295 if len(o) > 0 { 296 opts = o[0] 297 } 298 res, err := searcher.Search(context.Background(), q, &opts) 299 if err != nil { 300 t.Fatalf("Search(%s): %v", q, err) 301 } 302 clearScores(res) 303 return res 304} 305 306func searcherForTest(t *testing.T, b *IndexBuilder) Searcher { 307 var buf bytes.Buffer 308 if err := b.Write(&buf); err != nil { 309 t.Fatal(err) 310 } 311 f := &memSeeker{buf.Bytes()} 312 313 searcher, err := NewSearcher(f) 314 if err != nil { 315 t.Fatalf("NewSearcher: %v", err) 316 } 317 318 return searcher 319} 320 321func TestCaseFold(t *testing.T) { 322 b := testIndexBuilder(t, nil, 323 Document{Name: "f1", Content: []byte("I love BaNaNAS.")}, 324 // -----------------------------------012345678901234 325 ) 326 t.Run("LineMatches", func(t *testing.T) { 327 sres := searchForTest(t, b, &query.Substring{ 328 Pattern: "bananas", 329 CaseSensitive: true, 330 }) 331 matches := sres.Files 332 if len(matches) != 0 { 333 t.Errorf("foldcase: got %#v, want 0 matches", matches) 334 } 335 336 sres = searchForTest(t, b, 337 &query.Substring{ 338 Pattern: "BaNaNAS", 339 CaseSensitive: true, 340 }) 341 matches = sres.Files 342 if len(matches) != 1 { 343 t.Errorf("no foldcase: got %v, want 1 matches", matches) 344 } else if matches[0].LineMatches[0].LineFragments[0].Offset != 7 { 345 t.Errorf("foldcase: got %v, want offsets 7", matches) 346 } 347 }) 348 349 t.Run("ChunkMatches", func(t *testing.T) { 350 sres := searchForTest(t, b, &query.Substring{ 351 Pattern: "bananas", 352 CaseSensitive: true, 353 }, chunkOpts) 354 matches := sres.Files 355 if len(matches) != 0 { 356 t.Errorf("foldcase: got %#v, want 0 matches", matches) 357 } 358 359 sres = searchForTest(t, b, 360 &query.Substring{ 361 Pattern: "BaNaNAS", 362 CaseSensitive: true, 363 }) 364 matches = sres.Files 365 if len(matches) != 1 { 366 t.Errorf("no foldcase: got %v, want 1 matches", matches) 367 } else if matches[0].LineMatches[0].LineFragments[0].Offset != 7 { 368 t.Errorf("foldcase: got %v, want offsets 7", matches) 369 } 370 }) 371} 372 373// wordsAsSymbols finds all ASCII words in doc.Content which are longer than 2 374// chars. Those are then set as symbols. 375func wordsAsSymbols(doc Document) Document { 376 re := regexp.MustCompile(`\b\w{2,}\b`) 377 var symbols []DocumentSection 378 for _, match := range re.FindAllIndex(doc.Content, -1) { 379 symbols = append(symbols, DocumentSection{ 380 Start: uint32(match[0]), 381 End: uint32(match[1]), 382 }) 383 } 384 doc.Symbols = symbols 385 return doc 386} 387 388func TestSearchStats(t *testing.T) { 389 ctx := context.Background() 390 searcher := searcherForTest(t, testIndexBuilder(t, nil, 391 wordsAsSymbols(Document{Name: "f1", Content: []byte("x banana y")}), 392 wordsAsSymbols(Document{Name: "f2", Content: []byte("x apple y")}), 393 wordsAsSymbols(Document{Name: "f3", Content: []byte("x banana apple y")}), 394 // --------------------------------------------------0123456789012345 395 )) 396 397 andQuery := query.NewAnd( 398 &query.Substring{ 399 Pattern: "banana", 400 }, 401 &query.Substring{ 402 Pattern: "apple", 403 }, 404 ) 405 406 t.Run("LineMatches", func(t *testing.T) { 407 sres, err := searcher.Search(ctx, andQuery, &SearchOptions{}) 408 if err != nil { 409 t.Fatal(err) 410 } 411 matches := sres.Files 412 if len(matches) != 1 || len(matches[0].LineMatches) != 1 || len(matches[0].LineMatches[0].LineFragments) != 2 { 413 t.Fatalf("got %#v, want 1 match with 2 fragments", matches) 414 } 415 416 if matches[0].LineMatches[0].LineFragments[0].Offset != 2 || matches[0].LineMatches[0].LineFragments[1].Offset != 9 { 417 t.Fatalf("got %#v, want offsets 2,9", matches) 418 } 419 }) 420 t.Run("ChunkMatches", func(t *testing.T) { 421 sres, err := searcher.Search(ctx, andQuery, &chunkOpts) 422 if err != nil { 423 t.Fatal(err) 424 } 425 matches := sres.Files 426 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 || len(matches[0].ChunkMatches[0].Ranges) != 2 { 427 t.Fatalf("got %#v, want 1 chunk match with 2 ranges", matches) 428 } 429 430 if matches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset != 2 || matches[0].ChunkMatches[0].Ranges[1].Start.ByteOffset != 9 { 431 t.Fatalf("got %#v, want offsets 2,9", matches) 432 } 433 }) 434 t.Run("Stats", func(t *testing.T) { 435 cases := []struct { 436 Name string 437 Q query.Q 438 Want Stats 439 }{{ 440 Name: "and-query", 441 Q: andQuery, 442 Want: Stats{ 443 FilesLoaded: 1, 444 ContentBytesLoaded: 22, 445 IndexBytesLoaded: 10, 446 NgramMatches: 3, // we look at doc 1, because it's max(0,1) due to AND 447 NgramLookups: 104, 448 MatchCount: 2, 449 FileCount: 1, 450 FilesConsidered: 2, 451 ShardsScanned: 1, 452 }, 453 }, { 454 Name: "one-trigram", 455 Q: &query.Substring{ 456 Pattern: "a y", 457 Content: true, 458 CaseSensitive: true, 459 }, 460 Want: Stats{ 461 ContentBytesLoaded: 14, 462 IndexBytesLoaded: 1, 463 FileCount: 1, 464 FilesConsidered: 1, 465 FilesLoaded: 1, 466 ShardsScanned: 1, 467 MatchCount: 1, 468 NgramMatches: 1, 469 NgramLookups: 2, // once to lookup frequency then again to access posting list. 470 }, 471 }, { 472 Name: "one-trigram-case-insensitive", 473 Q: &query.Substring{ 474 Pattern: "a y", 475 Content: true, 476 }, 477 Want: Stats{ 478 ContentBytesLoaded: 14, 479 IndexBytesLoaded: 1, 480 FileCount: 1, 481 FilesConsidered: 1, 482 FilesLoaded: 1, 483 ShardsScanned: 1, 484 MatchCount: 1, 485 NgramMatches: 1, 486 NgramLookups: 8, // "a y" has 2**2 casings which we lookup twice. 487 }, 488 }, { 489 Name: "one-trigram-pruned", 490 Q: &query.Substring{ 491 Pattern: "foo", 492 Content: true, 493 CaseSensitive: true, 494 }, 495 Want: Stats{ 496 ShardsSkippedFilter: 1, 497 NgramLookups: 1, // only had to lookup once 498 }, 499 }, { 500 Name: "one-trigram-branch-pruned", 501 Q: query.NewAnd( 502 &query.Substring{ 503 Pattern: "foo", 504 Content: true, 505 CaseSensitive: true, 506 }, 507 &query.Substring{ 508 Pattern: "a y", 509 Content: true, 510 CaseSensitive: true, 511 }, 512 ), 513 Want: Stats{ 514 IndexBytesLoaded: 1, // we created an iterator for "a y" before pruning. 515 ShardsSkippedFilter: 1, 516 NgramLookups: 3, // we lookedup "foo" once (1), but lookedup and created "a y" (2). 517 }, 518 }, { 519 Name: "symbol-substr-nomatch", 520 Q: &query.Symbol{Expr: &query.Substring{ 521 Pattern: "banana apple", 522 Content: true, 523 CaseSensitive: true, 524 }}, 525 Want: Stats{ 526 IndexBytesLoaded: 3, 527 FilesConsidered: 1, // important that we only check 1 file to ensure we are using the index 528 MatchCount: 0, // even though there is a match it doesn't align with a symbol 529 ShardsScanned: 1, 530 NgramMatches: 1, 531 NgramLookups: 12, 532 }, 533 }, { 534 Name: "symbol-substr", 535 Q: &query.Symbol{Expr: &query.Substring{ 536 Pattern: "apple", 537 Content: true, 538 CaseSensitive: true, 539 }}, 540 Want: Stats{ 541 ContentBytesLoaded: 35, 542 IndexBytesLoaded: 4, 543 FileCount: 2, 544 FilesConsidered: 2, // must be 2 to ensure we used the index 545 FilesLoaded: 2, 546 MatchCount: 2, // apple symbols is in two files 547 ShardsScanned: 1, 548 NgramMatches: 2, 549 NgramLookups: 5, 550 }, 551 }, { 552 Name: "symbol-regexp-nomatch", 553 Q: &query.Symbol{Expr: &query.Regexp{ 554 Regexp: mustParseRE("^apple.banana$"), 555 Content: true, 556 CaseSensitive: true, 557 }}, 558 Want: Stats{ 559 ContentBytesLoaded: 33, // we still have to run regex since "app" matches two documents 560 IndexBytesLoaded: 10, 561 FilesConsidered: 2, // important that we don't check 3 to ensure we are using the index 562 FilesLoaded: 2, 563 MatchCount: 0, // even though there is a match it doesn't align with a symbol 564 ShardsScanned: 1, 565 NgramMatches: 3, 566 NgramLookups: 11, 567 }, 568 }, { 569 Name: "symbol-regexp", 570 Q: &query.Symbol{Expr: &query.Regexp{ 571 Regexp: mustParseRE("^app.e$"), 572 Content: true, 573 CaseSensitive: true, 574 }}, 575 Want: Stats{ 576 ContentBytesLoaded: 35, 577 IndexBytesLoaded: 2, 578 FileCount: 2, 579 FilesConsidered: 2, // must be 2 to ensure we used the index 580 FilesLoaded: 2, 581 MatchCount: 2, // apple symbols is in two files 582 ShardsScanned: 1, 583 NgramMatches: 2, 584 NgramLookups: 2, 585 }, 586 }} 587 588 for _, tc := range cases { 589 t.Run(tc.Name, func(t *testing.T) { 590 sres, err := searcher.Search(ctx, tc.Q, &chunkOpts) 591 if err != nil { 592 t.Fatal(err) 593 } 594 if diff := cmp.Diff(tc.Want, sres.Stats, cmpopts.IgnoreFields(Stats{}, "MatchTreeConstruction", "MatchTreeSearch")); diff != "" { 595 t.Errorf("unexpected Stats (-want +got):\n%s", diff) 596 } 597 }) 598 } 599 }) 600} 601 602func TestAndNegateSearch(t *testing.T) { 603 b := testIndexBuilder(t, nil, 604 Document{Name: "f1", Content: []byte("x banana y")}, 605 // -----------------------------------0123456789 606 Document{Name: "f4", Content: []byte("x banana apple y")}) 607 608 t.Run("LineMatches", func(t *testing.T) { 609 sres := searchForTest(t, b, query.NewAnd( 610 &query.Substring{ 611 Pattern: "banana", 612 }, 613 &query.Not{Child: &query.Substring{ 614 Pattern: "apple", 615 }})) 616 617 matches := sres.Files 618 619 if len(matches) != 1 || len(matches[0].LineMatches) != 1 { 620 t.Fatalf("got %v, want 1 match", matches) 621 } 622 if matches[0].FileName != "f1" { 623 t.Fatalf("got match %#v, want FileName: f1", matches[0]) 624 } 625 if matches[0].LineMatches[0].LineFragments[0].Offset != 2 { 626 t.Fatalf("got %v, want offset 2", matches) 627 } 628 }) 629 630 t.Run("ChunkMatches", func(t *testing.T) { 631 sres := searchForTest(t, b, 632 query.NewAnd( 633 &query.Substring{ 634 Pattern: "banana", 635 }, 636 &query.Not{Child: &query.Substring{ 637 Pattern: "apple", 638 }}, 639 ), 640 chunkOpts, 641 ) 642 643 matches := sres.Files 644 645 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 { 646 t.Fatalf("got %v, want 1 match", matches) 647 } 648 if matches[0].FileName != "f1" { 649 t.Fatalf("got match %#v, want FileName: f1", matches[0]) 650 } 651 if matches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset != 2 { 652 t.Fatalf("got %v, want offset 2", matches) 653 } 654 }) 655} 656 657func TestNegativeMatchesOnlyShortcut(t *testing.T) { 658 b := testIndexBuilder(t, nil, 659 Document{Name: "f1", Content: []byte("x banana y")}, 660 Document{Name: "f2", Content: []byte("x appelmoes y")}, 661 Document{Name: "f3", Content: []byte("x appelmoes y")}, 662 Document{Name: "f3", Content: []byte("x appelmoes y")}) 663 664 t.Run("LineMatches", func(t *testing.T) { 665 sres := searchForTest(t, b, query.NewAnd( 666 &query.Substring{ 667 Pattern: "banana", 668 }, 669 &query.Not{Child: &query.Substring{ 670 Pattern: "appel", 671 }})) 672 673 if sres.Stats.FilesConsidered != 1 { 674 t.Errorf("got %#v, want FilesConsidered: 1", sres.Stats) 675 } 676 }) 677 678 t.Run("ChunkMatches", func(t *testing.T) { 679 sres := searchForTest(t, b, query.NewAnd( 680 &query.Substring{ 681 Pattern: "banana", 682 }, 683 &query.Not{Child: &query.Substring{ 684 Pattern: "appel", 685 }}), chunkOpts) 686 687 if sres.Stats.FilesConsidered != 1 { 688 t.Errorf("got %#v, want FilesConsidered: 1", sres.Stats) 689 } 690 }) 691} 692 693func TestFileSearch(t *testing.T) { 694 b := testIndexBuilder(t, nil, 695 Document{Name: "banzana", Content: []byte("x orange y")}, 696 // -------------0123456 697 Document{Name: "banana", Content: []byte("x apple y")}, 698 // -------------012345 699 ) 700 701 t.Run("LineMatches", func(t *testing.T) { 702 sres := searchForTest(t, b, &query.Substring{ 703 Pattern: "anan", 704 FileName: true, 705 }) 706 707 matches := sres.Files 708 if len(matches) != 1 || len(matches[0].LineMatches) != 1 { 709 t.Fatalf("got %v, want 1 match", matches) 710 } 711 712 got := matches[0].LineMatches[0] 713 want := LineMatch{ 714 Line: []byte("banana"), 715 LineFragments: []LineFragmentMatch{{ 716 Offset: 1, 717 LineOffset: 1, 718 MatchLength: 4, 719 }}, 720 FileName: true, 721 } 722 723 if !reflect.DeepEqual(got, want) { 724 t.Errorf("got %#v, want %#v", got, want) 725 } 726 }) 727 728 t.Run("ChunkMatches", func(t *testing.T) { 729 sres := searchForTest(t, b, &query.Substring{ 730 Pattern: "anan", 731 FileName: true, 732 }, chunkOpts) 733 734 matches := sres.Files 735 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 { 736 t.Fatalf("got %v, want 1 match", matches) 737 } 738 739 got := matches[0].ChunkMatches[0] 740 want := ChunkMatch{ 741 Content: []byte("banana"), 742 ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1}, 743 Ranges: []Range{{ 744 Start: Location{ByteOffset: 1, LineNumber: 1, Column: 2}, 745 End: Location{ByteOffset: 5, LineNumber: 1, Column: 6}, 746 }}, 747 FileName: true, 748 } 749 750 if diff := cmp.Diff(want, got); diff != "" { 751 t.Fatal(diff) 752 } 753 }) 754 755 t.Run("FileNameSet", func(t *testing.T) { 756 sres := searchForTest(t, b, query.NewFileNameSet("banana"), chunkOpts) 757 758 matches := sres.Files 759 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 { 760 t.Fatalf("got %v, want 1 match", matches) 761 } 762 763 got := matches[0].ChunkMatches[0] 764 want := ChunkMatch{ 765 Content: []byte("banana"), 766 ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1}, 767 Ranges: []Range{{ 768 Start: Location{ByteOffset: 0, LineNumber: 1, Column: 1}, 769 End: Location{ByteOffset: 6, LineNumber: 1, Column: 7}, 770 }}, 771 FileName: true, 772 } 773 774 if diff := cmp.Diff(want, got); diff != "" { 775 t.Fatal(diff) 776 } 777 }) 778} 779 780func TestFileCase(t *testing.T) { 781 b := testIndexBuilder(t, nil, 782 Document{Name: "BANANA", Content: []byte("x orange y")}) 783 784 t.Run("LineMatches", func(t *testing.T) { 785 sres := searchForTest(t, b, &query.Substring{ 786 Pattern: "banana", 787 FileName: true, 788 }) 789 790 matches := sres.Files 791 if len(matches) != 1 || matches[0].FileName != "BANANA" { 792 t.Fatalf("got %v, want 1 match 'BANANA'", matches) 793 } 794 }) 795 796 t.Run("ChunkMatches", func(t *testing.T) { 797 sres := searchForTest(t, b, &query.Substring{ 798 Pattern: "banana", 799 FileName: true, 800 }, chunkOpts) 801 802 matches := sres.Files 803 if len(matches) != 1 || matches[0].FileName != "BANANA" { 804 t.Fatalf("got %v, want 1 match 'BANANA'", matches) 805 } 806 }) 807} 808 809func TestFileRegexpSearchBruteForce(t *testing.T) { 810 b := testIndexBuilder(t, nil, 811 Document{Name: "banzana", Content: []byte("x orange y")}, 812 Document{Name: "banana", Content: []byte("x apple y")}, 813 ) 814 t.Run("LineMatches", func(t *testing.T) { 815 sres := searchForTest(t, b, &query.Regexp{ 816 Regexp: mustParseRE("[qn][zx]"), 817 FileName: true, 818 }) 819 820 matches := sres.Files 821 if len(matches) != 1 || matches[0].FileName != "banzana" { 822 t.Fatalf("got %v, want 1 match on 'banzana'", matches) 823 } 824 }) 825 t.Run("LineMatches", func(t *testing.T) { 826 sres := searchForTest(t, b, &query.Regexp{ 827 Regexp: mustParseRE("[qn][zx]"), 828 FileName: true, 829 }, chunkOpts) 830 831 matches := sres.Files 832 if len(matches) != 1 || matches[0].FileName != "banzana" { 833 t.Fatalf("got %v, want 1 match on 'banzana'", matches) 834 } 835 }) 836} 837 838func TestFileRegexpSearchShortString(t *testing.T) { 839 b := testIndexBuilder(t, nil, 840 Document{Name: "banana.py", Content: []byte("x orange y")}) 841 842 t.Run("LineMatches", func(t *testing.T) { 843 sres := searchForTest(t, b, &query.Regexp{ 844 Regexp: mustParseRE("ana.py"), 845 FileName: true, 846 }) 847 848 matches := sres.Files 849 if len(matches) != 1 || matches[0].FileName != "banana.py" { 850 t.Fatalf("got %v, want 1 match on 'banana.py'", matches) 851 } 852 }) 853 854 t.Run("ChunkMatches", func(t *testing.T) { 855 sres := searchForTest(t, b, &query.Regexp{ 856 Regexp: mustParseRE("ana.py"), 857 FileName: true, 858 }, chunkOpts) 859 860 matches := sres.Files 861 if len(matches) != 1 || matches[0].FileName != "banana.py" { 862 t.Fatalf("got %v, want 1 match on 'banana.py'", matches) 863 } 864 }) 865} 866 867func TestFileSubstringSearchBruteForce(t *testing.T) { 868 b := testIndexBuilder(t, nil, 869 Document{Name: "BANZANA", Content: []byte("x orange y")}, 870 Document{Name: "banana", Content: []byte("x apple y")}) 871 872 q := &query.Substring{ 873 Pattern: "z", 874 FileName: true, 875 } 876 877 t.Run("LineMatches", func(t *testing.T) { 878 res := searchForTest(t, b, q) 879 if len(res.Files) != 1 || res.Files[0].FileName != "BANZANA" { 880 t.Fatalf("got %v, want 1 match on 'BANZANA''", res.Files) 881 } 882 }) 883 884 t.Run("ChunkMatches", func(t *testing.T) { 885 res := searchForTest(t, b, q, chunkOpts) 886 if len(res.Files) != 1 || res.Files[0].FileName != "BANZANA" { 887 t.Fatalf("got %v, want 1 match on 'BANZANA''", res.Files) 888 } 889 }) 890} 891 892func TestFileSubstringSearchBruteForceEnd(t *testing.T) { 893 b := testIndexBuilder(t, nil, 894 Document{Name: "BANZANA", Content: []byte("x orange y")}, 895 Document{Name: "bananaq", Content: []byte("x apple y")}) 896 897 q := &query.Substring{ 898 Pattern: "q", 899 FileName: true, 900 } 901 t.Run("LineMatches", func(t *testing.T) { 902 res := searchForTest(t, b, q) 903 if want := "bananaq"; len(res.Files) != 1 || res.Files[0].FileName != want { 904 t.Fatalf("got %v, want 1 match in %q", res.Files, want) 905 } 906 }) 907 908 t.Run("LineMatches", func(t *testing.T) { 909 res := searchForTest(t, b, q, chunkOpts) 910 if want := "bananaq"; len(res.Files) != 1 || res.Files[0].FileName != want { 911 t.Fatalf("got %v, want 1 match in %q", res.Files, want) 912 } 913 }) 914} 915 916func TestSearchMatchAll(t *testing.T) { 917 b := testIndexBuilder(t, nil, 918 Document{Name: "banzana", Content: []byte("x orange y")}, 919 Document{Name: "banana", Content: []byte("x apple y")}) 920 921 t.Run("LineMatches", func(t *testing.T) { 922 sres := searchForTest(t, b, &query.Const{Value: true}) 923 matches := sres.Files 924 if len(matches) != 2 { 925 t.Fatalf("got %v, want 2 matches", matches) 926 } 927 }) 928 929 t.Run("ChunkMatches", func(t *testing.T) { 930 sres := searchForTest(t, b, &query.Const{Value: true}, chunkOpts) 931 matches := sres.Files 932 if len(matches) != 2 { 933 t.Fatalf("got %v, want 2 matches", matches) 934 } 935 }) 936} 937 938func TestSearchNewline(t *testing.T) { 939 b := testIndexBuilder(t, nil, 940 Document{Name: "banzana", Content: []byte("abcd\ndefg")}) 941 942 t.Run("LineMatches", func(t *testing.T) { 943 sres := searchForTest(t, b, &query.Substring{Pattern: "d\nd"}) 944 945 // Just check that we don't crash. 946 947 matches := sres.Files 948 if len(matches) != 1 { 949 t.Fatalf("got %v, want 1 matches", matches) 950 } 951 }) 952 953 t.Run("ChunkMatches", func(t *testing.T) { 954 sres := searchForTest(t, b, &query.Substring{Pattern: "d\nd"}, chunkOpts) 955 956 // Just check that we don't crash. 957 958 matches := sres.Files 959 if len(matches) != 1 { 960 t.Fatalf("got %v, want 1 matches", matches) 961 } 962 }) 963} 964 965func TestSearchMatchAllRegexp(t *testing.T) { 966 b := testIndexBuilder(t, nil, 967 Document{Name: "banzana", Content: []byte("abcd")}, 968 Document{Name: "banana", Content: []byte("pqrs")}) 969 970 t.Run("LineMatches", func(t *testing.T) { 971 sres := searchForTest(t, b, &query.Regexp{Regexp: mustParseRE(".")}) 972 973 matches := sres.Files 974 if len(matches) != 2 || sres.Stats.MatchCount != 2 { 975 t.Fatalf("got %v, want 2 matches", matches) 976 } 977 if len(matches[0].LineMatches[0].Line) != 4 || len(matches[1].LineMatches[0].Line) != 4 { 978 t.Fatalf("want 4 chars in every file, got %#v", matches) 979 } 980 }) 981 982 t.Run("ChunkMatches", func(t *testing.T) { 983 sres := searchForTest(t, b, &query.Regexp{Regexp: mustParseRE(".")}, chunkOpts) 984 985 matches := sres.Files 986 if len(matches) != 2 || sres.Stats.MatchCount != 8 { 987 t.Fatalf("got %v, want 2 matches", matches) 988 } 989 if len(matches[0].ChunkMatches[0].Content) != 4 || len(matches[1].ChunkMatches[0].Content) != 4 { 990 t.Fatalf("want 4 chars in every file, got %#v", matches) 991 } 992 }) 993} 994 995func TestFileRestriction(t *testing.T) { 996 b := testIndexBuilder(t, nil, 997 Document{Name: "banana1", Content: []byte("x orange y")}, 998 Document{Name: "banana2", Content: []byte("x apple y")}, 999 Document{Name: "orange", Content: []byte("x apple z")}) 1000 1001 t.Run("LineMatches", func(t *testing.T) { 1002 sres := searchForTest(t, b, query.NewAnd( 1003 &query.Substring{ 1004 Pattern: "banana", 1005 FileName: true, 1006 }, 1007 &query.Substring{ 1008 Pattern: "apple", 1009 })) 1010 1011 matches := sres.Files 1012 if len(matches) != 1 || len(matches[0].LineMatches) != 1 { 1013 t.Fatalf("got %v, want 1 match", matches) 1014 } 1015 1016 match := matches[0].LineMatches[0] 1017 got := string(match.Line) 1018 want := "x apple y" 1019 if got != want { 1020 t.Errorf("got match %#v, want line %q", match, want) 1021 } 1022 }) 1023 1024 t.Run("ChunkMatches", func(t *testing.T) { 1025 sres := searchForTest(t, b, query.NewAnd( 1026 &query.Substring{ 1027 Pattern: "banana", 1028 FileName: true, 1029 }, 1030 &query.Substring{ 1031 Pattern: "apple", 1032 }), chunkOpts) 1033 1034 matches := sres.Files 1035 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 { 1036 t.Fatalf("got %v, want 1 match", matches) 1037 } 1038 1039 match := matches[0].ChunkMatches[0] 1040 got := string(match.Content) 1041 want := "x apple y" 1042 if got != want { 1043 t.Errorf("got match %#v, want line %q", match, want) 1044 } 1045 }) 1046} 1047 1048func TestFileNameBoundary(t *testing.T) { 1049 b := testIndexBuilder(t, nil, 1050 Document{Name: "banana2", Content: []byte("x apple y")}, 1051 Document{Name: "helpers.go", Content: []byte("x apple y")}, 1052 Document{Name: "foo", Content: []byte("x apple y")}) 1053 1054 t.Run("LineMatches", func(t *testing.T) { 1055 sres := searchForTest(t, b, &query.Substring{ 1056 Pattern: "helpers.go", 1057 FileName: true, 1058 }) 1059 1060 matches := sres.Files 1061 if len(matches) != 1 || len(matches[0].LineMatches) != 1 { 1062 t.Fatalf("got %v, want 1 match", matches) 1063 } 1064 }) 1065 1066 t.Run("ChunkMatches", func(t *testing.T) { 1067 sres := searchForTest(t, b, &query.Substring{ 1068 Pattern: "helpers.go", 1069 FileName: true, 1070 }, chunkOpts) 1071 1072 matches := sres.Files 1073 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 { 1074 t.Fatalf("got %v, want 1 match", matches) 1075 } 1076 }) 1077} 1078 1079func TestDocumentOrder(t *testing.T) { 1080 var docs []Document 1081 for i := 0; i < 3; i++ { 1082 docs = append(docs, Document{Name: fmt.Sprintf("f%d", i), Content: []byte("needle")}) 1083 } 1084 1085 b := testIndexBuilder(t, nil, docs...) 1086 1087 t.Run("LineMatches", func(t *testing.T) { 1088 sres := searchForTest(t, b, query.NewAnd( 1089 &query.Substring{ 1090 Pattern: "needle", 1091 })) 1092 1093 want := []string{"f0", "f1", "f2"} 1094 var got []string 1095 for _, f := range sres.Files { 1096 got = append(got, f.FileName) 1097 } 1098 if !reflect.DeepEqual(got, want) { 1099 t.Fatalf("got %v, want %v", got, want) 1100 } 1101 }) 1102 1103 t.Run("ChunkMatches", func(t *testing.T) { 1104 sres := searchForTest(t, b, 1105 query.NewAnd(&query.Substring{ 1106 Pattern: "needle", 1107 }), 1108 chunkOpts, 1109 ) 1110 1111 want := []string{"f0", "f1", "f2"} 1112 var got []string 1113 for _, f := range sres.Files { 1114 got = append(got, f.FileName) 1115 } 1116 if !reflect.DeepEqual(got, want) { 1117 t.Fatalf("got %v, want %v", got, want) 1118 } 1119 }) 1120} 1121 1122func TestBranchMask(t *testing.T) { 1123 b := testIndexBuilder(t, &Repository{ 1124 Branches: []RepositoryBranch{ 1125 {"master", "v-master"}, 1126 {"stable", "v-stable"}, 1127 {"bonzai", "v-bonzai"}, 1128 }, 1129 }, Document{Name: "f1", Content: []byte("needle"), Branches: []string{"master"}}, 1130 Document{Name: "f2", Content: []byte("needle"), Branches: []string{"stable", "master"}}, 1131 Document{Name: "f3", Content: []byte("needle"), Branches: []string{"stable", "master"}}, 1132 Document{Name: "f4", Content: []byte("needle"), Branches: []string{"bonzai"}}, 1133 ) 1134 1135 t.Run("LineMatches", func(t *testing.T) { 1136 sres := searchForTest(t, b, query.NewAnd( 1137 &query.Substring{ 1138 Pattern: "needle", 1139 }, 1140 &query.Branch{ 1141 Pattern: "table", 1142 })) 1143 1144 if len(sres.Files) != 2 || sres.Files[0].FileName != "f2" || sres.Files[1].FileName != "f3" { 1145 t.Fatalf("got %v, want 2 result from [f2,f3]", sres.Files) 1146 } 1147 1148 if len(sres.Files[0].Branches) != 1 || sres.Files[0].Branches[0] != "stable" { 1149 t.Fatalf("got %v, want 1 branch 'stable'", sres.Files[0].Branches) 1150 } 1151 }) 1152 1153 t.Run("ChunkMatches", func(t *testing.T) { 1154 sres := searchForTest(t, b, query.NewAnd( 1155 &query.Substring{ 1156 Pattern: "needle", 1157 }, 1158 &query.Branch{ 1159 Pattern: "table", 1160 }), 1161 chunkOpts, 1162 ) 1163 1164 if len(sres.Files) != 2 || sres.Files[0].FileName != "f2" || sres.Files[1].FileName != "f3" { 1165 t.Fatalf("got %v, want 2 result from [f2,f3]", sres.Files) 1166 } 1167 1168 if len(sres.Files[0].Branches) != 1 || sres.Files[0].Branches[0] != "stable" { 1169 t.Fatalf("got %v, want 1 branch 'stable'", sres.Files[0].Branches) 1170 } 1171 }) 1172} 1173 1174func TestBranchLimit(t *testing.T) { 1175 for limit := 64; limit <= 65; limit++ { 1176 r := &Repository{} 1177 for i := 0; i < limit; i++ { 1178 s := fmt.Sprintf("b%d", i) 1179 r.Branches = append(r.Branches, RepositoryBranch{ 1180 s, "v-" + s, 1181 }) 1182 } 1183 _, err := NewIndexBuilder(r) 1184 if limit == 64 && err != nil { 1185 t.Fatalf("NewIndexBuilder: %v", err) 1186 } else if limit == 65 && err == nil { 1187 t.Fatalf("NewIndexBuilder succeeded") 1188 } 1189 } 1190} 1191 1192func TestBranchReport(t *testing.T) { 1193 branches := []string{"stable", "master"} 1194 b := testIndexBuilder(t, &Repository{ 1195 Branches: []RepositoryBranch{ 1196 {"stable", "vs"}, 1197 {"master", "vm"}, 1198 }, 1199 }, 1200 Document{Name: "f2", Content: []byte("needle"), Branches: branches}) 1201 1202 t.Run("LineMatches", func(t *testing.T) { 1203 sres := searchForTest(t, b, &query.Substring{ 1204 Pattern: "needle", 1205 }) 1206 if len(sres.Files) != 1 { 1207 t.Fatalf("got %v, want 1 result from f2", sres.Files) 1208 } 1209 1210 f := sres.Files[0] 1211 if !reflect.DeepEqual(f.Branches, branches) { 1212 t.Fatalf("got branches %q, want %q", f.Branches, branches) 1213 } 1214 }) 1215 1216 t.Run("ChunkMatches", func(t *testing.T) { 1217 sres := searchForTest(t, b, &query.Substring{ 1218 Pattern: "needle", 1219 }, chunkOpts) 1220 if len(sres.Files) != 1 { 1221 t.Fatalf("got %v, want 1 result from f2", sres.Files) 1222 } 1223 1224 f := sres.Files[0] 1225 if !reflect.DeepEqual(f.Branches, branches) { 1226 t.Fatalf("got branches %q, want %q", f.Branches, branches) 1227 } 1228 }) 1229} 1230 1231func TestBranchVersions(t *testing.T) { 1232 b := testIndexBuilder(t, &Repository{ 1233 Branches: []RepositoryBranch{ 1234 {"stable", "v-stable"}, 1235 {"master", "v-master"}, 1236 }, 1237 }, Document{Name: "f2", Content: []byte("needle"), Branches: []string{"master"}}) 1238 1239 t.Run("LineMatches", func(t *testing.T) { 1240 sres := searchForTest(t, b, &query.Substring{ 1241 Pattern: "needle", 1242 }) 1243 if len(sres.Files) != 1 { 1244 t.Fatalf("got %v, want 1 result from f2", sres.Files) 1245 } 1246 1247 f := sres.Files[0] 1248 if f.Version != "v-master" { 1249 t.Fatalf("got file %#v, want version 'v-master'", f) 1250 } 1251 }) 1252 1253 t.Run("ChunkMatches", func(t *testing.T) { 1254 sres := searchForTest(t, b, &query.Substring{ 1255 Pattern: "needle", 1256 }, chunkOpts) 1257 if len(sres.Files) != 1 { 1258 t.Fatalf("got %v, want 1 result from f2", sres.Files) 1259 } 1260 1261 f := sres.Files[0] 1262 if f.Version != "v-master" { 1263 t.Fatalf("got file %#v, want version 'v-master'", f) 1264 } 1265 }) 1266} 1267 1268func mustParseRE(s string) *syntax.Regexp { 1269 r, err := syntax.Parse(s, syntax.Perl) 1270 if err != nil { 1271 panic(err) 1272 } 1273 1274 return r 1275} 1276 1277func TestRegexp(t *testing.T) { 1278 content := []byte("needle the bla") 1279 // ----------------01234567890123 1280 1281 b := testIndexBuilder(t, nil, 1282 Document{ 1283 Name: "f1", 1284 Content: content, 1285 }) 1286 1287 t.Run("LineMatches", func(t *testing.T) { 1288 sres := searchForTest(t, b, 1289 &query.Regexp{ 1290 Regexp: mustParseRE("dle.*bla"), 1291 }) 1292 1293 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 { 1294 t.Fatalf("got %v, want 1 match in 1 file", sres.Files) 1295 } 1296 1297 got := sres.Files[0].LineMatches[0] 1298 want := LineMatch{ 1299 LineFragments: []LineFragmentMatch{{ 1300 LineOffset: 3, 1301 Offset: 3, 1302 MatchLength: 11, 1303 }}, 1304 Line: content, 1305 FileName: false, 1306 LineNumber: 1, 1307 LineStart: 0, 1308 LineEnd: 14, 1309 } 1310 1311 if !reflect.DeepEqual(got, want) { 1312 t.Errorf("got %#v, want %#v", got, want) 1313 } 1314 }) 1315 1316 t.Run("ChunkMatches", func(t *testing.T) { 1317 sres := searchForTest(t, b, 1318 &query.Regexp{ 1319 Regexp: mustParseRE("dle.*bla"), 1320 }, chunkOpts) 1321 1322 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 { 1323 t.Fatalf("got %v, want 1 match in 1 file", sres.Files) 1324 } 1325 1326 got := sres.Files[0].ChunkMatches[0] 1327 want := ChunkMatch{ 1328 Content: content, 1329 ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1}, 1330 Ranges: []Range{{ 1331 Start: Location{ByteOffset: 3, LineNumber: 1, Column: 4}, 1332 End: Location{ByteOffset: 14, LineNumber: 1, Column: 15}, 1333 }}, 1334 } 1335 1336 if diff := cmp.Diff(want, got); diff != "" { 1337 t.Fatal(diff) 1338 } 1339 }) 1340} 1341 1342func TestRegexpFile(t *testing.T) { 1343 content := []byte("needle the bla") 1344 1345 name := "let's play: find the mussel" 1346 b := testIndexBuilder(t, nil, 1347 Document{Name: name, Content: content}, 1348 Document{Name: "play.txt", Content: content}) 1349 1350 t.Run("LineMatches", func(t *testing.T) { 1351 sres := searchForTest(t, b, 1352 &query.Regexp{ 1353 Regexp: mustParseRE("play.*mussel"), 1354 FileName: true, 1355 }) 1356 1357 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 { 1358 t.Fatalf("got %v, want 1 match in 1 file", sres.Files) 1359 } 1360 1361 if sres.Files[0].FileName != name { 1362 t.Errorf("got match %#v, want name %q", sres.Files[0], name) 1363 } 1364 }) 1365 1366 t.Run("ChunkMatches", func(t *testing.T) { 1367 sres := searchForTest(t, b, 1368 &query.Regexp{ 1369 Regexp: mustParseRE("play.*mussel"), 1370 FileName: true, 1371 }, chunkOpts) 1372 1373 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 { 1374 t.Fatalf("got %v, want 1 match in 1 file", sres.Files) 1375 } 1376 1377 if sres.Files[0].FileName != name { 1378 t.Errorf("got match %#v, want name %q", sres.Files[0], name) 1379 } 1380 }) 1381} 1382 1383func TestRegexpOrder(t *testing.T) { 1384 content := []byte("bla the needle") 1385 // ----------------01234567890123 1386 1387 b := testIndexBuilder(t, nil, 1388 Document{Name: "f1", Content: content}) 1389 1390 t.Run("LineMatches", func(t *testing.T) { 1391 sres := searchForTest(t, b, 1392 &query.Regexp{ 1393 Regexp: mustParseRE("dle.*bla"), 1394 }) 1395 1396 if len(sres.Files) != 0 { 1397 t.Fatalf("got %v, want 0 matches", sres.Files) 1398 } 1399 }) 1400 1401 t.Run("ChunkMatches", func(t *testing.T) { 1402 sres := searchForTest(t, b, 1403 &query.Regexp{ 1404 Regexp: mustParseRE("dle.*bla"), 1405 }) 1406 1407 if len(sres.Files) != 0 { 1408 t.Fatalf("got %v, want 0 matches", sres.Files) 1409 } 1410 }) 1411} 1412 1413func TestRepoName(t *testing.T) { 1414 content := []byte("bla the needle") 1415 // ----------------01234567890123 1416 1417 b := testIndexBuilder(t, &Repository{Name: "bla"}, 1418 Document{Name: "f1", Content: content}) 1419 1420 t.Run("LineMatches", func(t *testing.T) { 1421 sres := searchForTest(t, b, 1422 query.NewAnd( 1423 &query.Substring{Pattern: "needle"}, 1424 &query.Repo{Regexp: regexp.MustCompile("foo")}, 1425 )) 1426 1427 if len(sres.Files) != 0 { 1428 t.Fatalf("got %v, want 0 matches", sres.Files) 1429 } 1430 1431 if sres.Stats.FilesConsidered > 0 { 1432 t.Fatalf("got FilesConsidered %d, should have short circuited", sres.Stats.FilesConsidered) 1433 } 1434 1435 sres = searchForTest(t, b, 1436 query.NewAnd( 1437 &query.Substring{Pattern: "needle"}, 1438 &query.Repo{Regexp: regexp.MustCompile("bla")}, 1439 )) 1440 if len(sres.Files) != 1 { 1441 t.Fatalf("got %v, want 1 match", sres.Files) 1442 } 1443 }) 1444 1445 t.Run("ChunkMatches", func(t *testing.T) { 1446 sres := searchForTest(t, b, 1447 query.NewAnd( 1448 &query.Substring{Pattern: "needle"}, 1449 &query.Repo{Regexp: regexp.MustCompile("foo")}, 1450 ), 1451 chunkOpts, 1452 ) 1453 1454 if len(sres.Files) != 0 { 1455 t.Fatalf("got %v, want 0 matches", sres.Files) 1456 } 1457 1458 if sres.Stats.FilesConsidered > 0 { 1459 t.Fatalf("got FilesConsidered %d, should have short circuited", sres.Stats.FilesConsidered) 1460 } 1461 1462 sres = searchForTest(t, b, 1463 query.NewAnd( 1464 &query.Substring{Pattern: "needle"}, 1465 &query.Repo{Regexp: regexp.MustCompile("bla")}, 1466 )) 1467 if len(sres.Files) != 1 { 1468 t.Fatalf("got %v, want 1 match", sres.Files) 1469 } 1470 }) 1471} 1472 1473func TestMergeMatches(t *testing.T) { 1474 content := []byte("blablabla") 1475 b := testIndexBuilder(t, nil, 1476 Document{Name: "f1", Content: content}) 1477 1478 t.Run("LineMatches", func(t *testing.T) { 1479 sres := searchForTest(t, b, 1480 &query.Substring{Pattern: "bla"}) 1481 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 { 1482 t.Fatalf("got %v, want 1 match", sres.Files) 1483 } 1484 }) 1485 1486 t.Run("ChunkMatches", func(t *testing.T) { 1487 sres := searchForTest(t, b, 1488 &query.Substring{Pattern: "bla"}, 1489 chunkOpts, 1490 ) 1491 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 { 1492 t.Fatalf("got %v, want 1 match", sres.Files) 1493 } 1494 }) 1495} 1496 1497func TestRepoURL(t *testing.T) { 1498 content := []byte("blablabla") 1499 b := testIndexBuilder(t, &Repository{ 1500 Name: "name", 1501 URL: "URL", 1502 CommitURLTemplate: "commit", 1503 FileURLTemplate: "file-url", 1504 LineFragmentTemplate: "fragment", 1505 }, Document{Name: "f1", Content: content}) 1506 1507 sres := searchForTest(t, b, &query.Substring{Pattern: "bla"}) 1508 1509 if sres.RepoURLs["name"] != "file-url" { 1510 t.Errorf("got RepoURLs %v, want {name: URL}", sres.RepoURLs) 1511 } 1512 if sres.LineFragments["name"] != "fragment" { 1513 t.Errorf("got URLs %v, want {name: URL}", sres.LineFragments) 1514 } 1515} 1516 1517func TestRegexpCaseSensitive(t *testing.T) { 1518 content := []byte("bla\nfunc unmarshalGitiles\n") 1519 b := testIndexBuilder(t, nil, Document{ 1520 Name: "f1", 1521 Content: content, 1522 }) 1523 1524 t.Run("LineMatches", func(t *testing.T) { 1525 res := searchForTest(t, b, 1526 &query.Regexp{ 1527 Regexp: mustParseRE("func.*Gitiles"), 1528 CaseSensitive: true, 1529 }) 1530 1531 if len(res.Files) != 1 { 1532 t.Fatalf("got %v, want one match", res.Files) 1533 } 1534 }) 1535 1536 t.Run("ChunkMatches", func(t *testing.T) { 1537 res := searchForTest(t, b, 1538 &query.Regexp{ 1539 Regexp: mustParseRE("func.*Gitiles"), 1540 CaseSensitive: true, 1541 }, 1542 chunkOpts, 1543 ) 1544 1545 if len(res.Files) != 1 { 1546 t.Fatalf("got %v, want one match", res.Files) 1547 } 1548 }) 1549} 1550 1551func TestRegexpCaseFolding(t *testing.T) { 1552 content := []byte("bla\nfunc unmarshalGitiles\n") 1553 1554 b := testIndexBuilder(t, nil, 1555 Document{Name: "f1", Content: content}) 1556 res := searchForTest(t, b, 1557 &query.Regexp{ 1558 Regexp: mustParseRE("func.*GITILES"), 1559 CaseSensitive: false, 1560 }) 1561 1562 if len(res.Files) != 1 { 1563 t.Fatalf("got %v, want one match", res.Files) 1564 } 1565} 1566 1567func TestCaseRegexp(t *testing.T) { 1568 content := []byte("BLABLABLA") 1569 b := testIndexBuilder(t, nil, 1570 Document{Name: "f1", Content: content}) 1571 1572 t.Run("LineMatches", func(t *testing.T) { 1573 res := searchForTest(t, b, 1574 &query.Regexp{ 1575 Regexp: mustParseRE("[xb][xl][xa]"), 1576 CaseSensitive: true, 1577 }) 1578 1579 if len(res.Files) > 0 { 1580 t.Fatalf("got %v, want no matches", res.Files) 1581 } 1582 }) 1583 1584 t.Run("ChunkMatches", func(t *testing.T) { 1585 res := searchForTest(t, b, 1586 &query.Regexp{ 1587 Regexp: mustParseRE("[xb][xl][xa]"), 1588 CaseSensitive: true, 1589 }, 1590 chunkOpts, 1591 ) 1592 1593 if len(res.Files) > 0 { 1594 t.Fatalf("got %v, want no matches", res.Files) 1595 } 1596 }) 1597} 1598 1599func TestNegativeRegexp(t *testing.T) { 1600 content := []byte("BLABLABLA needle bla") 1601 b := testIndexBuilder(t, nil, 1602 Document{Name: "f1", Content: content}) 1603 1604 t.Run("LineMatches", func(t *testing.T) { 1605 res := searchForTest(t, b, 1606 query.NewAnd( 1607 &query.Substring{ 1608 Pattern: "needle", 1609 }, 1610 &query.Not{ 1611 Child: &query.Regexp{ 1612 Regexp: mustParseRE(".cs"), 1613 }, 1614 })) 1615 1616 if len(res.Files) != 1 { 1617 t.Fatalf("got %v, want 1 match", res.Files) 1618 } 1619 }) 1620 1621 t.Run("ChunkMatches", func(t *testing.T) { 1622 res := searchForTest(t, b, 1623 query.NewAnd( 1624 &query.Substring{ 1625 Pattern: "needle", 1626 }, 1627 &query.Not{ 1628 Child: &query.Regexp{ 1629 Regexp: mustParseRE(".cs"), 1630 }, 1631 }, 1632 ), 1633 chunkOpts) 1634 1635 if len(res.Files) != 1 { 1636 t.Fatalf("got %v, want 1 match", res.Files) 1637 } 1638 }) 1639} 1640 1641func TestSymbolRank(t *testing.T) { 1642 t.Skip() 1643 1644 content := []byte("func bla() blubxxxxx") 1645 // ----------------01234567890123456789 1646 b := testIndexBuilder(t, nil, 1647 Document{ 1648 Name: "f1", 1649 Content: content, 1650 }, Document{ 1651 Name: "f2", 1652 Content: content, 1653 Symbols: []DocumentSection{{5, 8}}, 1654 }, Document{ 1655 Name: "f3", 1656 Content: content, 1657 }) 1658 1659 t.Run("LineMatches", func(t *testing.T) { 1660 res := searchForTest(t, b, 1661 &query.Substring{ 1662 CaseSensitive: false, 1663 Pattern: "bla", 1664 }) 1665 1666 if len(res.Files) != 3 { 1667 t.Fatalf("got %d files, want 3 files. Full data: %v", len(res.Files), res.Files) 1668 } 1669 if res.Files[0].FileName != "f2" { 1670 t.Errorf("got %#v, want 'f2' as top match", res.Files[0]) 1671 } 1672 }) 1673 1674 t.Run("ChunkMatches", func(t *testing.T) { 1675 res := searchForTest(t, b, 1676 &query.Substring{ 1677 CaseSensitive: false, 1678 Pattern: "bla", 1679 }, chunkOpts) 1680 1681 if len(res.Files) != 3 { 1682 t.Fatalf("got %d files, want 3 files. Full data: %v", len(res.Files), res.Files) 1683 } 1684 if res.Files[0].FileName != "f2" { 1685 t.Errorf("got %#v, want 'f2' as top match", res.Files[0]) 1686 } 1687 }) 1688} 1689 1690func TestSymbolRankRegexpUTF8(t *testing.T) { 1691 t.Skip() 1692 1693 prefix := strings.Repeat(string([]rune{kelvinCodePoint}), 100) + "\n" 1694 content := []byte(prefix + 1695 "func bla() blub") 1696 // ------012345678901234 1697 b := testIndexBuilder(t, nil, 1698 Document{ 1699 Name: "f1", 1700 Content: content, 1701 }, Document{ 1702 Name: "f2", 1703 Content: content, 1704 Symbols: []DocumentSection{{uint32(len(prefix) + 5), uint32(len(prefix) + 8)}}, 1705 }, Document{ 1706 Name: "f3", 1707 Content: content, 1708 }) 1709 1710 t.Run("LineMatches", func(t *testing.T) { 1711 res := searchForTest(t, b, 1712 &query.Regexp{ 1713 Regexp: mustParseRE("b.a"), 1714 }) 1715 1716 if len(res.Files) != 3 { 1717 t.Fatalf("got %#v, want 3 files", res.Files) 1718 } 1719 if res.Files[0].FileName != "f2" { 1720 t.Errorf("got %#v, want 'f2' as top match", res.Files[0]) 1721 } 1722 }) 1723 1724 t.Run("ChunjkMatches", func(t *testing.T) { 1725 res := searchForTest(t, b, 1726 &query.Regexp{ 1727 Regexp: mustParseRE("b.a"), 1728 }, chunkOpts) 1729 1730 if len(res.Files) != 3 { 1731 t.Fatalf("got %#v, want 3 files", res.Files) 1732 } 1733 if res.Files[0].FileName != "f2" { 1734 t.Errorf("got %#v, want 'f2' as top match", res.Files[0]) 1735 } 1736 }) 1737} 1738 1739func TestPartialSymbolRank(t *testing.T) { 1740 t.Skip() 1741 1742 content := []byte("func bla() blub") 1743 // ----------------012345678901234 1744 1745 b := testIndexBuilder(t, nil, 1746 Document{ 1747 Name: "f1", 1748 Content: content, 1749 Symbols: []DocumentSection{{4, 9}}, 1750 }, Document{ 1751 Name: "f2", 1752 Content: content, 1753 Symbols: []DocumentSection{{4, 8}}, 1754 }, Document{ 1755 Name: "f3", 1756 Content: content, 1757 Symbols: []DocumentSection{{4, 9}}, 1758 }) 1759 1760 t.Run("LineMatches", func(t *testing.T) { 1761 res := searchForTest(t, b, 1762 &query.Substring{ 1763 Pattern: "bla", 1764 }) 1765 1766 if len(res.Files) != 3 { 1767 t.Fatalf("got %#v, want 3 files", res.Files) 1768 } 1769 if res.Files[0].FileName != "f2" { 1770 t.Errorf("got %#v, want 'f2' as top match", res.Files[0]) 1771 } 1772 }) 1773 1774 t.Run("ChunkMatches", func(t *testing.T) { 1775 res := searchForTest(t, b, 1776 &query.Substring{ 1777 Pattern: "bla", 1778 }, chunkOpts) 1779 1780 if len(res.Files) != 3 { 1781 t.Fatalf("got %#v, want 3 files", res.Files) 1782 } 1783 if res.Files[0].FileName != "f2" { 1784 t.Errorf("got %#v, want 'f2' as top match", res.Files[0]) 1785 } 1786 }) 1787} 1788 1789func TestNegativeRepo(t *testing.T) { 1790 content := []byte("bla the needle") 1791 // ----------------01234567890123 1792 b := testIndexBuilder(t, &Repository{ 1793 Name: "bla", 1794 }, Document{Name: "f1", Content: content}) 1795 1796 t.Run("LineMatches", func(t *testing.T) { 1797 sres := searchForTest(t, b, 1798 query.NewAnd( 1799 &query.Substring{Pattern: "needle"}, 1800 &query.Not{Child: &query.Repo{Regexp: regexp.MustCompile("bla")}}, 1801 )) 1802 1803 if len(sres.Files) != 0 { 1804 t.Fatalf("got %v, want 0 matches", sres.Files) 1805 } 1806 }) 1807 1808 t.Run("ChunkMatches", func(t *testing.T) { 1809 sres := searchForTest(t, b, 1810 query.NewAnd( 1811 &query.Substring{Pattern: "needle"}, 1812 &query.Not{Child: &query.Repo{Regexp: regexp.MustCompile("bla")}}, 1813 ), chunkOpts) 1814 1815 if len(sres.Files) != 0 { 1816 t.Fatalf("got %v, want 0 matches", sres.Files) 1817 } 1818 }) 1819} 1820 1821func TestListRepos(t *testing.T) { 1822 content := []byte("bla the needle\n") 1823 // ----------------012345678901234- 1824 1825 t.Run("default and minimal fallback", func(t *testing.T) { 1826 repo := &Repository{ 1827 Name: "reponame", 1828 Branches: []RepositoryBranch{{Name: "main"}, {Name: "dev"}}, 1829 } 1830 b := testIndexBuilder(t, repo, 1831 Document{Name: "f1", Content: content, Branches: []string{"main", "dev"}}, 1832 Document{Name: "f2", Content: content, Branches: []string{"main"}}, 1833 Document{Name: "f2", Content: content, Branches: []string{"dev"}}, 1834 Document{Name: "f3", Content: content, Branches: []string{"dev"}}) 1835 1836 searcher := searcherForTest(t, b) 1837 1838 for _, opts := range []*ListOptions{ 1839 nil, 1840 {}, 1841 {Field: RepoListFieldRepos}, 1842 {Field: RepoListFieldReposMap}, 1843 } { 1844 t.Run(fmt.Sprint(opts), func(t *testing.T) { 1845 q := &query.Repo{Regexp: regexp.MustCompile("epo")} 1846 1847 res, err := searcher.List(context.Background(), q, opts) 1848 if err != nil { 1849 t.Fatalf("List(%v): %v", q, err) 1850 } 1851 1852 want := &RepoList{ 1853 Repos: []*RepoListEntry{{ 1854 Repository: *repo, 1855 Stats: RepoStats{ 1856 Documents: 4, 1857 ContentBytes: 68, // (15 bytes of content and 2 bytes of filename) x 4 1858 Shards: 1, 1859 1860 NewLinesCount: 4, 1861 DefaultBranchNewLinesCount: 2, 1862 OtherBranchesNewLinesCount: 3, 1863 }, 1864 }}, 1865 Stats: RepoStats{ 1866 Repos: 1, 1867 Documents: 4, 1868 ContentBytes: 68, 1869 Shards: 1, 1870 1871 NewLinesCount: 4, 1872 DefaultBranchNewLinesCount: 2, 1873 OtherBranchesNewLinesCount: 3, 1874 }, 1875 } 1876 ignored := []cmp.Option{ 1877 cmpopts.EquateEmpty(), 1878 cmpopts.IgnoreFields(RepoListEntry{}, "IndexMetadata"), 1879 cmpopts.IgnoreFields(RepoStats{}, "IndexBytes"), 1880 cmpopts.IgnoreFields(Repository{}, "SubRepoMap"), 1881 cmpopts.IgnoreFields(Repository{}, "priority"), 1882 } 1883 if diff := cmp.Diff(want, res, ignored...); diff != "" { 1884 t.Fatalf("mismatch (-want +got):\n%s", diff) 1885 } 1886 1887 q = &query.Repo{Regexp: regexp.MustCompile("bla")} 1888 res, err = searcher.List(context.Background(), q, nil) 1889 if err != nil { 1890 t.Fatalf("List(%v): %v", q, err) 1891 } 1892 if len(res.Repos) != 0 || len(res.ReposMap) != 0 { 1893 t.Fatalf("got %v, want 0 matches", res) 1894 } 1895 }) 1896 } 1897 }) 1898 1899 t.Run("minimal", func(t *testing.T) { 1900 repo := &Repository{ 1901 ID: 1234, 1902 Name: "reponame", 1903 Branches: []RepositoryBranch{{Name: "main"}, {Name: "dev"}}, 1904 RawConfig: map[string]string{"repoid": "1234"}, 1905 } 1906 b := testIndexBuilder(t, repo, 1907 Document{Name: "f1", Content: content, Branches: []string{"main", "dev"}}, 1908 Document{Name: "f2", Content: content, Branches: []string{"main"}}, 1909 Document{Name: "f2", Content: content, Branches: []string{"dev"}}, 1910 Document{Name: "f3", Content: content, Branches: []string{"dev"}}) 1911 1912 searcher := searcherForTest(t, b) 1913 1914 q := &query.Repo{Regexp: regexp.MustCompile("epo")} 1915 res, err := searcher.List(context.Background(), q, &ListOptions{Field: RepoListFieldReposMap}) 1916 if err != nil { 1917 t.Fatalf("List(%v): %v", q, err) 1918 } 1919 1920 want := &RepoList{ 1921 ReposMap: ReposMap{ 1922 repo.ID: { 1923 HasSymbols: repo.HasSymbols, 1924 Branches: repo.Branches, 1925 }, 1926 }, 1927 Stats: RepoStats{ 1928 Repos: 1, 1929 Shards: 1, 1930 Documents: 4, 1931 IndexBytes: 412, 1932 ContentBytes: 68, 1933 NewLinesCount: 4, 1934 DefaultBranchNewLinesCount: 2, 1935 OtherBranchesNewLinesCount: 3, 1936 }, 1937 } 1938 1939 ignored := []cmp.Option{ 1940 cmpopts.IgnoreFields(MinimalRepoListEntry{}, "IndexTimeUnix"), 1941 } 1942 if diff := cmp.Diff(want, res, ignored...); diff != "" { 1943 t.Fatalf("mismatch (-want +got):\n%s", diff) 1944 } 1945 1946 q = &query.Repo{Regexp: regexp.MustCompile("bla")} 1947 res, err = searcher.List(context.Background(), q, &ListOptions{Field: RepoListFieldReposMap}) 1948 if err != nil { 1949 t.Fatalf("List(%v): %v", q, err) 1950 } 1951 if len(res.Repos) != 0 || len(res.ReposMap) != 0 { 1952 t.Fatalf("got %v, want 0 matches", res) 1953 } 1954 }) 1955} 1956 1957func TestListReposByContent(t *testing.T) { 1958 content := []byte("bla the needle") 1959 1960 b := testIndexBuilder(t, &Repository{ 1961 Name: "reponame", 1962 }, 1963 Document{Name: "f1", Content: content}, 1964 Document{Name: "f2", Content: content}) 1965 1966 searcher := searcherForTest(t, b) 1967 q := &query.Substring{Pattern: "needle"} 1968 res, err := searcher.List(context.Background(), q, nil) 1969 if err != nil { 1970 t.Fatalf("List(%v): %v", q, err) 1971 } 1972 if len(res.Repos) != 1 || res.Repos[0].Repository.Name != "reponame" { 1973 t.Fatalf("got %v, want 1 matches", res) 1974 } 1975 if got := res.Repos[0].Stats.Shards; got != 1 { 1976 t.Fatalf("got %d, want 1 shard", got) 1977 } 1978 q = &query.Substring{Pattern: "foo"} 1979 res, err = searcher.List(context.Background(), q, nil) 1980 if err != nil { 1981 t.Fatalf("List(%v): %v", q, err) 1982 } 1983 if len(res.Repos) != 0 { 1984 t.Fatalf("got %v, want 0 matches", res) 1985 } 1986} 1987 1988func TestMetadata(t *testing.T) { 1989 content := []byte("bla the needle") 1990 1991 b := testIndexBuilder(t, &Repository{ 1992 Name: "reponame", 1993 }, Document{Name: "f1", Content: content}, 1994 Document{Name: "f2", Content: content}) 1995 1996 var buf bytes.Buffer 1997 if err := b.Write(&buf); err != nil { 1998 t.Fatal(err) 1999 } 2000 f := &memSeeker{buf.Bytes()} 2001 2002 rd, _, err := ReadMetadata(f) 2003 if err != nil { 2004 t.Fatalf("ReadMetadata: %v", err) 2005 } 2006 2007 if got, want := rd[0].Name, "reponame"; got != want { 2008 t.Fatalf("got %q want %q", got, want) 2009 } 2010} 2011 2012func TestOr(t *testing.T) { 2013 b := testIndexBuilder(t, nil, 2014 Document{Name: "f1", Content: []byte("needle")}, 2015 Document{Name: "f2", Content: []byte("banana")}) 2016 t.Run("LineMatches", func(t *testing.T) { 2017 sres := searchForTest(t, b, query.NewOr( 2018 &query.Substring{Pattern: "needle"}, 2019 &query.Substring{Pattern: "banana"})) 2020 2021 if len(sres.Files) != 2 { 2022 t.Fatalf("got %v, want 2 files", sres.Files) 2023 } 2024 }) 2025 2026 t.Run("ChunkMatches", func(t *testing.T) { 2027 sres := searchForTest(t, b, query.NewOr( 2028 &query.Substring{Pattern: "needle"}, 2029 &query.Substring{Pattern: "banana"})) 2030 2031 if len(sres.Files) != 2 { 2032 t.Fatalf("got %v, want 2 files", sres.Files) 2033 } 2034 }) 2035} 2036 2037func TestFrequency(t *testing.T) { 2038 content := []byte("sla _Py_HashDouble(double v sla las las shd dot dot") 2039 2040 b := testIndexBuilder(t, nil, 2041 Document{ 2042 Name: "f1", 2043 Content: content, 2044 }) 2045 2046 t.Run("LineMatches", func(t *testing.T) { 2047 sres := searchForTest(t, b, &query.Substring{Pattern: "slashdot"}) 2048 if len(sres.Files) != 0 { 2049 t.Errorf("got %v, wanted 0 matches", sres.Files) 2050 } 2051 }) 2052 2053 t.Run("ChunkMatches", func(t *testing.T) { 2054 sres := searchForTest(t, b, &query.Substring{Pattern: "slashdot"}, chunkOpts) 2055 if len(sres.Files) != 0 { 2056 t.Errorf("got %v, wanted 0 matches", sres.Files) 2057 } 2058 }) 2059} 2060 2061func TestMatchNewline(t *testing.T) { 2062 re, err := syntax.Parse("[^a]a", syntax.ClassNL) 2063 if err != nil { 2064 t.Fatalf("syntax.Parse: %v", err) 2065 } 2066 2067 content := []byte("pqr\nalex") 2068 2069 b := testIndexBuilder(t, nil, 2070 Document{ 2071 Name: "f1", 2072 Content: content, 2073 }) 2074 2075 t.Run("LineMatches", func(t *testing.T) { 2076 sres := searchForTest(t, b, &query.Regexp{Regexp: re, CaseSensitive: true}) 2077 if len(sres.Files) != 1 { 2078 t.Errorf("got %v, wanted 1 matches", sres.Files) 2079 } else if l := sres.Files[0].LineMatches[0].Line; !bytes.Equal(l, content[len("pqr\n"):]) { 2080 t.Errorf("got match line %q, want %q", l, content) 2081 } 2082 }) 2083 2084 t.Run("ChunkMatches", func(t *testing.T) { 2085 sres := searchForTest(t, b, &query.Regexp{Regexp: re, CaseSensitive: true}, chunkOpts) 2086 if len(sres.Files) != 1 { 2087 t.Errorf("got %v, wanted 1 matches", sres.Files) 2088 } else if c := sres.Files[0].ChunkMatches[0].Content; !bytes.Equal(c, content) { 2089 t.Errorf("got match line %q, want %q", c, content) 2090 } 2091 }) 2092} 2093 2094func TestSubRepo(t *testing.T) { 2095 subRepos := map[string]*Repository{ 2096 "sub": { 2097 Name: "sub-name", 2098 LineFragmentTemplate: "sub-line", 2099 }, 2100 } 2101 2102 content := []byte("pqr\nalex") 2103 2104 b := testIndexBuilder(t, &Repository{ 2105 SubRepoMap: subRepos, 2106 }, Document{ 2107 Name: "sub/f1", 2108 Content: content, 2109 SubRepositoryPath: "sub", 2110 }) 2111 2112 sres := searchForTest(t, b, &query.Substring{Pattern: "alex"}) 2113 if len(sres.Files) != 1 { 2114 t.Fatalf("got %v, wanted 1 matches", sres.Files) 2115 } 2116 2117 f := sres.Files[0] 2118 if f.SubRepositoryPath != "sub" || f.SubRepositoryName != "sub-name" { 2119 t.Errorf("got %#v, want SubRepository{Path,Name} = {'sub', 'sub-name'}", f) 2120 } 2121 2122 if sres.LineFragments["sub-name"] != "sub-line" { 2123 t.Errorf("got LineFragmentTemplate %v, want {'sub':'sub-line'}", sres.LineFragments) 2124 } 2125} 2126 2127func TestSearchEither(t *testing.T) { 2128 b := testIndexBuilder(t, nil, 2129 Document{Name: "f1", Content: []byte("bla needle bla")}, 2130 Document{Name: "needle-file-branch", Content: []byte("bla content")}) 2131 2132 t.Run("LineMatches", func(t *testing.T) { 2133 sres := searchForTest(t, b, &query.Substring{Pattern: "needle"}) 2134 if len(sres.Files) != 2 { 2135 t.Fatalf("got %v, wanted 2 matches", sres.Files) 2136 } 2137 2138 sres = searchForTest(t, b, &query.Substring{Pattern: "needle", Content: true}) 2139 if len(sres.Files) != 1 { 2140 t.Fatalf("got %v, wanted 1 match", sres.Files) 2141 } 2142 2143 if got, want := sres.Files[0].FileName, "f1"; got != want { 2144 t.Errorf("got %q, want %q", got, want) 2145 } 2146 }) 2147 2148 t.Run("ChunkMatches", func(t *testing.T) { 2149 sres := searchForTest(t, b, &query.Substring{Pattern: "needle"}, chunkOpts) 2150 if len(sres.Files) != 2 { 2151 t.Fatalf("got %v, wanted 2 matches", sres.Files) 2152 } 2153 2154 sres = searchForTest(t, b, &query.Substring{Pattern: "needle", Content: true}, chunkOpts) 2155 if len(sres.Files) != 1 { 2156 t.Fatalf("got %v, wanted 1 match", sres.Files) 2157 } 2158 2159 if got, want := sres.Files[0].FileName, "f1"; got != want { 2160 t.Errorf("got %q, want %q", got, want) 2161 } 2162 }) 2163} 2164 2165func TestUnicodeExactMatch(t *testing.T) { 2166 needle := "néédlÉ" 2167 content := []byte("blá blá " + needle + " blâ") 2168 2169 b := testIndexBuilder(t, nil, 2170 Document{Name: "f1", Content: content}) 2171 2172 t.Run("LineMatches", func(t *testing.T) { 2173 if res := searchForTest(t, b, &query.Substring{Pattern: needle, CaseSensitive: true}); len(res.Files) != 1 { 2174 t.Fatalf("case sensitive: got %v, wanted 1 match", res.Files) 2175 } 2176 }) 2177 2178 t.Run("ChunkMatches", func(t *testing.T) { 2179 res := searchForTest(t, b, &query.Substring{Pattern: needle, CaseSensitive: true}, chunkOpts) 2180 if len(res.Files) != 1 { 2181 t.Fatalf("case sensitive: got %v, wanted 1 match", res.Files) 2182 } 2183 }) 2184} 2185 2186func TestUnicodeCoverContent(t *testing.T) { 2187 needle := "néédlÉ" 2188 content := []byte("blá blá " + needle + " blâ") 2189 2190 b := testIndexBuilder(t, nil, 2191 Document{Name: "f1", Content: content}) 2192 2193 t.Run("LineMatches", func(t *testing.T) { 2194 if res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ", CaseSensitive: true}); len(res.Files) != 0 { 2195 t.Fatalf("case sensitive: got %v, wanted 0 match", res.Files) 2196 } 2197 2198 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ"}) 2199 if len(res.Files) != 1 { 2200 t.Fatalf("case insensitive: got %v, wanted 1 match", res.Files) 2201 } 2202 2203 if got, want := res.Files[0].LineMatches[0].LineFragments[0].Offset, uint32(strings.Index(string(content), needle)); got != want { 2204 t.Errorf("got %d want %d", got, want) 2205 } 2206 }) 2207 2208 t.Run("ChunkMatches", func(t *testing.T) { 2209 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ", CaseSensitive: true}, chunkOpts) 2210 if len(res.Files) != 0 { 2211 t.Fatalf("case sensitive: got %v, wanted 0 match", res.Files) 2212 } 2213 2214 res = searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ"}, chunkOpts) 2215 if len(res.Files) != 1 { 2216 t.Fatalf("case insensitive: got %v, wanted 1 match", res.Files) 2217 } 2218 2219 got := res.Files[0].ChunkMatches[0].Ranges[0].Start.ByteOffset 2220 want := uint32(strings.Index(string(content), needle)) 2221 if got != want { 2222 t.Errorf("got %d want %d", got, want) 2223 } 2224 }) 2225} 2226 2227func TestUnicodeNonCoverContent(t *testing.T) { 2228 needle := "nééáádlÉ" 2229 content := []byte("blá blá " + needle + " blâ") 2230 2231 b := testIndexBuilder(t, nil, 2232 Document{Name: "f1", Content: content}) 2233 2234 t.Run("LineMatches", func(t *testing.T) { 2235 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉÁÁDLÉ", Content: true}) 2236 if len(res.Files) != 1 { 2237 t.Fatalf("got %v, wanted 1 match", res.Files) 2238 } 2239 2240 if got, want := res.Files[0].LineMatches[0].LineFragments[0].Offset, uint32(strings.Index(string(content), needle)); got != want { 2241 t.Errorf("got %d want %d", got, want) 2242 } 2243 }) 2244 2245 t.Run("ChunkMatches", func(t *testing.T) { 2246 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉÁÁDLÉ", Content: true}, chunkOpts) 2247 if len(res.Files) != 1 { 2248 t.Fatalf("got %v, wanted 1 match", res.Files) 2249 } 2250 2251 got := res.Files[0].ChunkMatches[0].Ranges[0].Start.ByteOffset 2252 want := uint32(strings.Index(string(content), needle)) 2253 if got != want { 2254 t.Errorf("got %d want %d", got, want) 2255 } 2256 }) 2257} 2258 2259const kelvinCodePoint = 8490 2260 2261func TestUnicodeVariableLength(t *testing.T) { 2262 lower := 'k' 2263 upper := rune(kelvinCodePoint) 2264 2265 needle := "nee" + string([]rune{lower}) + "eed" 2266 corpus := []byte("nee" + string([]rune{upper}) + "eed" + 2267 " ee" + string([]rune{lower}) + "ee" + 2268 " ee" + string([]rune{upper}) + "ee") 2269 2270 t.Run("LineMatches", func(t *testing.T) { 2271 b := testIndexBuilder(t, nil, 2272 Document{Name: "f1", Content: []byte(corpus)}) 2273 2274 res := searchForTest(t, b, &query.Substring{Pattern: needle, Content: true}) 2275 if len(res.Files) != 1 { 2276 t.Fatalf("got %v, wanted 1 match", res.Files) 2277 } 2278 }) 2279 2280 t.Run("ChunkMatches", func(t *testing.T) { 2281 b := testIndexBuilder(t, nil, 2282 Document{Name: "f1", Content: []byte(corpus)}) 2283 2284 res := searchForTest(t, b, &query.Substring{Pattern: needle, Content: true}, chunkOpts) 2285 if len(res.Files) != 1 { 2286 t.Fatalf("got %v, wanted 1 match", res.Files) 2287 } 2288 }) 2289} 2290 2291func TestUnicodeFileStartOffsets(t *testing.T) { 2292 unicode := "世界" 2293 wat := "waaaaaat" 2294 b := testIndexBuilder(t, nil, 2295 Document{ 2296 Name: "f1", 2297 Content: []byte(unicode), 2298 }, 2299 Document{ 2300 Name: "f2", 2301 Content: []byte(wat), 2302 }, 2303 ) 2304 q := &query.Substring{Pattern: wat, Content: true} 2305 res := searchForTest(t, b, q) 2306 if len(res.Files) != 1 { 2307 t.Fatalf("got %v, wanted 1 match", res.Files) 2308 } 2309} 2310 2311func TestLongFileUTF8(t *testing.T) { 2312 needle := "neeedle" 2313 2314 // 6 bytes. 2315 unicode := "世界" 2316 content := []byte(strings.Repeat(unicode, 100) + needle) 2317 b := testIndexBuilder(t, nil, 2318 Document{ 2319 Name: "f1", 2320 Content: []byte(strings.Repeat("a", 50)), 2321 }, 2322 Document{ 2323 Name: "f2", 2324 Content: content, 2325 }) 2326 2327 t.Run("LineMatches", func(t *testing.T) { 2328 q := &query.Substring{Pattern: needle, Content: true} 2329 res := searchForTest(t, b, q) 2330 if len(res.Files) != 1 { 2331 t.Errorf("got %v, want 1 result", res) 2332 } 2333 }) 2334 2335 t.Run("ChunkMatches", func(t *testing.T) { 2336 q := &query.Substring{Pattern: needle, Content: true} 2337 res := searchForTest(t, b, q, chunkOpts) 2338 if len(res.Files) != 1 { 2339 t.Errorf("got %v, want 1 result", res) 2340 } 2341 }) 2342} 2343 2344func TestEstimateDocCount(t *testing.T) { 2345 content := []byte("bla needle bla") 2346 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2347 Document{Name: "f1", Content: content}, 2348 Document{Name: "f2", Content: content}, 2349 ) 2350 2351 t.Run("LineMatches", func(t *testing.T) { 2352 if sres := searchForTest(t, b, 2353 query.NewAnd( 2354 &query.Substring{Pattern: "needle"}, 2355 &query.Repo{Regexp: regexp.MustCompile("reponame")}, 2356 ), SearchOptions{ 2357 EstimateDocCount: true, 2358 }); sres.Stats.ShardFilesConsidered != 2 { 2359 t.Errorf("got FilesConsidered = %d, want 2", sres.Stats.FilesConsidered) 2360 } 2361 if sres := searchForTest(t, b, 2362 query.NewAnd( 2363 &query.Substring{Pattern: "needle"}, 2364 &query.Repo{Regexp: regexp.MustCompile("nomatch")}, 2365 ), SearchOptions{ 2366 EstimateDocCount: true, 2367 }); sres.Stats.ShardFilesConsidered != 0 { 2368 t.Errorf("got FilesConsidered = %d, want 0", sres.Stats.FilesConsidered) 2369 } 2370 }) 2371 2372 t.Run("ChunkMatches", func(t *testing.T) { 2373 if sres := searchForTest(t, b, 2374 query.NewAnd( 2375 &query.Substring{Pattern: "needle"}, 2376 &query.Repo{Regexp: regexp.MustCompile("reponame")}, 2377 ), SearchOptions{ 2378 EstimateDocCount: true, 2379 ChunkMatches: true, 2380 }); sres.Stats.ShardFilesConsidered != 2 { 2381 t.Errorf("got FilesConsidered = %d, want 2", sres.Stats.FilesConsidered) 2382 } 2383 if sres := searchForTest(t, b, 2384 query.NewAnd( 2385 &query.Substring{Pattern: "needle"}, 2386 &query.Repo{Regexp: regexp.MustCompile("nomatch")}, 2387 ), SearchOptions{ 2388 EstimateDocCount: true, 2389 ChunkMatches: true, 2390 }); sres.Stats.ShardFilesConsidered != 0 { 2391 t.Errorf("got FilesConsidered = %d, want 0", sres.Stats.FilesConsidered) 2392 } 2393 }) 2394} 2395 2396func TestUTF8CorrectCorpus(t *testing.T) { 2397 needle := "neeedle" 2398 2399 // 6 bytes. 2400 unicode := "世界" 2401 b := testIndexBuilder(t, nil, 2402 Document{ 2403 Name: "f1", 2404 Content: []byte(strings.Repeat(unicode, 100)), 2405 }, 2406 Document{ 2407 Name: "xxxxxneeedle", 2408 Content: []byte("hello"), 2409 }) 2410 2411 t.Run("LineMatches", func(t *testing.T) { 2412 q := &query.Substring{Pattern: needle, FileName: true} 2413 res := searchForTest(t, b, q) 2414 if len(res.Files) != 1 { 2415 t.Errorf("got %v, want 1 result", res) 2416 } 2417 }) 2418 2419 t.Run("ChunkMatches", func(t *testing.T) { 2420 q := &query.Substring{Pattern: needle, FileName: true} 2421 res := searchForTest(t, b, q, chunkOpts) 2422 if len(res.Files) != 1 { 2423 t.Errorf("got %v, want 1 result", res) 2424 } 2425 }) 2426} 2427 2428func TestBuilderStats(t *testing.T) { 2429 b := testIndexBuilder(t, nil, 2430 Document{ 2431 Name: "f1", 2432 Content: []byte(strings.Repeat("abcd", 1024)), 2433 }) 2434 var buf bytes.Buffer 2435 if err := b.Write(&buf); err != nil { 2436 t.Fatal(err) 2437 } 2438 2439 if got, want := b.ContentSize(), uint32(2+4*1024); got != want { 2440 t.Errorf("got %d, want %d", got, want) 2441 } 2442} 2443 2444func TestIOStats(t *testing.T) { 2445 b := testIndexBuilder(t, nil, 2446 Document{ 2447 Name: "f1", 2448 Content: []byte(strings.Repeat("abcd", 1024)), 2449 }) 2450 2451 t.Run("LineMatches", func(t *testing.T) { 2452 q := &query.Substring{Pattern: "abc", CaseSensitive: true, Content: true} 2453 res := searchForTest(t, b, q) 2454 2455 // 4096 (content) + 2 (overhead: newlines or doc sections) 2456 if got, want := res.Stats.ContentBytesLoaded, int64(4100); got != want { 2457 t.Errorf("got content I/O %d, want %d", got, want) 2458 } 2459 2460 // 1024 entries, each 4 bytes apart. 4 fits into single byte 2461 // delta encoded. 2462 if got, want := res.Stats.IndexBytesLoaded, int64(1024); got != want { 2463 t.Errorf("got index I/O %d, want %d", got, want) 2464 } 2465 }) 2466 2467 t.Run("ChunkMatches", func(t *testing.T) { 2468 q := &query.Substring{Pattern: "abc", CaseSensitive: true, Content: true} 2469 res := searchForTest(t, b, q, chunkOpts) 2470 2471 // 4096 (content) + 2 (overhead: newlines or doc sections) 2472 if got, want := res.Stats.ContentBytesLoaded, int64(4098); got != want { 2473 t.Errorf("got content I/O %d, want %d", got, want) 2474 } 2475 2476 // 1024 entries, each 4 bytes apart. 4 fits into single byte 2477 // delta encoded. 2478 if got, want := res.Stats.IndexBytesLoaded, int64(1024); got != want { 2479 t.Errorf("got index I/O %d, want %d", got, want) 2480 } 2481 }) 2482} 2483 2484func TestStartLineAnchor(t *testing.T) { 2485 b := testIndexBuilder(t, nil, 2486 Document{ 2487 Name: "f1", 2488 Content: []byte( 2489 `hello 2490start of middle of line 2491`), 2492 }) 2493 2494 t.Run("LineMatches", func(t *testing.T) { 2495 q, err := query.Parse("^start") 2496 if err != nil { 2497 t.Errorf("parse: %v", err) 2498 } 2499 2500 res := searchForTest(t, b, q) 2501 if len(res.Files) != 1 { 2502 t.Errorf("got %v, want 1 file", res.Files) 2503 } 2504 2505 q, err = query.Parse("^middle") 2506 if err != nil { 2507 t.Errorf("parse: %v", err) 2508 } 2509 res = searchForTest(t, b, q) 2510 if len(res.Files) != 0 { 2511 t.Errorf("got %v, want 0 files", res.Files) 2512 } 2513 }) 2514 2515 t.Run("ChunkMatches", func(t *testing.T) { 2516 q, err := query.Parse("^start") 2517 if err != nil { 2518 t.Errorf("parse: %v", err) 2519 } 2520 2521 res := searchForTest(t, b, q, chunkOpts) 2522 if len(res.Files) != 1 { 2523 t.Errorf("got %v, want 1 file", res.Files) 2524 } 2525 2526 q, err = query.Parse("^middle") 2527 if err != nil { 2528 t.Errorf("parse: %v", err) 2529 } 2530 res = searchForTest(t, b, q, chunkOpts) 2531 if len(res.Files) != 0 { 2532 t.Errorf("got %v, want 0 files", res.Files) 2533 } 2534 }) 2535} 2536 2537func TestAndOrUnicode(t *testing.T) { 2538 q, err := query.Parse("orange.*apple") 2539 if err != nil { 2540 t.Errorf("parse: %v", err) 2541 } 2542 finalQ := query.NewAnd(q, 2543 query.NewOr(query.NewAnd(&query.Repo{Regexp: regexp.MustCompile("name")}, 2544 query.NewOr(&query.Branch{Pattern: "master"})))) 2545 2546 b := testIndexBuilder(t, &Repository{ 2547 Name: "name", 2548 Branches: []RepositoryBranch{{"master", "master-version"}}, 2549 }, Document{ 2550 Name: "f2", 2551 Content: []byte("orange\u2318apple"), 2552 // --------------0123456 78901 2553 Branches: []string{"master"}, 2554 }) 2555 2556 t.Run("LineMatches", func(t *testing.T) { 2557 res := searchForTest(t, b, finalQ) 2558 if len(res.Files) != 1 { 2559 t.Errorf("got %v, want 1 result", res.Files) 2560 } 2561 }) 2562 2563 t.Run("ChunkMatches", func(t *testing.T) { 2564 res := searchForTest(t, b, finalQ, chunkOpts) 2565 if len(res.Files) != 1 { 2566 t.Errorf("got %v, want 1 result", res.Files) 2567 } 2568 }) 2569} 2570 2571func TestAndShort(t *testing.T) { 2572 content := []byte("bla needle at orange bla") 2573 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2574 Document{Name: "f1", Content: content}, 2575 Document{Name: "f2", Content: []byte("xx at xx")}, 2576 Document{Name: "f3", Content: []byte("yy orange xx")}, 2577 ) 2578 2579 q := query.NewAnd(&query.Substring{Pattern: "at"}, 2580 &query.Substring{Pattern: "orange"}) 2581 2582 t.Run("LineMatches", func(t *testing.T) { 2583 res := searchForTest(t, b, q) 2584 if len(res.Files) != 1 || res.Files[0].FileName != "f1" { 2585 t.Errorf("got %v, want 1 result", res.Files) 2586 } 2587 }) 2588 2589 t.Run("ChunkMatches", func(t *testing.T) { 2590 res := searchForTest(t, b, q, chunkOpts) 2591 if len(res.Files) != 1 || res.Files[0].FileName != "f1" { 2592 t.Errorf("got %v, want 1 result", res.Files) 2593 } 2594 }) 2595} 2596 2597func TestNoCollectRegexpSubstring(t *testing.T) { 2598 content := []byte("bla final bla\nfoo final, foo") 2599 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2600 Document{Name: "f1", Content: content}, 2601 ) 2602 2603 q := &query.Regexp{ 2604 Regexp: mustParseRE("final[,.]"), 2605 } 2606 2607 t.Run("LineMatches", func(t *testing.T) { 2608 res := searchForTest(t, b, q) 2609 if len(res.Files) != 1 { 2610 t.Fatalf("got %v, want 1 result", res.Files) 2611 } 2612 if f := res.Files[0]; len(f.LineMatches) != 1 { 2613 t.Fatalf("got line matches %v, want 1 line match", printLineMatches(f.LineMatches)) 2614 } 2615 }) 2616 2617 t.Run("ChunkMatches", func(t *testing.T) { 2618 res := searchForTest(t, b, q, chunkOpts) 2619 if len(res.Files) != 1 { 2620 t.Fatalf("got %v, want 1 result", res.Files) 2621 } 2622 if f := res.Files[0]; len(f.ChunkMatches) != 1 { 2623 t.Fatalf("got line matches %v, want 1 line match", printLineMatches(f.LineMatches)) 2624 } 2625 }) 2626} 2627 2628func printLineMatches(ms []LineMatch) string { 2629 var ss []string 2630 for _, m := range ms { 2631 ss = append(ss, fmt.Sprintf("%d:%q %v", m.LineNumber, m.Line, m.LineFragments)) 2632 } 2633 2634 return strings.Join(ss, ", ") 2635} 2636 2637func TestLang(t *testing.T) { 2638 content := []byte("bla needle bla") 2639 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2640 Document{Name: "f1", Content: content}, 2641 Document{Name: "f2", Language: "java", Content: content}, 2642 Document{Name: "f3", Language: "cpp", Content: content}, 2643 ) 2644 2645 q := query.NewAnd(&query.Substring{Pattern: "needle"}, 2646 &query.Language{Language: "cpp"}) 2647 2648 t.Run("LineMatches", func(t *testing.T) { 2649 res := searchForTest(t, b, q) 2650 if len(res.Files) != 1 { 2651 t.Fatalf("got %v, want 1 result in f3", res.Files) 2652 } 2653 f := res.Files[0] 2654 if f.FileName != "f3" || f.Language != "cpp" { 2655 t.Fatalf("got %v, want 1 match with language cpp", f) 2656 } 2657 }) 2658 2659 t.Run("ChunkMatches", func(t *testing.T) { 2660 res := searchForTest(t, b, q, chunkOpts) 2661 if len(res.Files) != 1 { 2662 t.Fatalf("got %v, want 1 result in f3", res.Files) 2663 } 2664 f := res.Files[0] 2665 if f.FileName != "f3" || f.Language != "cpp" { 2666 t.Fatalf("got %v, want 1 match with language cpp", f) 2667 } 2668 }) 2669} 2670 2671func TestLangShortcut(t *testing.T) { 2672 content := []byte("bla needle bla") 2673 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2674 Document{Name: "f2", Language: "java", Content: content}, 2675 Document{Name: "f3", Language: "cpp", Content: content}, 2676 ) 2677 2678 q := query.NewAnd(&query.Substring{Pattern: "needle"}, 2679 &query.Language{Language: "fortran"}) 2680 2681 t.Run("LineMatches", func(t *testing.T) { 2682 res := searchForTest(t, b, q) 2683 if len(res.Files) != 0 { 2684 t.Fatalf("got %v, want 0 results", res.Files) 2685 } 2686 if res.Stats.IndexBytesLoaded > 0 { 2687 t.Errorf("got IndexBytesLoaded %d, want 0", res.Stats.IndexBytesLoaded) 2688 } 2689 }) 2690 2691 t.Run("ChunkMatches", func(t *testing.T) { 2692 res := searchForTest(t, b, q, chunkOpts) 2693 if len(res.Files) != 0 { 2694 t.Fatalf("got %v, want 0 results", res.Files) 2695 } 2696 if res.Stats.IndexBytesLoaded > 0 { 2697 t.Errorf("got IndexBytesLoaded %d, want 0", res.Stats.IndexBytesLoaded) 2698 } 2699 }) 2700} 2701 2702func TestNoTextMatchAtoms(t *testing.T) { 2703 content := []byte("bla needle bla") 2704 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2705 Document{Name: "f1", Content: content}, 2706 Document{Name: "f2", Language: "java", Content: content}, 2707 Document{Name: "f3", Language: "cpp", Content: content}, 2708 ) 2709 q := query.NewAnd(&query.Language{Language: "java"}) 2710 t.Run("LineMatches", func(t *testing.T) { 2711 res := searchForTest(t, b, q) 2712 if len(res.Files) != 1 { 2713 t.Fatalf("got %v, want 1 result in f3", res.Files) 2714 } 2715 }) 2716 2717 t.Run("ChunkMatches", func(t *testing.T) { 2718 res := searchForTest(t, b, q, chunkOpts) 2719 if len(res.Files) != 1 { 2720 t.Fatalf("got %v, want 1 result in f3", res.Files) 2721 } 2722 }) 2723} 2724 2725func TestNoPositiveAtoms(t *testing.T) { 2726 content := []byte("bla needle bla") 2727 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2728 Document{Name: "f1", Content: content}, 2729 Document{Name: "f2", Content: content}, 2730 ) 2731 2732 q := query.NewAnd( 2733 &query.Not{Child: &query.Substring{Pattern: "xyz"}}, 2734 &query.Repo{Regexp: regexp.MustCompile("reponame")}) 2735 t.Run("LineMatches", func(t *testing.T) { 2736 res := searchForTest(t, b, q) 2737 if len(res.Files) != 2 { 2738 t.Fatalf("got %v, want 2 results in f3", res.Files) 2739 } 2740 }) 2741 t.Run("ChunkMatches", func(t *testing.T) { 2742 res := searchForTest(t, b, q, chunkOpts) 2743 if len(res.Files) != 2 { 2744 t.Fatalf("got %v, want 2 results in f3", res.Files) 2745 } 2746 }) 2747} 2748 2749func TestSymbolBoundaryStart(t *testing.T) { 2750 content := []byte("start\nbla bla\nend") 2751 // ----------------012345-67890123-456 2752 2753 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2754 Document{ 2755 Name: "f1", 2756 Content: content, 2757 Symbols: []DocumentSection{{0, 5}, {14, 17}}, 2758 }, 2759 ) 2760 q := &query.Symbol{ 2761 Expr: &query.Substring{Pattern: "start"}, 2762 } 2763 t.Run("LineMatches", func(t *testing.T) { 2764 res := searchForTest(t, b, q) 2765 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 { 2766 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2767 } 2768 m := res.Files[0].LineMatches[0].LineFragments[0] 2769 if m.Offset != 0 { 2770 t.Fatalf("got offset %d want 0", m.Offset) 2771 } 2772 }) 2773 2774 t.Run("ChunkMatches", func(t *testing.T) { 2775 res := searchForTest(t, b, q, chunkOpts) 2776 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 { 2777 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2778 } 2779 m := res.Files[0].ChunkMatches[0].Ranges[0] 2780 if m.Start.ByteOffset != 0 { 2781 t.Fatalf("got offset %d want 0", m.Start.ByteOffset) 2782 } 2783 }) 2784} 2785 2786func TestSymbolBoundaryEnd(t *testing.T) { 2787 content := []byte("start\nbla bla\nend") 2788 // ----------------012345-67890123-456 2789 2790 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2791 Document{ 2792 Name: "f1", 2793 Content: content, 2794 Symbols: []DocumentSection{{14, 17}}, 2795 }, 2796 ) 2797 q := &query.Symbol{ 2798 Expr: &query.Substring{Pattern: "end"}, 2799 } 2800 t.Run("LineMatches", func(t *testing.T) { 2801 res := searchForTest(t, b, q) 2802 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 { 2803 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2804 } 2805 m := res.Files[0].LineMatches[0].LineFragments[0] 2806 if m.Offset != 14 { 2807 t.Fatalf("got offset %d want 0", m.Offset) 2808 } 2809 }) 2810 2811 t.Run("ChunkMatches", func(t *testing.T) { 2812 res := searchForTest(t, b, q, chunkOpts) 2813 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 { 2814 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2815 } 2816 m := res.Files[0].ChunkMatches[0].Ranges[0] 2817 if m.Start.ByteOffset != 14 { 2818 t.Fatalf("got offset %d want 0", m.Start.ByteOffset) 2819 } 2820 }) 2821} 2822 2823func TestSymbolSubstring(t *testing.T) { 2824 content := []byte("bla\nsymblabla\nbla") 2825 // ----------------0123-4567890123-456 2826 2827 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2828 Document{ 2829 Name: "f1", 2830 Content: content, 2831 Symbols: []DocumentSection{{4, 12}}, 2832 }, 2833 ) 2834 q := &query.Symbol{ 2835 Expr: &query.Substring{Pattern: "bla"}, 2836 } 2837 t.Run("LineMatches", func(t *testing.T) { 2838 res := searchForTest(t, b, q) 2839 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 { 2840 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2841 } 2842 m := res.Files[0].LineMatches[0].LineFragments[0] 2843 if m.Offset != 7 || m.MatchLength != 3 { 2844 t.Fatalf("got offset %d, size %d want 7 size 3", m.Offset, m.MatchLength) 2845 } 2846 }) 2847 2848 t.Run("ChunkMatches", func(t *testing.T) { 2849 res := searchForTest(t, b, q, chunkOpts) 2850 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 { 2851 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2852 } 2853 m := res.Files[0].ChunkMatches[0].Ranges[0] 2854 if m.Start.ByteOffset != 7 || m.End.ByteOffset != 10 { 2855 t.Fatalf("got offset %d, end %d want 7, 10", m.Start.ByteOffset, m.End.ByteOffset) 2856 } 2857 }) 2858} 2859 2860func TestSymbolSubstringExact(t *testing.T) { 2861 content := []byte("bla\nsym\nbla\nsym\nasymb") 2862 // ----------------0123-4567-890123456-78901 2863 2864 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2865 Document{ 2866 Name: "f1", 2867 Content: content, 2868 Symbols: []DocumentSection{{4, 7}}, 2869 }, 2870 ) 2871 q := &query.Symbol{ 2872 Expr: &query.Substring{Pattern: "sym"}, 2873 } 2874 t.Run("LineMatches", func(t *testing.T) { 2875 res := searchForTest(t, b, q) 2876 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 { 2877 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2878 } 2879 m := res.Files[0].LineMatches[0].LineFragments[0] 2880 if m.Offset != 4 { 2881 t.Fatalf("got offset %d, want 7", m.Offset) 2882 } 2883 }) 2884 2885 t.Run("ChunkMatches", func(t *testing.T) { 2886 res := searchForTest(t, b, q, chunkOpts) 2887 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 { 2888 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2889 } 2890 m := res.Files[0].ChunkMatches[0].Ranges[0] 2891 if m.Start.ByteOffset != 4 { 2892 t.Fatalf("got offset %d, want 7", m.Start.ByteOffset) 2893 } 2894 }) 2895} 2896 2897func TestSymbolRegexpExact(t *testing.T) { 2898 content := []byte("blah\nbla\nbl") 2899 // ----------------01234-5678-90 2900 2901 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2902 Document{ 2903 Name: "f1", 2904 Content: content, 2905 Symbols: []DocumentSection{{0, 4}, {5, 8}, {9, 11}}, 2906 }, 2907 ) 2908 q := &query.Symbol{ 2909 Expr: &query.Regexp{Regexp: mustParseRE("^bla$")}, 2910 } 2911 t.Run("LineMatches", func(t *testing.T) { 2912 res := searchForTest(t, b, q) 2913 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 { 2914 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2915 } 2916 m := res.Files[0].LineMatches[0].LineFragments[0] 2917 if m.Offset != 5 { 2918 t.Fatalf("got offset %d, want 5", m.Offset) 2919 } 2920 }) 2921 2922 t.Run("ChunkMatches", func(t *testing.T) { 2923 res := searchForTest(t, b, q, chunkOpts) 2924 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 { 2925 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2926 } 2927 m := res.Files[0].ChunkMatches[0].Ranges[0] 2928 if m.Start.ByteOffset != 5 { 2929 t.Fatalf("got offset %d, want 5", m.Start.ByteOffset) 2930 } 2931 }) 2932} 2933 2934func TestSymbolRegexpPartial(t *testing.T) { 2935 content := []byte("abcdef") 2936 // ----------------012345 2937 2938 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2939 Document{ 2940 Name: "f1", 2941 Content: content, 2942 Symbols: []DocumentSection{{0, 6}}, 2943 }, 2944 ) 2945 q := &query.Symbol{ 2946 Expr: &query.Regexp{Regexp: mustParseRE("(b|d)c(d|b)")}, 2947 } 2948 t.Run("LineMatches", func(t *testing.T) { 2949 res := searchForTest(t, b, q) 2950 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 { 2951 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2952 } 2953 m := res.Files[0].LineMatches[0].LineFragments[0] 2954 if m.Offset != 1 { 2955 t.Fatalf("got offset %d, want 1", m.Offset) 2956 } 2957 if m.MatchLength != 3 { 2958 t.Fatalf("got match length %d, want 3", m.MatchLength) 2959 } 2960 }) 2961 2962 t.Run("ChunkMatches", func(t *testing.T) { 2963 res := searchForTest(t, b, q, chunkOpts) 2964 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 { 2965 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2966 } 2967 m := res.Files[0].ChunkMatches[0].Ranges[0] 2968 if m.Start.ByteOffset != 1 { 2969 t.Fatalf("got offset %d, want 1", m.Start.ByteOffset) 2970 } 2971 if m.End.ByteOffset != 4 { 2972 t.Fatalf("got match end %d, want 4", m.End.ByteOffset) 2973 } 2974 }) 2975} 2976 2977func TestSymbolRegexpAll(t *testing.T) { 2978 docs := []Document{ 2979 { 2980 Name: "f1", 2981 Content: []byte("Hello Zoekt"), 2982 // --------------01234567890 2983 Symbols: []DocumentSection{{0, 5}, {6, 11}}, 2984 }, 2985 { 2986 Name: "f2", 2987 Content: []byte("Second Zoekt Third"), 2988 // --------------012345678901234567 2989 Symbols: []DocumentSection{{0, 6}, {7, 12}, {13, 18}}, 2990 }, 2991 } 2992 2993 b := testIndexBuilder(t, &Repository{Name: "reponame"}, docs...) 2994 q := &query.Symbol{ 2995 Expr: &query.Regexp{Regexp: mustParseRE(".*")}, 2996 } 2997 t.Run("LineMatches", func(t *testing.T) { 2998 res := searchForTest(t, b, q) 2999 if len(res.Files) != len(docs) { 3000 t.Fatalf("got %v, want %d file", res.Files, len(docs)) 3001 } 3002 for i, want := range docs { 3003 got := res.Files[i].LineMatches[0].LineFragments 3004 if len(got) != len(want.Symbols) { 3005 t.Fatalf("got %d symbols, want %d symbols in doc %s", len(got), len(want.Symbols), want.Name) 3006 } 3007 3008 for j, sec := range want.Symbols { 3009 if sec.Start != got[j].Offset { 3010 t.Fatalf("got offset %d, want %d in doc %s", got[j].Offset, sec.Start, want.Name) 3011 } 3012 } 3013 } 3014 }) 3015 3016 t.Run("ChunkMatches", func(t *testing.T) { 3017 res := searchForTest(t, b, q, chunkOpts) 3018 if len(res.Files) != len(docs) { 3019 t.Fatalf("got %v, want %d file", res.Files, len(docs)) 3020 } 3021 for i, want := range docs { 3022 got := res.Files[i].ChunkMatches[0].Ranges 3023 if len(got) != len(want.Symbols) { 3024 t.Fatalf("got %d symbols, want %d symbols in doc %s", len(got), len(want.Symbols), want.Name) 3025 } 3026 3027 for j, sec := range want.Symbols { 3028 if sec.Start != uint32(got[j].Start.ByteOffset) { 3029 t.Fatalf("got offset %d, want %d in doc %s", got[j].Start.ByteOffset, sec.Start, want.Name) 3030 } 3031 } 3032 } 3033 }) 3034} 3035 3036func TestHitIterTerminate(t *testing.T) { 3037 // contrived input: trigram frequencies forces selecting abc + 3038 // def for the distance iteration. There is no match, so this 3039 // will advance the compressedPostingIterator to beyond the 3040 // end. 3041 content := []byte("abc bcdbcd cdecde abcabc def efg") 3042 b := testIndexBuilder(t, nil, 3043 Document{ 3044 Name: "f1", 3045 Content: content, 3046 }, 3047 ) 3048 3049 t.Run("LineMatches", func(t *testing.T) { 3050 searchForTest(t, b, &query.Substring{Pattern: "abcdef"}) 3051 }) 3052 3053 t.Run("ChunkMatches", func(t *testing.T) { 3054 searchForTest(t, b, &query.Substring{Pattern: "abcdef"}, chunkOpts) 3055 }) 3056} 3057 3058func TestDistanceHitIterBailLast(t *testing.T) { 3059 content := []byte("AST AST AST UASH") 3060 b := testIndexBuilder(t, nil, 3061 Document{ 3062 Name: "f1", 3063 Content: content, 3064 }, 3065 ) 3066 t.Run("LineMatches", func(t *testing.T) { 3067 res := searchForTest(t, b, &query.Substring{Pattern: "UAST"}) 3068 if len(res.Files) != 0 { 3069 t.Fatalf("got %v, want no results", res.Files) 3070 } 3071 }) 3072 3073 t.Run("LineMatches", func(t *testing.T) { 3074 res := searchForTest(t, b, &query.Substring{Pattern: "UAST"}, chunkOpts) 3075 if len(res.Files) != 0 { 3076 t.Fatalf("got %v, want no results", res.Files) 3077 } 3078 }) 3079} 3080 3081func TestDocumentSectionRuneBoundary(t *testing.T) { 3082 content := string([]rune{kelvinCodePoint, kelvinCodePoint, kelvinCodePoint}) 3083 b, err := NewIndexBuilder(nil) 3084 if err != nil { 3085 t.Fatalf("NewIndexBuilder: %v", err) 3086 } 3087 3088 for i, sec := range []DocumentSection{ 3089 {2, 6}, 3090 {3, 7}, 3091 } { 3092 if err := b.Add(Document{ 3093 Name: "f1", 3094 Content: []byte(content), 3095 Symbols: []DocumentSection{sec}, 3096 }); err == nil { 3097 t.Errorf("%d: Add succeeded", i) 3098 } 3099 } 3100} 3101 3102func TestUnicodeQuery(t *testing.T) { 3103 content := string([]rune{kelvinCodePoint, kelvinCodePoint, kelvinCodePoint}) 3104 b := testIndexBuilder(t, nil, 3105 Document{ 3106 Name: "f1", 3107 Content: []byte(content), 3108 }, 3109 ) 3110 3111 q := &query.Substring{Pattern: content} 3112 3113 t.Run("LineMatches", func(t *testing.T) { 3114 res := searchForTest(t, b, q) 3115 if len(res.Files) != 1 { 3116 t.Fatalf("want 1 match, got %v", res.Files) 3117 } 3118 3119 f := res.Files[0] 3120 if len(f.LineMatches) != 1 { 3121 t.Fatalf("want 1 line, got %v", f.LineMatches) 3122 } 3123 l := f.LineMatches[0] 3124 3125 if len(l.LineFragments) != 1 { 3126 t.Fatalf("want 1 line fragment, got %v", l.LineFragments) 3127 } 3128 fr := l.LineFragments[0] 3129 if fr.MatchLength != len(content) { 3130 t.Fatalf("got MatchLength %d want %d", fr.MatchLength, len(content)) 3131 } 3132 }) 3133 3134 t.Run("ChunkMatches", func(t *testing.T) { 3135 res := searchForTest(t, b, q, chunkOpts) 3136 if len(res.Files) != 1 { 3137 t.Fatalf("want 1 match, got %v", res.Files) 3138 } 3139 3140 f := res.Files[0] 3141 if len(f.ChunkMatches) != 1 { 3142 t.Fatalf("want 1 line, got %v", f.LineMatches) 3143 } 3144 cm := f.ChunkMatches[0] 3145 3146 if len(cm.Ranges) != 1 { 3147 t.Fatalf("want 1 line fragment, got %v", cm.Ranges) 3148 } 3149 rr := cm.Ranges[0] 3150 if matchLen := rr.End.ByteOffset - rr.Start.ByteOffset; int(matchLen) != len(content) { 3151 t.Fatalf("got MatchLength %d want %d", matchLen, len(content)) 3152 } 3153 }) 3154} 3155 3156func TestSkipInvalidContent(t *testing.T) { 3157 for _, content := range []string{ 3158 // Binary 3159 "abc def \x00 abc", 3160 } { 3161 3162 b, err := NewIndexBuilder(nil) 3163 if err != nil { 3164 t.Fatalf("NewIndexBuilder: %v", err) 3165 } 3166 3167 if err := b.Add(Document{ 3168 Name: "f1", 3169 Content: []byte(content), 3170 }); err != nil { 3171 t.Fatal(err) 3172 } 3173 3174 t.Run("LineMatches", func(t *testing.T) { 3175 q := &query.Substring{Pattern: "abc def"} 3176 res := searchForTest(t, b, q) 3177 if len(res.Files) != 0 { 3178 t.Fatalf("got %v, want no results", res.Files) 3179 } 3180 3181 q = &query.Substring{Pattern: "NOT-INDEXED"} 3182 res = searchForTest(t, b, q) 3183 if len(res.Files) != 1 { 3184 t.Fatalf("got %v, want 1 result", res.Files) 3185 } 3186 }) 3187 3188 t.Run("ChunkMatches", func(t *testing.T) { 3189 q := &query.Substring{Pattern: "abc def"} 3190 res := searchForTest(t, b, q, chunkOpts) 3191 if len(res.Files) != 0 { 3192 t.Fatalf("got %v, want no results", res.Files) 3193 } 3194 3195 q = &query.Substring{Pattern: "NOT-INDEXED"} 3196 res = searchForTest(t, b, q, chunkOpts) 3197 if len(res.Files) != 1 { 3198 t.Fatalf("got %v, want 1 result", res.Files) 3199 } 3200 }) 3201 } 3202} 3203 3204func TestDocChecker(t *testing.T) { 3205 docChecker := DocChecker{} 3206 3207 // Test valid and invalid text 3208 for _, text := range []string{"", "simple ascii", "símplé unicödé", "\uFEFFwith utf8 'bom'", "with \uFFFD unicode replacement char"} { 3209 if err := docChecker.Check([]byte(text), 20000, false); err != nil { 3210 t.Errorf("Check(%q): %v", text, err) 3211 } 3212 } 3213 for _, text := range []string{"zero\x00byte", "xx", "0123456789abcdefghi"} { 3214 if err := docChecker.Check([]byte(text), 15, false); err == nil { 3215 t.Errorf("Check(%q) succeeded", text) 3216 } 3217 } 3218 3219 // Test valid and invalid text with an allowed large file 3220 for _, text := range []string{"0123456789abcdefghi", "qwertyuiopasdfghjklzxcvbnm"} { 3221 if err := docChecker.Check([]byte(text), 15, true); err != nil { 3222 t.Errorf("Check(%q): %v", text, err) 3223 } 3224 } 3225 for _, text := range []string{"zero\x00byte", "xx"} { 3226 if err := docChecker.Check([]byte(text), 15, true); err == nil { 3227 t.Errorf("Check(%q) succeeded", text) 3228 } 3229 } 3230} 3231 3232func TestLineAnd(t *testing.T) { 3233 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 3234 Document{Name: "f1", Content: []byte("apple\nbanana\napple banana chocolate apple pudding banana\ngrape")}, 3235 Document{Name: "f2", Content: []byte("apple orange\nbanana")}, 3236 Document{Name: "f3", Content: []byte("banana grape")}, 3237 ) 3238 pattern := "(apple)(?-s:.)*?(banana)" 3239 r, _ := syntax.Parse(pattern, syntax.Perl) 3240 3241 q := query.Regexp{ 3242 Regexp: r, 3243 Content: true, 3244 } 3245 t.Run("LineMatches", func(t *testing.T) { 3246 res := searchForTest(t, b, &q) 3247 wantRegexpCount := 1 3248 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount { 3249 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount) 3250 } 3251 if len(res.Files) != 1 || res.Files[0].FileName != "f1" { 3252 t.Errorf("got %v, want 1 result", res.Files) 3253 } 3254 }) 3255 3256 t.Run("ChunkMatches", func(t *testing.T) { 3257 res := searchForTest(t, b, &q, chunkOpts) 3258 wantRegexpCount := 1 3259 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount { 3260 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount) 3261 } 3262 if len(res.Files) != 1 || res.Files[0].FileName != "f1" { 3263 t.Errorf("got %v, want 1 result", res.Files) 3264 } 3265 }) 3266} 3267 3268func TestLineAndFileName(t *testing.T) { 3269 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 3270 Document{Name: "f1", Content: []byte("apple banana\ngrape")}, 3271 Document{Name: "f2", Content: []byte("apple banana\norange")}, 3272 Document{Name: "apple banana", Content: []byte("banana grape")}, 3273 ) 3274 pattern := "(apple)(?-s:.)*?(banana)" 3275 r, _ := syntax.Parse(pattern, syntax.Perl) 3276 3277 q := query.Regexp{ 3278 Regexp: r, 3279 FileName: true, 3280 } 3281 t.Run("LineMatches", func(t *testing.T) { 3282 res := searchForTest(t, b, &q) 3283 wantRegexpCount := 1 3284 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount { 3285 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount) 3286 } 3287 if len(res.Files) != 1 || res.Files[0].FileName != "apple banana" { 3288 t.Errorf("got %v, want 1 result", res.Files) 3289 } 3290 }) 3291 3292 t.Run("ChunkMatches", func(t *testing.T) { 3293 res := searchForTest(t, b, &q, chunkOpts) 3294 wantRegexpCount := 1 3295 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount { 3296 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount) 3297 } 3298 if len(res.Files) != 1 || res.Files[0].FileName != "apple banana" { 3299 t.Errorf("got %v, want 1 result", res.Files) 3300 } 3301 }) 3302} 3303 3304func TestMultiLineRegex(t *testing.T) { 3305 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 3306 Document{Name: "f1", Content: []byte("apple banana\ngrape")}, 3307 Document{Name: "f2", Content: []byte("apple orange")}, 3308 Document{Name: "f3", Content: []byte("grape apple")}, 3309 ) 3310 pattern := "(apple).*?[[:space:]].*?(grape)" 3311 r, _ := syntax.Parse(pattern, syntax.Perl) 3312 3313 q := query.Regexp{ 3314 Regexp: r, 3315 } 3316 t.Run("LineMatches", func(t *testing.T) { 3317 res := searchForTest(t, b, &q) 3318 wantRegexpCount := 2 3319 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount { 3320 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount) 3321 } 3322 if len(res.Files) != 1 || res.Files[0].FileName != "f1" { 3323 t.Errorf("got %v, want 1 result", res.Files) 3324 } 3325 if l := len(res.Files[0].LineMatches); l != 2 { 3326 t.Errorf("got %v, want 2 line matches", l) 3327 } 3328 }) 3329 3330 t.Run("ChunkMatches", func(t *testing.T) { 3331 res := searchForTest(t, b, &q, chunkOpts) 3332 wantRegexpCount := 2 3333 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount { 3334 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount) 3335 } 3336 if len(res.Files) != 1 || res.Files[0].FileName != "f1" { 3337 t.Errorf("got %v, want 1 result", res.Files) 3338 } 3339 if l := len(res.Files[0].ChunkMatches); l != 1 { 3340 t.Errorf("got %v, want 1 chunk matches", l) 3341 } 3342 if l := len(res.Files[0].ChunkMatches[0].Ranges); l != 1 { 3343 t.Errorf("got %v, want 1 chunk ranges", l) 3344 } 3345 }) 3346} 3347 3348func TestSearchTypeFileName(t *testing.T) { 3349 b := testIndexBuilder(t, &Repository{ 3350 Name: "reponame", 3351 }, 3352 Document{Name: "f1", Content: []byte("bla the needle")}, 3353 Document{Name: "f2", Content: []byte("another file another\nneedle")}, 3354 // -----------------------------------012345678901234567890-123456 3355 ) 3356 3357 t.Run("LineMatches", func(t *testing.T) { 3358 wantSingleMatch := func(res *SearchResult, want string) { 3359 t.Helper() 3360 fmatches := res.Files 3361 if len(fmatches) != 1 { 3362 t.Errorf("got %v, want 1 matches", len(fmatches)) 3363 return 3364 } 3365 if len(fmatches[0].LineMatches) != 1 { 3366 t.Errorf("got %d line matches", len(fmatches[0].LineMatches)) 3367 return 3368 } 3369 var got string 3370 if fmatches[0].LineMatches[0].FileName { 3371 got = fmatches[0].FileName 3372 } else { 3373 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].LineMatches[0].LineFragments[0].Offset) 3374 } 3375 3376 if got != want { 3377 t.Errorf("got %s, want %s", got, want) 3378 } 3379 } 3380 3381 // Only return the later match in the second file 3382 res := searchForTest(t, b, query.NewAnd( 3383 &query.Type{ 3384 Type: query.TypeFileName, 3385 Child: &query.Substring{Pattern: "needle"}, 3386 }, 3387 &query.Substring{Pattern: "file"})) 3388 wantSingleMatch(res, "f2:8") 3389 3390 // Only return a filename result 3391 res = searchForTest(t, b, 3392 &query.Type{ 3393 Type: query.TypeFileName, 3394 Child: &query.Substring{Pattern: "file"}, 3395 }) 3396 wantSingleMatch(res, "f2") 3397 }) 3398 3399 t.Run("ChunkMatches", func(t *testing.T) { 3400 wantSingleMatch := func(res *SearchResult, want string) { 3401 t.Helper() 3402 fmatches := res.Files 3403 if len(fmatches) != 1 { 3404 t.Errorf("got %v, want 1 matches", len(fmatches)) 3405 return 3406 } 3407 if len(fmatches[0].ChunkMatches) != 1 { 3408 t.Errorf("got %d line matches", len(fmatches[0].ChunkMatches)) 3409 return 3410 } 3411 var got string 3412 if fmatches[0].ChunkMatches[0].FileName { 3413 got = fmatches[0].FileName 3414 } else { 3415 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset) 3416 } 3417 3418 if got != want { 3419 t.Errorf("got %s, want %s", got, want) 3420 } 3421 } 3422 3423 // Only return the later match in the second file 3424 res := searchForTest(t, b, query.NewAnd( 3425 &query.Type{ 3426 Type: query.TypeFileName, 3427 Child: &query.Substring{Pattern: "needle"}, 3428 }, 3429 &query.Substring{Pattern: "file"}), 3430 chunkOpts, 3431 ) 3432 wantSingleMatch(res, "f2:8") 3433 3434 // Only return a filename result 3435 res = searchForTest(t, b, 3436 &query.Type{ 3437 Type: query.TypeFileName, 3438 Child: &query.Substring{Pattern: "file"}, 3439 }, 3440 chunkOpts, 3441 ) 3442 wantSingleMatch(res, "f2") 3443 }) 3444} 3445 3446func TestSearchTypeLanguage(t *testing.T) { 3447 b := testIndexBuilder(t, &Repository{ 3448 Name: "reponame", 3449 }, 3450 Document{Name: "apex.cls", Content: []byte("public class Car extends Vehicle {")}, 3451 Document{Name: "tex.cls", Content: []byte(`\DeclareOption*{`)}, 3452 Document{Name: "hello.h", Content: []byte(`#include <stdio.h>`)}, 3453 Document{Name: "be.magik", Content: []byte(`_package unicorn`)}, 3454 ) 3455 3456 t.Log(b.languageMap) 3457 3458 t.Run("LineMatches", func(t *testing.T) { 3459 wantSingleMatch := func(res *SearchResult, want string) { 3460 t.Helper() 3461 fmatches := res.Files 3462 if len(fmatches) != 1 { 3463 t.Errorf("got %v, want 1 matches", len(fmatches)) 3464 return 3465 } 3466 if len(fmatches[0].LineMatches) != 1 { 3467 t.Errorf("got %d line matches", len(fmatches[0].LineMatches)) 3468 return 3469 } 3470 var got string 3471 if fmatches[0].LineMatches[0].FileName { 3472 got = fmatches[0].FileName 3473 } else { 3474 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].LineMatches[0].LineFragments[0].Offset) 3475 } 3476 3477 if got != want { 3478 t.Errorf("got %s, want %s", got, want) 3479 } 3480 } 3481 3482 res := searchForTest(t, b, &query.Language{Language: "Apex"}) 3483 wantSingleMatch(res, "apex.cls") 3484 3485 res = searchForTest(t, b, &query.Language{Language: "TeX"}) 3486 wantSingleMatch(res, "tex.cls") 3487 3488 res = searchForTest(t, b, &query.Language{Language: "C"}) 3489 wantSingleMatch(res, "hello.h") 3490 3491 res = searchForTest(t, b, &query.Language{Language: "Magik"}) 3492 wantSingleMatch(res, "be.magik") 3493 3494 // test fallback language search by pretending it's an older index version 3495 res = searchForTest(t, b, &query.Language{Language: "C++"}) 3496 if len(res.Files) != 0 { 3497 t.Errorf("got %d results for C++, want 0", len(res.Files)) 3498 } 3499 3500 b.featureVersion = 11 // force fallback 3501 res = searchForTest(t, b, &query.Language{Language: "C++"}) 3502 wantSingleMatch(res, "hello.h") 3503 }) 3504 3505 t.Run("ChunkMatches", func(t *testing.T) { 3506 wantSingleMatch := func(res *SearchResult, want string) { 3507 t.Helper() 3508 fmatches := res.Files 3509 if len(fmatches) != 1 { 3510 t.Errorf("got %v, want 1 matches", len(fmatches)) 3511 return 3512 } 3513 if len(fmatches[0].ChunkMatches) != 1 { 3514 t.Errorf("got %d line matches", len(fmatches[0].ChunkMatches)) 3515 return 3516 } 3517 var got string 3518 if fmatches[0].ChunkMatches[0].FileName { 3519 got = fmatches[0].FileName 3520 } else { 3521 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset) 3522 } 3523 3524 if got != want { 3525 t.Errorf("got %s, want %s", got, want) 3526 } 3527 } 3528 3529 b.featureVersion = FeatureVersion // reset feature version 3530 res := searchForTest(t, b, &query.Language{Language: "Apex"}, chunkOpts) 3531 wantSingleMatch(res, "apex.cls") 3532 3533 res = searchForTest(t, b, &query.Language{Language: "TeX"}, chunkOpts) 3534 wantSingleMatch(res, "tex.cls") 3535 3536 res = searchForTest(t, b, &query.Language{Language: "C"}, chunkOpts) 3537 wantSingleMatch(res, "hello.h") 3538 3539 // test fallback language search by pretending it's an older index version 3540 res = searchForTest(t, b, &query.Language{Language: "C++"}, chunkOpts) 3541 if len(res.Files) != 0 { 3542 t.Errorf("got %d results for C++, want 0", len(res.Files)) 3543 } 3544 3545 b.featureVersion = 11 // force fallback 3546 res = searchForTest(t, b, &query.Language{Language: "C++"}, chunkOpts) 3547 wantSingleMatch(res, "hello.h") 3548 }) 3549} 3550 3551func TestStats(t *testing.T) { 3552 ignored := []cmp.Option{ 3553 cmpopts.EquateEmpty(), 3554 cmpopts.IgnoreFields(RepoListEntry{}, "Repository"), 3555 cmpopts.IgnoreFields(RepoListEntry{}, "IndexMetadata"), 3556 cmpopts.IgnoreFields(RepoStats{}, "IndexBytes"), 3557 } 3558 3559 repoListEntries := func(b *IndexBuilder) []RepoListEntry { 3560 searcher := searcherForTest(t, b) 3561 indexdata := searcher.(*indexData) 3562 return indexdata.repoListEntry 3563 } 3564 3565 t.Run("one empty repo", func(t *testing.T) { 3566 b := testIndexBuilder(t, nil) 3567 got := repoListEntries(b) 3568 want := []RepoListEntry{ 3569 { 3570 Stats: RepoStats{ 3571 Repos: 0, 3572 Shards: 1, 3573 Documents: 0, 3574 IndexBytes: 20, 3575 ContentBytes: 0, 3576 NewLinesCount: 0, 3577 DefaultBranchNewLinesCount: 0, 3578 OtherBranchesNewLinesCount: 0, 3579 }, 3580 }, 3581 } 3582 3583 if diff := cmp.Diff(want, got, ignored...); diff != "" { 3584 t.Fatalf("mismatch (-want +got):\n%s", diff) 3585 } 3586 }) 3587 3588 t.Run("one simple shard", func(t *testing.T) { 3589 b := testIndexBuilder(t, nil, 3590 Document{Name: "doc 0", Content: []byte("content 0")}, 3591 Document{Name: "doc 1", Content: []byte("content 1")}, 3592 ) 3593 got := repoListEntries(b) 3594 want := []RepoListEntry{ 3595 { 3596 Stats: RepoStats{ 3597 Repos: 0, 3598 Shards: 1, 3599 Documents: 2, 3600 IndexBytes: 224, 3601 ContentBytes: 28, 3602 NewLinesCount: 0, 3603 DefaultBranchNewLinesCount: 0, 3604 OtherBranchesNewLinesCount: 0, 3605 }, 3606 }, 3607 } 3608 3609 if diff := cmp.Diff(want, got, ignored...); diff != "" { 3610 t.Fatalf("mismatch (-want +got):\n%s", diff) 3611 } 3612 }) 3613 3614 t.Run("one compound shard", func(t *testing.T) { 3615 b := testIndexBuilderCompound(t, 3616 []*Repository{ 3617 {Name: "repo 0"}, 3618 {Name: "repo 1"}, 3619 }, 3620 [][]Document{ 3621 { 3622 {Name: "doc 0", Content: []byte("content 0")}, 3623 {Name: "doc 1", Content: []byte("content 1")}, 3624 }, 3625 { 3626 {Name: "doc 2", Content: []byte("content 2")}, 3627 {Name: "doc 3", Content: []byte("content 3")}, 3628 }, 3629 }, 3630 ) 3631 got := repoListEntries(b) 3632 want := []RepoListEntry{ 3633 { 3634 Stats: RepoStats{ 3635 Repos: 0, 3636 Shards: 1, 3637 Documents: 2, 3638 IndexBytes: 180, 3639 ContentBytes: 28, 3640 NewLinesCount: 0, 3641 DefaultBranchNewLinesCount: 0, 3642 OtherBranchesNewLinesCount: 0, 3643 }, 3644 }, 3645 { 3646 Stats: RepoStats{ 3647 Repos: 0, 3648 Shards: 1, 3649 Documents: 2, 3650 IndexBytes: 180, 3651 ContentBytes: 28, 3652 NewLinesCount: 0, 3653 DefaultBranchNewLinesCount: 0, 3654 OtherBranchesNewLinesCount: 0, 3655 }, 3656 }, 3657 } 3658 3659 if diff := cmp.Diff(want, got, ignored...); diff != "" { 3660 t.Fatalf("mismatch (-want +got):\n%s", diff) 3661 } 3662 }) 3663 3664 t.Run("compound shard with empty repos", func(t *testing.T) { 3665 b := testIndexBuilderCompound(t, 3666 []*Repository{ 3667 {Name: "repo 0"}, 3668 {Name: "repo 1"}, 3669 {Name: "repo 2"}, 3670 {Name: "repo 3"}, 3671 {Name: "repo 4"}, 3672 }, 3673 [][]Document{ 3674 {{Name: "doc 0", Content: []byte("content 0")}}, 3675 nil, 3676 {{Name: "doc 1", Content: []byte("content 1")}}, 3677 nil, 3678 nil, 3679 }, 3680 ) 3681 got := repoListEntries(b) 3682 3683 entryEmpty := RepoListEntry{Stats: RepoStats{ 3684 Shards: 1, 3685 Documents: 0, 3686 ContentBytes: 0, 3687 }} 3688 entryNonEmpty := RepoListEntry{Stats: RepoStats{ 3689 Shards: 1, 3690 Documents: 1, 3691 ContentBytes: 14, 3692 }} 3693 3694 want := []RepoListEntry{ 3695 entryNonEmpty, 3696 entryEmpty, 3697 entryNonEmpty, 3698 entryEmpty, 3699 entryEmpty, 3700 } 3701 3702 if diff := cmp.Diff(want, got, ignored...); diff != "" { 3703 t.Fatalf("mismatch (-want +got):\n%s", diff) 3704 } 3705 }) 3706} 3707 3708// This tests the frequent pattern "\bLITERAL\b". 3709func TestWordSearch(t *testing.T) { 3710 content := []byte("needle the bla") 3711 // ----------------01234567890123 3712 3713 b := testIndexBuilder(t, nil, 3714 Document{ 3715 Name: "f1", 3716 Content: content, 3717 }) 3718 3719 t.Run("LineMatches", func(t *testing.T) { 3720 sres := searchForTest(t, b, 3721 &query.Regexp{ 3722 Regexp: mustParseRE("\\bthe\\b"), 3723 CaseSensitive: true, 3724 Content: true, 3725 }) 3726 3727 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 { 3728 t.Fatalf("got %v, want 1 match in 1 file", sres.Files) 3729 } 3730 3731 if sres.Stats.RegexpsConsidered != 0 { 3732 t.Fatal("expected regexp to be skipped") 3733 } 3734 3735 got := sres.Files[0].LineMatches[0] 3736 want := LineMatch{ 3737 LineFragments: []LineFragmentMatch{{ 3738 LineOffset: 7, 3739 Offset: 7, 3740 MatchLength: 3, 3741 }}, 3742 Line: content, 3743 FileName: false, 3744 LineNumber: 1, 3745 LineStart: 0, 3746 LineEnd: 14, 3747 } 3748 3749 if !reflect.DeepEqual(got, want) { 3750 t.Errorf("got %#v, want %#v", got, want) 3751 } 3752 }) 3753 3754 t.Run("ChunkMatches", func(t *testing.T) { 3755 sres := searchForTest(t, b, 3756 &query.Regexp{ 3757 Regexp: mustParseRE("\\bthe\\b"), 3758 CaseSensitive: true, 3759 }, chunkOpts) 3760 3761 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 { 3762 t.Fatalf("got %v, want 1 match in 1 file", sres.Files) 3763 } 3764 3765 if sres.Stats.RegexpsConsidered != 0 { 3766 t.Fatal("expected regexp to be skipped") 3767 } 3768 3769 got := sres.Files[0].ChunkMatches[0] 3770 want := ChunkMatch{ 3771 Content: content, 3772 ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1}, 3773 Ranges: []Range{{ 3774 Start: Location{ByteOffset: 7, LineNumber: 1, Column: 8}, 3775 End: Location{ByteOffset: 10, LineNumber: 1, Column: 11}, 3776 }}, 3777 } 3778 3779 if diff := cmp.Diff(want, got); diff != "" { 3780 t.Fatal(diff) 3781 } 3782 }) 3783}