fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

1// Copyright 2016 Google Inc. All rights reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package zoekt 16 17import ( 18 "bytes" 19 "context" 20 "fmt" 21 "reflect" 22 "regexp/syntax" 23 "strings" 24 "testing" 25 26 "github.com/google/go-cmp/cmp" 27 "github.com/google/go-cmp/cmp/cmpopts" 28 "github.com/grafana/regexp" 29 30 "github.com/sourcegraph/zoekt/query" 31) 32 33func clearScores(r *SearchResult) { 34 for i := range r.Files { 35 r.Files[i].Score = 0.0 36 for j := range r.Files[i].LineMatches { 37 r.Files[i].LineMatches[j].Score = 0.0 38 } 39 for j := range r.Files[i].ChunkMatches { 40 r.Files[i].ChunkMatches[j].Score = 0.0 41 } 42 r.Files[i].Checksum = nil 43 r.Files[i].Debug = "" 44 } 45} 46 47func testIndexBuilder(t *testing.T, repo *Repository, docs ...Document) *IndexBuilder { 48 t.Helper() 49 50 b, err := NewIndexBuilder(repo) 51 if err != nil { 52 t.Fatalf("NewIndexBuilder: %v", err) 53 } 54 55 for i, d := range docs { 56 if err := b.Add(d); err != nil { 57 t.Fatalf("Add %d: %v", i, err) 58 } 59 } 60 61 return b 62} 63 64func testIndexBuilderCompound(t *testing.T, repos []*Repository, docs [][]Document) *IndexBuilder { 65 t.Helper() 66 67 b := newIndexBuilder() 68 b.indexFormatVersion = NextIndexFormatVersion 69 70 if len(repos) != len(docs) { 71 t.Fatalf("testIndexBuilderCompound: repos must be the same length as docs, got: len(repos)=%d len(docs)=%d", len(repos), len(docs)) 72 } 73 74 for i, repo := range repos { 75 if err := b.setRepository(repo); err != nil { 76 t.Fatal(err) 77 } 78 for j, d := range docs[i] { 79 if err := b.Add(d); err != nil { 80 t.Fatalf("Add %d %d: %v", i, j, err) 81 } 82 } 83 } 84 85 return b 86} 87 88func TestBoundary(t *testing.T) { 89 b := testIndexBuilder(t, nil, 90 Document{Name: "f1", Content: []byte("x the")}, 91 Document{Name: "f1", Content: []byte("reader")}) 92 res := searchForTest(t, b, &query.Substring{Pattern: "there"}) 93 if len(res.Files) > 0 { 94 t.Fatalf("got %v, want no matches", res.Files) 95 } 96} 97 98func TestDocSectionInvalid(t *testing.T) { 99 b, err := NewIndexBuilder(nil) 100 if err != nil { 101 t.Fatalf("NewIndexBuilder: %v", err) 102 } 103 doc := Document{ 104 Name: "f1", 105 Content: []byte("01234567890123"), 106 Symbols: []DocumentSection{{5, 8}, {7, 9}}, 107 } 108 109 if err := b.Add(doc); err == nil { 110 t.Errorf("overlapping doc sections should fail") 111 } 112 113 doc = Document{ 114 Name: "f1", 115 Content: []byte("01234567890123"), 116 Symbols: []DocumentSection{{0, 20}}, 117 } 118 119 if err := b.Add(doc); err == nil { 120 t.Errorf("doc sections beyond EOF should fail") 121 } 122} 123 124func TestBasic(t *testing.T) { 125 b := testIndexBuilder(t, nil, 126 Document{ 127 Name: "f2", 128 Content: []byte("to carry water in the no later bla"), 129 // --------------0123456789012345678901234567890123 130 }) 131 132 t.Run("LineMatch", func(t *testing.T) { 133 res := searchForTest(t, b, &query.Substring{ 134 Pattern: "water", 135 CaseSensitive: true, 136 }) 137 fmatches := res.Files 138 if len(fmatches) != 1 || len(fmatches[0].LineMatches) != 1 { 139 t.Fatalf("got %v, want 1 matches", fmatches) 140 } 141 142 got := fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].LineMatches[0].LineFragments[0].Offset) 143 want := "f2:9" 144 if got != want { 145 t.Errorf("1: got %s, want %s", got, want) 146 } 147 }) 148 149 t.Run("ChunkMatch", func(t *testing.T) { 150 res := searchForTest(t, b, &query.Substring{ 151 Pattern: "water", 152 CaseSensitive: true, 153 }, chunkOpts) 154 fmatches := res.Files 155 if len(fmatches) != 1 || len(fmatches[0].ChunkMatches) != 1 { 156 t.Fatalf("got %v, want 1 matches", fmatches) 157 } 158 159 got := fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset) 160 want := "f2:9" 161 if got != want { 162 t.Errorf("1: got %s, want %s", got, want) 163 } 164 }) 165} 166 167func TestEmptyIndex(t *testing.T) { 168 b := testIndexBuilder(t, nil) 169 searcher := searcherForTest(t, b) 170 171 var opts SearchOptions 172 if _, err := searcher.Search(context.Background(), &query.Substring{}, &opts); err != nil { 173 t.Fatalf("Search: %v", err) 174 } 175 176 if _, err := searcher.List(context.Background(), &query.Repo{Regexp: regexp.MustCompile("")}, nil); err != nil { 177 t.Fatalf("List: %v", err) 178 } 179 180 if _, err := searcher.Search(context.Background(), &query.Substring{Pattern: "java", FileName: true}, &opts); err != nil { 181 t.Fatalf("Search: %v", err) 182 } 183} 184 185type memSeeker struct { 186 data []byte 187} 188 189func (s *memSeeker) Name() string { 190 return "memseeker" 191} 192 193func (s *memSeeker) Close() {} 194func (s *memSeeker) Read(off, sz uint32) ([]byte, error) { 195 return s.data[off : off+sz], nil 196} 197 198func (s *memSeeker) Size() (uint32, error) { 199 return uint32(len(s.data)), nil 200} 201 202func TestNewlines(t *testing.T) { 203 b := testIndexBuilder(t, nil, 204 Document{Name: "filename", Content: []byte("line1\nline2\nbla")}) 205 // ---------------------------------------------012345-678901-234 206 207 t.Run("LineMatches", func(t *testing.T) { 208 sres := searchForTest(t, b, &query.Substring{Pattern: "ne2"}) 209 210 matches := sres.Files 211 want := []FileMatch{{ 212 FileName: "filename", 213 LineMatches: []LineMatch{{ 214 LineFragments: []LineFragmentMatch{{ 215 Offset: 8, 216 LineOffset: 2, 217 MatchLength: 3, 218 }}, 219 Line: []byte("line2"), 220 LineStart: 6, 221 LineEnd: 11, 222 LineNumber: 2, 223 }}, 224 }} 225 226 if !reflect.DeepEqual(matches, want) { 227 t.Errorf("got %v, want %v", matches, want) 228 } 229 }) 230 231 t.Run("ChunkMatches", func(t *testing.T) { 232 sres := searchForTest(t, b, &query.Substring{Pattern: "ne2"}, chunkOpts) 233 234 matches := sres.Files 235 want := []FileMatch{{ 236 FileName: "filename", 237 ChunkMatches: []ChunkMatch{{ 238 Content: []byte("line2"), 239 ContentStart: Location{ 240 ByteOffset: 6, 241 LineNumber: 2, 242 Column: 1, 243 }, 244 Ranges: []Range{{ 245 Start: Location{ByteOffset: 8, LineNumber: 2, Column: 3}, 246 End: Location{ByteOffset: 11, LineNumber: 2, Column: 6}, 247 }}, 248 }}, 249 }} 250 251 if diff := cmp.Diff(want, matches); diff != "" { 252 t.Fatal(diff) 253 } 254 }) 255} 256 257// A result spanning multiple lines should have LineMatches that only cover 258// single lines. 259func TestQueryNewlines(t *testing.T) { 260 text := "line1\nline2\nbla" 261 b := testIndexBuilder(t, nil, 262 Document{Name: "filename", Content: []byte(text)}) 263 264 t.Run("LineMatches", func(t *testing.T) { 265 sres := searchForTest(t, b, &query.Substring{Pattern: "ine2\nbla"}) 266 matches := sres.Files 267 if len(matches) != 1 { 268 t.Fatalf("got %d file matches, want exactly one", len(matches)) 269 } 270 m := matches[0] 271 if len(m.LineMatches) != 2 { 272 t.Fatalf("got %d line matches, want exactly two", len(m.LineMatches)) 273 } 274 }) 275 276 t.Run("ChunkMatches", func(t *testing.T) { 277 sres := searchForTest(t, b, &query.Substring{Pattern: "ine2\nbla"}, chunkOpts) 278 matches := sres.Files 279 if len(matches) != 1 { 280 t.Fatalf("got %d file matches, want exactly one", len(matches)) 281 } 282 m := matches[0] 283 if len(m.ChunkMatches) != 1 { 284 t.Fatalf("got %d chunk matches, want exactly one", len(m.ChunkMatches)) 285 } 286 }) 287} 288 289var chunkOpts = SearchOptions{ChunkMatches: true} 290 291func searchForTest(t *testing.T, b *IndexBuilder, q query.Q, o ...SearchOptions) *SearchResult { 292 searcher := searcherForTest(t, b) 293 var opts SearchOptions 294 if len(o) > 0 { 295 opts = o[0] 296 } 297 res, err := searcher.Search(context.Background(), q, &opts) 298 if err != nil { 299 t.Fatalf("Search(%s): %v", q, err) 300 } 301 clearScores(res) 302 return res 303} 304 305func searcherForTest(t *testing.T, b *IndexBuilder) Searcher { 306 var buf bytes.Buffer 307 if err := b.Write(&buf); err != nil { 308 t.Fatal(err) 309 } 310 f := &memSeeker{buf.Bytes()} 311 312 searcher, err := NewSearcher(f) 313 if err != nil { 314 t.Fatalf("NewSearcher: %v", err) 315 } 316 317 return searcher 318} 319 320func TestCaseFold(t *testing.T) { 321 b := testIndexBuilder(t, nil, 322 Document{Name: "f1", Content: []byte("I love BaNaNAS.")}, 323 // -----------------------------------012345678901234 324 ) 325 t.Run("LineMatches", func(t *testing.T) { 326 sres := searchForTest(t, b, &query.Substring{ 327 Pattern: "bananas", 328 CaseSensitive: true, 329 }) 330 matches := sres.Files 331 if len(matches) != 0 { 332 t.Errorf("foldcase: got %#v, want 0 matches", matches) 333 } 334 335 sres = searchForTest(t, b, 336 &query.Substring{ 337 Pattern: "BaNaNAS", 338 CaseSensitive: true, 339 }) 340 matches = sres.Files 341 if len(matches) != 1 { 342 t.Errorf("no foldcase: got %v, want 1 matches", matches) 343 } else if matches[0].LineMatches[0].LineFragments[0].Offset != 7 { 344 t.Errorf("foldcase: got %v, want offsets 7", matches) 345 } 346 }) 347 348 t.Run("ChunkMatches", func(t *testing.T) { 349 sres := searchForTest(t, b, &query.Substring{ 350 Pattern: "bananas", 351 CaseSensitive: true, 352 }, chunkOpts) 353 matches := sres.Files 354 if len(matches) != 0 { 355 t.Errorf("foldcase: got %#v, want 0 matches", matches) 356 } 357 358 sres = searchForTest(t, b, 359 &query.Substring{ 360 Pattern: "BaNaNAS", 361 CaseSensitive: true, 362 }) 363 matches = sres.Files 364 if len(matches) != 1 { 365 t.Errorf("no foldcase: got %v, want 1 matches", matches) 366 } else if matches[0].LineMatches[0].LineFragments[0].Offset != 7 { 367 t.Errorf("foldcase: got %v, want offsets 7", matches) 368 } 369 }) 370} 371 372func TestSearchStats(t *testing.T) { 373 ctx := context.Background() 374 searcher := searcherForTest(t, testIndexBuilder(t, nil, 375 Document{Name: "f1", Content: []byte("x banana y")}, 376 Document{Name: "f2", Content: []byte("x apple y")}, 377 Document{Name: "f3", Content: []byte("x banana apple y")}, 378 // -----------------------------------0123456789012345 379 )) 380 381 andQuery := query.NewAnd( 382 &query.Substring{ 383 Pattern: "banana", 384 }, 385 &query.Substring{ 386 Pattern: "apple", 387 }, 388 ) 389 390 t.Run("LineMatches", func(t *testing.T) { 391 sres, err := searcher.Search(ctx, andQuery, &SearchOptions{}) 392 if err != nil { 393 t.Fatal(err) 394 } 395 matches := sres.Files 396 if len(matches) != 1 || len(matches[0].LineMatches) != 1 || len(matches[0].LineMatches[0].LineFragments) != 2 { 397 t.Fatalf("got %#v, want 1 match with 2 fragments", matches) 398 } 399 400 if matches[0].LineMatches[0].LineFragments[0].Offset != 2 || matches[0].LineMatches[0].LineFragments[1].Offset != 9 { 401 t.Fatalf("got %#v, want offsets 2,9", matches) 402 } 403 }) 404 t.Run("ChunkMatches", func(t *testing.T) { 405 sres, err := searcher.Search(ctx, andQuery, &chunkOpts) 406 if err != nil { 407 t.Fatal(err) 408 } 409 matches := sres.Files 410 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 || len(matches[0].ChunkMatches[0].Ranges) != 2 { 411 t.Fatalf("got %#v, want 1 chunk match with 2 ranges", matches) 412 } 413 414 if matches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset != 2 || matches[0].ChunkMatches[0].Ranges[1].Start.ByteOffset != 9 { 415 t.Fatalf("got %#v, want offsets 2,9", matches) 416 } 417 }) 418 t.Run("Stats", func(t *testing.T) { 419 cases := []struct { 420 Name string 421 Q query.Q 422 Want Stats 423 }{{ 424 Name: "and-query", 425 Q: andQuery, 426 Want: Stats{ 427 FilesLoaded: 1, 428 ContentBytesLoaded: 18, 429 IndexBytesLoaded: 8, 430 NgramMatches: 3, // we look at doc 1, because it's max(0,1) due to AND 431 NgramLookups: 104, 432 MatchCount: 2, 433 FileCount: 1, 434 FilesConsidered: 2, 435 ShardsScanned: 1, 436 }, 437 }, { 438 Name: "one-trigram", 439 Q: &query.Substring{ 440 Pattern: "a y", 441 Content: true, 442 CaseSensitive: true, 443 }, 444 Want: Stats{ 445 ContentBytesLoaded: 12, 446 IndexBytesLoaded: 1, 447 FileCount: 1, 448 FilesConsidered: 1, 449 FilesLoaded: 1, 450 ShardsScanned: 1, 451 MatchCount: 1, 452 NgramMatches: 1, 453 NgramLookups: 2, // once to lookup frequency then again to access posting list. 454 }, 455 }, { 456 Name: "one-trigram-case-insensitive", 457 Q: &query.Substring{ 458 Pattern: "a y", 459 Content: true, 460 }, 461 Want: Stats{ 462 ContentBytesLoaded: 12, 463 IndexBytesLoaded: 1, 464 FileCount: 1, 465 FilesConsidered: 1, 466 FilesLoaded: 1, 467 ShardsScanned: 1, 468 MatchCount: 1, 469 NgramMatches: 1, 470 NgramLookups: 8, // "a y" has 2**2 casings which we lookup twice. 471 }, 472 }, { 473 Name: "one-trigram-pruned", 474 Q: &query.Substring{ 475 Pattern: "foo", 476 Content: true, 477 CaseSensitive: true, 478 }, 479 Want: Stats{ 480 ShardsSkippedFilter: 1, 481 NgramLookups: 1, // only had to lookup once 482 }, 483 }, { 484 Name: "one-trigram-branch-pruned", 485 Q: query.NewAnd( 486 &query.Substring{ 487 Pattern: "foo", 488 Content: true, 489 CaseSensitive: true, 490 }, 491 &query.Substring{ 492 Pattern: "a y", 493 Content: true, 494 CaseSensitive: true, 495 }, 496 ), 497 Want: Stats{ 498 IndexBytesLoaded: 1, // we created an iterator for "a y" before pruning. 499 ShardsSkippedFilter: 1, 500 NgramLookups: 3, // we lookedup "foo" once (1), but lookedup and created "a y" (2). 501 }, 502 }} 503 504 for _, tc := range cases { 505 t.Run(tc.Name, func(t *testing.T) { 506 sres, err := searcher.Search(ctx, tc.Q, &chunkOpts) 507 if err != nil { 508 t.Fatal(err) 509 } 510 if diff := cmp.Diff(tc.Want, sres.Stats); diff != "" { 511 t.Errorf("unexpected Stats (-want +got):\n%s", diff) 512 } 513 }) 514 } 515 516 }) 517} 518 519func TestAndNegateSearch(t *testing.T) { 520 b := testIndexBuilder(t, nil, 521 Document{Name: "f1", Content: []byte("x banana y")}, 522 // -----------------------------------0123456789 523 Document{Name: "f4", Content: []byte("x banana apple y")}) 524 525 t.Run("LineMatches", func(t *testing.T) { 526 sres := searchForTest(t, b, query.NewAnd( 527 &query.Substring{ 528 Pattern: "banana", 529 }, 530 &query.Not{Child: &query.Substring{ 531 Pattern: "apple", 532 }})) 533 534 matches := sres.Files 535 536 if len(matches) != 1 || len(matches[0].LineMatches) != 1 { 537 t.Fatalf("got %v, want 1 match", matches) 538 } 539 if matches[0].FileName != "f1" { 540 t.Fatalf("got match %#v, want FileName: f1", matches[0]) 541 } 542 if matches[0].LineMatches[0].LineFragments[0].Offset != 2 { 543 t.Fatalf("got %v, want offset 2", matches) 544 } 545 }) 546 547 t.Run("ChunkMatches", func(t *testing.T) { 548 sres := searchForTest(t, b, 549 query.NewAnd( 550 &query.Substring{ 551 Pattern: "banana", 552 }, 553 &query.Not{Child: &query.Substring{ 554 Pattern: "apple", 555 }}, 556 ), 557 chunkOpts, 558 ) 559 560 matches := sres.Files 561 562 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 { 563 t.Fatalf("got %v, want 1 match", matches) 564 } 565 if matches[0].FileName != "f1" { 566 t.Fatalf("got match %#v, want FileName: f1", matches[0]) 567 } 568 if matches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset != 2 { 569 t.Fatalf("got %v, want offset 2", matches) 570 } 571 }) 572} 573 574func TestNegativeMatchesOnlyShortcut(t *testing.T) { 575 b := testIndexBuilder(t, nil, 576 Document{Name: "f1", Content: []byte("x banana y")}, 577 Document{Name: "f2", Content: []byte("x appelmoes y")}, 578 Document{Name: "f3", Content: []byte("x appelmoes y")}, 579 Document{Name: "f3", Content: []byte("x appelmoes y")}) 580 581 t.Run("LineMatches", func(t *testing.T) { 582 sres := searchForTest(t, b, query.NewAnd( 583 &query.Substring{ 584 Pattern: "banana", 585 }, 586 &query.Not{Child: &query.Substring{ 587 Pattern: "appel", 588 }})) 589 590 if sres.Stats.FilesConsidered != 1 { 591 t.Errorf("got %#v, want FilesConsidered: 1", sres.Stats) 592 } 593 }) 594 595 t.Run("ChunkMatches", func(t *testing.T) { 596 sres := searchForTest(t, b, query.NewAnd( 597 &query.Substring{ 598 Pattern: "banana", 599 }, 600 &query.Not{Child: &query.Substring{ 601 Pattern: "appel", 602 }}), chunkOpts) 603 604 if sres.Stats.FilesConsidered != 1 { 605 t.Errorf("got %#v, want FilesConsidered: 1", sres.Stats) 606 } 607 }) 608} 609 610func TestFileSearch(t *testing.T) { 611 b := testIndexBuilder(t, nil, 612 Document{Name: "banzana", Content: []byte("x orange y")}, 613 // -------------0123456 614 Document{Name: "banana", Content: []byte("x apple y")}, 615 // -------------012345 616 ) 617 618 t.Run("LineMatches", func(t *testing.T) { 619 sres := searchForTest(t, b, &query.Substring{ 620 Pattern: "anan", 621 FileName: true, 622 }) 623 624 matches := sres.Files 625 if len(matches) != 1 || len(matches[0].LineMatches) != 1 { 626 t.Fatalf("got %v, want 1 match", matches) 627 } 628 629 got := matches[0].LineMatches[0] 630 want := LineMatch{ 631 Line: []byte("banana"), 632 LineFragments: []LineFragmentMatch{{ 633 Offset: 1, 634 LineOffset: 1, 635 MatchLength: 4, 636 }}, 637 FileName: true, 638 } 639 640 if !reflect.DeepEqual(got, want) { 641 t.Errorf("got %#v, want %#v", got, want) 642 } 643 }) 644 645 t.Run("ChunkMatches", func(t *testing.T) { 646 sres := searchForTest(t, b, &query.Substring{ 647 Pattern: "anan", 648 FileName: true, 649 }, chunkOpts) 650 651 matches := sres.Files 652 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 { 653 t.Fatalf("got %v, want 1 match", matches) 654 } 655 656 got := matches[0].ChunkMatches[0] 657 want := ChunkMatch{ 658 Content: []byte("banana"), 659 ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1}, 660 Ranges: []Range{{ 661 Start: Location{ByteOffset: 1, LineNumber: 1, Column: 2}, 662 End: Location{ByteOffset: 5, LineNumber: 1, Column: 6}, 663 }}, 664 FileName: true, 665 } 666 667 if diff := cmp.Diff(want, got); diff != "" { 668 t.Fatal(diff) 669 } 670 }) 671 672 t.Run("FileNameSet", func(t *testing.T) { 673 sres := searchForTest(t, b, query.NewFileNameSet("banana"), chunkOpts) 674 675 matches := sres.Files 676 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 { 677 t.Fatalf("got %v, want 1 match", matches) 678 } 679 680 got := matches[0].ChunkMatches[0] 681 want := ChunkMatch{ 682 Content: []byte("banana"), 683 ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1}, 684 Ranges: []Range{{ 685 Start: Location{ByteOffset: 0, LineNumber: 1, Column: 1}, 686 End: Location{ByteOffset: 6, LineNumber: 1, Column: 7}, 687 }}, 688 FileName: true, 689 } 690 691 if diff := cmp.Diff(want, got); diff != "" { 692 t.Fatal(diff) 693 } 694 }) 695} 696 697func TestFileCase(t *testing.T) { 698 b := testIndexBuilder(t, nil, 699 Document{Name: "BANANA", Content: []byte("x orange y")}) 700 701 t.Run("LineMatches", func(t *testing.T) { 702 sres := searchForTest(t, b, &query.Substring{ 703 Pattern: "banana", 704 FileName: true, 705 }) 706 707 matches := sres.Files 708 if len(matches) != 1 || matches[0].FileName != "BANANA" { 709 t.Fatalf("got %v, want 1 match 'BANANA'", matches) 710 } 711 }) 712 713 t.Run("ChunkMatches", func(t *testing.T) { 714 sres := searchForTest(t, b, &query.Substring{ 715 Pattern: "banana", 716 FileName: true, 717 }, chunkOpts) 718 719 matches := sres.Files 720 if len(matches) != 1 || matches[0].FileName != "BANANA" { 721 t.Fatalf("got %v, want 1 match 'BANANA'", matches) 722 } 723 }) 724} 725 726func TestFileRegexpSearchBruteForce(t *testing.T) { 727 b := testIndexBuilder(t, nil, 728 Document{Name: "banzana", Content: []byte("x orange y")}, 729 Document{Name: "banana", Content: []byte("x apple y")}, 730 ) 731 t.Run("LineMatches", func(t *testing.T) { 732 sres := searchForTest(t, b, &query.Regexp{ 733 Regexp: mustParseRE("[qn][zx]"), 734 FileName: true, 735 }) 736 737 matches := sres.Files 738 if len(matches) != 1 || matches[0].FileName != "banzana" { 739 t.Fatalf("got %v, want 1 match on 'banzana'", matches) 740 } 741 }) 742 t.Run("LineMatches", func(t *testing.T) { 743 sres := searchForTest(t, b, &query.Regexp{ 744 Regexp: mustParseRE("[qn][zx]"), 745 FileName: true, 746 }, chunkOpts) 747 748 matches := sres.Files 749 if len(matches) != 1 || matches[0].FileName != "banzana" { 750 t.Fatalf("got %v, want 1 match on 'banzana'", matches) 751 } 752 }) 753} 754 755func TestFileRegexpSearchShortString(t *testing.T) { 756 b := testIndexBuilder(t, nil, 757 Document{Name: "banana.py", Content: []byte("x orange y")}) 758 759 t.Run("LineMatches", func(t *testing.T) { 760 sres := searchForTest(t, b, &query.Regexp{ 761 Regexp: mustParseRE("ana.py"), 762 FileName: true, 763 }) 764 765 matches := sres.Files 766 if len(matches) != 1 || matches[0].FileName != "banana.py" { 767 t.Fatalf("got %v, want 1 match on 'banana.py'", matches) 768 } 769 }) 770 771 t.Run("ChunkMatches", func(t *testing.T) { 772 sres := searchForTest(t, b, &query.Regexp{ 773 Regexp: mustParseRE("ana.py"), 774 FileName: true, 775 }, chunkOpts) 776 777 matches := sres.Files 778 if len(matches) != 1 || matches[0].FileName != "banana.py" { 779 t.Fatalf("got %v, want 1 match on 'banana.py'", matches) 780 } 781 }) 782} 783 784func TestFileSubstringSearchBruteForce(t *testing.T) { 785 b := testIndexBuilder(t, nil, 786 Document{Name: "BANZANA", Content: []byte("x orange y")}, 787 Document{Name: "banana", Content: []byte("x apple y")}) 788 789 q := &query.Substring{ 790 Pattern: "z", 791 FileName: true, 792 } 793 794 t.Run("LineMatches", func(t *testing.T) { 795 res := searchForTest(t, b, q) 796 if len(res.Files) != 1 || res.Files[0].FileName != "BANZANA" { 797 t.Fatalf("got %v, want 1 match on 'BANZANA''", res.Files) 798 } 799 }) 800 801 t.Run("ChunkMatches", func(t *testing.T) { 802 res := searchForTest(t, b, q, chunkOpts) 803 if len(res.Files) != 1 || res.Files[0].FileName != "BANZANA" { 804 t.Fatalf("got %v, want 1 match on 'BANZANA''", res.Files) 805 } 806 }) 807} 808 809func TestFileSubstringSearchBruteForceEnd(t *testing.T) { 810 b := testIndexBuilder(t, nil, 811 Document{Name: "BANZANA", Content: []byte("x orange y")}, 812 Document{Name: "bananaq", Content: []byte("x apple y")}) 813 814 q := &query.Substring{ 815 Pattern: "q", 816 FileName: true, 817 } 818 t.Run("LineMatches", func(t *testing.T) { 819 res := searchForTest(t, b, q) 820 if want := "bananaq"; len(res.Files) != 1 || res.Files[0].FileName != want { 821 t.Fatalf("got %v, want 1 match in %q", res.Files, want) 822 } 823 }) 824 825 t.Run("LineMatches", func(t *testing.T) { 826 res := searchForTest(t, b, q, chunkOpts) 827 if want := "bananaq"; len(res.Files) != 1 || res.Files[0].FileName != want { 828 t.Fatalf("got %v, want 1 match in %q", res.Files, want) 829 } 830 }) 831} 832 833func TestSearchMatchAll(t *testing.T) { 834 b := testIndexBuilder(t, nil, 835 Document{Name: "banzana", Content: []byte("x orange y")}, 836 Document{Name: "banana", Content: []byte("x apple y")}) 837 838 t.Run("LineMatches", func(t *testing.T) { 839 sres := searchForTest(t, b, &query.Const{Value: true}) 840 matches := sres.Files 841 if len(matches) != 2 { 842 t.Fatalf("got %v, want 2 matches", matches) 843 } 844 }) 845 846 t.Run("ChunkMatches", func(t *testing.T) { 847 sres := searchForTest(t, b, &query.Const{Value: true}, chunkOpts) 848 matches := sres.Files 849 if len(matches) != 2 { 850 t.Fatalf("got %v, want 2 matches", matches) 851 } 852 }) 853} 854 855func TestSearchNewline(t *testing.T) { 856 b := testIndexBuilder(t, nil, 857 Document{Name: "banzana", Content: []byte("abcd\ndefg")}) 858 859 t.Run("LineMatches", func(t *testing.T) { 860 sres := searchForTest(t, b, &query.Substring{Pattern: "d\nd"}) 861 862 // Just check that we don't crash. 863 864 matches := sres.Files 865 if len(matches) != 1 { 866 t.Fatalf("got %v, want 1 matches", matches) 867 } 868 }) 869 870 t.Run("ChunkMatches", func(t *testing.T) { 871 sres := searchForTest(t, b, &query.Substring{Pattern: "d\nd"}, chunkOpts) 872 873 // Just check that we don't crash. 874 875 matches := sres.Files 876 if len(matches) != 1 { 877 t.Fatalf("got %v, want 1 matches", matches) 878 } 879 }) 880} 881 882func TestSearchMatchAllRegexp(t *testing.T) { 883 b := testIndexBuilder(t, nil, 884 Document{Name: "banzana", Content: []byte("abcd")}, 885 Document{Name: "banana", Content: []byte("pqrs")}) 886 887 t.Run("LineMatches", func(t *testing.T) { 888 sres := searchForTest(t, b, &query.Regexp{Regexp: mustParseRE(".")}) 889 890 matches := sres.Files 891 if len(matches) != 2 || sres.Stats.MatchCount != 2 { 892 t.Fatalf("got %v, want 2 matches", matches) 893 } 894 if len(matches[0].LineMatches[0].Line) != 4 || len(matches[1].LineMatches[0].Line) != 4 { 895 t.Fatalf("want 4 chars in every file, got %#v", matches) 896 } 897 898 }) 899 900 t.Run("ChunkMatches", func(t *testing.T) { 901 sres := searchForTest(t, b, &query.Regexp{Regexp: mustParseRE(".")}, chunkOpts) 902 903 matches := sres.Files 904 if len(matches) != 2 || sres.Stats.MatchCount != 8 { 905 t.Fatalf("got %v, want 2 matches", matches) 906 } 907 if len(matches[0].ChunkMatches[0].Content) != 4 || len(matches[1].ChunkMatches[0].Content) != 4 { 908 t.Fatalf("want 4 chars in every file, got %#v", matches) 909 } 910 911 }) 912} 913 914func TestFileRestriction(t *testing.T) { 915 b := testIndexBuilder(t, nil, 916 Document{Name: "banana1", Content: []byte("x orange y")}, 917 Document{Name: "banana2", Content: []byte("x apple y")}, 918 Document{Name: "orange", Content: []byte("x apple z")}) 919 920 t.Run("LineMatches", func(t *testing.T) { 921 sres := searchForTest(t, b, query.NewAnd( 922 &query.Substring{ 923 Pattern: "banana", 924 FileName: true, 925 }, 926 &query.Substring{ 927 Pattern: "apple", 928 })) 929 930 matches := sres.Files 931 if len(matches) != 1 || len(matches[0].LineMatches) != 1 { 932 t.Fatalf("got %v, want 1 match", matches) 933 } 934 935 match := matches[0].LineMatches[0] 936 got := string(match.Line) 937 want := "x apple y" 938 if got != want { 939 t.Errorf("got match %#v, want line %q", match, want) 940 } 941 }) 942 943 t.Run("ChunkMatches", func(t *testing.T) { 944 sres := searchForTest(t, b, query.NewAnd( 945 &query.Substring{ 946 Pattern: "banana", 947 FileName: true, 948 }, 949 &query.Substring{ 950 Pattern: "apple", 951 }), chunkOpts) 952 953 matches := sres.Files 954 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 { 955 t.Fatalf("got %v, want 1 match", matches) 956 } 957 958 match := matches[0].ChunkMatches[0] 959 got := string(match.Content) 960 want := "x apple y" 961 if got != want { 962 t.Errorf("got match %#v, want line %q", match, want) 963 } 964 }) 965} 966 967func TestFileNameBoundary(t *testing.T) { 968 b := testIndexBuilder(t, nil, 969 Document{Name: "banana2", Content: []byte("x apple y")}, 970 Document{Name: "helpers.go", Content: []byte("x apple y")}, 971 Document{Name: "foo", Content: []byte("x apple y")}) 972 973 t.Run("LineMatches", func(t *testing.T) { 974 sres := searchForTest(t, b, &query.Substring{ 975 Pattern: "helpers.go", 976 FileName: true, 977 }) 978 979 matches := sres.Files 980 if len(matches) != 1 || len(matches[0].LineMatches) != 1 { 981 t.Fatalf("got %v, want 1 match", matches) 982 } 983 }) 984 985 t.Run("ChunkMatches", func(t *testing.T) { 986 sres := searchForTest(t, b, &query.Substring{ 987 Pattern: "helpers.go", 988 FileName: true, 989 }, chunkOpts) 990 991 matches := sres.Files 992 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 { 993 t.Fatalf("got %v, want 1 match", matches) 994 } 995 }) 996} 997 998func TestDocumentOrder(t *testing.T) { 999 var docs []Document 1000 for i := 0; i < 3; i++ { 1001 docs = append(docs, Document{Name: fmt.Sprintf("f%d", i), Content: []byte("needle")}) 1002 } 1003 1004 b := testIndexBuilder(t, nil, docs...) 1005 1006 t.Run("LineMatches", func(t *testing.T) { 1007 sres := searchForTest(t, b, query.NewAnd( 1008 &query.Substring{ 1009 Pattern: "needle", 1010 })) 1011 1012 want := []string{"f0", "f1", "f2"} 1013 var got []string 1014 for _, f := range sres.Files { 1015 got = append(got, f.FileName) 1016 } 1017 if !reflect.DeepEqual(got, want) { 1018 t.Fatalf("got %v, want %v", got, want) 1019 } 1020 }) 1021 1022 t.Run("ChunkMatches", func(t *testing.T) { 1023 sres := searchForTest(t, b, 1024 query.NewAnd(&query.Substring{ 1025 Pattern: "needle", 1026 }), 1027 chunkOpts, 1028 ) 1029 1030 want := []string{"f0", "f1", "f2"} 1031 var got []string 1032 for _, f := range sres.Files { 1033 got = append(got, f.FileName) 1034 } 1035 if !reflect.DeepEqual(got, want) { 1036 t.Fatalf("got %v, want %v", got, want) 1037 } 1038 }) 1039} 1040 1041func TestBranchMask(t *testing.T) { 1042 b := testIndexBuilder(t, &Repository{ 1043 Branches: []RepositoryBranch{ 1044 {"master", "v-master"}, 1045 {"stable", "v-stable"}, 1046 {"bonzai", "v-bonzai"}, 1047 }, 1048 }, Document{Name: "f1", Content: []byte("needle"), Branches: []string{"master"}}, 1049 Document{Name: "f2", Content: []byte("needle"), Branches: []string{"stable", "master"}}, 1050 Document{Name: "f3", Content: []byte("needle"), Branches: []string{"stable", "master"}}, 1051 Document{Name: "f4", Content: []byte("needle"), Branches: []string{"bonzai"}}, 1052 ) 1053 1054 t.Run("LineMatches", func(t *testing.T) { 1055 sres := searchForTest(t, b, query.NewAnd( 1056 &query.Substring{ 1057 Pattern: "needle", 1058 }, 1059 &query.Branch{ 1060 Pattern: "table", 1061 })) 1062 1063 if len(sres.Files) != 2 || sres.Files[0].FileName != "f2" || sres.Files[1].FileName != "f3" { 1064 t.Fatalf("got %v, want 2 result from [f2,f3]", sres.Files) 1065 } 1066 1067 if len(sres.Files[0].Branches) != 1 || sres.Files[0].Branches[0] != "stable" { 1068 t.Fatalf("got %v, want 1 branch 'stable'", sres.Files[0].Branches) 1069 } 1070 }) 1071 1072 t.Run("ChunkMatches", func(t *testing.T) { 1073 sres := searchForTest(t, b, query.NewAnd( 1074 &query.Substring{ 1075 Pattern: "needle", 1076 }, 1077 &query.Branch{ 1078 Pattern: "table", 1079 }), 1080 chunkOpts, 1081 ) 1082 1083 if len(sres.Files) != 2 || sres.Files[0].FileName != "f2" || sres.Files[1].FileName != "f3" { 1084 t.Fatalf("got %v, want 2 result from [f2,f3]", sres.Files) 1085 } 1086 1087 if len(sres.Files[0].Branches) != 1 || sres.Files[0].Branches[0] != "stable" { 1088 t.Fatalf("got %v, want 1 branch 'stable'", sres.Files[0].Branches) 1089 } 1090 }) 1091} 1092 1093func TestBranchLimit(t *testing.T) { 1094 for limit := 64; limit <= 65; limit++ { 1095 r := &Repository{} 1096 for i := 0; i < limit; i++ { 1097 s := fmt.Sprintf("b%d", i) 1098 r.Branches = append(r.Branches, RepositoryBranch{ 1099 s, "v-" + s, 1100 }) 1101 } 1102 _, err := NewIndexBuilder(r) 1103 if limit == 64 && err != nil { 1104 t.Fatalf("NewIndexBuilder: %v", err) 1105 } else if limit == 65 && err == nil { 1106 t.Fatalf("NewIndexBuilder succeeded") 1107 } 1108 } 1109} 1110 1111func TestBranchReport(t *testing.T) { 1112 branches := []string{"stable", "master"} 1113 b := testIndexBuilder(t, &Repository{ 1114 Branches: []RepositoryBranch{ 1115 {"stable", "vs"}, 1116 {"master", "vm"}, 1117 }, 1118 }, 1119 Document{Name: "f2", Content: []byte("needle"), Branches: branches}) 1120 1121 t.Run("LineMatches", func(t *testing.T) { 1122 sres := searchForTest(t, b, &query.Substring{ 1123 Pattern: "needle", 1124 }) 1125 if len(sres.Files) != 1 { 1126 t.Fatalf("got %v, want 1 result from f2", sres.Files) 1127 } 1128 1129 f := sres.Files[0] 1130 if !reflect.DeepEqual(f.Branches, branches) { 1131 t.Fatalf("got branches %q, want %q", f.Branches, branches) 1132 } 1133 }) 1134 1135 t.Run("ChunkMatches", func(t *testing.T) { 1136 sres := searchForTest(t, b, &query.Substring{ 1137 Pattern: "needle", 1138 }, chunkOpts) 1139 if len(sres.Files) != 1 { 1140 t.Fatalf("got %v, want 1 result from f2", sres.Files) 1141 } 1142 1143 f := sres.Files[0] 1144 if !reflect.DeepEqual(f.Branches, branches) { 1145 t.Fatalf("got branches %q, want %q", f.Branches, branches) 1146 } 1147 }) 1148 1149} 1150 1151func TestBranchVersions(t *testing.T) { 1152 b := testIndexBuilder(t, &Repository{ 1153 Branches: []RepositoryBranch{ 1154 {"stable", "v-stable"}, 1155 {"master", "v-master"}, 1156 }, 1157 }, Document{Name: "f2", Content: []byte("needle"), Branches: []string{"master"}}) 1158 1159 t.Run("LineMatches", func(t *testing.T) { 1160 sres := searchForTest(t, b, &query.Substring{ 1161 Pattern: "needle", 1162 }) 1163 if len(sres.Files) != 1 { 1164 t.Fatalf("got %v, want 1 result from f2", sres.Files) 1165 } 1166 1167 f := sres.Files[0] 1168 if f.Version != "v-master" { 1169 t.Fatalf("got file %#v, want version 'v-master'", f) 1170 } 1171 }) 1172 1173 t.Run("ChunkMatches", func(t *testing.T) { 1174 sres := searchForTest(t, b, &query.Substring{ 1175 Pattern: "needle", 1176 }, chunkOpts) 1177 if len(sres.Files) != 1 { 1178 t.Fatalf("got %v, want 1 result from f2", sres.Files) 1179 } 1180 1181 f := sres.Files[0] 1182 if f.Version != "v-master" { 1183 t.Fatalf("got file %#v, want version 'v-master'", f) 1184 } 1185 }) 1186} 1187 1188func mustParseRE(s string) *syntax.Regexp { 1189 r, err := syntax.Parse(s, syntax.Perl) 1190 if err != nil { 1191 panic(err) 1192 } 1193 1194 return r 1195} 1196 1197func TestRegexp(t *testing.T) { 1198 content := []byte("needle the bla") 1199 // ----------------01234567890123 1200 1201 b := testIndexBuilder(t, nil, 1202 Document{ 1203 Name: "f1", 1204 Content: content, 1205 }) 1206 1207 t.Run("LineMatches", func(t *testing.T) { 1208 sres := searchForTest(t, b, 1209 &query.Regexp{ 1210 Regexp: mustParseRE("dle.*bla"), 1211 }) 1212 1213 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 { 1214 t.Fatalf("got %v, want 1 match in 1 file", sres.Files) 1215 } 1216 1217 got := sres.Files[0].LineMatches[0] 1218 want := LineMatch{ 1219 LineFragments: []LineFragmentMatch{{ 1220 LineOffset: 3, 1221 Offset: 3, 1222 MatchLength: 11, 1223 }}, 1224 Line: content, 1225 FileName: false, 1226 LineNumber: 1, 1227 LineStart: 0, 1228 LineEnd: 14, 1229 } 1230 1231 if !reflect.DeepEqual(got, want) { 1232 t.Errorf("got %#v, want %#v", got, want) 1233 } 1234 }) 1235 1236 t.Run("ChunkMatches", func(t *testing.T) { 1237 sres := searchForTest(t, b, 1238 &query.Regexp{ 1239 Regexp: mustParseRE("dle.*bla"), 1240 }, chunkOpts) 1241 1242 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 { 1243 t.Fatalf("got %v, want 1 match in 1 file", sres.Files) 1244 } 1245 1246 got := sres.Files[0].ChunkMatches[0] 1247 want := ChunkMatch{ 1248 Content: content, 1249 ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1}, 1250 Ranges: []Range{{ 1251 Start: Location{ByteOffset: 3, LineNumber: 1, Column: 4}, 1252 End: Location{ByteOffset: 14, LineNumber: 1, Column: 15}, 1253 }}, 1254 } 1255 1256 if diff := cmp.Diff(want, got); diff != "" { 1257 t.Fatal(diff) 1258 } 1259 }) 1260} 1261 1262func TestRegexpFile(t *testing.T) { 1263 content := []byte("needle the bla") 1264 1265 name := "let's play: find the mussel" 1266 b := testIndexBuilder(t, nil, 1267 Document{Name: name, Content: content}, 1268 Document{Name: "play.txt", Content: content}) 1269 1270 t.Run("LineMatches", func(t *testing.T) { 1271 sres := searchForTest(t, b, 1272 &query.Regexp{ 1273 Regexp: mustParseRE("play.*mussel"), 1274 FileName: true, 1275 }) 1276 1277 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 { 1278 t.Fatalf("got %v, want 1 match in 1 file", sres.Files) 1279 } 1280 1281 if sres.Files[0].FileName != name { 1282 t.Errorf("got match %#v, want name %q", sres.Files[0], name) 1283 } 1284 }) 1285 1286 t.Run("ChunkMatches", func(t *testing.T) { 1287 sres := searchForTest(t, b, 1288 &query.Regexp{ 1289 Regexp: mustParseRE("play.*mussel"), 1290 FileName: true, 1291 }, chunkOpts) 1292 1293 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 { 1294 t.Fatalf("got %v, want 1 match in 1 file", sres.Files) 1295 } 1296 1297 if sres.Files[0].FileName != name { 1298 t.Errorf("got match %#v, want name %q", sres.Files[0], name) 1299 } 1300 }) 1301} 1302 1303func TestRegexpOrder(t *testing.T) { 1304 content := []byte("bla the needle") 1305 // ----------------01234567890123 1306 1307 b := testIndexBuilder(t, nil, 1308 Document{Name: "f1", Content: content}) 1309 1310 t.Run("LineMatches", func(t *testing.T) { 1311 sres := searchForTest(t, b, 1312 &query.Regexp{ 1313 Regexp: mustParseRE("dle.*bla"), 1314 }) 1315 1316 if len(sres.Files) != 0 { 1317 t.Fatalf("got %v, want 0 matches", sres.Files) 1318 } 1319 }) 1320 1321 t.Run("ChunkMatches", func(t *testing.T) { 1322 sres := searchForTest(t, b, 1323 &query.Regexp{ 1324 Regexp: mustParseRE("dle.*bla"), 1325 }) 1326 1327 if len(sres.Files) != 0 { 1328 t.Fatalf("got %v, want 0 matches", sres.Files) 1329 } 1330 }) 1331} 1332 1333func TestRepoName(t *testing.T) { 1334 content := []byte("bla the needle") 1335 // ----------------01234567890123 1336 1337 b := testIndexBuilder(t, &Repository{Name: "bla"}, 1338 Document{Name: "f1", Content: content}) 1339 1340 t.Run("LineMatches", func(t *testing.T) { 1341 sres := searchForTest(t, b, 1342 query.NewAnd( 1343 &query.Substring{Pattern: "needle"}, 1344 &query.Repo{Regexp: regexp.MustCompile("foo")}, 1345 )) 1346 1347 if len(sres.Files) != 0 { 1348 t.Fatalf("got %v, want 0 matches", sres.Files) 1349 } 1350 1351 if sres.Stats.FilesConsidered > 0 { 1352 t.Fatalf("got FilesConsidered %d, should have short circuited", sres.Stats.FilesConsidered) 1353 } 1354 1355 sres = searchForTest(t, b, 1356 query.NewAnd( 1357 &query.Substring{Pattern: "needle"}, 1358 &query.Repo{Regexp: regexp.MustCompile("bla")}, 1359 )) 1360 if len(sres.Files) != 1 { 1361 t.Fatalf("got %v, want 1 match", sres.Files) 1362 } 1363 }) 1364 1365 t.Run("ChunkMatches", func(t *testing.T) { 1366 sres := searchForTest(t, b, 1367 query.NewAnd( 1368 &query.Substring{Pattern: "needle"}, 1369 &query.Repo{Regexp: regexp.MustCompile("foo")}, 1370 ), 1371 chunkOpts, 1372 ) 1373 1374 if len(sres.Files) != 0 { 1375 t.Fatalf("got %v, want 0 matches", sres.Files) 1376 } 1377 1378 if sres.Stats.FilesConsidered > 0 { 1379 t.Fatalf("got FilesConsidered %d, should have short circuited", sres.Stats.FilesConsidered) 1380 } 1381 1382 sres = searchForTest(t, b, 1383 query.NewAnd( 1384 &query.Substring{Pattern: "needle"}, 1385 &query.Repo{Regexp: regexp.MustCompile("bla")}, 1386 )) 1387 if len(sres.Files) != 1 { 1388 t.Fatalf("got %v, want 1 match", sres.Files) 1389 } 1390 }) 1391} 1392 1393func TestMergeMatches(t *testing.T) { 1394 content := []byte("blablabla") 1395 b := testIndexBuilder(t, nil, 1396 Document{Name: "f1", Content: content}) 1397 1398 t.Run("LineMatches", func(t *testing.T) { 1399 sres := searchForTest(t, b, 1400 &query.Substring{Pattern: "bla"}) 1401 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 { 1402 t.Fatalf("got %v, want 1 match", sres.Files) 1403 } 1404 }) 1405 1406 t.Run("ChunkMatches", func(t *testing.T) { 1407 sres := searchForTest(t, b, 1408 &query.Substring{Pattern: "bla"}, 1409 chunkOpts, 1410 ) 1411 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 { 1412 t.Fatalf("got %v, want 1 match", sres.Files) 1413 } 1414 }) 1415} 1416 1417func TestRepoURL(t *testing.T) { 1418 content := []byte("blablabla") 1419 b := testIndexBuilder(t, &Repository{ 1420 Name: "name", 1421 URL: "URL", 1422 CommitURLTemplate: "commit", 1423 FileURLTemplate: "file-url", 1424 LineFragmentTemplate: "fragment", 1425 }, Document{Name: "f1", Content: content}) 1426 1427 sres := searchForTest(t, b, &query.Substring{Pattern: "bla"}) 1428 1429 if sres.RepoURLs["name"] != "file-url" { 1430 t.Errorf("got RepoURLs %v, want {name: URL}", sres.RepoURLs) 1431 } 1432 if sres.LineFragments["name"] != "fragment" { 1433 t.Errorf("got URLs %v, want {name: URL}", sres.LineFragments) 1434 } 1435} 1436 1437func TestRegexpCaseSensitive(t *testing.T) { 1438 content := []byte("bla\nfunc unmarshalGitiles\n") 1439 b := testIndexBuilder(t, nil, Document{ 1440 Name: "f1", 1441 Content: content, 1442 }) 1443 1444 t.Run("LineMatches", func(t *testing.T) { 1445 res := searchForTest(t, b, 1446 &query.Regexp{ 1447 Regexp: mustParseRE("func.*Gitiles"), 1448 CaseSensitive: true, 1449 }) 1450 1451 if len(res.Files) != 1 { 1452 t.Fatalf("got %v, want one match", res.Files) 1453 } 1454 }) 1455 1456 t.Run("ChunkMatches", func(t *testing.T) { 1457 res := searchForTest(t, b, 1458 &query.Regexp{ 1459 Regexp: mustParseRE("func.*Gitiles"), 1460 CaseSensitive: true, 1461 }, 1462 chunkOpts, 1463 ) 1464 1465 if len(res.Files) != 1 { 1466 t.Fatalf("got %v, want one match", res.Files) 1467 } 1468 }) 1469} 1470 1471func TestRegexpCaseFolding(t *testing.T) { 1472 content := []byte("bla\nfunc unmarshalGitiles\n") 1473 1474 b := testIndexBuilder(t, nil, 1475 Document{Name: "f1", Content: content}) 1476 res := searchForTest(t, b, 1477 &query.Regexp{ 1478 Regexp: mustParseRE("func.*GITILES"), 1479 CaseSensitive: false, 1480 }) 1481 1482 if len(res.Files) != 1 { 1483 t.Fatalf("got %v, want one match", res.Files) 1484 } 1485} 1486 1487func TestCaseRegexp(t *testing.T) { 1488 content := []byte("BLABLABLA") 1489 b := testIndexBuilder(t, nil, 1490 Document{Name: "f1", Content: content}) 1491 1492 t.Run("LineMatches", func(t *testing.T) { 1493 res := searchForTest(t, b, 1494 &query.Regexp{ 1495 Regexp: mustParseRE("[xb][xl][xa]"), 1496 CaseSensitive: true, 1497 }) 1498 1499 if len(res.Files) > 0 { 1500 t.Fatalf("got %v, want no matches", res.Files) 1501 } 1502 }) 1503 1504 t.Run("ChunkMatches", func(t *testing.T) { 1505 res := searchForTest(t, b, 1506 &query.Regexp{ 1507 Regexp: mustParseRE("[xb][xl][xa]"), 1508 CaseSensitive: true, 1509 }, 1510 chunkOpts, 1511 ) 1512 1513 if len(res.Files) > 0 { 1514 t.Fatalf("got %v, want no matches", res.Files) 1515 } 1516 }) 1517} 1518 1519func TestNegativeRegexp(t *testing.T) { 1520 content := []byte("BLABLABLA needle bla") 1521 b := testIndexBuilder(t, nil, 1522 Document{Name: "f1", Content: content}) 1523 1524 t.Run("LineMatches", func(t *testing.T) { 1525 res := searchForTest(t, b, 1526 query.NewAnd( 1527 &query.Substring{ 1528 Pattern: "needle", 1529 }, 1530 &query.Not{ 1531 Child: &query.Regexp{ 1532 Regexp: mustParseRE(".cs"), 1533 }, 1534 })) 1535 1536 if len(res.Files) != 1 { 1537 t.Fatalf("got %v, want 1 match", res.Files) 1538 } 1539 }) 1540 1541 t.Run("ChunkMatches", func(t *testing.T) { 1542 res := searchForTest(t, b, 1543 query.NewAnd( 1544 &query.Substring{ 1545 Pattern: "needle", 1546 }, 1547 &query.Not{ 1548 Child: &query.Regexp{ 1549 Regexp: mustParseRE(".cs"), 1550 }, 1551 }, 1552 ), 1553 chunkOpts) 1554 1555 if len(res.Files) != 1 { 1556 t.Fatalf("got %v, want 1 match", res.Files) 1557 } 1558 }) 1559} 1560 1561func TestSymbolRank(t *testing.T) { 1562 t.Skip() 1563 1564 content := []byte("func bla() blubxxxxx") 1565 // ----------------01234567890123456789 1566 b := testIndexBuilder(t, nil, 1567 Document{ 1568 Name: "f1", 1569 Content: content, 1570 }, Document{ 1571 Name: "f2", 1572 Content: content, 1573 Symbols: []DocumentSection{{5, 8}}, 1574 }, Document{ 1575 Name: "f3", 1576 Content: content, 1577 }) 1578 1579 t.Run("LineMatches", func(t *testing.T) { 1580 res := searchForTest(t, b, 1581 &query.Substring{ 1582 CaseSensitive: false, 1583 Pattern: "bla", 1584 }) 1585 1586 if len(res.Files) != 3 { 1587 t.Fatalf("got %d files, want 3 files. Full data: %v", len(res.Files), res.Files) 1588 } 1589 if res.Files[0].FileName != "f2" { 1590 t.Errorf("got %#v, want 'f2' as top match", res.Files[0]) 1591 } 1592 }) 1593 1594 t.Run("ChunkMatches", func(t *testing.T) { 1595 res := searchForTest(t, b, 1596 &query.Substring{ 1597 CaseSensitive: false, 1598 Pattern: "bla", 1599 }, chunkOpts) 1600 1601 if len(res.Files) != 3 { 1602 t.Fatalf("got %d files, want 3 files. Full data: %v", len(res.Files), res.Files) 1603 } 1604 if res.Files[0].FileName != "f2" { 1605 t.Errorf("got %#v, want 'f2' as top match", res.Files[0]) 1606 } 1607 }) 1608} 1609 1610func TestSymbolRankRegexpUTF8(t *testing.T) { 1611 t.Skip() 1612 1613 prefix := strings.Repeat(string([]rune{kelvinCodePoint}), 100) + "\n" 1614 content := []byte(prefix + 1615 "func bla() blub") 1616 // ------012345678901234 1617 b := testIndexBuilder(t, nil, 1618 Document{ 1619 Name: "f1", 1620 Content: content, 1621 }, Document{ 1622 Name: "f2", 1623 Content: content, 1624 Symbols: []DocumentSection{{uint32(len(prefix) + 5), uint32(len(prefix) + 8)}}, 1625 }, Document{ 1626 Name: "f3", 1627 Content: content, 1628 }) 1629 1630 t.Run("LineMatches", func(t *testing.T) { 1631 res := searchForTest(t, b, 1632 &query.Regexp{ 1633 Regexp: mustParseRE("b.a"), 1634 }) 1635 1636 if len(res.Files) != 3 { 1637 t.Fatalf("got %#v, want 3 files", res.Files) 1638 } 1639 if res.Files[0].FileName != "f2" { 1640 t.Errorf("got %#v, want 'f2' as top match", res.Files[0]) 1641 } 1642 }) 1643 1644 t.Run("ChunjkMatches", func(t *testing.T) { 1645 res := searchForTest(t, b, 1646 &query.Regexp{ 1647 Regexp: mustParseRE("b.a"), 1648 }, chunkOpts) 1649 1650 if len(res.Files) != 3 { 1651 t.Fatalf("got %#v, want 3 files", res.Files) 1652 } 1653 if res.Files[0].FileName != "f2" { 1654 t.Errorf("got %#v, want 'f2' as top match", res.Files[0]) 1655 } 1656 }) 1657} 1658 1659func TestPartialSymbolRank(t *testing.T) { 1660 t.Skip() 1661 1662 content := []byte("func bla() blub") 1663 // ----------------012345678901234 1664 1665 b := testIndexBuilder(t, nil, 1666 Document{ 1667 Name: "f1", 1668 Content: content, 1669 Symbols: []DocumentSection{{4, 9}}, 1670 }, Document{ 1671 Name: "f2", 1672 Content: content, 1673 Symbols: []DocumentSection{{4, 8}}, 1674 }, Document{ 1675 Name: "f3", 1676 Content: content, 1677 Symbols: []DocumentSection{{4, 9}}, 1678 }) 1679 1680 t.Run("LineMatches", func(t *testing.T) { 1681 res := searchForTest(t, b, 1682 &query.Substring{ 1683 Pattern: "bla", 1684 }) 1685 1686 if len(res.Files) != 3 { 1687 t.Fatalf("got %#v, want 3 files", res.Files) 1688 } 1689 if res.Files[0].FileName != "f2" { 1690 t.Errorf("got %#v, want 'f2' as top match", res.Files[0]) 1691 } 1692 }) 1693 1694 t.Run("ChunkMatches", func(t *testing.T) { 1695 res := searchForTest(t, b, 1696 &query.Substring{ 1697 Pattern: "bla", 1698 }, chunkOpts) 1699 1700 if len(res.Files) != 3 { 1701 t.Fatalf("got %#v, want 3 files", res.Files) 1702 } 1703 if res.Files[0].FileName != "f2" { 1704 t.Errorf("got %#v, want 'f2' as top match", res.Files[0]) 1705 } 1706 }) 1707} 1708 1709func TestNegativeRepo(t *testing.T) { 1710 content := []byte("bla the needle") 1711 // ----------------01234567890123 1712 b := testIndexBuilder(t, &Repository{ 1713 Name: "bla", 1714 }, Document{Name: "f1", Content: content}) 1715 1716 t.Run("LineMatches", func(t *testing.T) { 1717 sres := searchForTest(t, b, 1718 query.NewAnd( 1719 &query.Substring{Pattern: "needle"}, 1720 &query.Not{Child: &query.Repo{Regexp: regexp.MustCompile("bla")}}, 1721 )) 1722 1723 if len(sres.Files) != 0 { 1724 t.Fatalf("got %v, want 0 matches", sres.Files) 1725 } 1726 }) 1727 1728 t.Run("ChunkMatches", func(t *testing.T) { 1729 sres := searchForTest(t, b, 1730 query.NewAnd( 1731 &query.Substring{Pattern: "needle"}, 1732 &query.Not{Child: &query.Repo{Regexp: regexp.MustCompile("bla")}}, 1733 ), chunkOpts) 1734 1735 if len(sres.Files) != 0 { 1736 t.Fatalf("got %v, want 0 matches", sres.Files) 1737 } 1738 }) 1739} 1740 1741func TestListRepos(t *testing.T) { 1742 content := []byte("bla the needle\n") 1743 // ----------------012345678901234- 1744 1745 t.Run("default and minimal fallback", func(t *testing.T) { 1746 repo := &Repository{ 1747 Name: "reponame", 1748 Branches: []RepositoryBranch{{Name: "main"}, {Name: "dev"}}, 1749 } 1750 b := testIndexBuilder(t, repo, 1751 Document{Name: "f1", Content: content, Branches: []string{"main", "dev"}}, 1752 Document{Name: "f2", Content: content, Branches: []string{"main"}}, 1753 Document{Name: "f2", Content: content, Branches: []string{"dev"}}, 1754 Document{Name: "f3", Content: content, Branches: []string{"dev"}}) 1755 1756 searcher := searcherForTest(t, b) 1757 1758 for _, opts := range []*ListOptions{ 1759 nil, 1760 {Minimal: false}, 1761 {Minimal: true}, 1762 } { 1763 t.Run(fmt.Sprint(opts), func(t *testing.T) { 1764 q := &query.Repo{Regexp: regexp.MustCompile("epo")} 1765 1766 res, err := searcher.List(context.Background(), q, opts) 1767 if err != nil { 1768 t.Fatalf("List(%v): %v", q, err) 1769 } 1770 1771 want := &RepoList{ 1772 Repos: []*RepoListEntry{{ 1773 Repository: *repo, 1774 Stats: RepoStats{ 1775 Documents: 4, 1776 ContentBytes: 68, // (15 bytes of content and 2 bytes of filename) x 4 1777 Shards: 1, 1778 1779 NewLinesCount: 4, 1780 DefaultBranchNewLinesCount: 2, 1781 OtherBranchesNewLinesCount: 3, 1782 }, 1783 }}, 1784 Stats: RepoStats{ 1785 Repos: 1, 1786 Documents: 4, 1787 ContentBytes: 68, 1788 Shards: 1, 1789 1790 NewLinesCount: 4, 1791 DefaultBranchNewLinesCount: 2, 1792 OtherBranchesNewLinesCount: 3, 1793 }, 1794 } 1795 ignored := []cmp.Option{ 1796 cmpopts.EquateEmpty(), 1797 cmpopts.IgnoreFields(RepoListEntry{}, "IndexMetadata"), 1798 cmpopts.IgnoreFields(RepoStats{}, "IndexBytes"), 1799 cmpopts.IgnoreFields(Repository{}, "SubRepoMap"), 1800 cmpopts.IgnoreFields(Repository{}, "priority"), 1801 } 1802 if diff := cmp.Diff(want, res, ignored...); diff != "" { 1803 t.Fatalf("mismatch (-want +got):\n%s", diff) 1804 } 1805 1806 q = &query.Repo{Regexp: regexp.MustCompile("bla")} 1807 res, err = searcher.List(context.Background(), q, nil) 1808 if err != nil { 1809 t.Fatalf("List(%v): %v", q, err) 1810 } 1811 if len(res.Repos) != 0 || len(res.Minimal) != 0 { 1812 t.Fatalf("got %v, want 0 matches", res) 1813 } 1814 }) 1815 } 1816 }) 1817 1818 t.Run("minimal", func(t *testing.T) { 1819 repo := &Repository{ 1820 ID: 1234, 1821 Name: "reponame", 1822 Branches: []RepositoryBranch{{Name: "main"}, {Name: "dev"}}, 1823 RawConfig: map[string]string{"repoid": "1234"}, 1824 } 1825 b := testIndexBuilder(t, repo, 1826 Document{Name: "f1", Content: content, Branches: []string{"main", "dev"}}, 1827 Document{Name: "f2", Content: content, Branches: []string{"main"}}, 1828 Document{Name: "f2", Content: content, Branches: []string{"dev"}}, 1829 Document{Name: "f3", Content: content, Branches: []string{"dev"}}) 1830 1831 searcher := searcherForTest(t, b) 1832 1833 q := &query.Repo{Regexp: regexp.MustCompile("epo")} 1834 res, err := searcher.List(context.Background(), q, &ListOptions{Minimal: true}) 1835 if err != nil { 1836 t.Fatalf("List(%v): %v", q, err) 1837 } 1838 1839 want := &RepoList{ 1840 Minimal: map[uint32]*MinimalRepoListEntry{ 1841 repo.ID: { 1842 HasSymbols: repo.HasSymbols, 1843 Branches: repo.Branches, 1844 }, 1845 }, 1846 Stats: RepoStats{ 1847 Repos: 1, 1848 Shards: 1, 1849 Documents: 4, 1850 IndexBytes: 412, 1851 ContentBytes: 68, 1852 NewLinesCount: 4, 1853 DefaultBranchNewLinesCount: 2, 1854 OtherBranchesNewLinesCount: 3, 1855 }, 1856 } 1857 1858 ignored := []cmp.Option{ 1859 cmpopts.IgnoreFields(MinimalRepoListEntry{}, "IndexTimeUnix"), 1860 } 1861 if diff := cmp.Diff(want, res, ignored...); diff != "" { 1862 t.Fatalf("mismatch (-want +got):\n%s", diff) 1863 } 1864 1865 q = &query.Repo{Regexp: regexp.MustCompile("bla")} 1866 res, err = searcher.List(context.Background(), q, &ListOptions{Minimal: true}) 1867 if err != nil { 1868 t.Fatalf("List(%v): %v", q, err) 1869 } 1870 if len(res.Repos) != 0 || len(res.Minimal) != 0 { 1871 t.Fatalf("got %v, want 0 matches", res) 1872 } 1873 }) 1874} 1875 1876func TestListReposByContent(t *testing.T) { 1877 content := []byte("bla the needle") 1878 1879 b := testIndexBuilder(t, &Repository{ 1880 Name: "reponame", 1881 }, 1882 Document{Name: "f1", Content: content}, 1883 Document{Name: "f2", Content: content}) 1884 1885 searcher := searcherForTest(t, b) 1886 q := &query.Substring{Pattern: "needle"} 1887 res, err := searcher.List(context.Background(), q, nil) 1888 if err != nil { 1889 t.Fatalf("List(%v): %v", q, err) 1890 } 1891 if len(res.Repos) != 1 || res.Repos[0].Repository.Name != "reponame" { 1892 t.Fatalf("got %v, want 1 matches", res) 1893 } 1894 if got := res.Repos[0].Stats.Shards; got != 1 { 1895 t.Fatalf("got %d, want 1 shard", got) 1896 } 1897 q = &query.Substring{Pattern: "foo"} 1898 res, err = searcher.List(context.Background(), q, nil) 1899 if err != nil { 1900 t.Fatalf("List(%v): %v", q, err) 1901 } 1902 if len(res.Repos) != 0 { 1903 t.Fatalf("got %v, want 0 matches", res) 1904 } 1905} 1906 1907func TestMetadata(t *testing.T) { 1908 content := []byte("bla the needle") 1909 1910 b := testIndexBuilder(t, &Repository{ 1911 Name: "reponame", 1912 }, Document{Name: "f1", Content: content}, 1913 Document{Name: "f2", Content: content}) 1914 1915 var buf bytes.Buffer 1916 if err := b.Write(&buf); err != nil { 1917 t.Fatal(err) 1918 } 1919 f := &memSeeker{buf.Bytes()} 1920 1921 rd, _, err := ReadMetadata(f) 1922 if err != nil { 1923 t.Fatalf("ReadMetadata: %v", err) 1924 } 1925 1926 if got, want := rd[0].Name, "reponame"; got != want { 1927 t.Fatalf("got %q want %q", got, want) 1928 } 1929} 1930 1931func TestOr(t *testing.T) { 1932 b := testIndexBuilder(t, nil, 1933 Document{Name: "f1", Content: []byte("needle")}, 1934 Document{Name: "f2", Content: []byte("banana")}) 1935 t.Run("LineMatches", func(t *testing.T) { 1936 sres := searchForTest(t, b, query.NewOr( 1937 &query.Substring{Pattern: "needle"}, 1938 &query.Substring{Pattern: "banana"})) 1939 1940 if len(sres.Files) != 2 { 1941 t.Fatalf("got %v, want 2 files", sres.Files) 1942 } 1943 }) 1944 1945 t.Run("ChunkMatches", func(t *testing.T) { 1946 sres := searchForTest(t, b, query.NewOr( 1947 &query.Substring{Pattern: "needle"}, 1948 &query.Substring{Pattern: "banana"})) 1949 1950 if len(sres.Files) != 2 { 1951 t.Fatalf("got %v, want 2 files", sres.Files) 1952 } 1953 }) 1954} 1955 1956func TestFrequency(t *testing.T) { 1957 content := []byte("sla _Py_HashDouble(double v sla las las shd dot dot") 1958 1959 b := testIndexBuilder(t, nil, 1960 Document{ 1961 Name: "f1", 1962 Content: content, 1963 }) 1964 1965 t.Run("LineMatches", func(t *testing.T) { 1966 sres := searchForTest(t, b, &query.Substring{Pattern: "slashdot"}) 1967 if len(sres.Files) != 0 { 1968 t.Errorf("got %v, wanted 0 matches", sres.Files) 1969 } 1970 }) 1971 1972 t.Run("ChunkMatches", func(t *testing.T) { 1973 sres := searchForTest(t, b, &query.Substring{Pattern: "slashdot"}, chunkOpts) 1974 if len(sres.Files) != 0 { 1975 t.Errorf("got %v, wanted 0 matches", sres.Files) 1976 } 1977 }) 1978} 1979 1980func TestMatchNewline(t *testing.T) { 1981 re, err := syntax.Parse("[^a]a", syntax.ClassNL) 1982 if err != nil { 1983 t.Fatalf("syntax.Parse: %v", err) 1984 } 1985 1986 content := []byte("pqr\nalex") 1987 1988 b := testIndexBuilder(t, nil, 1989 Document{ 1990 Name: "f1", 1991 Content: content, 1992 }) 1993 1994 t.Run("LineMatches", func(t *testing.T) { 1995 sres := searchForTest(t, b, &query.Regexp{Regexp: re, CaseSensitive: true}) 1996 if len(sres.Files) != 1 { 1997 t.Errorf("got %v, wanted 1 matches", sres.Files) 1998 } else if l := sres.Files[0].LineMatches[0].Line; !bytes.Equal(l, content[len("pqr\n"):]) { 1999 t.Errorf("got match line %q, want %q", l, content) 2000 } 2001 }) 2002 2003 t.Run("ChunkMatches", func(t *testing.T) { 2004 sres := searchForTest(t, b, &query.Regexp{Regexp: re, CaseSensitive: true}, chunkOpts) 2005 if len(sres.Files) != 1 { 2006 t.Errorf("got %v, wanted 1 matches", sres.Files) 2007 } else if c := sres.Files[0].ChunkMatches[0].Content; !bytes.Equal(c, content) { 2008 t.Errorf("got match line %q, want %q", c, content) 2009 } 2010 }) 2011} 2012 2013func TestSubRepo(t *testing.T) { 2014 subRepos := map[string]*Repository{ 2015 "sub": { 2016 Name: "sub-name", 2017 LineFragmentTemplate: "sub-line", 2018 }, 2019 } 2020 2021 content := []byte("pqr\nalex") 2022 2023 b := testIndexBuilder(t, &Repository{ 2024 SubRepoMap: subRepos, 2025 }, Document{ 2026 Name: "sub/f1", 2027 Content: content, 2028 SubRepositoryPath: "sub", 2029 }) 2030 2031 sres := searchForTest(t, b, &query.Substring{Pattern: "alex"}) 2032 if len(sres.Files) != 1 { 2033 t.Fatalf("got %v, wanted 1 matches", sres.Files) 2034 } 2035 2036 f := sres.Files[0] 2037 if f.SubRepositoryPath != "sub" || f.SubRepositoryName != "sub-name" { 2038 t.Errorf("got %#v, want SubRepository{Path,Name} = {'sub', 'sub-name'}", f) 2039 } 2040 2041 if sres.LineFragments["sub-name"] != "sub-line" { 2042 t.Errorf("got LineFragmentTemplate %v, want {'sub':'sub-line'}", sres.LineFragments) 2043 } 2044} 2045 2046func TestSearchEither(t *testing.T) { 2047 b := testIndexBuilder(t, nil, 2048 Document{Name: "f1", Content: []byte("bla needle bla")}, 2049 Document{Name: "needle-file-branch", Content: []byte("bla content")}) 2050 2051 t.Run("LineMatches", func(t *testing.T) { 2052 sres := searchForTest(t, b, &query.Substring{Pattern: "needle"}) 2053 if len(sres.Files) != 2 { 2054 t.Fatalf("got %v, wanted 2 matches", sres.Files) 2055 } 2056 2057 sres = searchForTest(t, b, &query.Substring{Pattern: "needle", Content: true}) 2058 if len(sres.Files) != 1 { 2059 t.Fatalf("got %v, wanted 1 match", sres.Files) 2060 } 2061 2062 if got, want := sres.Files[0].FileName, "f1"; got != want { 2063 t.Errorf("got %q, want %q", got, want) 2064 } 2065 }) 2066 2067 t.Run("ChunkMatches", func(t *testing.T) { 2068 sres := searchForTest(t, b, &query.Substring{Pattern: "needle"}, chunkOpts) 2069 if len(sres.Files) != 2 { 2070 t.Fatalf("got %v, wanted 2 matches", sres.Files) 2071 } 2072 2073 sres = searchForTest(t, b, &query.Substring{Pattern: "needle", Content: true}, chunkOpts) 2074 if len(sres.Files) != 1 { 2075 t.Fatalf("got %v, wanted 1 match", sres.Files) 2076 } 2077 2078 if got, want := sres.Files[0].FileName, "f1"; got != want { 2079 t.Errorf("got %q, want %q", got, want) 2080 } 2081 }) 2082} 2083 2084func TestUnicodeExactMatch(t *testing.T) { 2085 needle := "néédlÉ" 2086 content := []byte("blá blá " + needle + " blâ") 2087 2088 b := testIndexBuilder(t, nil, 2089 Document{Name: "f1", Content: content}) 2090 2091 t.Run("LineMatches", func(t *testing.T) { 2092 if res := searchForTest(t, b, &query.Substring{Pattern: needle, CaseSensitive: true}); len(res.Files) != 1 { 2093 t.Fatalf("case sensitive: got %v, wanted 1 match", res.Files) 2094 } 2095 }) 2096 2097 t.Run("ChunkMatches", func(t *testing.T) { 2098 res := searchForTest(t, b, &query.Substring{Pattern: needle, CaseSensitive: true}, chunkOpts) 2099 if len(res.Files) != 1 { 2100 t.Fatalf("case sensitive: got %v, wanted 1 match", res.Files) 2101 } 2102 }) 2103} 2104 2105func TestUnicodeCoverContent(t *testing.T) { 2106 needle := "néédlÉ" 2107 content := []byte("blá blá " + needle + " blâ") 2108 2109 b := testIndexBuilder(t, nil, 2110 Document{Name: "f1", Content: content}) 2111 2112 t.Run("LineMatches", func(t *testing.T) { 2113 if res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ", CaseSensitive: true}); len(res.Files) != 0 { 2114 t.Fatalf("case sensitive: got %v, wanted 0 match", res.Files) 2115 } 2116 2117 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ"}) 2118 if len(res.Files) != 1 { 2119 t.Fatalf("case insensitive: got %v, wanted 1 match", res.Files) 2120 } 2121 2122 if got, want := res.Files[0].LineMatches[0].LineFragments[0].Offset, uint32(strings.Index(string(content), needle)); got != want { 2123 t.Errorf("got %d want %d", got, want) 2124 } 2125 }) 2126 2127 t.Run("ChunkMatches", func(t *testing.T) { 2128 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ", CaseSensitive: true}, chunkOpts) 2129 if len(res.Files) != 0 { 2130 t.Fatalf("case sensitive: got %v, wanted 0 match", res.Files) 2131 } 2132 2133 res = searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ"}, chunkOpts) 2134 if len(res.Files) != 1 { 2135 t.Fatalf("case insensitive: got %v, wanted 1 match", res.Files) 2136 } 2137 2138 got := res.Files[0].ChunkMatches[0].Ranges[0].Start.ByteOffset 2139 want := uint32(strings.Index(string(content), needle)) 2140 if got != want { 2141 t.Errorf("got %d want %d", got, want) 2142 } 2143 }) 2144} 2145 2146func TestUnicodeNonCoverContent(t *testing.T) { 2147 needle := "nééáádlÉ" 2148 content := []byte("blá blá " + needle + " blâ") 2149 2150 b := testIndexBuilder(t, nil, 2151 Document{Name: "f1", Content: content}) 2152 2153 t.Run("LineMatches", func(t *testing.T) { 2154 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉÁÁDLÉ", Content: true}) 2155 if len(res.Files) != 1 { 2156 t.Fatalf("got %v, wanted 1 match", res.Files) 2157 } 2158 2159 if got, want := res.Files[0].LineMatches[0].LineFragments[0].Offset, uint32(strings.Index(string(content), needle)); got != want { 2160 t.Errorf("got %d want %d", got, want) 2161 } 2162 }) 2163 2164 t.Run("ChunkMatches", func(t *testing.T) { 2165 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉÁÁDLÉ", Content: true}, chunkOpts) 2166 if len(res.Files) != 1 { 2167 t.Fatalf("got %v, wanted 1 match", res.Files) 2168 } 2169 2170 got := res.Files[0].ChunkMatches[0].Ranges[0].Start.ByteOffset 2171 want := uint32(strings.Index(string(content), needle)) 2172 if got != want { 2173 t.Errorf("got %d want %d", got, want) 2174 } 2175 }) 2176} 2177 2178const kelvinCodePoint = 8490 2179 2180func TestUnicodeVariableLength(t *testing.T) { 2181 lower := 'k' 2182 upper := rune(kelvinCodePoint) 2183 2184 needle := "nee" + string([]rune{lower}) + "eed" 2185 corpus := []byte("nee" + string([]rune{upper}) + "eed" + 2186 " ee" + string([]rune{lower}) + "ee" + 2187 " ee" + string([]rune{upper}) + "ee") 2188 2189 t.Run("LineMatches", func(t *testing.T) { 2190 b := testIndexBuilder(t, nil, 2191 Document{Name: "f1", Content: []byte(corpus)}) 2192 2193 res := searchForTest(t, b, &query.Substring{Pattern: needle, Content: true}) 2194 if len(res.Files) != 1 { 2195 t.Fatalf("got %v, wanted 1 match", res.Files) 2196 } 2197 }) 2198 2199 t.Run("ChunkMatches", func(t *testing.T) { 2200 b := testIndexBuilder(t, nil, 2201 Document{Name: "f1", Content: []byte(corpus)}) 2202 2203 res := searchForTest(t, b, &query.Substring{Pattern: needle, Content: true}, chunkOpts) 2204 if len(res.Files) != 1 { 2205 t.Fatalf("got %v, wanted 1 match", res.Files) 2206 } 2207 }) 2208} 2209 2210func TestUnicodeFileStartOffsets(t *testing.T) { 2211 unicode := "世界" 2212 wat := "waaaaaat" 2213 b := testIndexBuilder(t, nil, 2214 Document{ 2215 Name: "f1", 2216 Content: []byte(unicode), 2217 }, 2218 Document{ 2219 Name: "f2", 2220 Content: []byte(wat), 2221 }, 2222 ) 2223 q := &query.Substring{Pattern: wat, Content: true} 2224 res := searchForTest(t, b, q) 2225 if len(res.Files) != 1 { 2226 t.Fatalf("got %v, wanted 1 match", res.Files) 2227 } 2228} 2229 2230func TestLongFileUTF8(t *testing.T) { 2231 needle := "neeedle" 2232 2233 // 6 bytes. 2234 unicode := "世界" 2235 content := []byte(strings.Repeat(unicode, 100) + needle) 2236 b := testIndexBuilder(t, nil, 2237 Document{ 2238 Name: "f1", 2239 Content: []byte(strings.Repeat("a", 50)), 2240 }, 2241 Document{ 2242 Name: "f2", 2243 Content: content, 2244 }) 2245 2246 t.Run("LineMatches", func(t *testing.T) { 2247 q := &query.Substring{Pattern: needle, Content: true} 2248 res := searchForTest(t, b, q) 2249 if len(res.Files) != 1 { 2250 t.Errorf("got %v, want 1 result", res) 2251 } 2252 }) 2253 2254 t.Run("ChunkMatches", func(t *testing.T) { 2255 q := &query.Substring{Pattern: needle, Content: true} 2256 res := searchForTest(t, b, q, chunkOpts) 2257 if len(res.Files) != 1 { 2258 t.Errorf("got %v, want 1 result", res) 2259 } 2260 }) 2261} 2262 2263func TestEstimateDocCount(t *testing.T) { 2264 content := []byte("bla needle bla") 2265 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2266 Document{Name: "f1", Content: content}, 2267 Document{Name: "f2", Content: content}, 2268 ) 2269 2270 t.Run("LineMatches", func(t *testing.T) { 2271 if sres := searchForTest(t, b, 2272 query.NewAnd( 2273 &query.Substring{Pattern: "needle"}, 2274 &query.Repo{Regexp: regexp.MustCompile("reponame")}, 2275 ), SearchOptions{ 2276 EstimateDocCount: true, 2277 }); sres.Stats.ShardFilesConsidered != 2 { 2278 t.Errorf("got FilesConsidered = %d, want 2", sres.Stats.FilesConsidered) 2279 } 2280 if sres := searchForTest(t, b, 2281 query.NewAnd( 2282 &query.Substring{Pattern: "needle"}, 2283 &query.Repo{Regexp: regexp.MustCompile("nomatch")}, 2284 ), SearchOptions{ 2285 EstimateDocCount: true, 2286 }); sres.Stats.ShardFilesConsidered != 0 { 2287 t.Errorf("got FilesConsidered = %d, want 0", sres.Stats.FilesConsidered) 2288 } 2289 }) 2290 2291 t.Run("ChunkMatches", func(t *testing.T) { 2292 if sres := searchForTest(t, b, 2293 query.NewAnd( 2294 &query.Substring{Pattern: "needle"}, 2295 &query.Repo{Regexp: regexp.MustCompile("reponame")}, 2296 ), SearchOptions{ 2297 EstimateDocCount: true, 2298 ChunkMatches: true, 2299 }); sres.Stats.ShardFilesConsidered != 2 { 2300 t.Errorf("got FilesConsidered = %d, want 2", sres.Stats.FilesConsidered) 2301 } 2302 if sres := searchForTest(t, b, 2303 query.NewAnd( 2304 &query.Substring{Pattern: "needle"}, 2305 &query.Repo{Regexp: regexp.MustCompile("nomatch")}, 2306 ), SearchOptions{ 2307 EstimateDocCount: true, 2308 ChunkMatches: true, 2309 }); sres.Stats.ShardFilesConsidered != 0 { 2310 t.Errorf("got FilesConsidered = %d, want 0", sres.Stats.FilesConsidered) 2311 } 2312 }) 2313} 2314 2315func TestUTF8CorrectCorpus(t *testing.T) { 2316 needle := "neeedle" 2317 2318 // 6 bytes. 2319 unicode := "世界" 2320 b := testIndexBuilder(t, nil, 2321 Document{ 2322 Name: "f1", 2323 Content: []byte(strings.Repeat(unicode, 100)), 2324 }, 2325 Document{ 2326 Name: "xxxxxneeedle", 2327 Content: []byte("hello"), 2328 }) 2329 2330 t.Run("LineMatches", func(t *testing.T) { 2331 q := &query.Substring{Pattern: needle, FileName: true} 2332 res := searchForTest(t, b, q) 2333 if len(res.Files) != 1 { 2334 t.Errorf("got %v, want 1 result", res) 2335 } 2336 }) 2337 2338 t.Run("ChunkMatches", func(t *testing.T) { 2339 q := &query.Substring{Pattern: needle, FileName: true} 2340 res := searchForTest(t, b, q, chunkOpts) 2341 if len(res.Files) != 1 { 2342 t.Errorf("got %v, want 1 result", res) 2343 } 2344 }) 2345} 2346 2347func TestBuilderStats(t *testing.T) { 2348 b := testIndexBuilder(t, nil, 2349 Document{ 2350 Name: "f1", 2351 Content: []byte(strings.Repeat("abcd", 1024)), 2352 }) 2353 var buf bytes.Buffer 2354 if err := b.Write(&buf); err != nil { 2355 t.Fatal(err) 2356 } 2357 2358 if got, want := b.ContentSize(), uint32(2+4*1024); got != want { 2359 t.Errorf("got %d, want %d", got, want) 2360 } 2361} 2362 2363func TestIOStats(t *testing.T) { 2364 b := testIndexBuilder(t, nil, 2365 Document{ 2366 Name: "f1", 2367 Content: []byte(strings.Repeat("abcd", 1024)), 2368 }) 2369 2370 t.Run("LineMatches", func(t *testing.T) { 2371 q := &query.Substring{Pattern: "abc", CaseSensitive: true, Content: true} 2372 res := searchForTest(t, b, q) 2373 2374 // 4096 (content) + 2 (overhead: newlines or doc sections) 2375 if got, want := res.Stats.ContentBytesLoaded, int64(4098); got != want { 2376 t.Errorf("got content I/O %d, want %d", got, want) 2377 } 2378 2379 // 1024 entries, each 4 bytes apart. 4 fits into single byte 2380 // delta encoded. 2381 if got, want := res.Stats.IndexBytesLoaded, int64(1024); got != want { 2382 t.Errorf("got index I/O %d, want %d", got, want) 2383 } 2384 }) 2385 2386 t.Run("ChunkMatches", func(t *testing.T) { 2387 q := &query.Substring{Pattern: "abc", CaseSensitive: true, Content: true} 2388 res := searchForTest(t, b, q, chunkOpts) 2389 2390 // 4096 (content) + 2 (overhead: newlines or doc sections) 2391 if got, want := res.Stats.ContentBytesLoaded, int64(4098); got != want { 2392 t.Errorf("got content I/O %d, want %d", got, want) 2393 } 2394 2395 // 1024 entries, each 4 bytes apart. 4 fits into single byte 2396 // delta encoded. 2397 if got, want := res.Stats.IndexBytesLoaded, int64(1024); got != want { 2398 t.Errorf("got index I/O %d, want %d", got, want) 2399 } 2400 }) 2401} 2402 2403func TestStartLineAnchor(t *testing.T) { 2404 b := testIndexBuilder(t, nil, 2405 Document{ 2406 Name: "f1", 2407 Content: []byte( 2408 `hello 2409start of middle of line 2410`), 2411 }) 2412 2413 t.Run("LineMatches", func(t *testing.T) { 2414 q, err := query.Parse("^start") 2415 if err != nil { 2416 t.Errorf("parse: %v", err) 2417 } 2418 2419 res := searchForTest(t, b, q) 2420 if len(res.Files) != 1 { 2421 t.Errorf("got %v, want 1 file", res.Files) 2422 } 2423 2424 q, err = query.Parse("^middle") 2425 if err != nil { 2426 t.Errorf("parse: %v", err) 2427 } 2428 res = searchForTest(t, b, q) 2429 if len(res.Files) != 0 { 2430 t.Errorf("got %v, want 0 files", res.Files) 2431 } 2432 }) 2433 2434 t.Run("ChunkMatches", func(t *testing.T) { 2435 q, err := query.Parse("^start") 2436 if err != nil { 2437 t.Errorf("parse: %v", err) 2438 } 2439 2440 res := searchForTest(t, b, q, chunkOpts) 2441 if len(res.Files) != 1 { 2442 t.Errorf("got %v, want 1 file", res.Files) 2443 } 2444 2445 q, err = query.Parse("^middle") 2446 if err != nil { 2447 t.Errorf("parse: %v", err) 2448 } 2449 res = searchForTest(t, b, q, chunkOpts) 2450 if len(res.Files) != 0 { 2451 t.Errorf("got %v, want 0 files", res.Files) 2452 } 2453 }) 2454} 2455 2456func TestAndOrUnicode(t *testing.T) { 2457 q, err := query.Parse("orange.*apple") 2458 if err != nil { 2459 t.Errorf("parse: %v", err) 2460 } 2461 finalQ := query.NewAnd(q, 2462 query.NewOr(query.NewAnd(&query.Repo{Regexp: regexp.MustCompile("name")}, 2463 query.NewOr(&query.Branch{Pattern: "master"})))) 2464 2465 b := testIndexBuilder(t, &Repository{ 2466 Name: "name", 2467 Branches: []RepositoryBranch{{"master", "master-version"}}, 2468 }, Document{ 2469 Name: "f2", 2470 Content: []byte("orange\u2318apple"), 2471 // --------------0123456 78901 2472 Branches: []string{"master"}, 2473 }) 2474 2475 t.Run("LineMatches", func(t *testing.T) { 2476 res := searchForTest(t, b, finalQ) 2477 if len(res.Files) != 1 { 2478 t.Errorf("got %v, want 1 result", res.Files) 2479 } 2480 }) 2481 2482 t.Run("ChunkMatches", func(t *testing.T) { 2483 res := searchForTest(t, b, finalQ, chunkOpts) 2484 if len(res.Files) != 1 { 2485 t.Errorf("got %v, want 1 result", res.Files) 2486 } 2487 }) 2488} 2489 2490func TestAndShort(t *testing.T) { 2491 content := []byte("bla needle at orange bla") 2492 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2493 Document{Name: "f1", Content: content}, 2494 Document{Name: "f2", Content: []byte("xx at xx")}, 2495 Document{Name: "f3", Content: []byte("yy orange xx")}, 2496 ) 2497 2498 q := query.NewAnd(&query.Substring{Pattern: "at"}, 2499 &query.Substring{Pattern: "orange"}) 2500 2501 t.Run("LineMatches", func(t *testing.T) { 2502 res := searchForTest(t, b, q) 2503 if len(res.Files) != 1 || res.Files[0].FileName != "f1" { 2504 t.Errorf("got %v, want 1 result", res.Files) 2505 } 2506 }) 2507 2508 t.Run("ChunkMatches", func(t *testing.T) { 2509 res := searchForTest(t, b, q, chunkOpts) 2510 if len(res.Files) != 1 || res.Files[0].FileName != "f1" { 2511 t.Errorf("got %v, want 1 result", res.Files) 2512 } 2513 }) 2514} 2515 2516func TestNoCollectRegexpSubstring(t *testing.T) { 2517 content := []byte("bla final bla\nfoo final, foo") 2518 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2519 Document{Name: "f1", Content: content}, 2520 ) 2521 2522 q := &query.Regexp{ 2523 Regexp: mustParseRE("final[,.]"), 2524 } 2525 2526 t.Run("LineMatches", func(t *testing.T) { 2527 res := searchForTest(t, b, q) 2528 if len(res.Files) != 1 { 2529 t.Fatalf("got %v, want 1 result", res.Files) 2530 } 2531 if f := res.Files[0]; len(f.LineMatches) != 1 { 2532 t.Fatalf("got line matches %v, want 1 line match", printLineMatches(f.LineMatches)) 2533 } 2534 }) 2535 2536 t.Run("ChunkMatches", func(t *testing.T) { 2537 res := searchForTest(t, b, q, chunkOpts) 2538 if len(res.Files) != 1 { 2539 t.Fatalf("got %v, want 1 result", res.Files) 2540 } 2541 if f := res.Files[0]; len(f.ChunkMatches) != 1 { 2542 t.Fatalf("got line matches %v, want 1 line match", printLineMatches(f.LineMatches)) 2543 } 2544 }) 2545} 2546 2547func printLineMatches(ms []LineMatch) string { 2548 var ss []string 2549 for _, m := range ms { 2550 ss = append(ss, fmt.Sprintf("%d:%q %v", m.LineNumber, m.Line, m.LineFragments)) 2551 } 2552 2553 return strings.Join(ss, ", ") 2554} 2555 2556func TestLang(t *testing.T) { 2557 content := []byte("bla needle bla") 2558 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2559 Document{Name: "f1", Content: content}, 2560 Document{Name: "f2", Language: "java", Content: content}, 2561 Document{Name: "f3", Language: "cpp", Content: content}, 2562 ) 2563 2564 q := query.NewAnd(&query.Substring{Pattern: "needle"}, 2565 &query.Language{Language: "cpp"}) 2566 2567 t.Run("LineMatches", func(t *testing.T) { 2568 res := searchForTest(t, b, q) 2569 if len(res.Files) != 1 { 2570 t.Fatalf("got %v, want 1 result in f3", res.Files) 2571 } 2572 f := res.Files[0] 2573 if f.FileName != "f3" || f.Language != "cpp" { 2574 t.Fatalf("got %v, want 1 match with language cpp", f) 2575 } 2576 }) 2577 2578 t.Run("ChunkMatches", func(t *testing.T) { 2579 res := searchForTest(t, b, q, chunkOpts) 2580 if len(res.Files) != 1 { 2581 t.Fatalf("got %v, want 1 result in f3", res.Files) 2582 } 2583 f := res.Files[0] 2584 if f.FileName != "f3" || f.Language != "cpp" { 2585 t.Fatalf("got %v, want 1 match with language cpp", f) 2586 } 2587 }) 2588} 2589 2590func TestLangShortcut(t *testing.T) { 2591 content := []byte("bla needle bla") 2592 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2593 Document{Name: "f2", Language: "java", Content: content}, 2594 Document{Name: "f3", Language: "cpp", Content: content}, 2595 ) 2596 2597 q := query.NewAnd(&query.Substring{Pattern: "needle"}, 2598 &query.Language{Language: "fortran"}) 2599 2600 t.Run("LineMatches", func(t *testing.T) { 2601 res := searchForTest(t, b, q) 2602 if len(res.Files) != 0 { 2603 t.Fatalf("got %v, want 0 results", res.Files) 2604 } 2605 if res.Stats.IndexBytesLoaded > 0 { 2606 t.Errorf("got IndexBytesLoaded %d, want 0", res.Stats.IndexBytesLoaded) 2607 } 2608 }) 2609 2610 t.Run("ChunkMatches", func(t *testing.T) { 2611 res := searchForTest(t, b, q, chunkOpts) 2612 if len(res.Files) != 0 { 2613 t.Fatalf("got %v, want 0 results", res.Files) 2614 } 2615 if res.Stats.IndexBytesLoaded > 0 { 2616 t.Errorf("got IndexBytesLoaded %d, want 0", res.Stats.IndexBytesLoaded) 2617 } 2618 }) 2619} 2620 2621func TestNoTextMatchAtoms(t *testing.T) { 2622 content := []byte("bla needle bla") 2623 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2624 Document{Name: "f1", Content: content}, 2625 Document{Name: "f2", Language: "java", Content: content}, 2626 Document{Name: "f3", Language: "cpp", Content: content}, 2627 ) 2628 q := query.NewAnd(&query.Language{Language: "java"}) 2629 t.Run("LineMatches", func(t *testing.T) { 2630 res := searchForTest(t, b, q) 2631 if len(res.Files) != 1 { 2632 t.Fatalf("got %v, want 1 result in f3", res.Files) 2633 } 2634 }) 2635 2636 t.Run("ChunkMatches", func(t *testing.T) { 2637 res := searchForTest(t, b, q, chunkOpts) 2638 if len(res.Files) != 1 { 2639 t.Fatalf("got %v, want 1 result in f3", res.Files) 2640 } 2641 }) 2642} 2643 2644func TestNoPositiveAtoms(t *testing.T) { 2645 content := []byte("bla needle bla") 2646 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2647 Document{Name: "f1", Content: content}, 2648 Document{Name: "f2", Content: content}, 2649 ) 2650 2651 q := query.NewAnd( 2652 &query.Not{Child: &query.Substring{Pattern: "xyz"}}, 2653 &query.Repo{Regexp: regexp.MustCompile("reponame")}) 2654 t.Run("LineMatches", func(t *testing.T) { 2655 res := searchForTest(t, b, q) 2656 if len(res.Files) != 2 { 2657 t.Fatalf("got %v, want 2 results in f3", res.Files) 2658 } 2659 }) 2660 t.Run("ChunkMatches", func(t *testing.T) { 2661 res := searchForTest(t, b, q, chunkOpts) 2662 if len(res.Files) != 2 { 2663 t.Fatalf("got %v, want 2 results in f3", res.Files) 2664 } 2665 }) 2666} 2667 2668func TestSymbolBoundaryStart(t *testing.T) { 2669 content := []byte("start\nbla bla\nend") 2670 // ----------------012345-67890123-456 2671 2672 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2673 Document{ 2674 Name: "f1", 2675 Content: content, 2676 Symbols: []DocumentSection{{0, 5}, {14, 17}}, 2677 }, 2678 ) 2679 q := &query.Symbol{ 2680 Expr: &query.Substring{Pattern: "start"}, 2681 } 2682 t.Run("LineMatches", func(t *testing.T) { 2683 res := searchForTest(t, b, q) 2684 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 { 2685 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2686 } 2687 m := res.Files[0].LineMatches[0].LineFragments[0] 2688 if m.Offset != 0 { 2689 t.Fatalf("got offset %d want 0", m.Offset) 2690 } 2691 }) 2692 2693 t.Run("ChunkMatches", func(t *testing.T) { 2694 res := searchForTest(t, b, q, chunkOpts) 2695 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 { 2696 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2697 } 2698 m := res.Files[0].ChunkMatches[0].Ranges[0] 2699 if m.Start.ByteOffset != 0 { 2700 t.Fatalf("got offset %d want 0", m.Start.ByteOffset) 2701 } 2702 }) 2703} 2704 2705func TestSymbolBoundaryEnd(t *testing.T) { 2706 content := []byte("start\nbla bla\nend") 2707 // ----------------012345-67890123-456 2708 2709 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2710 Document{ 2711 Name: "f1", 2712 Content: content, 2713 Symbols: []DocumentSection{{14, 17}}, 2714 }, 2715 ) 2716 q := &query.Symbol{ 2717 Expr: &query.Substring{Pattern: "end"}, 2718 } 2719 t.Run("LineMatches", func(t *testing.T) { 2720 res := searchForTest(t, b, q) 2721 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 { 2722 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2723 } 2724 m := res.Files[0].LineMatches[0].LineFragments[0] 2725 if m.Offset != 14 { 2726 t.Fatalf("got offset %d want 0", m.Offset) 2727 } 2728 }) 2729 2730 t.Run("ChunkMatches", func(t *testing.T) { 2731 res := searchForTest(t, b, q, chunkOpts) 2732 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 { 2733 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2734 } 2735 m := res.Files[0].ChunkMatches[0].Ranges[0] 2736 if m.Start.ByteOffset != 14 { 2737 t.Fatalf("got offset %d want 0", m.Start.ByteOffset) 2738 } 2739 }) 2740} 2741 2742func TestSymbolSubstring(t *testing.T) { 2743 content := []byte("bla\nsymblabla\nbla") 2744 // ----------------0123-4567890123-456 2745 2746 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2747 Document{ 2748 Name: "f1", 2749 Content: content, 2750 Symbols: []DocumentSection{{4, 12}}, 2751 }, 2752 ) 2753 q := &query.Symbol{ 2754 Expr: &query.Substring{Pattern: "bla"}, 2755 } 2756 t.Run("LineMatches", func(t *testing.T) { 2757 res := searchForTest(t, b, q) 2758 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 { 2759 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2760 } 2761 m := res.Files[0].LineMatches[0].LineFragments[0] 2762 if m.Offset != 7 || m.MatchLength != 3 { 2763 t.Fatalf("got offset %d, size %d want 7 size 3", m.Offset, m.MatchLength) 2764 } 2765 }) 2766 2767 t.Run("ChunkMatches", func(t *testing.T) { 2768 res := searchForTest(t, b, q, chunkOpts) 2769 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 { 2770 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2771 } 2772 m := res.Files[0].ChunkMatches[0].Ranges[0] 2773 if m.Start.ByteOffset != 7 || m.End.ByteOffset != 10 { 2774 t.Fatalf("got offset %d, end %d want 7, 10", m.Start.ByteOffset, m.End.ByteOffset) 2775 } 2776 }) 2777} 2778 2779func TestSymbolSubstringExact(t *testing.T) { 2780 content := []byte("bla\nsym\nbla\nsym\nasymb") 2781 // ----------------0123-4567-890123456-78901 2782 2783 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2784 Document{ 2785 Name: "f1", 2786 Content: content, 2787 Symbols: []DocumentSection{{4, 7}}, 2788 }, 2789 ) 2790 q := &query.Symbol{ 2791 Expr: &query.Substring{Pattern: "sym"}, 2792 } 2793 t.Run("LineMatches", func(t *testing.T) { 2794 res := searchForTest(t, b, q) 2795 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 { 2796 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2797 } 2798 m := res.Files[0].LineMatches[0].LineFragments[0] 2799 if m.Offset != 4 { 2800 t.Fatalf("got offset %d, want 7", m.Offset) 2801 } 2802 }) 2803 2804 t.Run("ChunkMatches", func(t *testing.T) { 2805 res := searchForTest(t, b, q, chunkOpts) 2806 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 { 2807 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2808 } 2809 m := res.Files[0].ChunkMatches[0].Ranges[0] 2810 if m.Start.ByteOffset != 4 { 2811 t.Fatalf("got offset %d, want 7", m.Start.ByteOffset) 2812 } 2813 }) 2814} 2815 2816func TestSymbolRegexpExact(t *testing.T) { 2817 content := []byte("blah\nbla\nbl") 2818 // ----------------01234-5678-90 2819 2820 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2821 Document{ 2822 Name: "f1", 2823 Content: content, 2824 Symbols: []DocumentSection{{0, 4}, {5, 8}, {9, 11}}, 2825 }, 2826 ) 2827 q := &query.Symbol{ 2828 Expr: &query.Regexp{Regexp: mustParseRE("^bla$")}, 2829 } 2830 t.Run("LineMatches", func(t *testing.T) { 2831 res := searchForTest(t, b, q) 2832 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 { 2833 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2834 } 2835 m := res.Files[0].LineMatches[0].LineFragments[0] 2836 if m.Offset != 5 { 2837 t.Fatalf("got offset %d, want 5", m.Offset) 2838 } 2839 }) 2840 2841 t.Run("ChunkMatches", func(t *testing.T) { 2842 res := searchForTest(t, b, q, chunkOpts) 2843 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 { 2844 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2845 } 2846 m := res.Files[0].ChunkMatches[0].Ranges[0] 2847 if m.Start.ByteOffset != 5 { 2848 t.Fatalf("got offset %d, want 5", m.Start.ByteOffset) 2849 } 2850 }) 2851} 2852 2853func TestSymbolRegexpPartial(t *testing.T) { 2854 content := []byte("abcdef") 2855 // ----------------012345 2856 2857 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2858 Document{ 2859 Name: "f1", 2860 Content: content, 2861 Symbols: []DocumentSection{{0, 6}}, 2862 }, 2863 ) 2864 q := &query.Symbol{ 2865 Expr: &query.Regexp{Regexp: mustParseRE("(b|d)c(d|b)")}, 2866 } 2867 t.Run("LineMatches", func(t *testing.T) { 2868 res := searchForTest(t, b, q) 2869 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 { 2870 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2871 } 2872 m := res.Files[0].LineMatches[0].LineFragments[0] 2873 if m.Offset != 1 { 2874 t.Fatalf("got offset %d, want 1", m.Offset) 2875 } 2876 if m.MatchLength != 3 { 2877 t.Fatalf("got match length %d, want 3", m.MatchLength) 2878 } 2879 }) 2880 2881 t.Run("ChunkMatches", func(t *testing.T) { 2882 res := searchForTest(t, b, q, chunkOpts) 2883 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 { 2884 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2885 } 2886 m := res.Files[0].ChunkMatches[0].Ranges[0] 2887 if m.Start.ByteOffset != 1 { 2888 t.Fatalf("got offset %d, want 1", m.Start.ByteOffset) 2889 } 2890 if m.End.ByteOffset != 4 { 2891 t.Fatalf("got match end %d, want 4", m.End.ByteOffset) 2892 } 2893 }) 2894} 2895 2896func TestSymbolRegexpAll(t *testing.T) { 2897 docs := []Document{ 2898 { 2899 Name: "f1", 2900 Content: []byte("Hello Zoekt"), 2901 // --------------01234567890 2902 Symbols: []DocumentSection{{0, 5}, {6, 11}}, 2903 }, 2904 { 2905 Name: "f2", 2906 Content: []byte("Second Zoekt Third"), 2907 // --------------012345678901234567 2908 Symbols: []DocumentSection{{0, 6}, {7, 12}, {13, 18}}, 2909 }, 2910 } 2911 2912 b := testIndexBuilder(t, &Repository{Name: "reponame"}, docs...) 2913 q := &query.Symbol{ 2914 Expr: &query.Regexp{Regexp: mustParseRE(".*")}, 2915 } 2916 t.Run("LineMatches", func(t *testing.T) { 2917 res := searchForTest(t, b, q) 2918 if len(res.Files) != len(docs) { 2919 t.Fatalf("got %v, want %d file", res.Files, len(docs)) 2920 } 2921 for i, want := range docs { 2922 got := res.Files[i].LineMatches[0].LineFragments 2923 if len(got) != len(want.Symbols) { 2924 t.Fatalf("got %d symbols, want %d symbols in doc %s", len(got), len(want.Symbols), want.Name) 2925 } 2926 2927 for j, sec := range want.Symbols { 2928 if sec.Start != got[j].Offset { 2929 t.Fatalf("got offset %d, want %d in doc %s", got[j].Offset, sec.Start, want.Name) 2930 } 2931 } 2932 } 2933 }) 2934 2935 t.Run("ChunkMatches", func(t *testing.T) { 2936 res := searchForTest(t, b, q, chunkOpts) 2937 if len(res.Files) != len(docs) { 2938 t.Fatalf("got %v, want %d file", res.Files, len(docs)) 2939 } 2940 for i, want := range docs { 2941 got := res.Files[i].ChunkMatches[0].Ranges 2942 if len(got) != len(want.Symbols) { 2943 t.Fatalf("got %d symbols, want %d symbols in doc %s", len(got), len(want.Symbols), want.Name) 2944 } 2945 2946 for j, sec := range want.Symbols { 2947 if sec.Start != uint32(got[j].Start.ByteOffset) { 2948 t.Fatalf("got offset %d, want %d in doc %s", got[j].Start.ByteOffset, sec.Start, want.Name) 2949 } 2950 } 2951 } 2952 }) 2953} 2954 2955func TestHitIterTerminate(t *testing.T) { 2956 // contrived input: trigram frequencies forces selecting abc + 2957 // def for the distance iteration. There is no match, so this 2958 // will advance the compressedPostingIterator to beyond the 2959 // end. 2960 content := []byte("abc bcdbcd cdecde abcabc def efg") 2961 b := testIndexBuilder(t, nil, 2962 Document{ 2963 Name: "f1", 2964 Content: content, 2965 }, 2966 ) 2967 2968 t.Run("LineMatches", func(t *testing.T) { 2969 searchForTest(t, b, &query.Substring{Pattern: "abcdef"}) 2970 }) 2971 2972 t.Run("ChunkMatches", func(t *testing.T) { 2973 searchForTest(t, b, &query.Substring{Pattern: "abcdef"}, chunkOpts) 2974 }) 2975} 2976 2977func TestDistanceHitIterBailLast(t *testing.T) { 2978 content := []byte("AST AST AST UASH") 2979 b := testIndexBuilder(t, nil, 2980 Document{ 2981 Name: "f1", 2982 Content: content, 2983 }, 2984 ) 2985 t.Run("LineMatches", func(t *testing.T) { 2986 res := searchForTest(t, b, &query.Substring{Pattern: "UAST"}) 2987 if len(res.Files) != 0 { 2988 t.Fatalf("got %v, want no results", res.Files) 2989 } 2990 }) 2991 2992 t.Run("LineMatches", func(t *testing.T) { 2993 res := searchForTest(t, b, &query.Substring{Pattern: "UAST"}, chunkOpts) 2994 if len(res.Files) != 0 { 2995 t.Fatalf("got %v, want no results", res.Files) 2996 } 2997 }) 2998} 2999 3000func TestDocumentSectionRuneBoundary(t *testing.T) { 3001 content := string([]rune{kelvinCodePoint, kelvinCodePoint, kelvinCodePoint}) 3002 b, err := NewIndexBuilder(nil) 3003 if err != nil { 3004 t.Fatalf("NewIndexBuilder: %v", err) 3005 } 3006 3007 for i, sec := range []DocumentSection{ 3008 {2, 6}, 3009 {3, 7}, 3010 } { 3011 if err := b.Add(Document{ 3012 Name: "f1", 3013 Content: []byte(content), 3014 Symbols: []DocumentSection{sec}, 3015 }); err == nil { 3016 t.Errorf("%d: Add succeeded", i) 3017 } 3018 } 3019} 3020 3021func TestUnicodeQuery(t *testing.T) { 3022 content := string([]rune{kelvinCodePoint, kelvinCodePoint, kelvinCodePoint}) 3023 b := testIndexBuilder(t, nil, 3024 Document{ 3025 Name: "f1", 3026 Content: []byte(content), 3027 }, 3028 ) 3029 3030 q := &query.Substring{Pattern: content} 3031 3032 t.Run("LineMatches", func(t *testing.T) { 3033 res := searchForTest(t, b, q) 3034 if len(res.Files) != 1 { 3035 t.Fatalf("want 1 match, got %v", res.Files) 3036 } 3037 3038 f := res.Files[0] 3039 if len(f.LineMatches) != 1 { 3040 t.Fatalf("want 1 line, got %v", f.LineMatches) 3041 } 3042 l := f.LineMatches[0] 3043 3044 if len(l.LineFragments) != 1 { 3045 t.Fatalf("want 1 line fragment, got %v", l.LineFragments) 3046 } 3047 fr := l.LineFragments[0] 3048 if fr.MatchLength != len(content) { 3049 t.Fatalf("got MatchLength %d want %d", fr.MatchLength, len(content)) 3050 } 3051 }) 3052 3053 t.Run("ChunkMatches", func(t *testing.T) { 3054 res := searchForTest(t, b, q, chunkOpts) 3055 if len(res.Files) != 1 { 3056 t.Fatalf("want 1 match, got %v", res.Files) 3057 } 3058 3059 f := res.Files[0] 3060 if len(f.ChunkMatches) != 1 { 3061 t.Fatalf("want 1 line, got %v", f.LineMatches) 3062 } 3063 cm := f.ChunkMatches[0] 3064 3065 if len(cm.Ranges) != 1 { 3066 t.Fatalf("want 1 line fragment, got %v", cm.Ranges) 3067 } 3068 rr := cm.Ranges[0] 3069 if matchLen := rr.End.ByteOffset - rr.Start.ByteOffset; int(matchLen) != len(content) { 3070 t.Fatalf("got MatchLength %d want %d", matchLen, len(content)) 3071 } 3072 }) 3073} 3074 3075func TestSkipInvalidContent(t *testing.T) { 3076 for _, content := range []string{ 3077 // Binary 3078 "abc def \x00 abc", 3079 } { 3080 3081 b, err := NewIndexBuilder(nil) 3082 if err != nil { 3083 t.Fatalf("NewIndexBuilder: %v", err) 3084 } 3085 3086 if err := b.Add(Document{ 3087 Name: "f1", 3088 Content: []byte(content), 3089 }); err != nil { 3090 t.Fatal(err) 3091 } 3092 3093 t.Run("LineMatches", func(t *testing.T) { 3094 q := &query.Substring{Pattern: "abc def"} 3095 res := searchForTest(t, b, q) 3096 if len(res.Files) != 0 { 3097 t.Fatalf("got %v, want no results", res.Files) 3098 } 3099 3100 q = &query.Substring{Pattern: "NOT-INDEXED"} 3101 res = searchForTest(t, b, q) 3102 if len(res.Files) != 1 { 3103 t.Fatalf("got %v, want 1 result", res.Files) 3104 } 3105 }) 3106 3107 t.Run("ChunkMatches", func(t *testing.T) { 3108 q := &query.Substring{Pattern: "abc def"} 3109 res := searchForTest(t, b, q, chunkOpts) 3110 if len(res.Files) != 0 { 3111 t.Fatalf("got %v, want no results", res.Files) 3112 } 3113 3114 q = &query.Substring{Pattern: "NOT-INDEXED"} 3115 res = searchForTest(t, b, q, chunkOpts) 3116 if len(res.Files) != 1 { 3117 t.Fatalf("got %v, want 1 result", res.Files) 3118 } 3119 }) 3120 } 3121} 3122 3123func TestCheckText(t *testing.T) { 3124 for _, text := range []string{"", "simple ascii", "símplé unicödé", "\uFEFFwith utf8 'bom'", "with \uFFFD unicode replacement char"} { 3125 if err := CheckText([]byte(text), 20000); err != nil { 3126 t.Errorf("CheckText(%q): %v", text, err) 3127 } 3128 } 3129 for _, text := range []string{"zero\x00byte", "xx", "0123456789abcdefghi"} { 3130 if err := CheckText([]byte(text), 15); err == nil { 3131 t.Errorf("CheckText(%q) succeeded", text) 3132 } 3133 } 3134} 3135 3136func TestLineAnd(t *testing.T) { 3137 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 3138 Document{Name: "f1", Content: []byte("apple\nbanana\napple banana chocolate apple pudding banana\ngrape")}, 3139 Document{Name: "f2", Content: []byte("apple orange\nbanana")}, 3140 Document{Name: "f3", Content: []byte("banana grape")}, 3141 ) 3142 pattern := "(apple)(?-s:.)*?(banana)" 3143 r, _ := syntax.Parse(pattern, syntax.Perl) 3144 3145 q := query.Regexp{ 3146 Regexp: r, 3147 Content: true, 3148 } 3149 t.Run("LineMatches", func(t *testing.T) { 3150 res := searchForTest(t, b, &q) 3151 wantRegexpCount := 1 3152 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount { 3153 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount) 3154 } 3155 if len(res.Files) != 1 || res.Files[0].FileName != "f1" { 3156 t.Errorf("got %v, want 1 result", res.Files) 3157 } 3158 }) 3159 3160 t.Run("ChunkMatches", func(t *testing.T) { 3161 res := searchForTest(t, b, &q, chunkOpts) 3162 wantRegexpCount := 1 3163 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount { 3164 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount) 3165 } 3166 if len(res.Files) != 1 || res.Files[0].FileName != "f1" { 3167 t.Errorf("got %v, want 1 result", res.Files) 3168 } 3169 }) 3170} 3171 3172func TestLineAndFileName(t *testing.T) { 3173 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 3174 Document{Name: "f1", Content: []byte("apple banana\ngrape")}, 3175 Document{Name: "f2", Content: []byte("apple banana\norange")}, 3176 Document{Name: "apple banana", Content: []byte("banana grape")}, 3177 ) 3178 pattern := "(apple)(?-s:.)*?(banana)" 3179 r, _ := syntax.Parse(pattern, syntax.Perl) 3180 3181 q := query.Regexp{ 3182 Regexp: r, 3183 FileName: true, 3184 } 3185 t.Run("LineMatches", func(t *testing.T) { 3186 res := searchForTest(t, b, &q) 3187 wantRegexpCount := 1 3188 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount { 3189 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount) 3190 } 3191 if len(res.Files) != 1 || res.Files[0].FileName != "apple banana" { 3192 t.Errorf("got %v, want 1 result", res.Files) 3193 } 3194 }) 3195 3196 t.Run("ChunkMatches", func(t *testing.T) { 3197 res := searchForTest(t, b, &q, chunkOpts) 3198 wantRegexpCount := 1 3199 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount { 3200 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount) 3201 } 3202 if len(res.Files) != 1 || res.Files[0].FileName != "apple banana" { 3203 t.Errorf("got %v, want 1 result", res.Files) 3204 } 3205 }) 3206} 3207 3208func TestMultiLineRegex(t *testing.T) { 3209 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 3210 Document{Name: "f1", Content: []byte("apple banana\ngrape")}, 3211 Document{Name: "f2", Content: []byte("apple orange")}, 3212 Document{Name: "f3", Content: []byte("grape apple")}, 3213 ) 3214 pattern := "(apple).*?[[:space:]].*?(grape)" 3215 r, _ := syntax.Parse(pattern, syntax.Perl) 3216 3217 q := query.Regexp{ 3218 Regexp: r, 3219 } 3220 t.Run("LineMatches", func(t *testing.T) { 3221 res := searchForTest(t, b, &q) 3222 wantRegexpCount := 2 3223 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount { 3224 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount) 3225 } 3226 if len(res.Files) != 1 || res.Files[0].FileName != "f1" { 3227 t.Errorf("got %v, want 1 result", res.Files) 3228 } 3229 if l := len(res.Files[0].LineMatches); l != 2 { 3230 t.Errorf("got %v, want 2 line matches", l) 3231 } 3232 }) 3233 3234 t.Run("ChunkMatches", func(t *testing.T) { 3235 res := searchForTest(t, b, &q, chunkOpts) 3236 wantRegexpCount := 2 3237 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount { 3238 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount) 3239 } 3240 if len(res.Files) != 1 || res.Files[0].FileName != "f1" { 3241 t.Errorf("got %v, want 1 result", res.Files) 3242 } 3243 if l := len(res.Files[0].ChunkMatches); l != 1 { 3244 t.Errorf("got %v, want 1 chunk matches", l) 3245 } 3246 if l := len(res.Files[0].ChunkMatches[0].Ranges); l != 1 { 3247 t.Errorf("got %v, want 1 chunk ranges", l) 3248 } 3249 }) 3250} 3251 3252func TestSearchTypeFileName(t *testing.T) { 3253 b := testIndexBuilder(t, &Repository{ 3254 Name: "reponame", 3255 }, 3256 Document{Name: "f1", Content: []byte("bla the needle")}, 3257 Document{Name: "f2", Content: []byte("another file another\nneedle")}, 3258 // -----------------------------------012345678901234567890-123456 3259 ) 3260 3261 t.Run("LineMatches", func(t *testing.T) { 3262 wantSingleMatch := func(res *SearchResult, want string) { 3263 t.Helper() 3264 fmatches := res.Files 3265 if len(fmatches) != 1 { 3266 t.Errorf("got %v, want 1 matches", len(fmatches)) 3267 return 3268 } 3269 if len(fmatches[0].LineMatches) != 1 { 3270 t.Errorf("got %d line matches", len(fmatches[0].LineMatches)) 3271 return 3272 } 3273 var got string 3274 if fmatches[0].LineMatches[0].FileName { 3275 got = fmatches[0].FileName 3276 } else { 3277 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].LineMatches[0].LineFragments[0].Offset) 3278 } 3279 3280 if got != want { 3281 t.Errorf("got %s, want %s", got, want) 3282 } 3283 } 3284 3285 // Only return the later match in the second file 3286 res := searchForTest(t, b, query.NewAnd( 3287 &query.Type{ 3288 Type: query.TypeFileName, 3289 Child: &query.Substring{Pattern: "needle"}, 3290 }, 3291 &query.Substring{Pattern: "file"})) 3292 wantSingleMatch(res, "f2:8") 3293 3294 // Only return a filename result 3295 res = searchForTest(t, b, 3296 &query.Type{ 3297 Type: query.TypeFileName, 3298 Child: &query.Substring{Pattern: "file"}, 3299 }) 3300 wantSingleMatch(res, "f2") 3301 }) 3302 3303 t.Run("ChunkMatches", func(t *testing.T) { 3304 wantSingleMatch := func(res *SearchResult, want string) { 3305 t.Helper() 3306 fmatches := res.Files 3307 if len(fmatches) != 1 { 3308 t.Errorf("got %v, want 1 matches", len(fmatches)) 3309 return 3310 } 3311 if len(fmatches[0].ChunkMatches) != 1 { 3312 t.Errorf("got %d line matches", len(fmatches[0].ChunkMatches)) 3313 return 3314 } 3315 var got string 3316 if fmatches[0].ChunkMatches[0].FileName { 3317 got = fmatches[0].FileName 3318 } else { 3319 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset) 3320 } 3321 3322 if got != want { 3323 t.Errorf("got %s, want %s", got, want) 3324 } 3325 } 3326 3327 // Only return the later match in the second file 3328 res := searchForTest(t, b, query.NewAnd( 3329 &query.Type{ 3330 Type: query.TypeFileName, 3331 Child: &query.Substring{Pattern: "needle"}, 3332 }, 3333 &query.Substring{Pattern: "file"}), 3334 chunkOpts, 3335 ) 3336 wantSingleMatch(res, "f2:8") 3337 3338 // Only return a filename result 3339 res = searchForTest(t, b, 3340 &query.Type{ 3341 Type: query.TypeFileName, 3342 Child: &query.Substring{Pattern: "file"}, 3343 }, 3344 chunkOpts, 3345 ) 3346 wantSingleMatch(res, "f2") 3347 }) 3348} 3349 3350func TestSearchTypeLanguage(t *testing.T) { 3351 b := testIndexBuilder(t, &Repository{ 3352 Name: "reponame", 3353 }, 3354 Document{Name: "apex.cls", Content: []byte("public class Car extends Vehicle {")}, 3355 Document{Name: "tex.cls", Content: []byte(`\DeclareOption*{`)}, 3356 Document{Name: "hello.h", Content: []byte(`#include <stdio.h>`)}, 3357 ) 3358 3359 t.Log(b.languageMap) 3360 3361 t.Run("LineMatches", func(t *testing.T) { 3362 wantSingleMatch := func(res *SearchResult, want string) { 3363 t.Helper() 3364 fmatches := res.Files 3365 if len(fmatches) != 1 { 3366 t.Errorf("got %v, want 1 matches", len(fmatches)) 3367 return 3368 } 3369 if len(fmatches[0].LineMatches) != 1 { 3370 t.Errorf("got %d line matches", len(fmatches[0].LineMatches)) 3371 return 3372 } 3373 var got string 3374 if fmatches[0].LineMatches[0].FileName { 3375 got = fmatches[0].FileName 3376 } else { 3377 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].LineMatches[0].LineFragments[0].Offset) 3378 } 3379 3380 if got != want { 3381 t.Errorf("got %s, want %s", got, want) 3382 } 3383 } 3384 3385 res := searchForTest(t, b, &query.Language{Language: "Apex"}) 3386 wantSingleMatch(res, "apex.cls") 3387 3388 res = searchForTest(t, b, &query.Language{Language: "TeX"}) 3389 wantSingleMatch(res, "tex.cls") 3390 3391 res = searchForTest(t, b, &query.Language{Language: "C"}) 3392 wantSingleMatch(res, "hello.h") 3393 3394 // test fallback language search by pretending it's an older index version 3395 res = searchForTest(t, b, &query.Language{Language: "C++"}) 3396 if len(res.Files) != 0 { 3397 t.Errorf("got %d results for C++, want 0", len(res.Files)) 3398 } 3399 3400 b.featureVersion = 11 // force fallback 3401 res = searchForTest(t, b, &query.Language{Language: "C++"}) 3402 wantSingleMatch(res, "hello.h") 3403 }) 3404 3405 t.Run("ChunkMatches", func(t *testing.T) { 3406 wantSingleMatch := func(res *SearchResult, want string) { 3407 t.Helper() 3408 fmatches := res.Files 3409 if len(fmatches) != 1 { 3410 t.Errorf("got %v, want 1 matches", len(fmatches)) 3411 return 3412 } 3413 if len(fmatches[0].ChunkMatches) != 1 { 3414 t.Errorf("got %d line matches", len(fmatches[0].ChunkMatches)) 3415 return 3416 } 3417 var got string 3418 if fmatches[0].ChunkMatches[0].FileName { 3419 got = fmatches[0].FileName 3420 } else { 3421 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset) 3422 } 3423 3424 if got != want { 3425 t.Errorf("got %s, want %s", got, want) 3426 } 3427 } 3428 3429 b.featureVersion = FeatureVersion // reset feature version 3430 res := searchForTest(t, b, &query.Language{Language: "Apex"}, chunkOpts) 3431 wantSingleMatch(res, "apex.cls") 3432 3433 res = searchForTest(t, b, &query.Language{Language: "TeX"}, chunkOpts) 3434 wantSingleMatch(res, "tex.cls") 3435 3436 res = searchForTest(t, b, &query.Language{Language: "C"}, chunkOpts) 3437 wantSingleMatch(res, "hello.h") 3438 3439 // test fallback language search by pretending it's an older index version 3440 res = searchForTest(t, b, &query.Language{Language: "C++"}, chunkOpts) 3441 if len(res.Files) != 0 { 3442 t.Errorf("got %d results for C++, want 0", len(res.Files)) 3443 } 3444 3445 b.featureVersion = 11 // force fallback 3446 res = searchForTest(t, b, &query.Language{Language: "C++"}, chunkOpts) 3447 wantSingleMatch(res, "hello.h") 3448 }) 3449} 3450 3451func TestStats(t *testing.T) { 3452 ignored := []cmp.Option{ 3453 cmpopts.EquateEmpty(), 3454 cmpopts.IgnoreFields(RepoListEntry{}, "Repository"), 3455 cmpopts.IgnoreFields(RepoListEntry{}, "IndexMetadata"), 3456 cmpopts.IgnoreFields(RepoStats{}, "IndexBytes"), 3457 } 3458 3459 repoListEntries := func(b *IndexBuilder) []RepoListEntry { 3460 searcher := searcherForTest(t, b) 3461 indexdata := searcher.(*indexData) 3462 return indexdata.repoListEntry 3463 } 3464 3465 t.Run("one empty repo", func(t *testing.T) { 3466 b := testIndexBuilder(t, nil) 3467 got := repoListEntries(b) 3468 want := []RepoListEntry{ 3469 { 3470 Stats: RepoStats{ 3471 Repos: 0, 3472 Shards: 1, 3473 Documents: 0, 3474 IndexBytes: 20, 3475 ContentBytes: 0, 3476 NewLinesCount: 0, 3477 DefaultBranchNewLinesCount: 0, 3478 OtherBranchesNewLinesCount: 0, 3479 }, 3480 }, 3481 } 3482 3483 if diff := cmp.Diff(want, got, ignored...); diff != "" { 3484 t.Fatalf("mismatch (-want +got):\n%s", diff) 3485 } 3486 3487 }) 3488 3489 t.Run("one simple shard", func(t *testing.T) { 3490 b := testIndexBuilder(t, nil, 3491 Document{Name: "doc 0", Content: []byte("content 0")}, 3492 Document{Name: "doc 1", Content: []byte("content 1")}, 3493 ) 3494 got := repoListEntries(b) 3495 want := []RepoListEntry{ 3496 { 3497 Stats: RepoStats{ 3498 Repos: 0, 3499 Shards: 1, 3500 Documents: 2, 3501 IndexBytes: 224, 3502 ContentBytes: 28, 3503 NewLinesCount: 0, 3504 DefaultBranchNewLinesCount: 0, 3505 OtherBranchesNewLinesCount: 0, 3506 }, 3507 }, 3508 } 3509 3510 if diff := cmp.Diff(want, got, ignored...); diff != "" { 3511 t.Fatalf("mismatch (-want +got):\n%s", diff) 3512 } 3513 3514 }) 3515 3516 t.Run("one compound shard", func(t *testing.T) { 3517 b := testIndexBuilderCompound(t, 3518 []*Repository{ 3519 {Name: "repo 0"}, 3520 {Name: "repo 1"}, 3521 }, 3522 [][]Document{ 3523 { 3524 {Name: "doc 0", Content: []byte("content 0")}, 3525 {Name: "doc 1", Content: []byte("content 1")}, 3526 }, 3527 { 3528 {Name: "doc 2", Content: []byte("content 2")}, 3529 {Name: "doc 3", Content: []byte("content 3")}, 3530 }, 3531 }, 3532 ) 3533 got := repoListEntries(b) 3534 want := []RepoListEntry{ 3535 { 3536 Stats: RepoStats{ 3537 Repos: 0, 3538 Shards: 1, 3539 Documents: 2, 3540 IndexBytes: 180, 3541 ContentBytes: 28, 3542 NewLinesCount: 0, 3543 DefaultBranchNewLinesCount: 0, 3544 OtherBranchesNewLinesCount: 0, 3545 }, 3546 }, 3547 { 3548 Stats: RepoStats{ 3549 Repos: 0, 3550 Shards: 1, 3551 Documents: 2, 3552 IndexBytes: 180, 3553 ContentBytes: 28, 3554 NewLinesCount: 0, 3555 DefaultBranchNewLinesCount: 0, 3556 OtherBranchesNewLinesCount: 0, 3557 }, 3558 }, 3559 } 3560 3561 if diff := cmp.Diff(want, got, ignored...); diff != "" { 3562 t.Fatalf("mismatch (-want +got):\n%s", diff) 3563 } 3564 }) 3565 3566 t.Run("compound shard with empty repos", func(t *testing.T) { 3567 b := testIndexBuilderCompound(t, 3568 []*Repository{ 3569 {Name: "repo 0"}, 3570 {Name: "repo 1"}, 3571 {Name: "repo 2"}, 3572 {Name: "repo 3"}, 3573 {Name: "repo 4"}, 3574 }, 3575 [][]Document{ 3576 {{Name: "doc 0", Content: []byte("content 0")}}, 3577 nil, 3578 {{Name: "doc 1", Content: []byte("content 1")}}, 3579 nil, 3580 nil, 3581 }, 3582 ) 3583 got := repoListEntries(b) 3584 3585 entryEmpty := RepoListEntry{Stats: RepoStats{ 3586 Shards: 1, 3587 Documents: 0, 3588 ContentBytes: 0, 3589 }} 3590 entryNonEmpty := RepoListEntry{Stats: RepoStats{ 3591 Shards: 1, 3592 Documents: 1, 3593 ContentBytes: 14, 3594 }} 3595 3596 want := []RepoListEntry{ 3597 entryNonEmpty, 3598 entryEmpty, 3599 entryNonEmpty, 3600 entryEmpty, 3601 entryEmpty, 3602 } 3603 3604 if diff := cmp.Diff(want, got, ignored...); diff != "" { 3605 t.Fatalf("mismatch (-want +got):\n%s", diff) 3606 } 3607 3608 }) 3609} 3610 3611// This tests the frequent pattern "\bLITERAL\b". 3612func TestWordSearch(t *testing.T) { 3613 content := []byte("needle the bla") 3614 // ----------------01234567890123 3615 3616 b := testIndexBuilder(t, nil, 3617 Document{ 3618 Name: "f1", 3619 Content: content, 3620 }) 3621 3622 t.Run("LineMatches", func(t *testing.T) { 3623 sres := searchForTest(t, b, 3624 &query.Regexp{ 3625 Regexp: mustParseRE("\\bthe\\b"), 3626 CaseSensitive: true, 3627 Content: true, 3628 }) 3629 3630 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 { 3631 t.Fatalf("got %v, want 1 match in 1 file", sres.Files) 3632 } 3633 3634 if sres.Stats.RegexpsConsidered != 0 { 3635 t.Fatal("expected regexp to be skipped") 3636 } 3637 3638 got := sres.Files[0].LineMatches[0] 3639 want := LineMatch{ 3640 LineFragments: []LineFragmentMatch{{ 3641 LineOffset: 7, 3642 Offset: 7, 3643 MatchLength: 3, 3644 }}, 3645 Line: content, 3646 FileName: false, 3647 LineNumber: 1, 3648 LineStart: 0, 3649 LineEnd: 14, 3650 } 3651 3652 if !reflect.DeepEqual(got, want) { 3653 t.Errorf("got %#v, want %#v", got, want) 3654 } 3655 }) 3656 3657 t.Run("ChunkMatches", func(t *testing.T) { 3658 sres := searchForTest(t, b, 3659 &query.Regexp{ 3660 Regexp: mustParseRE("\\bthe\\b"), 3661 CaseSensitive: true, 3662 }, chunkOpts) 3663 3664 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 { 3665 t.Fatalf("got %v, want 1 match in 1 file", sres.Files) 3666 } 3667 3668 if sres.Stats.RegexpsConsidered != 0 { 3669 t.Fatal("expected regexp to be skipped") 3670 } 3671 3672 got := sres.Files[0].ChunkMatches[0] 3673 want := ChunkMatch{ 3674 Content: content, 3675 ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1}, 3676 Ranges: []Range{{ 3677 Start: Location{ByteOffset: 7, LineNumber: 1, Column: 8}, 3678 End: Location{ByteOffset: 10, LineNumber: 1, Column: 11}, 3679 }}, 3680 } 3681 3682 if diff := cmp.Diff(want, got); diff != "" { 3683 t.Fatal(diff) 3684 } 3685 }) 3686}