fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

1// Copyright 2016 Google Inc. All rights reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package zoekt 16 17import ( 18 "bytes" 19 "context" 20 "fmt" 21 "reflect" 22 "regexp/syntax" 23 "strings" 24 "testing" 25 26 "github.com/google/go-cmp/cmp" 27 "github.com/google/go-cmp/cmp/cmpopts" 28 "github.com/grafana/regexp" 29 30 "github.com/sourcegraph/zoekt/query" 31) 32 33func clearScores(r *SearchResult) { 34 for i := range r.Files { 35 r.Files[i].Score = 0.0 36 for j := range r.Files[i].LineMatches { 37 r.Files[i].LineMatches[j].Score = 0.0 38 } 39 for j := range r.Files[i].ChunkMatches { 40 r.Files[i].ChunkMatches[j].Score = 0.0 41 } 42 r.Files[i].Checksum = nil 43 r.Files[i].Debug = "" 44 } 45} 46 47func testIndexBuilder(t *testing.T, repo *Repository, docs ...Document) *IndexBuilder { 48 t.Helper() 49 50 b, err := NewIndexBuilder(repo) 51 if err != nil { 52 t.Fatalf("NewIndexBuilder: %v", err) 53 } 54 55 for i, d := range docs { 56 if err := b.Add(d); err != nil { 57 t.Fatalf("Add %d: %v", i, err) 58 } 59 } 60 61 return b 62} 63 64func testIndexBuilderCompound(t *testing.T, repos []*Repository, docs [][]Document) *IndexBuilder { 65 t.Helper() 66 67 b := newIndexBuilder() 68 b.indexFormatVersion = NextIndexFormatVersion 69 70 if len(repos) != len(docs) { 71 t.Fatalf("testIndexBuilderCompound: repos must be the same length as docs, got: len(repos)=%d len(docs)=%d", len(repos), len(docs)) 72 } 73 74 for i, repo := range repos { 75 if err := b.setRepository(repo); err != nil { 76 t.Fatal(err) 77 } 78 for j, d := range docs[i] { 79 if err := b.Add(d); err != nil { 80 t.Fatalf("Add %d %d: %v", i, j, err) 81 } 82 } 83 } 84 85 return b 86} 87 88func TestBoundary(t *testing.T) { 89 b := testIndexBuilder(t, nil, 90 Document{Name: "f1", Content: []byte("x the")}, 91 Document{Name: "f1", Content: []byte("reader")}) 92 res := searchForTest(t, b, &query.Substring{Pattern: "there"}) 93 if len(res.Files) > 0 { 94 t.Fatalf("got %v, want no matches", res.Files) 95 } 96} 97 98func TestDocSectionInvalid(t *testing.T) { 99 b, err := NewIndexBuilder(nil) 100 if err != nil { 101 t.Fatalf("NewIndexBuilder: %v", err) 102 } 103 doc := Document{ 104 Name: "f1", 105 Content: []byte("01234567890123"), 106 Symbols: []DocumentSection{{5, 8}, {7, 9}}, 107 } 108 109 if err := b.Add(doc); err == nil { 110 t.Errorf("overlapping doc sections should fail") 111 } 112 113 doc = Document{ 114 Name: "f1", 115 Content: []byte("01234567890123"), 116 Symbols: []DocumentSection{{0, 20}}, 117 } 118 119 if err := b.Add(doc); err == nil { 120 t.Errorf("doc sections beyond EOF should fail") 121 } 122} 123 124func TestBasic(t *testing.T) { 125 b := testIndexBuilder(t, nil, 126 Document{ 127 Name: "f2", 128 Content: []byte("to carry water in the no later bla"), 129 // --------------0123456789012345678901234567890123 130 }) 131 132 t.Run("LineMatch", func(t *testing.T) { 133 res := searchForTest(t, b, &query.Substring{ 134 Pattern: "water", 135 CaseSensitive: true, 136 }) 137 fmatches := res.Files 138 if len(fmatches) != 1 || len(fmatches[0].LineMatches) != 1 { 139 t.Fatalf("got %v, want 1 matches", fmatches) 140 } 141 142 got := fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].LineMatches[0].LineFragments[0].Offset) 143 want := "f2:9" 144 if got != want { 145 t.Errorf("1: got %s, want %s", got, want) 146 } 147 }) 148 149 t.Run("ChunkMatch", func(t *testing.T) { 150 res := searchForTest(t, b, &query.Substring{ 151 Pattern: "water", 152 CaseSensitive: true, 153 }, chunkOpts) 154 fmatches := res.Files 155 if len(fmatches) != 1 || len(fmatches[0].ChunkMatches) != 1 { 156 t.Fatalf("got %v, want 1 matches", fmatches) 157 } 158 159 got := fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset) 160 want := "f2:9" 161 if got != want { 162 t.Errorf("1: got %s, want %s", got, want) 163 } 164 }) 165} 166 167func TestEmptyIndex(t *testing.T) { 168 b := testIndexBuilder(t, nil) 169 searcher := searcherForTest(t, b) 170 171 var opts SearchOptions 172 if _, err := searcher.Search(context.Background(), &query.Substring{}, &opts); err != nil { 173 t.Fatalf("Search: %v", err) 174 } 175 176 if _, err := searcher.List(context.Background(), &query.Repo{Regexp: regexp.MustCompile("")}, nil); err != nil { 177 t.Fatalf("List: %v", err) 178 } 179 180 if _, err := searcher.Search(context.Background(), &query.Substring{Pattern: "java", FileName: true}, &opts); err != nil { 181 t.Fatalf("Search: %v", err) 182 } 183} 184 185type memSeeker struct { 186 data []byte 187} 188 189func (s *memSeeker) Name() string { 190 return "memseeker" 191} 192 193func (s *memSeeker) Close() {} 194func (s *memSeeker) Read(off, sz uint32) ([]byte, error) { 195 return s.data[off : off+sz], nil 196} 197 198func (s *memSeeker) Size() (uint32, error) { 199 return uint32(len(s.data)), nil 200} 201 202func TestNewlines(t *testing.T) { 203 b := testIndexBuilder(t, nil, 204 Document{Name: "filename", Content: []byte("line1\nline2\nbla")}) 205 // ---------------------------------------------012345-678901-234 206 207 t.Run("LineMatches", func(t *testing.T) { 208 sres := searchForTest(t, b, &query.Substring{Pattern: "ne2"}) 209 210 matches := sres.Files 211 want := []FileMatch{{ 212 FileName: "filename", 213 LineMatches: []LineMatch{{ 214 LineFragments: []LineFragmentMatch{{ 215 Offset: 8, 216 LineOffset: 2, 217 MatchLength: 3, 218 }}, 219 Line: []byte("line2"), 220 LineStart: 6, 221 LineEnd: 11, 222 LineNumber: 2, 223 }}, 224 }} 225 226 if !reflect.DeepEqual(matches, want) { 227 t.Errorf("got %v, want %v", matches, want) 228 } 229 }) 230 231 t.Run("ChunkMatches", func(t *testing.T) { 232 sres := searchForTest(t, b, &query.Substring{Pattern: "ne2"}, chunkOpts) 233 234 matches := sres.Files 235 want := []FileMatch{{ 236 FileName: "filename", 237 ChunkMatches: []ChunkMatch{{ 238 Content: []byte("line2"), 239 ContentStart: Location{ 240 ByteOffset: 6, 241 LineNumber: 2, 242 Column: 1, 243 }, 244 Ranges: []Range{{ 245 Start: Location{ByteOffset: 8, LineNumber: 2, Column: 3}, 246 End: Location{ByteOffset: 11, LineNumber: 2, Column: 6}, 247 }}, 248 }}, 249 }} 250 251 if diff := cmp.Diff(want, matches); diff != "" { 252 t.Fatal(diff) 253 } 254 }) 255} 256 257// A result spanning multiple lines should have LineMatches that only cover 258// single lines. 259func TestQueryNewlines(t *testing.T) { 260 text := "line1\nline2\nbla" 261 b := testIndexBuilder(t, nil, 262 Document{Name: "filename", Content: []byte(text)}) 263 264 t.Run("LineMatches", func(t *testing.T) { 265 sres := searchForTest(t, b, &query.Substring{Pattern: "ine2\nbla"}) 266 matches := sres.Files 267 if len(matches) != 1 { 268 t.Fatalf("got %d file matches, want exactly one", len(matches)) 269 } 270 m := matches[0] 271 if len(m.LineMatches) != 2 { 272 t.Fatalf("got %d line matches, want exactly two", len(m.LineMatches)) 273 } 274 }) 275 276 t.Run("ChunkMatches", func(t *testing.T) { 277 sres := searchForTest(t, b, &query.Substring{Pattern: "ine2\nbla"}, chunkOpts) 278 matches := sres.Files 279 if len(matches) != 1 { 280 t.Fatalf("got %d file matches, want exactly one", len(matches)) 281 } 282 m := matches[0] 283 if len(m.ChunkMatches) != 1 { 284 t.Fatalf("got %d chunk matches, want exactly one", len(m.ChunkMatches)) 285 } 286 }) 287} 288 289var chunkOpts = SearchOptions{ChunkMatches: true} 290 291func searchForTest(t *testing.T, b *IndexBuilder, q query.Q, o ...SearchOptions) *SearchResult { 292 searcher := searcherForTest(t, b) 293 var opts SearchOptions 294 if len(o) > 0 { 295 opts = o[0] 296 } 297 res, err := searcher.Search(context.Background(), q, &opts) 298 if err != nil { 299 t.Fatalf("Search(%s): %v", q, err) 300 } 301 clearScores(res) 302 return res 303} 304 305func searcherForTest(t *testing.T, b *IndexBuilder) Searcher { 306 var buf bytes.Buffer 307 if err := b.Write(&buf); err != nil { 308 t.Fatal(err) 309 } 310 f := &memSeeker{buf.Bytes()} 311 312 searcher, err := NewSearcher(f) 313 if err != nil { 314 t.Fatalf("NewSearcher: %v", err) 315 } 316 317 return searcher 318} 319 320func TestCaseFold(t *testing.T) { 321 b := testIndexBuilder(t, nil, 322 Document{Name: "f1", Content: []byte("I love BaNaNAS.")}, 323 // -----------------------------------012345678901234 324 ) 325 t.Run("LineMatches", func(t *testing.T) { 326 sres := searchForTest(t, b, &query.Substring{ 327 Pattern: "bananas", 328 CaseSensitive: true, 329 }) 330 matches := sres.Files 331 if len(matches) != 0 { 332 t.Errorf("foldcase: got %#v, want 0 matches", matches) 333 } 334 335 sres = searchForTest(t, b, 336 &query.Substring{ 337 Pattern: "BaNaNAS", 338 CaseSensitive: true, 339 }) 340 matches = sres.Files 341 if len(matches) != 1 { 342 t.Errorf("no foldcase: got %v, want 1 matches", matches) 343 } else if matches[0].LineMatches[0].LineFragments[0].Offset != 7 { 344 t.Errorf("foldcase: got %v, want offsets 7", matches) 345 } 346 }) 347 348 t.Run("ChunkMatches", func(t *testing.T) { 349 sres := searchForTest(t, b, &query.Substring{ 350 Pattern: "bananas", 351 CaseSensitive: true, 352 }, chunkOpts) 353 matches := sres.Files 354 if len(matches) != 0 { 355 t.Errorf("foldcase: got %#v, want 0 matches", matches) 356 } 357 358 sres = searchForTest(t, b, 359 &query.Substring{ 360 Pattern: "BaNaNAS", 361 CaseSensitive: true, 362 }) 363 matches = sres.Files 364 if len(matches) != 1 { 365 t.Errorf("no foldcase: got %v, want 1 matches", matches) 366 } else if matches[0].LineMatches[0].LineFragments[0].Offset != 7 { 367 t.Errorf("foldcase: got %v, want offsets 7", matches) 368 } 369 }) 370} 371 372func TestSearchStats(t *testing.T) { 373 ctx := context.Background() 374 searcher := searcherForTest(t, testIndexBuilder(t, nil, 375 Document{Name: "f1", Content: []byte("x banana y")}, 376 Document{Name: "f2", Content: []byte("x apple y")}, 377 Document{Name: "f3", Content: []byte("x banana apple y")}, 378 // -----------------------------------0123456789012345 379 )) 380 381 andQuery := query.NewAnd( 382 &query.Substring{ 383 Pattern: "banana", 384 }, 385 &query.Substring{ 386 Pattern: "apple", 387 }, 388 ) 389 390 t.Run("LineMatches", func(t *testing.T) { 391 sres, err := searcher.Search(ctx, andQuery, &SearchOptions{}) 392 if err != nil { 393 t.Fatal(err) 394 } 395 matches := sres.Files 396 if len(matches) != 1 || len(matches[0].LineMatches) != 1 || len(matches[0].LineMatches[0].LineFragments) != 2 { 397 t.Fatalf("got %#v, want 1 match with 2 fragments", matches) 398 } 399 400 if matches[0].LineMatches[0].LineFragments[0].Offset != 2 || matches[0].LineMatches[0].LineFragments[1].Offset != 9 { 401 t.Fatalf("got %#v, want offsets 2,9", matches) 402 } 403 }) 404 t.Run("ChunkMatches", func(t *testing.T) { 405 sres, err := searcher.Search(ctx, andQuery, &chunkOpts) 406 if err != nil { 407 t.Fatal(err) 408 } 409 matches := sres.Files 410 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 || len(matches[0].ChunkMatches[0].Ranges) != 2 { 411 t.Fatalf("got %#v, want 1 chunk match with 2 ranges", matches) 412 } 413 414 if matches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset != 2 || matches[0].ChunkMatches[0].Ranges[1].Start.ByteOffset != 9 { 415 t.Fatalf("got %#v, want offsets 2,9", matches) 416 } 417 }) 418 t.Run("Stats", func(t *testing.T) { 419 cases := []struct { 420 Name string 421 Q query.Q 422 Want Stats 423 }{{ 424 Name: "and-query", 425 Q: andQuery, 426 Want: Stats{ 427 FilesLoaded: 1, 428 ContentBytesLoaded: 18, 429 IndexBytesLoaded: 8, 430 NgramMatches: 3, // we look at doc 1, because it's max(0,1) due to AND 431 NgramLookups: 104, 432 MatchCount: 2, 433 FileCount: 1, 434 FilesConsidered: 2, 435 ShardsScanned: 1, 436 }, 437 }, { 438 Name: "one-trigram", 439 Q: &query.Substring{ 440 Pattern: "a y", 441 Content: true, 442 CaseSensitive: true, 443 }, 444 Want: Stats{ 445 ContentBytesLoaded: 12, 446 IndexBytesLoaded: 1, 447 FileCount: 1, 448 FilesConsidered: 1, 449 FilesLoaded: 1, 450 ShardsScanned: 1, 451 MatchCount: 1, 452 NgramMatches: 1, 453 NgramLookups: 2, // once to lookup frequency then again to access posting list. 454 }, 455 }, { 456 Name: "one-trigram-case-insensitive", 457 Q: &query.Substring{ 458 Pattern: "a y", 459 Content: true, 460 }, 461 Want: Stats{ 462 ContentBytesLoaded: 12, 463 IndexBytesLoaded: 1, 464 FileCount: 1, 465 FilesConsidered: 1, 466 FilesLoaded: 1, 467 ShardsScanned: 1, 468 MatchCount: 1, 469 NgramMatches: 1, 470 NgramLookups: 8, // "a y" has 2**2 casings which we lookup twice. 471 }, 472 }, { 473 Name: "one-trigram-pruned", 474 Q: &query.Substring{ 475 Pattern: "foo", 476 Content: true, 477 CaseSensitive: true, 478 }, 479 Want: Stats{ 480 ShardsSkippedFilter: 1, 481 NgramLookups: 1, // only had to lookup once 482 }, 483 }, { 484 Name: "one-trigram-branch-pruned", 485 Q: query.NewAnd( 486 &query.Substring{ 487 Pattern: "foo", 488 Content: true, 489 CaseSensitive: true, 490 }, 491 &query.Substring{ 492 Pattern: "a y", 493 Content: true, 494 CaseSensitive: true, 495 }, 496 ), 497 Want: Stats{ 498 IndexBytesLoaded: 1, // we created an iterator for "a y" before pruning. 499 ShardsSkippedFilter: 1, 500 NgramLookups: 3, // we lookedup "foo" once (1), but lookedup and created "a y" (2). 501 }, 502 }} 503 504 for _, tc := range cases { 505 t.Run(tc.Name, func(t *testing.T) { 506 sres, err := searcher.Search(ctx, tc.Q, &chunkOpts) 507 if err != nil { 508 t.Fatal(err) 509 } 510 if diff := cmp.Diff(tc.Want, sres.Stats, cmpopts.IgnoreFields(Stats{}, "MatchTreeConstruction", "MatchTreeSearch")); diff != "" { 511 t.Errorf("unexpected Stats (-want +got):\n%s", diff) 512 } 513 }) 514 } 515 516 }) 517} 518 519func TestAndNegateSearch(t *testing.T) { 520 b := testIndexBuilder(t, nil, 521 Document{Name: "f1", Content: []byte("x banana y")}, 522 // -----------------------------------0123456789 523 Document{Name: "f4", Content: []byte("x banana apple y")}) 524 525 t.Run("LineMatches", func(t *testing.T) { 526 sres := searchForTest(t, b, query.NewAnd( 527 &query.Substring{ 528 Pattern: "banana", 529 }, 530 &query.Not{Child: &query.Substring{ 531 Pattern: "apple", 532 }})) 533 534 matches := sres.Files 535 536 if len(matches) != 1 || len(matches[0].LineMatches) != 1 { 537 t.Fatalf("got %v, want 1 match", matches) 538 } 539 if matches[0].FileName != "f1" { 540 t.Fatalf("got match %#v, want FileName: f1", matches[0]) 541 } 542 if matches[0].LineMatches[0].LineFragments[0].Offset != 2 { 543 t.Fatalf("got %v, want offset 2", matches) 544 } 545 }) 546 547 t.Run("ChunkMatches", func(t *testing.T) { 548 sres := searchForTest(t, b, 549 query.NewAnd( 550 &query.Substring{ 551 Pattern: "banana", 552 }, 553 &query.Not{Child: &query.Substring{ 554 Pattern: "apple", 555 }}, 556 ), 557 chunkOpts, 558 ) 559 560 matches := sres.Files 561 562 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 { 563 t.Fatalf("got %v, want 1 match", matches) 564 } 565 if matches[0].FileName != "f1" { 566 t.Fatalf("got match %#v, want FileName: f1", matches[0]) 567 } 568 if matches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset != 2 { 569 t.Fatalf("got %v, want offset 2", matches) 570 } 571 }) 572} 573 574func TestNegativeMatchesOnlyShortcut(t *testing.T) { 575 b := testIndexBuilder(t, nil, 576 Document{Name: "f1", Content: []byte("x banana y")}, 577 Document{Name: "f2", Content: []byte("x appelmoes y")}, 578 Document{Name: "f3", Content: []byte("x appelmoes y")}, 579 Document{Name: "f3", Content: []byte("x appelmoes y")}) 580 581 t.Run("LineMatches", func(t *testing.T) { 582 sres := searchForTest(t, b, query.NewAnd( 583 &query.Substring{ 584 Pattern: "banana", 585 }, 586 &query.Not{Child: &query.Substring{ 587 Pattern: "appel", 588 }})) 589 590 if sres.Stats.FilesConsidered != 1 { 591 t.Errorf("got %#v, want FilesConsidered: 1", sres.Stats) 592 } 593 }) 594 595 t.Run("ChunkMatches", func(t *testing.T) { 596 sres := searchForTest(t, b, query.NewAnd( 597 &query.Substring{ 598 Pattern: "banana", 599 }, 600 &query.Not{Child: &query.Substring{ 601 Pattern: "appel", 602 }}), chunkOpts) 603 604 if sres.Stats.FilesConsidered != 1 { 605 t.Errorf("got %#v, want FilesConsidered: 1", sres.Stats) 606 } 607 }) 608} 609 610func TestFileSearch(t *testing.T) { 611 b := testIndexBuilder(t, nil, 612 Document{Name: "banzana", Content: []byte("x orange y")}, 613 // -------------0123456 614 Document{Name: "banana", Content: []byte("x apple y")}, 615 // -------------012345 616 ) 617 618 t.Run("LineMatches", func(t *testing.T) { 619 sres := searchForTest(t, b, &query.Substring{ 620 Pattern: "anan", 621 FileName: true, 622 }) 623 624 matches := sres.Files 625 if len(matches) != 1 || len(matches[0].LineMatches) != 1 { 626 t.Fatalf("got %v, want 1 match", matches) 627 } 628 629 got := matches[0].LineMatches[0] 630 want := LineMatch{ 631 Line: []byte("banana"), 632 LineFragments: []LineFragmentMatch{{ 633 Offset: 1, 634 LineOffset: 1, 635 MatchLength: 4, 636 }}, 637 FileName: true, 638 } 639 640 if !reflect.DeepEqual(got, want) { 641 t.Errorf("got %#v, want %#v", got, want) 642 } 643 }) 644 645 t.Run("ChunkMatches", func(t *testing.T) { 646 sres := searchForTest(t, b, &query.Substring{ 647 Pattern: "anan", 648 FileName: true, 649 }, chunkOpts) 650 651 matches := sres.Files 652 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 { 653 t.Fatalf("got %v, want 1 match", matches) 654 } 655 656 got := matches[0].ChunkMatches[0] 657 want := ChunkMatch{ 658 Content: []byte("banana"), 659 ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1}, 660 Ranges: []Range{{ 661 Start: Location{ByteOffset: 1, LineNumber: 1, Column: 2}, 662 End: Location{ByteOffset: 5, LineNumber: 1, Column: 6}, 663 }}, 664 FileName: true, 665 } 666 667 if diff := cmp.Diff(want, got); diff != "" { 668 t.Fatal(diff) 669 } 670 }) 671 672 t.Run("FileNameSet", func(t *testing.T) { 673 sres := searchForTest(t, b, query.NewFileNameSet("banana"), chunkOpts) 674 675 matches := sres.Files 676 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 { 677 t.Fatalf("got %v, want 1 match", matches) 678 } 679 680 got := matches[0].ChunkMatches[0] 681 want := ChunkMatch{ 682 Content: []byte("banana"), 683 ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1}, 684 Ranges: []Range{{ 685 Start: Location{ByteOffset: 0, LineNumber: 1, Column: 1}, 686 End: Location{ByteOffset: 6, LineNumber: 1, Column: 7}, 687 }}, 688 FileName: true, 689 } 690 691 if diff := cmp.Diff(want, got); diff != "" { 692 t.Fatal(diff) 693 } 694 }) 695} 696 697func TestFileCase(t *testing.T) { 698 b := testIndexBuilder(t, nil, 699 Document{Name: "BANANA", Content: []byte("x orange y")}) 700 701 t.Run("LineMatches", func(t *testing.T) { 702 sres := searchForTest(t, b, &query.Substring{ 703 Pattern: "banana", 704 FileName: true, 705 }) 706 707 matches := sres.Files 708 if len(matches) != 1 || matches[0].FileName != "BANANA" { 709 t.Fatalf("got %v, want 1 match 'BANANA'", matches) 710 } 711 }) 712 713 t.Run("ChunkMatches", func(t *testing.T) { 714 sres := searchForTest(t, b, &query.Substring{ 715 Pattern: "banana", 716 FileName: true, 717 }, chunkOpts) 718 719 matches := sres.Files 720 if len(matches) != 1 || matches[0].FileName != "BANANA" { 721 t.Fatalf("got %v, want 1 match 'BANANA'", matches) 722 } 723 }) 724} 725 726func TestFileRegexpSearchBruteForce(t *testing.T) { 727 b := testIndexBuilder(t, nil, 728 Document{Name: "banzana", Content: []byte("x orange y")}, 729 Document{Name: "banana", Content: []byte("x apple y")}, 730 ) 731 t.Run("LineMatches", func(t *testing.T) { 732 sres := searchForTest(t, b, &query.Regexp{ 733 Regexp: mustParseRE("[qn][zx]"), 734 FileName: true, 735 }) 736 737 matches := sres.Files 738 if len(matches) != 1 || matches[0].FileName != "banzana" { 739 t.Fatalf("got %v, want 1 match on 'banzana'", matches) 740 } 741 }) 742 t.Run("LineMatches", func(t *testing.T) { 743 sres := searchForTest(t, b, &query.Regexp{ 744 Regexp: mustParseRE("[qn][zx]"), 745 FileName: true, 746 }, chunkOpts) 747 748 matches := sres.Files 749 if len(matches) != 1 || matches[0].FileName != "banzana" { 750 t.Fatalf("got %v, want 1 match on 'banzana'", matches) 751 } 752 }) 753} 754 755func TestFileRegexpSearchShortString(t *testing.T) { 756 b := testIndexBuilder(t, nil, 757 Document{Name: "banana.py", Content: []byte("x orange y")}) 758 759 t.Run("LineMatches", func(t *testing.T) { 760 sres := searchForTest(t, b, &query.Regexp{ 761 Regexp: mustParseRE("ana.py"), 762 FileName: true, 763 }) 764 765 matches := sres.Files 766 if len(matches) != 1 || matches[0].FileName != "banana.py" { 767 t.Fatalf("got %v, want 1 match on 'banana.py'", matches) 768 } 769 }) 770 771 t.Run("ChunkMatches", func(t *testing.T) { 772 sres := searchForTest(t, b, &query.Regexp{ 773 Regexp: mustParseRE("ana.py"), 774 FileName: true, 775 }, chunkOpts) 776 777 matches := sres.Files 778 if len(matches) != 1 || matches[0].FileName != "banana.py" { 779 t.Fatalf("got %v, want 1 match on 'banana.py'", matches) 780 } 781 }) 782} 783 784func TestFileSubstringSearchBruteForce(t *testing.T) { 785 b := testIndexBuilder(t, nil, 786 Document{Name: "BANZANA", Content: []byte("x orange y")}, 787 Document{Name: "banana", Content: []byte("x apple y")}) 788 789 q := &query.Substring{ 790 Pattern: "z", 791 FileName: true, 792 } 793 794 t.Run("LineMatches", func(t *testing.T) { 795 res := searchForTest(t, b, q) 796 if len(res.Files) != 1 || res.Files[0].FileName != "BANZANA" { 797 t.Fatalf("got %v, want 1 match on 'BANZANA''", res.Files) 798 } 799 }) 800 801 t.Run("ChunkMatches", func(t *testing.T) { 802 res := searchForTest(t, b, q, chunkOpts) 803 if len(res.Files) != 1 || res.Files[0].FileName != "BANZANA" { 804 t.Fatalf("got %v, want 1 match on 'BANZANA''", res.Files) 805 } 806 }) 807} 808 809func TestFileSubstringSearchBruteForceEnd(t *testing.T) { 810 b := testIndexBuilder(t, nil, 811 Document{Name: "BANZANA", Content: []byte("x orange y")}, 812 Document{Name: "bananaq", Content: []byte("x apple y")}) 813 814 q := &query.Substring{ 815 Pattern: "q", 816 FileName: true, 817 } 818 t.Run("LineMatches", func(t *testing.T) { 819 res := searchForTest(t, b, q) 820 if want := "bananaq"; len(res.Files) != 1 || res.Files[0].FileName != want { 821 t.Fatalf("got %v, want 1 match in %q", res.Files, want) 822 } 823 }) 824 825 t.Run("LineMatches", func(t *testing.T) { 826 res := searchForTest(t, b, q, chunkOpts) 827 if want := "bananaq"; len(res.Files) != 1 || res.Files[0].FileName != want { 828 t.Fatalf("got %v, want 1 match in %q", res.Files, want) 829 } 830 }) 831} 832 833func TestSearchMatchAll(t *testing.T) { 834 b := testIndexBuilder(t, nil, 835 Document{Name: "banzana", Content: []byte("x orange y")}, 836 Document{Name: "banana", Content: []byte("x apple y")}) 837 838 t.Run("LineMatches", func(t *testing.T) { 839 sres := searchForTest(t, b, &query.Const{Value: true}) 840 matches := sres.Files 841 if len(matches) != 2 { 842 t.Fatalf("got %v, want 2 matches", matches) 843 } 844 }) 845 846 t.Run("ChunkMatches", func(t *testing.T) { 847 sres := searchForTest(t, b, &query.Const{Value: true}, chunkOpts) 848 matches := sres.Files 849 if len(matches) != 2 { 850 t.Fatalf("got %v, want 2 matches", matches) 851 } 852 }) 853} 854 855func TestSearchNewline(t *testing.T) { 856 b := testIndexBuilder(t, nil, 857 Document{Name: "banzana", Content: []byte("abcd\ndefg")}) 858 859 t.Run("LineMatches", func(t *testing.T) { 860 sres := searchForTest(t, b, &query.Substring{Pattern: "d\nd"}) 861 862 // Just check that we don't crash. 863 864 matches := sres.Files 865 if len(matches) != 1 { 866 t.Fatalf("got %v, want 1 matches", matches) 867 } 868 }) 869 870 t.Run("ChunkMatches", func(t *testing.T) { 871 sres := searchForTest(t, b, &query.Substring{Pattern: "d\nd"}, chunkOpts) 872 873 // Just check that we don't crash. 874 875 matches := sres.Files 876 if len(matches) != 1 { 877 t.Fatalf("got %v, want 1 matches", matches) 878 } 879 }) 880} 881 882func TestSearchMatchAllRegexp(t *testing.T) { 883 b := testIndexBuilder(t, nil, 884 Document{Name: "banzana", Content: []byte("abcd")}, 885 Document{Name: "banana", Content: []byte("pqrs")}) 886 887 t.Run("LineMatches", func(t *testing.T) { 888 sres := searchForTest(t, b, &query.Regexp{Regexp: mustParseRE(".")}) 889 890 matches := sres.Files 891 if len(matches) != 2 || sres.Stats.MatchCount != 2 { 892 t.Fatalf("got %v, want 2 matches", matches) 893 } 894 if len(matches[0].LineMatches[0].Line) != 4 || len(matches[1].LineMatches[0].Line) != 4 { 895 t.Fatalf("want 4 chars in every file, got %#v", matches) 896 } 897 898 }) 899 900 t.Run("ChunkMatches", func(t *testing.T) { 901 sres := searchForTest(t, b, &query.Regexp{Regexp: mustParseRE(".")}, chunkOpts) 902 903 matches := sres.Files 904 if len(matches) != 2 || sres.Stats.MatchCount != 8 { 905 t.Fatalf("got %v, want 2 matches", matches) 906 } 907 if len(matches[0].ChunkMatches[0].Content) != 4 || len(matches[1].ChunkMatches[0].Content) != 4 { 908 t.Fatalf("want 4 chars in every file, got %#v", matches) 909 } 910 911 }) 912} 913 914func TestFileRestriction(t *testing.T) { 915 b := testIndexBuilder(t, nil, 916 Document{Name: "banana1", Content: []byte("x orange y")}, 917 Document{Name: "banana2", Content: []byte("x apple y")}, 918 Document{Name: "orange", Content: []byte("x apple z")}) 919 920 t.Run("LineMatches", func(t *testing.T) { 921 sres := searchForTest(t, b, query.NewAnd( 922 &query.Substring{ 923 Pattern: "banana", 924 FileName: true, 925 }, 926 &query.Substring{ 927 Pattern: "apple", 928 })) 929 930 matches := sres.Files 931 if len(matches) != 1 || len(matches[0].LineMatches) != 1 { 932 t.Fatalf("got %v, want 1 match", matches) 933 } 934 935 match := matches[0].LineMatches[0] 936 got := string(match.Line) 937 want := "x apple y" 938 if got != want { 939 t.Errorf("got match %#v, want line %q", match, want) 940 } 941 }) 942 943 t.Run("ChunkMatches", func(t *testing.T) { 944 sres := searchForTest(t, b, query.NewAnd( 945 &query.Substring{ 946 Pattern: "banana", 947 FileName: true, 948 }, 949 &query.Substring{ 950 Pattern: "apple", 951 }), chunkOpts) 952 953 matches := sres.Files 954 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 { 955 t.Fatalf("got %v, want 1 match", matches) 956 } 957 958 match := matches[0].ChunkMatches[0] 959 got := string(match.Content) 960 want := "x apple y" 961 if got != want { 962 t.Errorf("got match %#v, want line %q", match, want) 963 } 964 }) 965} 966 967func TestFileNameBoundary(t *testing.T) { 968 b := testIndexBuilder(t, nil, 969 Document{Name: "banana2", Content: []byte("x apple y")}, 970 Document{Name: "helpers.go", Content: []byte("x apple y")}, 971 Document{Name: "foo", Content: []byte("x apple y")}) 972 973 t.Run("LineMatches", func(t *testing.T) { 974 sres := searchForTest(t, b, &query.Substring{ 975 Pattern: "helpers.go", 976 FileName: true, 977 }) 978 979 matches := sres.Files 980 if len(matches) != 1 || len(matches[0].LineMatches) != 1 { 981 t.Fatalf("got %v, want 1 match", matches) 982 } 983 }) 984 985 t.Run("ChunkMatches", func(t *testing.T) { 986 sres := searchForTest(t, b, &query.Substring{ 987 Pattern: "helpers.go", 988 FileName: true, 989 }, chunkOpts) 990 991 matches := sres.Files 992 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 { 993 t.Fatalf("got %v, want 1 match", matches) 994 } 995 }) 996} 997 998func TestDocumentOrder(t *testing.T) { 999 var docs []Document 1000 for i := 0; i < 3; i++ { 1001 docs = append(docs, Document{Name: fmt.Sprintf("f%d", i), Content: []byte("needle")}) 1002 } 1003 1004 b := testIndexBuilder(t, nil, docs...) 1005 1006 t.Run("LineMatches", func(t *testing.T) { 1007 sres := searchForTest(t, b, query.NewAnd( 1008 &query.Substring{ 1009 Pattern: "needle", 1010 })) 1011 1012 want := []string{"f0", "f1", "f2"} 1013 var got []string 1014 for _, f := range sres.Files { 1015 got = append(got, f.FileName) 1016 } 1017 if !reflect.DeepEqual(got, want) { 1018 t.Fatalf("got %v, want %v", got, want) 1019 } 1020 }) 1021 1022 t.Run("ChunkMatches", func(t *testing.T) { 1023 sres := searchForTest(t, b, 1024 query.NewAnd(&query.Substring{ 1025 Pattern: "needle", 1026 }), 1027 chunkOpts, 1028 ) 1029 1030 want := []string{"f0", "f1", "f2"} 1031 var got []string 1032 for _, f := range sres.Files { 1033 got = append(got, f.FileName) 1034 } 1035 if !reflect.DeepEqual(got, want) { 1036 t.Fatalf("got %v, want %v", got, want) 1037 } 1038 }) 1039} 1040 1041func TestBranchMask(t *testing.T) { 1042 b := testIndexBuilder(t, &Repository{ 1043 Branches: []RepositoryBranch{ 1044 {"master", "v-master"}, 1045 {"stable", "v-stable"}, 1046 {"bonzai", "v-bonzai"}, 1047 }, 1048 }, Document{Name: "f1", Content: []byte("needle"), Branches: []string{"master"}}, 1049 Document{Name: "f2", Content: []byte("needle"), Branches: []string{"stable", "master"}}, 1050 Document{Name: "f3", Content: []byte("needle"), Branches: []string{"stable", "master"}}, 1051 Document{Name: "f4", Content: []byte("needle"), Branches: []string{"bonzai"}}, 1052 ) 1053 1054 t.Run("LineMatches", func(t *testing.T) { 1055 sres := searchForTest(t, b, query.NewAnd( 1056 &query.Substring{ 1057 Pattern: "needle", 1058 }, 1059 &query.Branch{ 1060 Pattern: "table", 1061 })) 1062 1063 if len(sres.Files) != 2 || sres.Files[0].FileName != "f2" || sres.Files[1].FileName != "f3" { 1064 t.Fatalf("got %v, want 2 result from [f2,f3]", sres.Files) 1065 } 1066 1067 if len(sres.Files[0].Branches) != 1 || sres.Files[0].Branches[0] != "stable" { 1068 t.Fatalf("got %v, want 1 branch 'stable'", sres.Files[0].Branches) 1069 } 1070 }) 1071 1072 t.Run("ChunkMatches", func(t *testing.T) { 1073 sres := searchForTest(t, b, query.NewAnd( 1074 &query.Substring{ 1075 Pattern: "needle", 1076 }, 1077 &query.Branch{ 1078 Pattern: "table", 1079 }), 1080 chunkOpts, 1081 ) 1082 1083 if len(sres.Files) != 2 || sres.Files[0].FileName != "f2" || sres.Files[1].FileName != "f3" { 1084 t.Fatalf("got %v, want 2 result from [f2,f3]", sres.Files) 1085 } 1086 1087 if len(sres.Files[0].Branches) != 1 || sres.Files[0].Branches[0] != "stable" { 1088 t.Fatalf("got %v, want 1 branch 'stable'", sres.Files[0].Branches) 1089 } 1090 }) 1091} 1092 1093func TestBranchLimit(t *testing.T) { 1094 for limit := 64; limit <= 65; limit++ { 1095 r := &Repository{} 1096 for i := 0; i < limit; i++ { 1097 s := fmt.Sprintf("b%d", i) 1098 r.Branches = append(r.Branches, RepositoryBranch{ 1099 s, "v-" + s, 1100 }) 1101 } 1102 _, err := NewIndexBuilder(r) 1103 if limit == 64 && err != nil { 1104 t.Fatalf("NewIndexBuilder: %v", err) 1105 } else if limit == 65 && err == nil { 1106 t.Fatalf("NewIndexBuilder succeeded") 1107 } 1108 } 1109} 1110 1111func TestBranchReport(t *testing.T) { 1112 branches := []string{"stable", "master"} 1113 b := testIndexBuilder(t, &Repository{ 1114 Branches: []RepositoryBranch{ 1115 {"stable", "vs"}, 1116 {"master", "vm"}, 1117 }, 1118 }, 1119 Document{Name: "f2", Content: []byte("needle"), Branches: branches}) 1120 1121 t.Run("LineMatches", func(t *testing.T) { 1122 sres := searchForTest(t, b, &query.Substring{ 1123 Pattern: "needle", 1124 }) 1125 if len(sres.Files) != 1 { 1126 t.Fatalf("got %v, want 1 result from f2", sres.Files) 1127 } 1128 1129 f := sres.Files[0] 1130 if !reflect.DeepEqual(f.Branches, branches) { 1131 t.Fatalf("got branches %q, want %q", f.Branches, branches) 1132 } 1133 }) 1134 1135 t.Run("ChunkMatches", func(t *testing.T) { 1136 sres := searchForTest(t, b, &query.Substring{ 1137 Pattern: "needle", 1138 }, chunkOpts) 1139 if len(sres.Files) != 1 { 1140 t.Fatalf("got %v, want 1 result from f2", sres.Files) 1141 } 1142 1143 f := sres.Files[0] 1144 if !reflect.DeepEqual(f.Branches, branches) { 1145 t.Fatalf("got branches %q, want %q", f.Branches, branches) 1146 } 1147 }) 1148 1149} 1150 1151func TestBranchVersions(t *testing.T) { 1152 b := testIndexBuilder(t, &Repository{ 1153 Branches: []RepositoryBranch{ 1154 {"stable", "v-stable"}, 1155 {"master", "v-master"}, 1156 }, 1157 }, Document{Name: "f2", Content: []byte("needle"), Branches: []string{"master"}}) 1158 1159 t.Run("LineMatches", func(t *testing.T) { 1160 sres := searchForTest(t, b, &query.Substring{ 1161 Pattern: "needle", 1162 }) 1163 if len(sres.Files) != 1 { 1164 t.Fatalf("got %v, want 1 result from f2", sres.Files) 1165 } 1166 1167 f := sres.Files[0] 1168 if f.Version != "v-master" { 1169 t.Fatalf("got file %#v, want version 'v-master'", f) 1170 } 1171 }) 1172 1173 t.Run("ChunkMatches", func(t *testing.T) { 1174 sres := searchForTest(t, b, &query.Substring{ 1175 Pattern: "needle", 1176 }, chunkOpts) 1177 if len(sres.Files) != 1 { 1178 t.Fatalf("got %v, want 1 result from f2", sres.Files) 1179 } 1180 1181 f := sres.Files[0] 1182 if f.Version != "v-master" { 1183 t.Fatalf("got file %#v, want version 'v-master'", f) 1184 } 1185 }) 1186} 1187 1188func mustParseRE(s string) *syntax.Regexp { 1189 r, err := syntax.Parse(s, syntax.Perl) 1190 if err != nil { 1191 panic(err) 1192 } 1193 1194 return r 1195} 1196 1197func TestRegexp(t *testing.T) { 1198 content := []byte("needle the bla") 1199 // ----------------01234567890123 1200 1201 b := testIndexBuilder(t, nil, 1202 Document{ 1203 Name: "f1", 1204 Content: content, 1205 }) 1206 1207 t.Run("LineMatches", func(t *testing.T) { 1208 sres := searchForTest(t, b, 1209 &query.Regexp{ 1210 Regexp: mustParseRE("dle.*bla"), 1211 }) 1212 1213 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 { 1214 t.Fatalf("got %v, want 1 match in 1 file", sres.Files) 1215 } 1216 1217 got := sres.Files[0].LineMatches[0] 1218 want := LineMatch{ 1219 LineFragments: []LineFragmentMatch{{ 1220 LineOffset: 3, 1221 Offset: 3, 1222 MatchLength: 11, 1223 }}, 1224 Line: content, 1225 FileName: false, 1226 LineNumber: 1, 1227 LineStart: 0, 1228 LineEnd: 14, 1229 } 1230 1231 if !reflect.DeepEqual(got, want) { 1232 t.Errorf("got %#v, want %#v", got, want) 1233 } 1234 }) 1235 1236 t.Run("ChunkMatches", func(t *testing.T) { 1237 sres := searchForTest(t, b, 1238 &query.Regexp{ 1239 Regexp: mustParseRE("dle.*bla"), 1240 }, chunkOpts) 1241 1242 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 { 1243 t.Fatalf("got %v, want 1 match in 1 file", sres.Files) 1244 } 1245 1246 got := sres.Files[0].ChunkMatches[0] 1247 want := ChunkMatch{ 1248 Content: content, 1249 ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1}, 1250 Ranges: []Range{{ 1251 Start: Location{ByteOffset: 3, LineNumber: 1, Column: 4}, 1252 End: Location{ByteOffset: 14, LineNumber: 1, Column: 15}, 1253 }}, 1254 } 1255 1256 if diff := cmp.Diff(want, got); diff != "" { 1257 t.Fatal(diff) 1258 } 1259 }) 1260} 1261 1262func TestRegexpFile(t *testing.T) { 1263 content := []byte("needle the bla") 1264 1265 name := "let's play: find the mussel" 1266 b := testIndexBuilder(t, nil, 1267 Document{Name: name, Content: content}, 1268 Document{Name: "play.txt", Content: content}) 1269 1270 t.Run("LineMatches", func(t *testing.T) { 1271 sres := searchForTest(t, b, 1272 &query.Regexp{ 1273 Regexp: mustParseRE("play.*mussel"), 1274 FileName: true, 1275 }) 1276 1277 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 { 1278 t.Fatalf("got %v, want 1 match in 1 file", sres.Files) 1279 } 1280 1281 if sres.Files[0].FileName != name { 1282 t.Errorf("got match %#v, want name %q", sres.Files[0], name) 1283 } 1284 }) 1285 1286 t.Run("ChunkMatches", func(t *testing.T) { 1287 sres := searchForTest(t, b, 1288 &query.Regexp{ 1289 Regexp: mustParseRE("play.*mussel"), 1290 FileName: true, 1291 }, chunkOpts) 1292 1293 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 { 1294 t.Fatalf("got %v, want 1 match in 1 file", sres.Files) 1295 } 1296 1297 if sres.Files[0].FileName != name { 1298 t.Errorf("got match %#v, want name %q", sres.Files[0], name) 1299 } 1300 }) 1301} 1302 1303func TestRegexpOrder(t *testing.T) { 1304 content := []byte("bla the needle") 1305 // ----------------01234567890123 1306 1307 b := testIndexBuilder(t, nil, 1308 Document{Name: "f1", Content: content}) 1309 1310 t.Run("LineMatches", func(t *testing.T) { 1311 sres := searchForTest(t, b, 1312 &query.Regexp{ 1313 Regexp: mustParseRE("dle.*bla"), 1314 }) 1315 1316 if len(sres.Files) != 0 { 1317 t.Fatalf("got %v, want 0 matches", sres.Files) 1318 } 1319 }) 1320 1321 t.Run("ChunkMatches", func(t *testing.T) { 1322 sres := searchForTest(t, b, 1323 &query.Regexp{ 1324 Regexp: mustParseRE("dle.*bla"), 1325 }) 1326 1327 if len(sres.Files) != 0 { 1328 t.Fatalf("got %v, want 0 matches", sres.Files) 1329 } 1330 }) 1331} 1332 1333func TestRepoName(t *testing.T) { 1334 content := []byte("bla the needle") 1335 // ----------------01234567890123 1336 1337 b := testIndexBuilder(t, &Repository{Name: "bla"}, 1338 Document{Name: "f1", Content: content}) 1339 1340 t.Run("LineMatches", func(t *testing.T) { 1341 sres := searchForTest(t, b, 1342 query.NewAnd( 1343 &query.Substring{Pattern: "needle"}, 1344 &query.Repo{Regexp: regexp.MustCompile("foo")}, 1345 )) 1346 1347 if len(sres.Files) != 0 { 1348 t.Fatalf("got %v, want 0 matches", sres.Files) 1349 } 1350 1351 if sres.Stats.FilesConsidered > 0 { 1352 t.Fatalf("got FilesConsidered %d, should have short circuited", sres.Stats.FilesConsidered) 1353 } 1354 1355 sres = searchForTest(t, b, 1356 query.NewAnd( 1357 &query.Substring{Pattern: "needle"}, 1358 &query.Repo{Regexp: regexp.MustCompile("bla")}, 1359 )) 1360 if len(sres.Files) != 1 { 1361 t.Fatalf("got %v, want 1 match", sres.Files) 1362 } 1363 }) 1364 1365 t.Run("ChunkMatches", func(t *testing.T) { 1366 sres := searchForTest(t, b, 1367 query.NewAnd( 1368 &query.Substring{Pattern: "needle"}, 1369 &query.Repo{Regexp: regexp.MustCompile("foo")}, 1370 ), 1371 chunkOpts, 1372 ) 1373 1374 if len(sres.Files) != 0 { 1375 t.Fatalf("got %v, want 0 matches", sres.Files) 1376 } 1377 1378 if sres.Stats.FilesConsidered > 0 { 1379 t.Fatalf("got FilesConsidered %d, should have short circuited", sres.Stats.FilesConsidered) 1380 } 1381 1382 sres = searchForTest(t, b, 1383 query.NewAnd( 1384 &query.Substring{Pattern: "needle"}, 1385 &query.Repo{Regexp: regexp.MustCompile("bla")}, 1386 )) 1387 if len(sres.Files) != 1 { 1388 t.Fatalf("got %v, want 1 match", sres.Files) 1389 } 1390 }) 1391} 1392 1393func TestMergeMatches(t *testing.T) { 1394 content := []byte("blablabla") 1395 b := testIndexBuilder(t, nil, 1396 Document{Name: "f1", Content: content}) 1397 1398 t.Run("LineMatches", func(t *testing.T) { 1399 sres := searchForTest(t, b, 1400 &query.Substring{Pattern: "bla"}) 1401 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 { 1402 t.Fatalf("got %v, want 1 match", sres.Files) 1403 } 1404 }) 1405 1406 t.Run("ChunkMatches", func(t *testing.T) { 1407 sres := searchForTest(t, b, 1408 &query.Substring{Pattern: "bla"}, 1409 chunkOpts, 1410 ) 1411 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 { 1412 t.Fatalf("got %v, want 1 match", sres.Files) 1413 } 1414 }) 1415} 1416 1417func TestRepoURL(t *testing.T) { 1418 content := []byte("blablabla") 1419 b := testIndexBuilder(t, &Repository{ 1420 Name: "name", 1421 URL: "URL", 1422 CommitURLTemplate: "commit", 1423 FileURLTemplate: "file-url", 1424 LineFragmentTemplate: "fragment", 1425 }, Document{Name: "f1", Content: content}) 1426 1427 sres := searchForTest(t, b, &query.Substring{Pattern: "bla"}) 1428 1429 if sres.RepoURLs["name"] != "file-url" { 1430 t.Errorf("got RepoURLs %v, want {name: URL}", sres.RepoURLs) 1431 } 1432 if sres.LineFragments["name"] != "fragment" { 1433 t.Errorf("got URLs %v, want {name: URL}", sres.LineFragments) 1434 } 1435} 1436 1437func TestRegexpCaseSensitive(t *testing.T) { 1438 content := []byte("bla\nfunc unmarshalGitiles\n") 1439 b := testIndexBuilder(t, nil, Document{ 1440 Name: "f1", 1441 Content: content, 1442 }) 1443 1444 t.Run("LineMatches", func(t *testing.T) { 1445 res := searchForTest(t, b, 1446 &query.Regexp{ 1447 Regexp: mustParseRE("func.*Gitiles"), 1448 CaseSensitive: true, 1449 }) 1450 1451 if len(res.Files) != 1 { 1452 t.Fatalf("got %v, want one match", res.Files) 1453 } 1454 }) 1455 1456 t.Run("ChunkMatches", func(t *testing.T) { 1457 res := searchForTest(t, b, 1458 &query.Regexp{ 1459 Regexp: mustParseRE("func.*Gitiles"), 1460 CaseSensitive: true, 1461 }, 1462 chunkOpts, 1463 ) 1464 1465 if len(res.Files) != 1 { 1466 t.Fatalf("got %v, want one match", res.Files) 1467 } 1468 }) 1469} 1470 1471func TestRegexpCaseFolding(t *testing.T) { 1472 content := []byte("bla\nfunc unmarshalGitiles\n") 1473 1474 b := testIndexBuilder(t, nil, 1475 Document{Name: "f1", Content: content}) 1476 res := searchForTest(t, b, 1477 &query.Regexp{ 1478 Regexp: mustParseRE("func.*GITILES"), 1479 CaseSensitive: false, 1480 }) 1481 1482 if len(res.Files) != 1 { 1483 t.Fatalf("got %v, want one match", res.Files) 1484 } 1485} 1486 1487func TestCaseRegexp(t *testing.T) { 1488 content := []byte("BLABLABLA") 1489 b := testIndexBuilder(t, nil, 1490 Document{Name: "f1", Content: content}) 1491 1492 t.Run("LineMatches", func(t *testing.T) { 1493 res := searchForTest(t, b, 1494 &query.Regexp{ 1495 Regexp: mustParseRE("[xb][xl][xa]"), 1496 CaseSensitive: true, 1497 }) 1498 1499 if len(res.Files) > 0 { 1500 t.Fatalf("got %v, want no matches", res.Files) 1501 } 1502 }) 1503 1504 t.Run("ChunkMatches", func(t *testing.T) { 1505 res := searchForTest(t, b, 1506 &query.Regexp{ 1507 Regexp: mustParseRE("[xb][xl][xa]"), 1508 CaseSensitive: true, 1509 }, 1510 chunkOpts, 1511 ) 1512 1513 if len(res.Files) > 0 { 1514 t.Fatalf("got %v, want no matches", res.Files) 1515 } 1516 }) 1517} 1518 1519func TestNegativeRegexp(t *testing.T) { 1520 content := []byte("BLABLABLA needle bla") 1521 b := testIndexBuilder(t, nil, 1522 Document{Name: "f1", Content: content}) 1523 1524 t.Run("LineMatches", func(t *testing.T) { 1525 res := searchForTest(t, b, 1526 query.NewAnd( 1527 &query.Substring{ 1528 Pattern: "needle", 1529 }, 1530 &query.Not{ 1531 Child: &query.Regexp{ 1532 Regexp: mustParseRE(".cs"), 1533 }, 1534 })) 1535 1536 if len(res.Files) != 1 { 1537 t.Fatalf("got %v, want 1 match", res.Files) 1538 } 1539 }) 1540 1541 t.Run("ChunkMatches", func(t *testing.T) { 1542 res := searchForTest(t, b, 1543 query.NewAnd( 1544 &query.Substring{ 1545 Pattern: "needle", 1546 }, 1547 &query.Not{ 1548 Child: &query.Regexp{ 1549 Regexp: mustParseRE(".cs"), 1550 }, 1551 }, 1552 ), 1553 chunkOpts) 1554 1555 if len(res.Files) != 1 { 1556 t.Fatalf("got %v, want 1 match", res.Files) 1557 } 1558 }) 1559} 1560 1561func TestSymbolRank(t *testing.T) { 1562 t.Skip() 1563 1564 content := []byte("func bla() blubxxxxx") 1565 // ----------------01234567890123456789 1566 b := testIndexBuilder(t, nil, 1567 Document{ 1568 Name: "f1", 1569 Content: content, 1570 }, Document{ 1571 Name: "f2", 1572 Content: content, 1573 Symbols: []DocumentSection{{5, 8}}, 1574 }, Document{ 1575 Name: "f3", 1576 Content: content, 1577 }) 1578 1579 t.Run("LineMatches", func(t *testing.T) { 1580 res := searchForTest(t, b, 1581 &query.Substring{ 1582 CaseSensitive: false, 1583 Pattern: "bla", 1584 }) 1585 1586 if len(res.Files) != 3 { 1587 t.Fatalf("got %d files, want 3 files. Full data: %v", len(res.Files), res.Files) 1588 } 1589 if res.Files[0].FileName != "f2" { 1590 t.Errorf("got %#v, want 'f2' as top match", res.Files[0]) 1591 } 1592 }) 1593 1594 t.Run("ChunkMatches", func(t *testing.T) { 1595 res := searchForTest(t, b, 1596 &query.Substring{ 1597 CaseSensitive: false, 1598 Pattern: "bla", 1599 }, chunkOpts) 1600 1601 if len(res.Files) != 3 { 1602 t.Fatalf("got %d files, want 3 files. Full data: %v", len(res.Files), res.Files) 1603 } 1604 if res.Files[0].FileName != "f2" { 1605 t.Errorf("got %#v, want 'f2' as top match", res.Files[0]) 1606 } 1607 }) 1608} 1609 1610func TestSymbolRankRegexpUTF8(t *testing.T) { 1611 t.Skip() 1612 1613 prefix := strings.Repeat(string([]rune{kelvinCodePoint}), 100) + "\n" 1614 content := []byte(prefix + 1615 "func bla() blub") 1616 // ------012345678901234 1617 b := testIndexBuilder(t, nil, 1618 Document{ 1619 Name: "f1", 1620 Content: content, 1621 }, Document{ 1622 Name: "f2", 1623 Content: content, 1624 Symbols: []DocumentSection{{uint32(len(prefix) + 5), uint32(len(prefix) + 8)}}, 1625 }, Document{ 1626 Name: "f3", 1627 Content: content, 1628 }) 1629 1630 t.Run("LineMatches", func(t *testing.T) { 1631 res := searchForTest(t, b, 1632 &query.Regexp{ 1633 Regexp: mustParseRE("b.a"), 1634 }) 1635 1636 if len(res.Files) != 3 { 1637 t.Fatalf("got %#v, want 3 files", res.Files) 1638 } 1639 if res.Files[0].FileName != "f2" { 1640 t.Errorf("got %#v, want 'f2' as top match", res.Files[0]) 1641 } 1642 }) 1643 1644 t.Run("ChunjkMatches", func(t *testing.T) { 1645 res := searchForTest(t, b, 1646 &query.Regexp{ 1647 Regexp: mustParseRE("b.a"), 1648 }, chunkOpts) 1649 1650 if len(res.Files) != 3 { 1651 t.Fatalf("got %#v, want 3 files", res.Files) 1652 } 1653 if res.Files[0].FileName != "f2" { 1654 t.Errorf("got %#v, want 'f2' as top match", res.Files[0]) 1655 } 1656 }) 1657} 1658 1659func TestPartialSymbolRank(t *testing.T) { 1660 t.Skip() 1661 1662 content := []byte("func bla() blub") 1663 // ----------------012345678901234 1664 1665 b := testIndexBuilder(t, nil, 1666 Document{ 1667 Name: "f1", 1668 Content: content, 1669 Symbols: []DocumentSection{{4, 9}}, 1670 }, Document{ 1671 Name: "f2", 1672 Content: content, 1673 Symbols: []DocumentSection{{4, 8}}, 1674 }, Document{ 1675 Name: "f3", 1676 Content: content, 1677 Symbols: []DocumentSection{{4, 9}}, 1678 }) 1679 1680 t.Run("LineMatches", func(t *testing.T) { 1681 res := searchForTest(t, b, 1682 &query.Substring{ 1683 Pattern: "bla", 1684 }) 1685 1686 if len(res.Files) != 3 { 1687 t.Fatalf("got %#v, want 3 files", res.Files) 1688 } 1689 if res.Files[0].FileName != "f2" { 1690 t.Errorf("got %#v, want 'f2' as top match", res.Files[0]) 1691 } 1692 }) 1693 1694 t.Run("ChunkMatches", func(t *testing.T) { 1695 res := searchForTest(t, b, 1696 &query.Substring{ 1697 Pattern: "bla", 1698 }, chunkOpts) 1699 1700 if len(res.Files) != 3 { 1701 t.Fatalf("got %#v, want 3 files", res.Files) 1702 } 1703 if res.Files[0].FileName != "f2" { 1704 t.Errorf("got %#v, want 'f2' as top match", res.Files[0]) 1705 } 1706 }) 1707} 1708 1709func TestNegativeRepo(t *testing.T) { 1710 content := []byte("bla the needle") 1711 // ----------------01234567890123 1712 b := testIndexBuilder(t, &Repository{ 1713 Name: "bla", 1714 }, Document{Name: "f1", Content: content}) 1715 1716 t.Run("LineMatches", func(t *testing.T) { 1717 sres := searchForTest(t, b, 1718 query.NewAnd( 1719 &query.Substring{Pattern: "needle"}, 1720 &query.Not{Child: &query.Repo{Regexp: regexp.MustCompile("bla")}}, 1721 )) 1722 1723 if len(sres.Files) != 0 { 1724 t.Fatalf("got %v, want 0 matches", sres.Files) 1725 } 1726 }) 1727 1728 t.Run("ChunkMatches", func(t *testing.T) { 1729 sres := searchForTest(t, b, 1730 query.NewAnd( 1731 &query.Substring{Pattern: "needle"}, 1732 &query.Not{Child: &query.Repo{Regexp: regexp.MustCompile("bla")}}, 1733 ), chunkOpts) 1734 1735 if len(sres.Files) != 0 { 1736 t.Fatalf("got %v, want 0 matches", sres.Files) 1737 } 1738 }) 1739} 1740 1741func TestListRepos(t *testing.T) { 1742 content := []byte("bla the needle\n") 1743 // ----------------012345678901234- 1744 1745 t.Run("default and minimal fallback", func(t *testing.T) { 1746 repo := &Repository{ 1747 Name: "reponame", 1748 Branches: []RepositoryBranch{{Name: "main"}, {Name: "dev"}}, 1749 } 1750 b := testIndexBuilder(t, repo, 1751 Document{Name: "f1", Content: content, Branches: []string{"main", "dev"}}, 1752 Document{Name: "f2", Content: content, Branches: []string{"main"}}, 1753 Document{Name: "f2", Content: content, Branches: []string{"dev"}}, 1754 Document{Name: "f3", Content: content, Branches: []string{"dev"}}) 1755 1756 searcher := searcherForTest(t, b) 1757 1758 for _, opts := range []*ListOptions{ 1759 nil, 1760 {}, 1761 {Field: RepoListFieldRepos}, 1762 {Field: RepoListFieldReposMap}, 1763 } { 1764 t.Run(fmt.Sprint(opts), func(t *testing.T) { 1765 q := &query.Repo{Regexp: regexp.MustCompile("epo")} 1766 1767 res, err := searcher.List(context.Background(), q, opts) 1768 if err != nil { 1769 t.Fatalf("List(%v): %v", q, err) 1770 } 1771 1772 want := &RepoList{ 1773 Repos: []*RepoListEntry{{ 1774 Repository: *repo, 1775 Stats: RepoStats{ 1776 Documents: 4, 1777 ContentBytes: 68, // (15 bytes of content and 2 bytes of filename) x 4 1778 Shards: 1, 1779 1780 NewLinesCount: 4, 1781 DefaultBranchNewLinesCount: 2, 1782 OtherBranchesNewLinesCount: 3, 1783 }, 1784 }}, 1785 Stats: RepoStats{ 1786 Repos: 1, 1787 Documents: 4, 1788 ContentBytes: 68, 1789 Shards: 1, 1790 1791 NewLinesCount: 4, 1792 DefaultBranchNewLinesCount: 2, 1793 OtherBranchesNewLinesCount: 3, 1794 }, 1795 } 1796 ignored := []cmp.Option{ 1797 cmpopts.EquateEmpty(), 1798 cmpopts.IgnoreFields(RepoListEntry{}, "IndexMetadata"), 1799 cmpopts.IgnoreFields(RepoStats{}, "IndexBytes"), 1800 cmpopts.IgnoreFields(Repository{}, "SubRepoMap"), 1801 cmpopts.IgnoreFields(Repository{}, "priority"), 1802 } 1803 if diff := cmp.Diff(want, res, ignored...); diff != "" { 1804 t.Fatalf("mismatch (-want +got):\n%s", diff) 1805 } 1806 1807 q = &query.Repo{Regexp: regexp.MustCompile("bla")} 1808 res, err = searcher.List(context.Background(), q, nil) 1809 if err != nil { 1810 t.Fatalf("List(%v): %v", q, err) 1811 } 1812 if len(res.Repos) != 0 || len(res.ReposMap) != 0 { 1813 t.Fatalf("got %v, want 0 matches", res) 1814 } 1815 }) 1816 } 1817 }) 1818 1819 t.Run("minimal", func(t *testing.T) { 1820 repo := &Repository{ 1821 ID: 1234, 1822 Name: "reponame", 1823 Branches: []RepositoryBranch{{Name: "main"}, {Name: "dev"}}, 1824 RawConfig: map[string]string{"repoid": "1234"}, 1825 } 1826 b := testIndexBuilder(t, repo, 1827 Document{Name: "f1", Content: content, Branches: []string{"main", "dev"}}, 1828 Document{Name: "f2", Content: content, Branches: []string{"main"}}, 1829 Document{Name: "f2", Content: content, Branches: []string{"dev"}}, 1830 Document{Name: "f3", Content: content, Branches: []string{"dev"}}) 1831 1832 searcher := searcherForTest(t, b) 1833 1834 q := &query.Repo{Regexp: regexp.MustCompile("epo")} 1835 res, err := searcher.List(context.Background(), q, &ListOptions{Field: RepoListFieldReposMap}) 1836 if err != nil { 1837 t.Fatalf("List(%v): %v", q, err) 1838 } 1839 1840 want := &RepoList{ 1841 ReposMap: ReposMap{ 1842 repo.ID: { 1843 HasSymbols: repo.HasSymbols, 1844 Branches: repo.Branches, 1845 }, 1846 }, 1847 Stats: RepoStats{ 1848 Repos: 1, 1849 Shards: 1, 1850 Documents: 4, 1851 IndexBytes: 412, 1852 ContentBytes: 68, 1853 NewLinesCount: 4, 1854 DefaultBranchNewLinesCount: 2, 1855 OtherBranchesNewLinesCount: 3, 1856 }, 1857 } 1858 1859 ignored := []cmp.Option{ 1860 cmpopts.IgnoreFields(MinimalRepoListEntry{}, "IndexTimeUnix"), 1861 } 1862 if diff := cmp.Diff(want, res, ignored...); diff != "" { 1863 t.Fatalf("mismatch (-want +got):\n%s", diff) 1864 } 1865 1866 q = &query.Repo{Regexp: regexp.MustCompile("bla")} 1867 res, err = searcher.List(context.Background(), q, &ListOptions{Field: RepoListFieldReposMap}) 1868 if err != nil { 1869 t.Fatalf("List(%v): %v", q, err) 1870 } 1871 if len(res.Repos) != 0 || len(res.ReposMap) != 0 { 1872 t.Fatalf("got %v, want 0 matches", res) 1873 } 1874 }) 1875} 1876 1877func TestListReposByContent(t *testing.T) { 1878 content := []byte("bla the needle") 1879 1880 b := testIndexBuilder(t, &Repository{ 1881 Name: "reponame", 1882 }, 1883 Document{Name: "f1", Content: content}, 1884 Document{Name: "f2", Content: content}) 1885 1886 searcher := searcherForTest(t, b) 1887 q := &query.Substring{Pattern: "needle"} 1888 res, err := searcher.List(context.Background(), q, nil) 1889 if err != nil { 1890 t.Fatalf("List(%v): %v", q, err) 1891 } 1892 if len(res.Repos) != 1 || res.Repos[0].Repository.Name != "reponame" { 1893 t.Fatalf("got %v, want 1 matches", res) 1894 } 1895 if got := res.Repos[0].Stats.Shards; got != 1 { 1896 t.Fatalf("got %d, want 1 shard", got) 1897 } 1898 q = &query.Substring{Pattern: "foo"} 1899 res, err = searcher.List(context.Background(), q, nil) 1900 if err != nil { 1901 t.Fatalf("List(%v): %v", q, err) 1902 } 1903 if len(res.Repos) != 0 { 1904 t.Fatalf("got %v, want 0 matches", res) 1905 } 1906} 1907 1908func TestMetadata(t *testing.T) { 1909 content := []byte("bla the needle") 1910 1911 b := testIndexBuilder(t, &Repository{ 1912 Name: "reponame", 1913 }, Document{Name: "f1", Content: content}, 1914 Document{Name: "f2", Content: content}) 1915 1916 var buf bytes.Buffer 1917 if err := b.Write(&buf); err != nil { 1918 t.Fatal(err) 1919 } 1920 f := &memSeeker{buf.Bytes()} 1921 1922 rd, _, err := ReadMetadata(f) 1923 if err != nil { 1924 t.Fatalf("ReadMetadata: %v", err) 1925 } 1926 1927 if got, want := rd[0].Name, "reponame"; got != want { 1928 t.Fatalf("got %q want %q", got, want) 1929 } 1930} 1931 1932func TestOr(t *testing.T) { 1933 b := testIndexBuilder(t, nil, 1934 Document{Name: "f1", Content: []byte("needle")}, 1935 Document{Name: "f2", Content: []byte("banana")}) 1936 t.Run("LineMatches", func(t *testing.T) { 1937 sres := searchForTest(t, b, query.NewOr( 1938 &query.Substring{Pattern: "needle"}, 1939 &query.Substring{Pattern: "banana"})) 1940 1941 if len(sres.Files) != 2 { 1942 t.Fatalf("got %v, want 2 files", sres.Files) 1943 } 1944 }) 1945 1946 t.Run("ChunkMatches", func(t *testing.T) { 1947 sres := searchForTest(t, b, query.NewOr( 1948 &query.Substring{Pattern: "needle"}, 1949 &query.Substring{Pattern: "banana"})) 1950 1951 if len(sres.Files) != 2 { 1952 t.Fatalf("got %v, want 2 files", sres.Files) 1953 } 1954 }) 1955} 1956 1957func TestFrequency(t *testing.T) { 1958 content := []byte("sla _Py_HashDouble(double v sla las las shd dot dot") 1959 1960 b := testIndexBuilder(t, nil, 1961 Document{ 1962 Name: "f1", 1963 Content: content, 1964 }) 1965 1966 t.Run("LineMatches", func(t *testing.T) { 1967 sres := searchForTest(t, b, &query.Substring{Pattern: "slashdot"}) 1968 if len(sres.Files) != 0 { 1969 t.Errorf("got %v, wanted 0 matches", sres.Files) 1970 } 1971 }) 1972 1973 t.Run("ChunkMatches", func(t *testing.T) { 1974 sres := searchForTest(t, b, &query.Substring{Pattern: "slashdot"}, chunkOpts) 1975 if len(sres.Files) != 0 { 1976 t.Errorf("got %v, wanted 0 matches", sres.Files) 1977 } 1978 }) 1979} 1980 1981func TestMatchNewline(t *testing.T) { 1982 re, err := syntax.Parse("[^a]a", syntax.ClassNL) 1983 if err != nil { 1984 t.Fatalf("syntax.Parse: %v", err) 1985 } 1986 1987 content := []byte("pqr\nalex") 1988 1989 b := testIndexBuilder(t, nil, 1990 Document{ 1991 Name: "f1", 1992 Content: content, 1993 }) 1994 1995 t.Run("LineMatches", func(t *testing.T) { 1996 sres := searchForTest(t, b, &query.Regexp{Regexp: re, CaseSensitive: true}) 1997 if len(sres.Files) != 1 { 1998 t.Errorf("got %v, wanted 1 matches", sres.Files) 1999 } else if l := sres.Files[0].LineMatches[0].Line; !bytes.Equal(l, content[len("pqr\n"):]) { 2000 t.Errorf("got match line %q, want %q", l, content) 2001 } 2002 }) 2003 2004 t.Run("ChunkMatches", func(t *testing.T) { 2005 sres := searchForTest(t, b, &query.Regexp{Regexp: re, CaseSensitive: true}, chunkOpts) 2006 if len(sres.Files) != 1 { 2007 t.Errorf("got %v, wanted 1 matches", sres.Files) 2008 } else if c := sres.Files[0].ChunkMatches[0].Content; !bytes.Equal(c, content) { 2009 t.Errorf("got match line %q, want %q", c, content) 2010 } 2011 }) 2012} 2013 2014func TestSubRepo(t *testing.T) { 2015 subRepos := map[string]*Repository{ 2016 "sub": { 2017 Name: "sub-name", 2018 LineFragmentTemplate: "sub-line", 2019 }, 2020 } 2021 2022 content := []byte("pqr\nalex") 2023 2024 b := testIndexBuilder(t, &Repository{ 2025 SubRepoMap: subRepos, 2026 }, Document{ 2027 Name: "sub/f1", 2028 Content: content, 2029 SubRepositoryPath: "sub", 2030 }) 2031 2032 sres := searchForTest(t, b, &query.Substring{Pattern: "alex"}) 2033 if len(sres.Files) != 1 { 2034 t.Fatalf("got %v, wanted 1 matches", sres.Files) 2035 } 2036 2037 f := sres.Files[0] 2038 if f.SubRepositoryPath != "sub" || f.SubRepositoryName != "sub-name" { 2039 t.Errorf("got %#v, want SubRepository{Path,Name} = {'sub', 'sub-name'}", f) 2040 } 2041 2042 if sres.LineFragments["sub-name"] != "sub-line" { 2043 t.Errorf("got LineFragmentTemplate %v, want {'sub':'sub-line'}", sres.LineFragments) 2044 } 2045} 2046 2047func TestSearchEither(t *testing.T) { 2048 b := testIndexBuilder(t, nil, 2049 Document{Name: "f1", Content: []byte("bla needle bla")}, 2050 Document{Name: "needle-file-branch", Content: []byte("bla content")}) 2051 2052 t.Run("LineMatches", func(t *testing.T) { 2053 sres := searchForTest(t, b, &query.Substring{Pattern: "needle"}) 2054 if len(sres.Files) != 2 { 2055 t.Fatalf("got %v, wanted 2 matches", sres.Files) 2056 } 2057 2058 sres = searchForTest(t, b, &query.Substring{Pattern: "needle", Content: true}) 2059 if len(sres.Files) != 1 { 2060 t.Fatalf("got %v, wanted 1 match", sres.Files) 2061 } 2062 2063 if got, want := sres.Files[0].FileName, "f1"; got != want { 2064 t.Errorf("got %q, want %q", got, want) 2065 } 2066 }) 2067 2068 t.Run("ChunkMatches", func(t *testing.T) { 2069 sres := searchForTest(t, b, &query.Substring{Pattern: "needle"}, chunkOpts) 2070 if len(sres.Files) != 2 { 2071 t.Fatalf("got %v, wanted 2 matches", sres.Files) 2072 } 2073 2074 sres = searchForTest(t, b, &query.Substring{Pattern: "needle", Content: true}, chunkOpts) 2075 if len(sres.Files) != 1 { 2076 t.Fatalf("got %v, wanted 1 match", sres.Files) 2077 } 2078 2079 if got, want := sres.Files[0].FileName, "f1"; got != want { 2080 t.Errorf("got %q, want %q", got, want) 2081 } 2082 }) 2083} 2084 2085func TestUnicodeExactMatch(t *testing.T) { 2086 needle := "néédlÉ" 2087 content := []byte("blá blá " + needle + " blâ") 2088 2089 b := testIndexBuilder(t, nil, 2090 Document{Name: "f1", Content: content}) 2091 2092 t.Run("LineMatches", func(t *testing.T) { 2093 if res := searchForTest(t, b, &query.Substring{Pattern: needle, CaseSensitive: true}); len(res.Files) != 1 { 2094 t.Fatalf("case sensitive: got %v, wanted 1 match", res.Files) 2095 } 2096 }) 2097 2098 t.Run("ChunkMatches", func(t *testing.T) { 2099 res := searchForTest(t, b, &query.Substring{Pattern: needle, CaseSensitive: true}, chunkOpts) 2100 if len(res.Files) != 1 { 2101 t.Fatalf("case sensitive: got %v, wanted 1 match", res.Files) 2102 } 2103 }) 2104} 2105 2106func TestUnicodeCoverContent(t *testing.T) { 2107 needle := "néédlÉ" 2108 content := []byte("blá blá " + needle + " blâ") 2109 2110 b := testIndexBuilder(t, nil, 2111 Document{Name: "f1", Content: content}) 2112 2113 t.Run("LineMatches", func(t *testing.T) { 2114 if res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ", CaseSensitive: true}); len(res.Files) != 0 { 2115 t.Fatalf("case sensitive: got %v, wanted 0 match", res.Files) 2116 } 2117 2118 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ"}) 2119 if len(res.Files) != 1 { 2120 t.Fatalf("case insensitive: got %v, wanted 1 match", res.Files) 2121 } 2122 2123 if got, want := res.Files[0].LineMatches[0].LineFragments[0].Offset, uint32(strings.Index(string(content), needle)); got != want { 2124 t.Errorf("got %d want %d", got, want) 2125 } 2126 }) 2127 2128 t.Run("ChunkMatches", func(t *testing.T) { 2129 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ", CaseSensitive: true}, chunkOpts) 2130 if len(res.Files) != 0 { 2131 t.Fatalf("case sensitive: got %v, wanted 0 match", res.Files) 2132 } 2133 2134 res = searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ"}, chunkOpts) 2135 if len(res.Files) != 1 { 2136 t.Fatalf("case insensitive: got %v, wanted 1 match", res.Files) 2137 } 2138 2139 got := res.Files[0].ChunkMatches[0].Ranges[0].Start.ByteOffset 2140 want := uint32(strings.Index(string(content), needle)) 2141 if got != want { 2142 t.Errorf("got %d want %d", got, want) 2143 } 2144 }) 2145} 2146 2147func TestUnicodeNonCoverContent(t *testing.T) { 2148 needle := "nééáádlÉ" 2149 content := []byte("blá blá " + needle + " blâ") 2150 2151 b := testIndexBuilder(t, nil, 2152 Document{Name: "f1", Content: content}) 2153 2154 t.Run("LineMatches", func(t *testing.T) { 2155 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉÁÁDLÉ", Content: true}) 2156 if len(res.Files) != 1 { 2157 t.Fatalf("got %v, wanted 1 match", res.Files) 2158 } 2159 2160 if got, want := res.Files[0].LineMatches[0].LineFragments[0].Offset, uint32(strings.Index(string(content), needle)); got != want { 2161 t.Errorf("got %d want %d", got, want) 2162 } 2163 }) 2164 2165 t.Run("ChunkMatches", func(t *testing.T) { 2166 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉÁÁDLÉ", Content: true}, chunkOpts) 2167 if len(res.Files) != 1 { 2168 t.Fatalf("got %v, wanted 1 match", res.Files) 2169 } 2170 2171 got := res.Files[0].ChunkMatches[0].Ranges[0].Start.ByteOffset 2172 want := uint32(strings.Index(string(content), needle)) 2173 if got != want { 2174 t.Errorf("got %d want %d", got, want) 2175 } 2176 }) 2177} 2178 2179const kelvinCodePoint = 8490 2180 2181func TestUnicodeVariableLength(t *testing.T) { 2182 lower := 'k' 2183 upper := rune(kelvinCodePoint) 2184 2185 needle := "nee" + string([]rune{lower}) + "eed" 2186 corpus := []byte("nee" + string([]rune{upper}) + "eed" + 2187 " ee" + string([]rune{lower}) + "ee" + 2188 " ee" + string([]rune{upper}) + "ee") 2189 2190 t.Run("LineMatches", func(t *testing.T) { 2191 b := testIndexBuilder(t, nil, 2192 Document{Name: "f1", Content: []byte(corpus)}) 2193 2194 res := searchForTest(t, b, &query.Substring{Pattern: needle, Content: true}) 2195 if len(res.Files) != 1 { 2196 t.Fatalf("got %v, wanted 1 match", res.Files) 2197 } 2198 }) 2199 2200 t.Run("ChunkMatches", func(t *testing.T) { 2201 b := testIndexBuilder(t, nil, 2202 Document{Name: "f1", Content: []byte(corpus)}) 2203 2204 res := searchForTest(t, b, &query.Substring{Pattern: needle, Content: true}, chunkOpts) 2205 if len(res.Files) != 1 { 2206 t.Fatalf("got %v, wanted 1 match", res.Files) 2207 } 2208 }) 2209} 2210 2211func TestUnicodeFileStartOffsets(t *testing.T) { 2212 unicode := "世界" 2213 wat := "waaaaaat" 2214 b := testIndexBuilder(t, nil, 2215 Document{ 2216 Name: "f1", 2217 Content: []byte(unicode), 2218 }, 2219 Document{ 2220 Name: "f2", 2221 Content: []byte(wat), 2222 }, 2223 ) 2224 q := &query.Substring{Pattern: wat, Content: true} 2225 res := searchForTest(t, b, q) 2226 if len(res.Files) != 1 { 2227 t.Fatalf("got %v, wanted 1 match", res.Files) 2228 } 2229} 2230 2231func TestLongFileUTF8(t *testing.T) { 2232 needle := "neeedle" 2233 2234 // 6 bytes. 2235 unicode := "世界" 2236 content := []byte(strings.Repeat(unicode, 100) + needle) 2237 b := testIndexBuilder(t, nil, 2238 Document{ 2239 Name: "f1", 2240 Content: []byte(strings.Repeat("a", 50)), 2241 }, 2242 Document{ 2243 Name: "f2", 2244 Content: content, 2245 }) 2246 2247 t.Run("LineMatches", func(t *testing.T) { 2248 q := &query.Substring{Pattern: needle, Content: true} 2249 res := searchForTest(t, b, q) 2250 if len(res.Files) != 1 { 2251 t.Errorf("got %v, want 1 result", res) 2252 } 2253 }) 2254 2255 t.Run("ChunkMatches", func(t *testing.T) { 2256 q := &query.Substring{Pattern: needle, Content: true} 2257 res := searchForTest(t, b, q, chunkOpts) 2258 if len(res.Files) != 1 { 2259 t.Errorf("got %v, want 1 result", res) 2260 } 2261 }) 2262} 2263 2264func TestEstimateDocCount(t *testing.T) { 2265 content := []byte("bla needle bla") 2266 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2267 Document{Name: "f1", Content: content}, 2268 Document{Name: "f2", Content: content}, 2269 ) 2270 2271 t.Run("LineMatches", func(t *testing.T) { 2272 if sres := searchForTest(t, b, 2273 query.NewAnd( 2274 &query.Substring{Pattern: "needle"}, 2275 &query.Repo{Regexp: regexp.MustCompile("reponame")}, 2276 ), SearchOptions{ 2277 EstimateDocCount: true, 2278 }); sres.Stats.ShardFilesConsidered != 2 { 2279 t.Errorf("got FilesConsidered = %d, want 2", sres.Stats.FilesConsidered) 2280 } 2281 if sres := searchForTest(t, b, 2282 query.NewAnd( 2283 &query.Substring{Pattern: "needle"}, 2284 &query.Repo{Regexp: regexp.MustCompile("nomatch")}, 2285 ), SearchOptions{ 2286 EstimateDocCount: true, 2287 }); sres.Stats.ShardFilesConsidered != 0 { 2288 t.Errorf("got FilesConsidered = %d, want 0", sres.Stats.FilesConsidered) 2289 } 2290 }) 2291 2292 t.Run("ChunkMatches", func(t *testing.T) { 2293 if sres := searchForTest(t, b, 2294 query.NewAnd( 2295 &query.Substring{Pattern: "needle"}, 2296 &query.Repo{Regexp: regexp.MustCompile("reponame")}, 2297 ), SearchOptions{ 2298 EstimateDocCount: true, 2299 ChunkMatches: true, 2300 }); sres.Stats.ShardFilesConsidered != 2 { 2301 t.Errorf("got FilesConsidered = %d, want 2", sres.Stats.FilesConsidered) 2302 } 2303 if sres := searchForTest(t, b, 2304 query.NewAnd( 2305 &query.Substring{Pattern: "needle"}, 2306 &query.Repo{Regexp: regexp.MustCompile("nomatch")}, 2307 ), SearchOptions{ 2308 EstimateDocCount: true, 2309 ChunkMatches: true, 2310 }); sres.Stats.ShardFilesConsidered != 0 { 2311 t.Errorf("got FilesConsidered = %d, want 0", sres.Stats.FilesConsidered) 2312 } 2313 }) 2314} 2315 2316func TestUTF8CorrectCorpus(t *testing.T) { 2317 needle := "neeedle" 2318 2319 // 6 bytes. 2320 unicode := "世界" 2321 b := testIndexBuilder(t, nil, 2322 Document{ 2323 Name: "f1", 2324 Content: []byte(strings.Repeat(unicode, 100)), 2325 }, 2326 Document{ 2327 Name: "xxxxxneeedle", 2328 Content: []byte("hello"), 2329 }) 2330 2331 t.Run("LineMatches", func(t *testing.T) { 2332 q := &query.Substring{Pattern: needle, FileName: true} 2333 res := searchForTest(t, b, q) 2334 if len(res.Files) != 1 { 2335 t.Errorf("got %v, want 1 result", res) 2336 } 2337 }) 2338 2339 t.Run("ChunkMatches", func(t *testing.T) { 2340 q := &query.Substring{Pattern: needle, FileName: true} 2341 res := searchForTest(t, b, q, chunkOpts) 2342 if len(res.Files) != 1 { 2343 t.Errorf("got %v, want 1 result", res) 2344 } 2345 }) 2346} 2347 2348func TestBuilderStats(t *testing.T) { 2349 b := testIndexBuilder(t, nil, 2350 Document{ 2351 Name: "f1", 2352 Content: []byte(strings.Repeat("abcd", 1024)), 2353 }) 2354 var buf bytes.Buffer 2355 if err := b.Write(&buf); err != nil { 2356 t.Fatal(err) 2357 } 2358 2359 if got, want := b.ContentSize(), uint32(2+4*1024); got != want { 2360 t.Errorf("got %d, want %d", got, want) 2361 } 2362} 2363 2364func TestIOStats(t *testing.T) { 2365 b := testIndexBuilder(t, nil, 2366 Document{ 2367 Name: "f1", 2368 Content: []byte(strings.Repeat("abcd", 1024)), 2369 }) 2370 2371 t.Run("LineMatches", func(t *testing.T) { 2372 q := &query.Substring{Pattern: "abc", CaseSensitive: true, Content: true} 2373 res := searchForTest(t, b, q) 2374 2375 // 4096 (content) + 2 (overhead: newlines or doc sections) 2376 if got, want := res.Stats.ContentBytesLoaded, int64(4098); got != want { 2377 t.Errorf("got content I/O %d, want %d", got, want) 2378 } 2379 2380 // 1024 entries, each 4 bytes apart. 4 fits into single byte 2381 // delta encoded. 2382 if got, want := res.Stats.IndexBytesLoaded, int64(1024); got != want { 2383 t.Errorf("got index I/O %d, want %d", got, want) 2384 } 2385 }) 2386 2387 t.Run("ChunkMatches", func(t *testing.T) { 2388 q := &query.Substring{Pattern: "abc", CaseSensitive: true, Content: true} 2389 res := searchForTest(t, b, q, chunkOpts) 2390 2391 // 4096 (content) + 2 (overhead: newlines or doc sections) 2392 if got, want := res.Stats.ContentBytesLoaded, int64(4098); got != want { 2393 t.Errorf("got content I/O %d, want %d", got, want) 2394 } 2395 2396 // 1024 entries, each 4 bytes apart. 4 fits into single byte 2397 // delta encoded. 2398 if got, want := res.Stats.IndexBytesLoaded, int64(1024); got != want { 2399 t.Errorf("got index I/O %d, want %d", got, want) 2400 } 2401 }) 2402} 2403 2404func TestStartLineAnchor(t *testing.T) { 2405 b := testIndexBuilder(t, nil, 2406 Document{ 2407 Name: "f1", 2408 Content: []byte( 2409 `hello 2410start of middle of line 2411`), 2412 }) 2413 2414 t.Run("LineMatches", func(t *testing.T) { 2415 q, err := query.Parse("^start") 2416 if err != nil { 2417 t.Errorf("parse: %v", err) 2418 } 2419 2420 res := searchForTest(t, b, q) 2421 if len(res.Files) != 1 { 2422 t.Errorf("got %v, want 1 file", res.Files) 2423 } 2424 2425 q, err = query.Parse("^middle") 2426 if err != nil { 2427 t.Errorf("parse: %v", err) 2428 } 2429 res = searchForTest(t, b, q) 2430 if len(res.Files) != 0 { 2431 t.Errorf("got %v, want 0 files", res.Files) 2432 } 2433 }) 2434 2435 t.Run("ChunkMatches", func(t *testing.T) { 2436 q, err := query.Parse("^start") 2437 if err != nil { 2438 t.Errorf("parse: %v", err) 2439 } 2440 2441 res := searchForTest(t, b, q, chunkOpts) 2442 if len(res.Files) != 1 { 2443 t.Errorf("got %v, want 1 file", res.Files) 2444 } 2445 2446 q, err = query.Parse("^middle") 2447 if err != nil { 2448 t.Errorf("parse: %v", err) 2449 } 2450 res = searchForTest(t, b, q, chunkOpts) 2451 if len(res.Files) != 0 { 2452 t.Errorf("got %v, want 0 files", res.Files) 2453 } 2454 }) 2455} 2456 2457func TestAndOrUnicode(t *testing.T) { 2458 q, err := query.Parse("orange.*apple") 2459 if err != nil { 2460 t.Errorf("parse: %v", err) 2461 } 2462 finalQ := query.NewAnd(q, 2463 query.NewOr(query.NewAnd(&query.Repo{Regexp: regexp.MustCompile("name")}, 2464 query.NewOr(&query.Branch{Pattern: "master"})))) 2465 2466 b := testIndexBuilder(t, &Repository{ 2467 Name: "name", 2468 Branches: []RepositoryBranch{{"master", "master-version"}}, 2469 }, Document{ 2470 Name: "f2", 2471 Content: []byte("orange\u2318apple"), 2472 // --------------0123456 78901 2473 Branches: []string{"master"}, 2474 }) 2475 2476 t.Run("LineMatches", func(t *testing.T) { 2477 res := searchForTest(t, b, finalQ) 2478 if len(res.Files) != 1 { 2479 t.Errorf("got %v, want 1 result", res.Files) 2480 } 2481 }) 2482 2483 t.Run("ChunkMatches", func(t *testing.T) { 2484 res := searchForTest(t, b, finalQ, chunkOpts) 2485 if len(res.Files) != 1 { 2486 t.Errorf("got %v, want 1 result", res.Files) 2487 } 2488 }) 2489} 2490 2491func TestAndShort(t *testing.T) { 2492 content := []byte("bla needle at orange bla") 2493 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2494 Document{Name: "f1", Content: content}, 2495 Document{Name: "f2", Content: []byte("xx at xx")}, 2496 Document{Name: "f3", Content: []byte("yy orange xx")}, 2497 ) 2498 2499 q := query.NewAnd(&query.Substring{Pattern: "at"}, 2500 &query.Substring{Pattern: "orange"}) 2501 2502 t.Run("LineMatches", func(t *testing.T) { 2503 res := searchForTest(t, b, q) 2504 if len(res.Files) != 1 || res.Files[0].FileName != "f1" { 2505 t.Errorf("got %v, want 1 result", res.Files) 2506 } 2507 }) 2508 2509 t.Run("ChunkMatches", func(t *testing.T) { 2510 res := searchForTest(t, b, q, chunkOpts) 2511 if len(res.Files) != 1 || res.Files[0].FileName != "f1" { 2512 t.Errorf("got %v, want 1 result", res.Files) 2513 } 2514 }) 2515} 2516 2517func TestNoCollectRegexpSubstring(t *testing.T) { 2518 content := []byte("bla final bla\nfoo final, foo") 2519 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2520 Document{Name: "f1", Content: content}, 2521 ) 2522 2523 q := &query.Regexp{ 2524 Regexp: mustParseRE("final[,.]"), 2525 } 2526 2527 t.Run("LineMatches", func(t *testing.T) { 2528 res := searchForTest(t, b, q) 2529 if len(res.Files) != 1 { 2530 t.Fatalf("got %v, want 1 result", res.Files) 2531 } 2532 if f := res.Files[0]; len(f.LineMatches) != 1 { 2533 t.Fatalf("got line matches %v, want 1 line match", printLineMatches(f.LineMatches)) 2534 } 2535 }) 2536 2537 t.Run("ChunkMatches", func(t *testing.T) { 2538 res := searchForTest(t, b, q, chunkOpts) 2539 if len(res.Files) != 1 { 2540 t.Fatalf("got %v, want 1 result", res.Files) 2541 } 2542 if f := res.Files[0]; len(f.ChunkMatches) != 1 { 2543 t.Fatalf("got line matches %v, want 1 line match", printLineMatches(f.LineMatches)) 2544 } 2545 }) 2546} 2547 2548func printLineMatches(ms []LineMatch) string { 2549 var ss []string 2550 for _, m := range ms { 2551 ss = append(ss, fmt.Sprintf("%d:%q %v", m.LineNumber, m.Line, m.LineFragments)) 2552 } 2553 2554 return strings.Join(ss, ", ") 2555} 2556 2557func TestLang(t *testing.T) { 2558 content := []byte("bla needle bla") 2559 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2560 Document{Name: "f1", Content: content}, 2561 Document{Name: "f2", Language: "java", Content: content}, 2562 Document{Name: "f3", Language: "cpp", Content: content}, 2563 ) 2564 2565 q := query.NewAnd(&query.Substring{Pattern: "needle"}, 2566 &query.Language{Language: "cpp"}) 2567 2568 t.Run("LineMatches", func(t *testing.T) { 2569 res := searchForTest(t, b, q) 2570 if len(res.Files) != 1 { 2571 t.Fatalf("got %v, want 1 result in f3", res.Files) 2572 } 2573 f := res.Files[0] 2574 if f.FileName != "f3" || f.Language != "cpp" { 2575 t.Fatalf("got %v, want 1 match with language cpp", f) 2576 } 2577 }) 2578 2579 t.Run("ChunkMatches", func(t *testing.T) { 2580 res := searchForTest(t, b, q, chunkOpts) 2581 if len(res.Files) != 1 { 2582 t.Fatalf("got %v, want 1 result in f3", res.Files) 2583 } 2584 f := res.Files[0] 2585 if f.FileName != "f3" || f.Language != "cpp" { 2586 t.Fatalf("got %v, want 1 match with language cpp", f) 2587 } 2588 }) 2589} 2590 2591func TestLangShortcut(t *testing.T) { 2592 content := []byte("bla needle bla") 2593 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2594 Document{Name: "f2", Language: "java", Content: content}, 2595 Document{Name: "f3", Language: "cpp", Content: content}, 2596 ) 2597 2598 q := query.NewAnd(&query.Substring{Pattern: "needle"}, 2599 &query.Language{Language: "fortran"}) 2600 2601 t.Run("LineMatches", func(t *testing.T) { 2602 res := searchForTest(t, b, q) 2603 if len(res.Files) != 0 { 2604 t.Fatalf("got %v, want 0 results", res.Files) 2605 } 2606 if res.Stats.IndexBytesLoaded > 0 { 2607 t.Errorf("got IndexBytesLoaded %d, want 0", res.Stats.IndexBytesLoaded) 2608 } 2609 }) 2610 2611 t.Run("ChunkMatches", func(t *testing.T) { 2612 res := searchForTest(t, b, q, chunkOpts) 2613 if len(res.Files) != 0 { 2614 t.Fatalf("got %v, want 0 results", res.Files) 2615 } 2616 if res.Stats.IndexBytesLoaded > 0 { 2617 t.Errorf("got IndexBytesLoaded %d, want 0", res.Stats.IndexBytesLoaded) 2618 } 2619 }) 2620} 2621 2622func TestNoTextMatchAtoms(t *testing.T) { 2623 content := []byte("bla needle bla") 2624 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2625 Document{Name: "f1", Content: content}, 2626 Document{Name: "f2", Language: "java", Content: content}, 2627 Document{Name: "f3", Language: "cpp", Content: content}, 2628 ) 2629 q := query.NewAnd(&query.Language{Language: "java"}) 2630 t.Run("LineMatches", func(t *testing.T) { 2631 res := searchForTest(t, b, q) 2632 if len(res.Files) != 1 { 2633 t.Fatalf("got %v, want 1 result in f3", res.Files) 2634 } 2635 }) 2636 2637 t.Run("ChunkMatches", func(t *testing.T) { 2638 res := searchForTest(t, b, q, chunkOpts) 2639 if len(res.Files) != 1 { 2640 t.Fatalf("got %v, want 1 result in f3", res.Files) 2641 } 2642 }) 2643} 2644 2645func TestNoPositiveAtoms(t *testing.T) { 2646 content := []byte("bla needle bla") 2647 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2648 Document{Name: "f1", Content: content}, 2649 Document{Name: "f2", Content: content}, 2650 ) 2651 2652 q := query.NewAnd( 2653 &query.Not{Child: &query.Substring{Pattern: "xyz"}}, 2654 &query.Repo{Regexp: regexp.MustCompile("reponame")}) 2655 t.Run("LineMatches", func(t *testing.T) { 2656 res := searchForTest(t, b, q) 2657 if len(res.Files) != 2 { 2658 t.Fatalf("got %v, want 2 results in f3", res.Files) 2659 } 2660 }) 2661 t.Run("ChunkMatches", func(t *testing.T) { 2662 res := searchForTest(t, b, q, chunkOpts) 2663 if len(res.Files) != 2 { 2664 t.Fatalf("got %v, want 2 results in f3", res.Files) 2665 } 2666 }) 2667} 2668 2669func TestSymbolBoundaryStart(t *testing.T) { 2670 content := []byte("start\nbla bla\nend") 2671 // ----------------012345-67890123-456 2672 2673 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2674 Document{ 2675 Name: "f1", 2676 Content: content, 2677 Symbols: []DocumentSection{{0, 5}, {14, 17}}, 2678 }, 2679 ) 2680 q := &query.Symbol{ 2681 Expr: &query.Substring{Pattern: "start"}, 2682 } 2683 t.Run("LineMatches", func(t *testing.T) { 2684 res := searchForTest(t, b, q) 2685 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 { 2686 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2687 } 2688 m := res.Files[0].LineMatches[0].LineFragments[0] 2689 if m.Offset != 0 { 2690 t.Fatalf("got offset %d want 0", m.Offset) 2691 } 2692 }) 2693 2694 t.Run("ChunkMatches", func(t *testing.T) { 2695 res := searchForTest(t, b, q, chunkOpts) 2696 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 { 2697 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2698 } 2699 m := res.Files[0].ChunkMatches[0].Ranges[0] 2700 if m.Start.ByteOffset != 0 { 2701 t.Fatalf("got offset %d want 0", m.Start.ByteOffset) 2702 } 2703 }) 2704} 2705 2706func TestSymbolBoundaryEnd(t *testing.T) { 2707 content := []byte("start\nbla bla\nend") 2708 // ----------------012345-67890123-456 2709 2710 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2711 Document{ 2712 Name: "f1", 2713 Content: content, 2714 Symbols: []DocumentSection{{14, 17}}, 2715 }, 2716 ) 2717 q := &query.Symbol{ 2718 Expr: &query.Substring{Pattern: "end"}, 2719 } 2720 t.Run("LineMatches", func(t *testing.T) { 2721 res := searchForTest(t, b, q) 2722 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 { 2723 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2724 } 2725 m := res.Files[0].LineMatches[0].LineFragments[0] 2726 if m.Offset != 14 { 2727 t.Fatalf("got offset %d want 0", m.Offset) 2728 } 2729 }) 2730 2731 t.Run("ChunkMatches", func(t *testing.T) { 2732 res := searchForTest(t, b, q, chunkOpts) 2733 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 { 2734 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2735 } 2736 m := res.Files[0].ChunkMatches[0].Ranges[0] 2737 if m.Start.ByteOffset != 14 { 2738 t.Fatalf("got offset %d want 0", m.Start.ByteOffset) 2739 } 2740 }) 2741} 2742 2743func TestSymbolSubstring(t *testing.T) { 2744 content := []byte("bla\nsymblabla\nbla") 2745 // ----------------0123-4567890123-456 2746 2747 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2748 Document{ 2749 Name: "f1", 2750 Content: content, 2751 Symbols: []DocumentSection{{4, 12}}, 2752 }, 2753 ) 2754 q := &query.Symbol{ 2755 Expr: &query.Substring{Pattern: "bla"}, 2756 } 2757 t.Run("LineMatches", func(t *testing.T) { 2758 res := searchForTest(t, b, q) 2759 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 { 2760 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2761 } 2762 m := res.Files[0].LineMatches[0].LineFragments[0] 2763 if m.Offset != 7 || m.MatchLength != 3 { 2764 t.Fatalf("got offset %d, size %d want 7 size 3", m.Offset, m.MatchLength) 2765 } 2766 }) 2767 2768 t.Run("ChunkMatches", func(t *testing.T) { 2769 res := searchForTest(t, b, q, chunkOpts) 2770 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 { 2771 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2772 } 2773 m := res.Files[0].ChunkMatches[0].Ranges[0] 2774 if m.Start.ByteOffset != 7 || m.End.ByteOffset != 10 { 2775 t.Fatalf("got offset %d, end %d want 7, 10", m.Start.ByteOffset, m.End.ByteOffset) 2776 } 2777 }) 2778} 2779 2780func TestSymbolSubstringExact(t *testing.T) { 2781 content := []byte("bla\nsym\nbla\nsym\nasymb") 2782 // ----------------0123-4567-890123456-78901 2783 2784 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2785 Document{ 2786 Name: "f1", 2787 Content: content, 2788 Symbols: []DocumentSection{{4, 7}}, 2789 }, 2790 ) 2791 q := &query.Symbol{ 2792 Expr: &query.Substring{Pattern: "sym"}, 2793 } 2794 t.Run("LineMatches", func(t *testing.T) { 2795 res := searchForTest(t, b, q) 2796 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 { 2797 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2798 } 2799 m := res.Files[0].LineMatches[0].LineFragments[0] 2800 if m.Offset != 4 { 2801 t.Fatalf("got offset %d, want 7", m.Offset) 2802 } 2803 }) 2804 2805 t.Run("ChunkMatches", func(t *testing.T) { 2806 res := searchForTest(t, b, q, chunkOpts) 2807 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 { 2808 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2809 } 2810 m := res.Files[0].ChunkMatches[0].Ranges[0] 2811 if m.Start.ByteOffset != 4 { 2812 t.Fatalf("got offset %d, want 7", m.Start.ByteOffset) 2813 } 2814 }) 2815} 2816 2817func TestSymbolRegexpExact(t *testing.T) { 2818 content := []byte("blah\nbla\nbl") 2819 // ----------------01234-5678-90 2820 2821 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2822 Document{ 2823 Name: "f1", 2824 Content: content, 2825 Symbols: []DocumentSection{{0, 4}, {5, 8}, {9, 11}}, 2826 }, 2827 ) 2828 q := &query.Symbol{ 2829 Expr: &query.Regexp{Regexp: mustParseRE("^bla$")}, 2830 } 2831 t.Run("LineMatches", func(t *testing.T) { 2832 res := searchForTest(t, b, q) 2833 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 { 2834 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2835 } 2836 m := res.Files[0].LineMatches[0].LineFragments[0] 2837 if m.Offset != 5 { 2838 t.Fatalf("got offset %d, want 5", m.Offset) 2839 } 2840 }) 2841 2842 t.Run("ChunkMatches", func(t *testing.T) { 2843 res := searchForTest(t, b, q, chunkOpts) 2844 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 { 2845 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2846 } 2847 m := res.Files[0].ChunkMatches[0].Ranges[0] 2848 if m.Start.ByteOffset != 5 { 2849 t.Fatalf("got offset %d, want 5", m.Start.ByteOffset) 2850 } 2851 }) 2852} 2853 2854func TestSymbolRegexpPartial(t *testing.T) { 2855 content := []byte("abcdef") 2856 // ----------------012345 2857 2858 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2859 Document{ 2860 Name: "f1", 2861 Content: content, 2862 Symbols: []DocumentSection{{0, 6}}, 2863 }, 2864 ) 2865 q := &query.Symbol{ 2866 Expr: &query.Regexp{Regexp: mustParseRE("(b|d)c(d|b)")}, 2867 } 2868 t.Run("LineMatches", func(t *testing.T) { 2869 res := searchForTest(t, b, q) 2870 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 { 2871 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2872 } 2873 m := res.Files[0].LineMatches[0].LineFragments[0] 2874 if m.Offset != 1 { 2875 t.Fatalf("got offset %d, want 1", m.Offset) 2876 } 2877 if m.MatchLength != 3 { 2878 t.Fatalf("got match length %d, want 3", m.MatchLength) 2879 } 2880 }) 2881 2882 t.Run("ChunkMatches", func(t *testing.T) { 2883 res := searchForTest(t, b, q, chunkOpts) 2884 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 { 2885 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2886 } 2887 m := res.Files[0].ChunkMatches[0].Ranges[0] 2888 if m.Start.ByteOffset != 1 { 2889 t.Fatalf("got offset %d, want 1", m.Start.ByteOffset) 2890 } 2891 if m.End.ByteOffset != 4 { 2892 t.Fatalf("got match end %d, want 4", m.End.ByteOffset) 2893 } 2894 }) 2895} 2896 2897func TestSymbolRegexpAll(t *testing.T) { 2898 docs := []Document{ 2899 { 2900 Name: "f1", 2901 Content: []byte("Hello Zoekt"), 2902 // --------------01234567890 2903 Symbols: []DocumentSection{{0, 5}, {6, 11}}, 2904 }, 2905 { 2906 Name: "f2", 2907 Content: []byte("Second Zoekt Third"), 2908 // --------------012345678901234567 2909 Symbols: []DocumentSection{{0, 6}, {7, 12}, {13, 18}}, 2910 }, 2911 } 2912 2913 b := testIndexBuilder(t, &Repository{Name: "reponame"}, docs...) 2914 q := &query.Symbol{ 2915 Expr: &query.Regexp{Regexp: mustParseRE(".*")}, 2916 } 2917 t.Run("LineMatches", func(t *testing.T) { 2918 res := searchForTest(t, b, q) 2919 if len(res.Files) != len(docs) { 2920 t.Fatalf("got %v, want %d file", res.Files, len(docs)) 2921 } 2922 for i, want := range docs { 2923 got := res.Files[i].LineMatches[0].LineFragments 2924 if len(got) != len(want.Symbols) { 2925 t.Fatalf("got %d symbols, want %d symbols in doc %s", len(got), len(want.Symbols), want.Name) 2926 } 2927 2928 for j, sec := range want.Symbols { 2929 if sec.Start != got[j].Offset { 2930 t.Fatalf("got offset %d, want %d in doc %s", got[j].Offset, sec.Start, want.Name) 2931 } 2932 } 2933 } 2934 }) 2935 2936 t.Run("ChunkMatches", func(t *testing.T) { 2937 res := searchForTest(t, b, q, chunkOpts) 2938 if len(res.Files) != len(docs) { 2939 t.Fatalf("got %v, want %d file", res.Files, len(docs)) 2940 } 2941 for i, want := range docs { 2942 got := res.Files[i].ChunkMatches[0].Ranges 2943 if len(got) != len(want.Symbols) { 2944 t.Fatalf("got %d symbols, want %d symbols in doc %s", len(got), len(want.Symbols), want.Name) 2945 } 2946 2947 for j, sec := range want.Symbols { 2948 if sec.Start != uint32(got[j].Start.ByteOffset) { 2949 t.Fatalf("got offset %d, want %d in doc %s", got[j].Start.ByteOffset, sec.Start, want.Name) 2950 } 2951 } 2952 } 2953 }) 2954} 2955 2956func TestHitIterTerminate(t *testing.T) { 2957 // contrived input: trigram frequencies forces selecting abc + 2958 // def for the distance iteration. There is no match, so this 2959 // will advance the compressedPostingIterator to beyond the 2960 // end. 2961 content := []byte("abc bcdbcd cdecde abcabc def efg") 2962 b := testIndexBuilder(t, nil, 2963 Document{ 2964 Name: "f1", 2965 Content: content, 2966 }, 2967 ) 2968 2969 t.Run("LineMatches", func(t *testing.T) { 2970 searchForTest(t, b, &query.Substring{Pattern: "abcdef"}) 2971 }) 2972 2973 t.Run("ChunkMatches", func(t *testing.T) { 2974 searchForTest(t, b, &query.Substring{Pattern: "abcdef"}, chunkOpts) 2975 }) 2976} 2977 2978func TestDistanceHitIterBailLast(t *testing.T) { 2979 content := []byte("AST AST AST UASH") 2980 b := testIndexBuilder(t, nil, 2981 Document{ 2982 Name: "f1", 2983 Content: content, 2984 }, 2985 ) 2986 t.Run("LineMatches", func(t *testing.T) { 2987 res := searchForTest(t, b, &query.Substring{Pattern: "UAST"}) 2988 if len(res.Files) != 0 { 2989 t.Fatalf("got %v, want no results", res.Files) 2990 } 2991 }) 2992 2993 t.Run("LineMatches", func(t *testing.T) { 2994 res := searchForTest(t, b, &query.Substring{Pattern: "UAST"}, chunkOpts) 2995 if len(res.Files) != 0 { 2996 t.Fatalf("got %v, want no results", res.Files) 2997 } 2998 }) 2999} 3000 3001func TestDocumentSectionRuneBoundary(t *testing.T) { 3002 content := string([]rune{kelvinCodePoint, kelvinCodePoint, kelvinCodePoint}) 3003 b, err := NewIndexBuilder(nil) 3004 if err != nil { 3005 t.Fatalf("NewIndexBuilder: %v", err) 3006 } 3007 3008 for i, sec := range []DocumentSection{ 3009 {2, 6}, 3010 {3, 7}, 3011 } { 3012 if err := b.Add(Document{ 3013 Name: "f1", 3014 Content: []byte(content), 3015 Symbols: []DocumentSection{sec}, 3016 }); err == nil { 3017 t.Errorf("%d: Add succeeded", i) 3018 } 3019 } 3020} 3021 3022func TestUnicodeQuery(t *testing.T) { 3023 content := string([]rune{kelvinCodePoint, kelvinCodePoint, kelvinCodePoint}) 3024 b := testIndexBuilder(t, nil, 3025 Document{ 3026 Name: "f1", 3027 Content: []byte(content), 3028 }, 3029 ) 3030 3031 q := &query.Substring{Pattern: content} 3032 3033 t.Run("LineMatches", func(t *testing.T) { 3034 res := searchForTest(t, b, q) 3035 if len(res.Files) != 1 { 3036 t.Fatalf("want 1 match, got %v", res.Files) 3037 } 3038 3039 f := res.Files[0] 3040 if len(f.LineMatches) != 1 { 3041 t.Fatalf("want 1 line, got %v", f.LineMatches) 3042 } 3043 l := f.LineMatches[0] 3044 3045 if len(l.LineFragments) != 1 { 3046 t.Fatalf("want 1 line fragment, got %v", l.LineFragments) 3047 } 3048 fr := l.LineFragments[0] 3049 if fr.MatchLength != len(content) { 3050 t.Fatalf("got MatchLength %d want %d", fr.MatchLength, len(content)) 3051 } 3052 }) 3053 3054 t.Run("ChunkMatches", func(t *testing.T) { 3055 res := searchForTest(t, b, q, chunkOpts) 3056 if len(res.Files) != 1 { 3057 t.Fatalf("want 1 match, got %v", res.Files) 3058 } 3059 3060 f := res.Files[0] 3061 if len(f.ChunkMatches) != 1 { 3062 t.Fatalf("want 1 line, got %v", f.LineMatches) 3063 } 3064 cm := f.ChunkMatches[0] 3065 3066 if len(cm.Ranges) != 1 { 3067 t.Fatalf("want 1 line fragment, got %v", cm.Ranges) 3068 } 3069 rr := cm.Ranges[0] 3070 if matchLen := rr.End.ByteOffset - rr.Start.ByteOffset; int(matchLen) != len(content) { 3071 t.Fatalf("got MatchLength %d want %d", matchLen, len(content)) 3072 } 3073 }) 3074} 3075 3076func TestSkipInvalidContent(t *testing.T) { 3077 for _, content := range []string{ 3078 // Binary 3079 "abc def \x00 abc", 3080 } { 3081 3082 b, err := NewIndexBuilder(nil) 3083 if err != nil { 3084 t.Fatalf("NewIndexBuilder: %v", err) 3085 } 3086 3087 if err := b.Add(Document{ 3088 Name: "f1", 3089 Content: []byte(content), 3090 }); err != nil { 3091 t.Fatal(err) 3092 } 3093 3094 t.Run("LineMatches", func(t *testing.T) { 3095 q := &query.Substring{Pattern: "abc def"} 3096 res := searchForTest(t, b, q) 3097 if len(res.Files) != 0 { 3098 t.Fatalf("got %v, want no results", res.Files) 3099 } 3100 3101 q = &query.Substring{Pattern: "NOT-INDEXED"} 3102 res = searchForTest(t, b, q) 3103 if len(res.Files) != 1 { 3104 t.Fatalf("got %v, want 1 result", res.Files) 3105 } 3106 }) 3107 3108 t.Run("ChunkMatches", func(t *testing.T) { 3109 q := &query.Substring{Pattern: "abc def"} 3110 res := searchForTest(t, b, q, chunkOpts) 3111 if len(res.Files) != 0 { 3112 t.Fatalf("got %v, want no results", res.Files) 3113 } 3114 3115 q = &query.Substring{Pattern: "NOT-INDEXED"} 3116 res = searchForTest(t, b, q, chunkOpts) 3117 if len(res.Files) != 1 { 3118 t.Fatalf("got %v, want 1 result", res.Files) 3119 } 3120 }) 3121 } 3122} 3123 3124func TestCheckText(t *testing.T) { 3125 for _, text := range []string{"", "simple ascii", "símplé unicödé", "\uFEFFwith utf8 'bom'", "with \uFFFD unicode replacement char"} { 3126 if err := CheckText([]byte(text), 20000); err != nil { 3127 t.Errorf("CheckText(%q): %v", text, err) 3128 } 3129 } 3130 for _, text := range []string{"zero\x00byte", "xx", "0123456789abcdefghi"} { 3131 if err := CheckText([]byte(text), 15); err == nil { 3132 t.Errorf("CheckText(%q) succeeded", text) 3133 } 3134 } 3135} 3136 3137func TestLineAnd(t *testing.T) { 3138 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 3139 Document{Name: "f1", Content: []byte("apple\nbanana\napple banana chocolate apple pudding banana\ngrape")}, 3140 Document{Name: "f2", Content: []byte("apple orange\nbanana")}, 3141 Document{Name: "f3", Content: []byte("banana grape")}, 3142 ) 3143 pattern := "(apple)(?-s:.)*?(banana)" 3144 r, _ := syntax.Parse(pattern, syntax.Perl) 3145 3146 q := query.Regexp{ 3147 Regexp: r, 3148 Content: true, 3149 } 3150 t.Run("LineMatches", func(t *testing.T) { 3151 res := searchForTest(t, b, &q) 3152 wantRegexpCount := 1 3153 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount { 3154 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount) 3155 } 3156 if len(res.Files) != 1 || res.Files[0].FileName != "f1" { 3157 t.Errorf("got %v, want 1 result", res.Files) 3158 } 3159 }) 3160 3161 t.Run("ChunkMatches", func(t *testing.T) { 3162 res := searchForTest(t, b, &q, chunkOpts) 3163 wantRegexpCount := 1 3164 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount { 3165 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount) 3166 } 3167 if len(res.Files) != 1 || res.Files[0].FileName != "f1" { 3168 t.Errorf("got %v, want 1 result", res.Files) 3169 } 3170 }) 3171} 3172 3173func TestLineAndFileName(t *testing.T) { 3174 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 3175 Document{Name: "f1", Content: []byte("apple banana\ngrape")}, 3176 Document{Name: "f2", Content: []byte("apple banana\norange")}, 3177 Document{Name: "apple banana", Content: []byte("banana grape")}, 3178 ) 3179 pattern := "(apple)(?-s:.)*?(banana)" 3180 r, _ := syntax.Parse(pattern, syntax.Perl) 3181 3182 q := query.Regexp{ 3183 Regexp: r, 3184 FileName: true, 3185 } 3186 t.Run("LineMatches", func(t *testing.T) { 3187 res := searchForTest(t, b, &q) 3188 wantRegexpCount := 1 3189 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount { 3190 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount) 3191 } 3192 if len(res.Files) != 1 || res.Files[0].FileName != "apple banana" { 3193 t.Errorf("got %v, want 1 result", res.Files) 3194 } 3195 }) 3196 3197 t.Run("ChunkMatches", func(t *testing.T) { 3198 res := searchForTest(t, b, &q, chunkOpts) 3199 wantRegexpCount := 1 3200 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount { 3201 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount) 3202 } 3203 if len(res.Files) != 1 || res.Files[0].FileName != "apple banana" { 3204 t.Errorf("got %v, want 1 result", res.Files) 3205 } 3206 }) 3207} 3208 3209func TestMultiLineRegex(t *testing.T) { 3210 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 3211 Document{Name: "f1", Content: []byte("apple banana\ngrape")}, 3212 Document{Name: "f2", Content: []byte("apple orange")}, 3213 Document{Name: "f3", Content: []byte("grape apple")}, 3214 ) 3215 pattern := "(apple).*?[[:space:]].*?(grape)" 3216 r, _ := syntax.Parse(pattern, syntax.Perl) 3217 3218 q := query.Regexp{ 3219 Regexp: r, 3220 } 3221 t.Run("LineMatches", func(t *testing.T) { 3222 res := searchForTest(t, b, &q) 3223 wantRegexpCount := 2 3224 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount { 3225 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount) 3226 } 3227 if len(res.Files) != 1 || res.Files[0].FileName != "f1" { 3228 t.Errorf("got %v, want 1 result", res.Files) 3229 } 3230 if l := len(res.Files[0].LineMatches); l != 2 { 3231 t.Errorf("got %v, want 2 line matches", l) 3232 } 3233 }) 3234 3235 t.Run("ChunkMatches", func(t *testing.T) { 3236 res := searchForTest(t, b, &q, chunkOpts) 3237 wantRegexpCount := 2 3238 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount { 3239 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount) 3240 } 3241 if len(res.Files) != 1 || res.Files[0].FileName != "f1" { 3242 t.Errorf("got %v, want 1 result", res.Files) 3243 } 3244 if l := len(res.Files[0].ChunkMatches); l != 1 { 3245 t.Errorf("got %v, want 1 chunk matches", l) 3246 } 3247 if l := len(res.Files[0].ChunkMatches[0].Ranges); l != 1 { 3248 t.Errorf("got %v, want 1 chunk ranges", l) 3249 } 3250 }) 3251} 3252 3253func TestSearchTypeFileName(t *testing.T) { 3254 b := testIndexBuilder(t, &Repository{ 3255 Name: "reponame", 3256 }, 3257 Document{Name: "f1", Content: []byte("bla the needle")}, 3258 Document{Name: "f2", Content: []byte("another file another\nneedle")}, 3259 // -----------------------------------012345678901234567890-123456 3260 ) 3261 3262 t.Run("LineMatches", func(t *testing.T) { 3263 wantSingleMatch := func(res *SearchResult, want string) { 3264 t.Helper() 3265 fmatches := res.Files 3266 if len(fmatches) != 1 { 3267 t.Errorf("got %v, want 1 matches", len(fmatches)) 3268 return 3269 } 3270 if len(fmatches[0].LineMatches) != 1 { 3271 t.Errorf("got %d line matches", len(fmatches[0].LineMatches)) 3272 return 3273 } 3274 var got string 3275 if fmatches[0].LineMatches[0].FileName { 3276 got = fmatches[0].FileName 3277 } else { 3278 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].LineMatches[0].LineFragments[0].Offset) 3279 } 3280 3281 if got != want { 3282 t.Errorf("got %s, want %s", got, want) 3283 } 3284 } 3285 3286 // Only return the later match in the second file 3287 res := searchForTest(t, b, query.NewAnd( 3288 &query.Type{ 3289 Type: query.TypeFileName, 3290 Child: &query.Substring{Pattern: "needle"}, 3291 }, 3292 &query.Substring{Pattern: "file"})) 3293 wantSingleMatch(res, "f2:8") 3294 3295 // Only return a filename result 3296 res = searchForTest(t, b, 3297 &query.Type{ 3298 Type: query.TypeFileName, 3299 Child: &query.Substring{Pattern: "file"}, 3300 }) 3301 wantSingleMatch(res, "f2") 3302 }) 3303 3304 t.Run("ChunkMatches", func(t *testing.T) { 3305 wantSingleMatch := func(res *SearchResult, want string) { 3306 t.Helper() 3307 fmatches := res.Files 3308 if len(fmatches) != 1 { 3309 t.Errorf("got %v, want 1 matches", len(fmatches)) 3310 return 3311 } 3312 if len(fmatches[0].ChunkMatches) != 1 { 3313 t.Errorf("got %d line matches", len(fmatches[0].ChunkMatches)) 3314 return 3315 } 3316 var got string 3317 if fmatches[0].ChunkMatches[0].FileName { 3318 got = fmatches[0].FileName 3319 } else { 3320 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset) 3321 } 3322 3323 if got != want { 3324 t.Errorf("got %s, want %s", got, want) 3325 } 3326 } 3327 3328 // Only return the later match in the second file 3329 res := searchForTest(t, b, query.NewAnd( 3330 &query.Type{ 3331 Type: query.TypeFileName, 3332 Child: &query.Substring{Pattern: "needle"}, 3333 }, 3334 &query.Substring{Pattern: "file"}), 3335 chunkOpts, 3336 ) 3337 wantSingleMatch(res, "f2:8") 3338 3339 // Only return a filename result 3340 res = searchForTest(t, b, 3341 &query.Type{ 3342 Type: query.TypeFileName, 3343 Child: &query.Substring{Pattern: "file"}, 3344 }, 3345 chunkOpts, 3346 ) 3347 wantSingleMatch(res, "f2") 3348 }) 3349} 3350 3351func TestSearchTypeLanguage(t *testing.T) { 3352 b := testIndexBuilder(t, &Repository{ 3353 Name: "reponame", 3354 }, 3355 Document{Name: "apex.cls", Content: []byte("public class Car extends Vehicle {")}, 3356 Document{Name: "tex.cls", Content: []byte(`\DeclareOption*{`)}, 3357 Document{Name: "hello.h", Content: []byte(`#include <stdio.h>`)}, 3358 ) 3359 3360 t.Log(b.languageMap) 3361 3362 t.Run("LineMatches", func(t *testing.T) { 3363 wantSingleMatch := func(res *SearchResult, want string) { 3364 t.Helper() 3365 fmatches := res.Files 3366 if len(fmatches) != 1 { 3367 t.Errorf("got %v, want 1 matches", len(fmatches)) 3368 return 3369 } 3370 if len(fmatches[0].LineMatches) != 1 { 3371 t.Errorf("got %d line matches", len(fmatches[0].LineMatches)) 3372 return 3373 } 3374 var got string 3375 if fmatches[0].LineMatches[0].FileName { 3376 got = fmatches[0].FileName 3377 } else { 3378 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].LineMatches[0].LineFragments[0].Offset) 3379 } 3380 3381 if got != want { 3382 t.Errorf("got %s, want %s", got, want) 3383 } 3384 } 3385 3386 res := searchForTest(t, b, &query.Language{Language: "Apex"}) 3387 wantSingleMatch(res, "apex.cls") 3388 3389 res = searchForTest(t, b, &query.Language{Language: "TeX"}) 3390 wantSingleMatch(res, "tex.cls") 3391 3392 res = searchForTest(t, b, &query.Language{Language: "C"}) 3393 wantSingleMatch(res, "hello.h") 3394 3395 // test fallback language search by pretending it's an older index version 3396 res = searchForTest(t, b, &query.Language{Language: "C++"}) 3397 if len(res.Files) != 0 { 3398 t.Errorf("got %d results for C++, want 0", len(res.Files)) 3399 } 3400 3401 b.featureVersion = 11 // force fallback 3402 res = searchForTest(t, b, &query.Language{Language: "C++"}) 3403 wantSingleMatch(res, "hello.h") 3404 }) 3405 3406 t.Run("ChunkMatches", func(t *testing.T) { 3407 wantSingleMatch := func(res *SearchResult, want string) { 3408 t.Helper() 3409 fmatches := res.Files 3410 if len(fmatches) != 1 { 3411 t.Errorf("got %v, want 1 matches", len(fmatches)) 3412 return 3413 } 3414 if len(fmatches[0].ChunkMatches) != 1 { 3415 t.Errorf("got %d line matches", len(fmatches[0].ChunkMatches)) 3416 return 3417 } 3418 var got string 3419 if fmatches[0].ChunkMatches[0].FileName { 3420 got = fmatches[0].FileName 3421 } else { 3422 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset) 3423 } 3424 3425 if got != want { 3426 t.Errorf("got %s, want %s", got, want) 3427 } 3428 } 3429 3430 b.featureVersion = FeatureVersion // reset feature version 3431 res := searchForTest(t, b, &query.Language{Language: "Apex"}, chunkOpts) 3432 wantSingleMatch(res, "apex.cls") 3433 3434 res = searchForTest(t, b, &query.Language{Language: "TeX"}, chunkOpts) 3435 wantSingleMatch(res, "tex.cls") 3436 3437 res = searchForTest(t, b, &query.Language{Language: "C"}, chunkOpts) 3438 wantSingleMatch(res, "hello.h") 3439 3440 // test fallback language search by pretending it's an older index version 3441 res = searchForTest(t, b, &query.Language{Language: "C++"}, chunkOpts) 3442 if len(res.Files) != 0 { 3443 t.Errorf("got %d results for C++, want 0", len(res.Files)) 3444 } 3445 3446 b.featureVersion = 11 // force fallback 3447 res = searchForTest(t, b, &query.Language{Language: "C++"}, chunkOpts) 3448 wantSingleMatch(res, "hello.h") 3449 }) 3450} 3451 3452func TestStats(t *testing.T) { 3453 ignored := []cmp.Option{ 3454 cmpopts.EquateEmpty(), 3455 cmpopts.IgnoreFields(RepoListEntry{}, "Repository"), 3456 cmpopts.IgnoreFields(RepoListEntry{}, "IndexMetadata"), 3457 cmpopts.IgnoreFields(RepoStats{}, "IndexBytes"), 3458 } 3459 3460 repoListEntries := func(b *IndexBuilder) []RepoListEntry { 3461 searcher := searcherForTest(t, b) 3462 indexdata := searcher.(*indexData) 3463 return indexdata.repoListEntry 3464 } 3465 3466 t.Run("one empty repo", func(t *testing.T) { 3467 b := testIndexBuilder(t, nil) 3468 got := repoListEntries(b) 3469 want := []RepoListEntry{ 3470 { 3471 Stats: RepoStats{ 3472 Repos: 0, 3473 Shards: 1, 3474 Documents: 0, 3475 IndexBytes: 20, 3476 ContentBytes: 0, 3477 NewLinesCount: 0, 3478 DefaultBranchNewLinesCount: 0, 3479 OtherBranchesNewLinesCount: 0, 3480 }, 3481 }, 3482 } 3483 3484 if diff := cmp.Diff(want, got, ignored...); diff != "" { 3485 t.Fatalf("mismatch (-want +got):\n%s", diff) 3486 } 3487 3488 }) 3489 3490 t.Run("one simple shard", func(t *testing.T) { 3491 b := testIndexBuilder(t, nil, 3492 Document{Name: "doc 0", Content: []byte("content 0")}, 3493 Document{Name: "doc 1", Content: []byte("content 1")}, 3494 ) 3495 got := repoListEntries(b) 3496 want := []RepoListEntry{ 3497 { 3498 Stats: RepoStats{ 3499 Repos: 0, 3500 Shards: 1, 3501 Documents: 2, 3502 IndexBytes: 224, 3503 ContentBytes: 28, 3504 NewLinesCount: 0, 3505 DefaultBranchNewLinesCount: 0, 3506 OtherBranchesNewLinesCount: 0, 3507 }, 3508 }, 3509 } 3510 3511 if diff := cmp.Diff(want, got, ignored...); diff != "" { 3512 t.Fatalf("mismatch (-want +got):\n%s", diff) 3513 } 3514 3515 }) 3516 3517 t.Run("one compound shard", func(t *testing.T) { 3518 b := testIndexBuilderCompound(t, 3519 []*Repository{ 3520 {Name: "repo 0"}, 3521 {Name: "repo 1"}, 3522 }, 3523 [][]Document{ 3524 { 3525 {Name: "doc 0", Content: []byte("content 0")}, 3526 {Name: "doc 1", Content: []byte("content 1")}, 3527 }, 3528 { 3529 {Name: "doc 2", Content: []byte("content 2")}, 3530 {Name: "doc 3", Content: []byte("content 3")}, 3531 }, 3532 }, 3533 ) 3534 got := repoListEntries(b) 3535 want := []RepoListEntry{ 3536 { 3537 Stats: RepoStats{ 3538 Repos: 0, 3539 Shards: 1, 3540 Documents: 2, 3541 IndexBytes: 180, 3542 ContentBytes: 28, 3543 NewLinesCount: 0, 3544 DefaultBranchNewLinesCount: 0, 3545 OtherBranchesNewLinesCount: 0, 3546 }, 3547 }, 3548 { 3549 Stats: RepoStats{ 3550 Repos: 0, 3551 Shards: 1, 3552 Documents: 2, 3553 IndexBytes: 180, 3554 ContentBytes: 28, 3555 NewLinesCount: 0, 3556 DefaultBranchNewLinesCount: 0, 3557 OtherBranchesNewLinesCount: 0, 3558 }, 3559 }, 3560 } 3561 3562 if diff := cmp.Diff(want, got, ignored...); diff != "" { 3563 t.Fatalf("mismatch (-want +got):\n%s", diff) 3564 } 3565 }) 3566 3567 t.Run("compound shard with empty repos", func(t *testing.T) { 3568 b := testIndexBuilderCompound(t, 3569 []*Repository{ 3570 {Name: "repo 0"}, 3571 {Name: "repo 1"}, 3572 {Name: "repo 2"}, 3573 {Name: "repo 3"}, 3574 {Name: "repo 4"}, 3575 }, 3576 [][]Document{ 3577 {{Name: "doc 0", Content: []byte("content 0")}}, 3578 nil, 3579 {{Name: "doc 1", Content: []byte("content 1")}}, 3580 nil, 3581 nil, 3582 }, 3583 ) 3584 got := repoListEntries(b) 3585 3586 entryEmpty := RepoListEntry{Stats: RepoStats{ 3587 Shards: 1, 3588 Documents: 0, 3589 ContentBytes: 0, 3590 }} 3591 entryNonEmpty := RepoListEntry{Stats: RepoStats{ 3592 Shards: 1, 3593 Documents: 1, 3594 ContentBytes: 14, 3595 }} 3596 3597 want := []RepoListEntry{ 3598 entryNonEmpty, 3599 entryEmpty, 3600 entryNonEmpty, 3601 entryEmpty, 3602 entryEmpty, 3603 } 3604 3605 if diff := cmp.Diff(want, got, ignored...); diff != "" { 3606 t.Fatalf("mismatch (-want +got):\n%s", diff) 3607 } 3608 3609 }) 3610} 3611 3612// This tests the frequent pattern "\bLITERAL\b". 3613func TestWordSearch(t *testing.T) { 3614 content := []byte("needle the bla") 3615 // ----------------01234567890123 3616 3617 b := testIndexBuilder(t, nil, 3618 Document{ 3619 Name: "f1", 3620 Content: content, 3621 }) 3622 3623 t.Run("LineMatches", func(t *testing.T) { 3624 sres := searchForTest(t, b, 3625 &query.Regexp{ 3626 Regexp: mustParseRE("\\bthe\\b"), 3627 CaseSensitive: true, 3628 Content: true, 3629 }) 3630 3631 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 { 3632 t.Fatalf("got %v, want 1 match in 1 file", sres.Files) 3633 } 3634 3635 if sres.Stats.RegexpsConsidered != 0 { 3636 t.Fatal("expected regexp to be skipped") 3637 } 3638 3639 got := sres.Files[0].LineMatches[0] 3640 want := LineMatch{ 3641 LineFragments: []LineFragmentMatch{{ 3642 LineOffset: 7, 3643 Offset: 7, 3644 MatchLength: 3, 3645 }}, 3646 Line: content, 3647 FileName: false, 3648 LineNumber: 1, 3649 LineStart: 0, 3650 LineEnd: 14, 3651 } 3652 3653 if !reflect.DeepEqual(got, want) { 3654 t.Errorf("got %#v, want %#v", got, want) 3655 } 3656 }) 3657 3658 t.Run("ChunkMatches", func(t *testing.T) { 3659 sres := searchForTest(t, b, 3660 &query.Regexp{ 3661 Regexp: mustParseRE("\\bthe\\b"), 3662 CaseSensitive: true, 3663 }, chunkOpts) 3664 3665 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 { 3666 t.Fatalf("got %v, want 1 match in 1 file", sres.Files) 3667 } 3668 3669 if sres.Stats.RegexpsConsidered != 0 { 3670 t.Fatal("expected regexp to be skipped") 3671 } 3672 3673 got := sres.Files[0].ChunkMatches[0] 3674 want := ChunkMatch{ 3675 Content: content, 3676 ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1}, 3677 Ranges: []Range{{ 3678 Start: Location{ByteOffset: 7, LineNumber: 1, Column: 8}, 3679 End: Location{ByteOffset: 10, LineNumber: 1, Column: 11}, 3680 }}, 3681 } 3682 3683 if diff := cmp.Diff(want, got); diff != "" { 3684 t.Fatal(diff) 3685 } 3686 }) 3687}