fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

1// Copyright 2016 Google Inc. All rights reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package zoekt 16 17import ( 18 "bytes" 19 "context" 20 "fmt" 21 "reflect" 22 "regexp/syntax" 23 "strings" 24 "testing" 25 26 "github.com/google/go-cmp/cmp" 27 "github.com/google/go-cmp/cmp/cmpopts" 28 "github.com/grafana/regexp" 29 30 "github.com/sourcegraph/zoekt/query" 31) 32 33func clearScores(r *SearchResult) { 34 for i := range r.Files { 35 r.Files[i].Score = 0.0 36 for j := range r.Files[i].LineMatches { 37 r.Files[i].LineMatches[j].Score = 0.0 38 } 39 for j := range r.Files[i].ChunkMatches { 40 r.Files[i].ChunkMatches[j].Score = 0.0 41 } 42 r.Files[i].Checksum = nil 43 r.Files[i].Debug = "" 44 } 45} 46 47func testIndexBuilder(t *testing.T, repo *Repository, docs ...Document) *IndexBuilder { 48 t.Helper() 49 50 b, err := NewIndexBuilder(repo) 51 if err != nil { 52 t.Fatalf("NewIndexBuilder: %v", err) 53 } 54 55 for i, d := range docs { 56 if err := b.Add(d); err != nil { 57 t.Fatalf("Add %d: %v", i, err) 58 } 59 } 60 61 return b 62} 63 64func testIndexBuilderCompound(t *testing.T, repos []*Repository, docs [][]Document) *IndexBuilder { 65 t.Helper() 66 67 b := newIndexBuilder() 68 b.indexFormatVersion = NextIndexFormatVersion 69 70 if len(repos) != len(docs) { 71 t.Fatalf("testIndexBuilderCompound: repos must be the same length as docs, got: len(repos)=%d len(docs)=%d", len(repos), len(docs)) 72 } 73 74 for i, repo := range repos { 75 if err := b.setRepository(repo); err != nil { 76 t.Fatal(err) 77 } 78 for j, d := range docs[i] { 79 if err := b.Add(d); err != nil { 80 t.Fatalf("Add %d %d: %v", i, j, err) 81 } 82 } 83 } 84 85 return b 86} 87 88func TestBoundary(t *testing.T) { 89 b := testIndexBuilder(t, nil, 90 Document{Name: "f1", Content: []byte("x the")}, 91 Document{Name: "f1", Content: []byte("reader")}) 92 res := searchForTest(t, b, &query.Substring{Pattern: "there"}) 93 if len(res.Files) > 0 { 94 t.Fatalf("got %v, want no matches", res.Files) 95 } 96} 97 98func TestDocSectionInvalid(t *testing.T) { 99 b, err := NewIndexBuilder(nil) 100 if err != nil { 101 t.Fatalf("NewIndexBuilder: %v", err) 102 } 103 doc := Document{ 104 Name: "f1", 105 Content: []byte("01234567890123"), 106 Symbols: []DocumentSection{{5, 8}, {7, 9}}, 107 } 108 109 if err := b.Add(doc); err == nil { 110 t.Errorf("overlapping doc sections should fail") 111 } 112 113 doc = Document{ 114 Name: "f1", 115 Content: []byte("01234567890123"), 116 Symbols: []DocumentSection{{0, 20}}, 117 } 118 119 if err := b.Add(doc); err == nil { 120 t.Errorf("doc sections beyond EOF should fail") 121 } 122} 123 124func TestBasic(t *testing.T) { 125 b := testIndexBuilder(t, nil, 126 Document{ 127 Name: "f2", 128 Content: []byte("to carry water in the no later bla"), 129 // --------------0123456789012345678901234567890123 130 }) 131 132 t.Run("LineMatch", func(t *testing.T) { 133 res := searchForTest(t, b, &query.Substring{ 134 Pattern: "water", 135 CaseSensitive: true, 136 }) 137 fmatches := res.Files 138 if len(fmatches) != 1 || len(fmatches[0].LineMatches) != 1 { 139 t.Fatalf("got %v, want 1 matches", fmatches) 140 } 141 142 got := fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].LineMatches[0].LineFragments[0].Offset) 143 want := "f2:9" 144 if got != want { 145 t.Errorf("1: got %s, want %s", got, want) 146 } 147 }) 148 149 t.Run("ChunkMatch", func(t *testing.T) { 150 res := searchForTest(t, b, &query.Substring{ 151 Pattern: "water", 152 CaseSensitive: true, 153 }, chunkOpts) 154 fmatches := res.Files 155 if len(fmatches) != 1 || len(fmatches[0].ChunkMatches) != 1 { 156 t.Fatalf("got %v, want 1 matches", fmatches) 157 } 158 159 got := fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset) 160 want := "f2:9" 161 if got != want { 162 t.Errorf("1: got %s, want %s", got, want) 163 } 164 }) 165} 166 167func TestEmptyIndex(t *testing.T) { 168 b := testIndexBuilder(t, nil) 169 searcher := searcherForTest(t, b) 170 171 var opts SearchOptions 172 if _, err := searcher.Search(context.Background(), &query.Substring{}, &opts); err != nil { 173 t.Fatalf("Search: %v", err) 174 } 175 176 if _, err := searcher.List(context.Background(), &query.Repo{Regexp: regexp.MustCompile("")}, nil); err != nil { 177 t.Fatalf("List: %v", err) 178 } 179 180 if _, err := searcher.Search(context.Background(), &query.Substring{Pattern: "java", FileName: true}, &opts); err != nil { 181 t.Fatalf("Search: %v", err) 182 } 183} 184 185type memSeeker struct { 186 data []byte 187} 188 189func (s *memSeeker) Name() string { 190 return "memseeker" 191} 192 193func (s *memSeeker) Close() {} 194func (s *memSeeker) Read(off, sz uint32) ([]byte, error) { 195 return s.data[off : off+sz], nil 196} 197 198func (s *memSeeker) Size() (uint32, error) { 199 return uint32(len(s.data)), nil 200} 201 202func TestNewlines(t *testing.T) { 203 b := testIndexBuilder(t, nil, 204 Document{Name: "filename", Content: []byte("line1\nline2\nbla")}) 205 // ---------------------------------------------012345-678901-234 206 207 t.Run("LineMatches", func(t *testing.T) { 208 sres := searchForTest(t, b, &query.Substring{Pattern: "ne2"}) 209 210 matches := sres.Files 211 want := []FileMatch{{ 212 FileName: "filename", 213 LineMatches: []LineMatch{{ 214 LineFragments: []LineFragmentMatch{{ 215 Offset: 8, 216 LineOffset: 2, 217 MatchLength: 3, 218 }}, 219 Line: []byte("line2"), 220 LineStart: 6, 221 LineEnd: 11, 222 LineNumber: 2, 223 }}, 224 }} 225 226 if !reflect.DeepEqual(matches, want) { 227 t.Errorf("got %v, want %v", matches, want) 228 } 229 }) 230 231 t.Run("ChunkMatches", func(t *testing.T) { 232 sres := searchForTest(t, b, &query.Substring{Pattern: "ne2"}, chunkOpts) 233 234 matches := sres.Files 235 want := []FileMatch{{ 236 FileName: "filename", 237 ChunkMatches: []ChunkMatch{{ 238 Content: []byte("line2"), 239 ContentStart: Location{ 240 ByteOffset: 6, 241 LineNumber: 2, 242 Column: 1, 243 }, 244 Ranges: []Range{{ 245 Start: Location{ByteOffset: 8, LineNumber: 2, Column: 3}, 246 End: Location{ByteOffset: 11, LineNumber: 2, Column: 6}, 247 }}, 248 }}, 249 }} 250 251 if diff := cmp.Diff(want, matches); diff != "" { 252 t.Fatal(diff) 253 } 254 }) 255} 256 257// A result spanning multiple lines should have LineMatches that only cover 258// single lines. 259func TestQueryNewlines(t *testing.T) { 260 text := "line1\nline2\nbla" 261 b := testIndexBuilder(t, nil, 262 Document{Name: "filename", Content: []byte(text)}) 263 264 t.Run("LineMatches", func(t *testing.T) { 265 sres := searchForTest(t, b, &query.Substring{Pattern: "ine2\nbla"}) 266 matches := sres.Files 267 if len(matches) != 1 { 268 t.Fatalf("got %d file matches, want exactly one", len(matches)) 269 } 270 m := matches[0] 271 if len(m.LineMatches) != 2 { 272 t.Fatalf("got %d line matches, want exactly two", len(m.LineMatches)) 273 } 274 }) 275 276 t.Run("ChunkMatches", func(t *testing.T) { 277 sres := searchForTest(t, b, &query.Substring{Pattern: "ine2\nbla"}, chunkOpts) 278 matches := sres.Files 279 if len(matches) != 1 { 280 t.Fatalf("got %d file matches, want exactly one", len(matches)) 281 } 282 m := matches[0] 283 if len(m.ChunkMatches) != 1 { 284 t.Fatalf("got %d chunk matches, want exactly one", len(m.ChunkMatches)) 285 } 286 }) 287} 288 289var chunkOpts = SearchOptions{ChunkMatches: true} 290 291func searchForTest(t *testing.T, b *IndexBuilder, q query.Q, o ...SearchOptions) *SearchResult { 292 searcher := searcherForTest(t, b) 293 var opts SearchOptions 294 if len(o) > 0 { 295 opts = o[0] 296 } 297 res, err := searcher.Search(context.Background(), q, &opts) 298 if err != nil { 299 t.Fatalf("Search(%s): %v", q, err) 300 } 301 clearScores(res) 302 return res 303} 304 305func searcherForTest(t *testing.T, b *IndexBuilder) Searcher { 306 var buf bytes.Buffer 307 if err := b.Write(&buf); err != nil { 308 t.Fatal(err) 309 } 310 f := &memSeeker{buf.Bytes()} 311 312 searcher, err := NewSearcher(f) 313 if err != nil { 314 t.Fatalf("NewSearcher: %v", err) 315 } 316 317 return searcher 318} 319 320func TestCaseFold(t *testing.T) { 321 b := testIndexBuilder(t, nil, 322 Document{Name: "f1", Content: []byte("I love BaNaNAS.")}, 323 // -----------------------------------012345678901234 324 ) 325 t.Run("LineMatches", func(t *testing.T) { 326 sres := searchForTest(t, b, &query.Substring{ 327 Pattern: "bananas", 328 CaseSensitive: true, 329 }) 330 matches := sres.Files 331 if len(matches) != 0 { 332 t.Errorf("foldcase: got %#v, want 0 matches", matches) 333 } 334 335 sres = searchForTest(t, b, 336 &query.Substring{ 337 Pattern: "BaNaNAS", 338 CaseSensitive: true, 339 }) 340 matches = sres.Files 341 if len(matches) != 1 { 342 t.Errorf("no foldcase: got %v, want 1 matches", matches) 343 } else if matches[0].LineMatches[0].LineFragments[0].Offset != 7 { 344 t.Errorf("foldcase: got %v, want offsets 7", matches) 345 } 346 }) 347 348 t.Run("ChunkMatches", func(t *testing.T) { 349 sres := searchForTest(t, b, &query.Substring{ 350 Pattern: "bananas", 351 CaseSensitive: true, 352 }, chunkOpts) 353 matches := sres.Files 354 if len(matches) != 0 { 355 t.Errorf("foldcase: got %#v, want 0 matches", matches) 356 } 357 358 sres = searchForTest(t, b, 359 &query.Substring{ 360 Pattern: "BaNaNAS", 361 CaseSensitive: true, 362 }) 363 matches = sres.Files 364 if len(matches) != 1 { 365 t.Errorf("no foldcase: got %v, want 1 matches", matches) 366 } else if matches[0].LineMatches[0].LineFragments[0].Offset != 7 { 367 t.Errorf("foldcase: got %v, want offsets 7", matches) 368 } 369 }) 370} 371 372// wordsAsSymbols finds all ASCII words in doc.Content which are longer than 2 373// chars. Those are then set as symbols. 374func wordsAsSymbols(doc Document) Document { 375 re := regexp.MustCompile(`\b\w{2,}\b`) 376 var symbols []DocumentSection 377 for _, match := range re.FindAllIndex(doc.Content, -1) { 378 symbols = append(symbols, DocumentSection{ 379 Start: uint32(match[0]), 380 End: uint32(match[1]), 381 }) 382 } 383 doc.Symbols = symbols 384 return doc 385} 386 387func TestSearchStats(t *testing.T) { 388 ctx := context.Background() 389 searcher := searcherForTest(t, testIndexBuilder(t, nil, 390 wordsAsSymbols(Document{Name: "f1", Content: []byte("x banana y")}), 391 wordsAsSymbols(Document{Name: "f2", Content: []byte("x apple y")}), 392 wordsAsSymbols(Document{Name: "f3", Content: []byte("x banana apple y")}), 393 // --------------------------------------------------0123456789012345 394 )) 395 396 andQuery := query.NewAnd( 397 &query.Substring{ 398 Pattern: "banana", 399 }, 400 &query.Substring{ 401 Pattern: "apple", 402 }, 403 ) 404 405 t.Run("LineMatches", func(t *testing.T) { 406 sres, err := searcher.Search(ctx, andQuery, &SearchOptions{}) 407 if err != nil { 408 t.Fatal(err) 409 } 410 matches := sres.Files 411 if len(matches) != 1 || len(matches[0].LineMatches) != 1 || len(matches[0].LineMatches[0].LineFragments) != 2 { 412 t.Fatalf("got %#v, want 1 match with 2 fragments", matches) 413 } 414 415 if matches[0].LineMatches[0].LineFragments[0].Offset != 2 || matches[0].LineMatches[0].LineFragments[1].Offset != 9 { 416 t.Fatalf("got %#v, want offsets 2,9", matches) 417 } 418 }) 419 t.Run("ChunkMatches", func(t *testing.T) { 420 sres, err := searcher.Search(ctx, andQuery, &chunkOpts) 421 if err != nil { 422 t.Fatal(err) 423 } 424 matches := sres.Files 425 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 || len(matches[0].ChunkMatches[0].Ranges) != 2 { 426 t.Fatalf("got %#v, want 1 chunk match with 2 ranges", matches) 427 } 428 429 if matches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset != 2 || matches[0].ChunkMatches[0].Ranges[1].Start.ByteOffset != 9 { 430 t.Fatalf("got %#v, want offsets 2,9", matches) 431 } 432 }) 433 t.Run("Stats", func(t *testing.T) { 434 cases := []struct { 435 Name string 436 Q query.Q 437 Want Stats 438 }{{ 439 Name: "and-query", 440 Q: andQuery, 441 Want: Stats{ 442 FilesLoaded: 1, 443 ContentBytesLoaded: 22, 444 IndexBytesLoaded: 8, 445 NgramMatches: 3, // we look at doc 1, because it's max(0,1) due to AND 446 NgramLookups: 104, 447 MatchCount: 2, 448 FileCount: 1, 449 FilesConsidered: 2, 450 ShardsScanned: 1, 451 }, 452 }, { 453 Name: "one-trigram", 454 Q: &query.Substring{ 455 Pattern: "a y", 456 Content: true, 457 CaseSensitive: true, 458 }, 459 Want: Stats{ 460 ContentBytesLoaded: 14, 461 IndexBytesLoaded: 1, 462 FileCount: 1, 463 FilesConsidered: 1, 464 FilesLoaded: 1, 465 ShardsScanned: 1, 466 MatchCount: 1, 467 NgramMatches: 1, 468 NgramLookups: 2, // once to lookup frequency then again to access posting list. 469 }, 470 }, { 471 Name: "one-trigram-case-insensitive", 472 Q: &query.Substring{ 473 Pattern: "a y", 474 Content: true, 475 }, 476 Want: Stats{ 477 ContentBytesLoaded: 14, 478 IndexBytesLoaded: 1, 479 FileCount: 1, 480 FilesConsidered: 1, 481 FilesLoaded: 1, 482 ShardsScanned: 1, 483 MatchCount: 1, 484 NgramMatches: 1, 485 NgramLookups: 8, // "a y" has 2**2 casings which we lookup twice. 486 }, 487 }, { 488 Name: "one-trigram-pruned", 489 Q: &query.Substring{ 490 Pattern: "foo", 491 Content: true, 492 CaseSensitive: true, 493 }, 494 Want: Stats{ 495 ShardsSkippedFilter: 1, 496 NgramLookups: 1, // only had to lookup once 497 }, 498 }, { 499 Name: "one-trigram-branch-pruned", 500 Q: query.NewAnd( 501 &query.Substring{ 502 Pattern: "foo", 503 Content: true, 504 CaseSensitive: true, 505 }, 506 &query.Substring{ 507 Pattern: "a y", 508 Content: true, 509 CaseSensitive: true, 510 }, 511 ), 512 Want: Stats{ 513 IndexBytesLoaded: 1, // we created an iterator for "a y" before pruning. 514 ShardsSkippedFilter: 1, 515 NgramLookups: 3, // we lookedup "foo" once (1), but lookedup and created "a y" (2). 516 }, 517 }, { 518 Name: "symbol-substr-nomatch", 519 Q: &query.Symbol{Expr: &query.Substring{ 520 Pattern: "banana apple", 521 Content: true, 522 CaseSensitive: true, 523 }}, 524 Want: Stats{ 525 IndexBytesLoaded: 3, 526 FilesConsidered: 1, // important that we only check 1 file to ensure we are using the index 527 MatchCount: 0, // even though there is a match it doesn't align with a symbol 528 ShardsScanned: 1, 529 NgramMatches: 1, 530 NgramLookups: 12, 531 }, 532 }, { 533 Name: "symbol-substr", 534 Q: &query.Symbol{Expr: &query.Substring{ 535 Pattern: "apple", 536 Content: true, 537 CaseSensitive: true, 538 }}, 539 Want: Stats{ 540 ContentBytesLoaded: 35, 541 IndexBytesLoaded: 4, 542 FileCount: 2, 543 FilesConsidered: 2, // must be 2 to ensure we used the index 544 FilesLoaded: 2, 545 MatchCount: 2, // apple symbols is in two files 546 ShardsScanned: 1, 547 NgramMatches: 2, 548 NgramLookups: 5, 549 }, 550 }, { 551 Name: "symbol-regexp-nomatch", 552 Q: &query.Symbol{Expr: &query.Regexp{ 553 Regexp: mustParseRE("^apple.banana$"), 554 Content: true, 555 CaseSensitive: true, 556 }}, 557 Want: Stats{ 558 ContentBytesLoaded: 33, // we still have to run regex since "app" matches two documents 559 IndexBytesLoaded: 8, 560 FilesConsidered: 2, // important that we don't check 3 to ensure we are using the index 561 FilesLoaded: 2, 562 MatchCount: 0, // even though there is a match it doesn't align with a symbol 563 ShardsScanned: 1, 564 NgramMatches: 3, 565 NgramLookups: 11, 566 }, 567 }, { 568 Name: "symbol-regexp", 569 Q: &query.Symbol{Expr: &query.Regexp{ 570 Regexp: mustParseRE("^app.e$"), 571 Content: true, 572 CaseSensitive: true, 573 }}, 574 Want: Stats{ 575 ContentBytesLoaded: 35, 576 IndexBytesLoaded: 2, 577 FileCount: 2, 578 FilesConsidered: 2, // must be 2 to ensure we used the index 579 FilesLoaded: 2, 580 MatchCount: 2, // apple symbols is in two files 581 ShardsScanned: 1, 582 NgramMatches: 2, 583 NgramLookups: 2, 584 }, 585 }} 586 587 for _, tc := range cases { 588 t.Run(tc.Name, func(t *testing.T) { 589 sres, err := searcher.Search(ctx, tc.Q, &chunkOpts) 590 if err != nil { 591 t.Fatal(err) 592 } 593 if diff := cmp.Diff(tc.Want, sres.Stats, cmpopts.IgnoreFields(Stats{}, "MatchTreeConstruction", "MatchTreeSearch")); diff != "" { 594 t.Errorf("unexpected Stats (-want +got):\n%s", diff) 595 } 596 }) 597 } 598 599 }) 600} 601 602func TestAndNegateSearch(t *testing.T) { 603 b := testIndexBuilder(t, nil, 604 Document{Name: "f1", Content: []byte("x banana y")}, 605 // -----------------------------------0123456789 606 Document{Name: "f4", Content: []byte("x banana apple y")}) 607 608 t.Run("LineMatches", func(t *testing.T) { 609 sres := searchForTest(t, b, query.NewAnd( 610 &query.Substring{ 611 Pattern: "banana", 612 }, 613 &query.Not{Child: &query.Substring{ 614 Pattern: "apple", 615 }})) 616 617 matches := sres.Files 618 619 if len(matches) != 1 || len(matches[0].LineMatches) != 1 { 620 t.Fatalf("got %v, want 1 match", matches) 621 } 622 if matches[0].FileName != "f1" { 623 t.Fatalf("got match %#v, want FileName: f1", matches[0]) 624 } 625 if matches[0].LineMatches[0].LineFragments[0].Offset != 2 { 626 t.Fatalf("got %v, want offset 2", matches) 627 } 628 }) 629 630 t.Run("ChunkMatches", func(t *testing.T) { 631 sres := searchForTest(t, b, 632 query.NewAnd( 633 &query.Substring{ 634 Pattern: "banana", 635 }, 636 &query.Not{Child: &query.Substring{ 637 Pattern: "apple", 638 }}, 639 ), 640 chunkOpts, 641 ) 642 643 matches := sres.Files 644 645 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 { 646 t.Fatalf("got %v, want 1 match", matches) 647 } 648 if matches[0].FileName != "f1" { 649 t.Fatalf("got match %#v, want FileName: f1", matches[0]) 650 } 651 if matches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset != 2 { 652 t.Fatalf("got %v, want offset 2", matches) 653 } 654 }) 655} 656 657func TestNegativeMatchesOnlyShortcut(t *testing.T) { 658 b := testIndexBuilder(t, nil, 659 Document{Name: "f1", Content: []byte("x banana y")}, 660 Document{Name: "f2", Content: []byte("x appelmoes y")}, 661 Document{Name: "f3", Content: []byte("x appelmoes y")}, 662 Document{Name: "f3", Content: []byte("x appelmoes y")}) 663 664 t.Run("LineMatches", func(t *testing.T) { 665 sres := searchForTest(t, b, query.NewAnd( 666 &query.Substring{ 667 Pattern: "banana", 668 }, 669 &query.Not{Child: &query.Substring{ 670 Pattern: "appel", 671 }})) 672 673 if sres.Stats.FilesConsidered != 1 { 674 t.Errorf("got %#v, want FilesConsidered: 1", sres.Stats) 675 } 676 }) 677 678 t.Run("ChunkMatches", func(t *testing.T) { 679 sres := searchForTest(t, b, query.NewAnd( 680 &query.Substring{ 681 Pattern: "banana", 682 }, 683 &query.Not{Child: &query.Substring{ 684 Pattern: "appel", 685 }}), chunkOpts) 686 687 if sres.Stats.FilesConsidered != 1 { 688 t.Errorf("got %#v, want FilesConsidered: 1", sres.Stats) 689 } 690 }) 691} 692 693func TestFileSearch(t *testing.T) { 694 b := testIndexBuilder(t, nil, 695 Document{Name: "banzana", Content: []byte("x orange y")}, 696 // -------------0123456 697 Document{Name: "banana", Content: []byte("x apple y")}, 698 // -------------012345 699 ) 700 701 t.Run("LineMatches", func(t *testing.T) { 702 sres := searchForTest(t, b, &query.Substring{ 703 Pattern: "anan", 704 FileName: true, 705 }) 706 707 matches := sres.Files 708 if len(matches) != 1 || len(matches[0].LineMatches) != 1 { 709 t.Fatalf("got %v, want 1 match", matches) 710 } 711 712 got := matches[0].LineMatches[0] 713 want := LineMatch{ 714 Line: []byte("banana"), 715 LineFragments: []LineFragmentMatch{{ 716 Offset: 1, 717 LineOffset: 1, 718 MatchLength: 4, 719 }}, 720 FileName: true, 721 } 722 723 if !reflect.DeepEqual(got, want) { 724 t.Errorf("got %#v, want %#v", got, want) 725 } 726 }) 727 728 t.Run("ChunkMatches", func(t *testing.T) { 729 sres := searchForTest(t, b, &query.Substring{ 730 Pattern: "anan", 731 FileName: true, 732 }, chunkOpts) 733 734 matches := sres.Files 735 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 { 736 t.Fatalf("got %v, want 1 match", matches) 737 } 738 739 got := matches[0].ChunkMatches[0] 740 want := ChunkMatch{ 741 Content: []byte("banana"), 742 ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1}, 743 Ranges: []Range{{ 744 Start: Location{ByteOffset: 1, LineNumber: 1, Column: 2}, 745 End: Location{ByteOffset: 5, LineNumber: 1, Column: 6}, 746 }}, 747 FileName: true, 748 } 749 750 if diff := cmp.Diff(want, got); diff != "" { 751 t.Fatal(diff) 752 } 753 }) 754 755 t.Run("FileNameSet", func(t *testing.T) { 756 sres := searchForTest(t, b, query.NewFileNameSet("banana"), chunkOpts) 757 758 matches := sres.Files 759 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 { 760 t.Fatalf("got %v, want 1 match", matches) 761 } 762 763 got := matches[0].ChunkMatches[0] 764 want := ChunkMatch{ 765 Content: []byte("banana"), 766 ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1}, 767 Ranges: []Range{{ 768 Start: Location{ByteOffset: 0, LineNumber: 1, Column: 1}, 769 End: Location{ByteOffset: 6, LineNumber: 1, Column: 7}, 770 }}, 771 FileName: true, 772 } 773 774 if diff := cmp.Diff(want, got); diff != "" { 775 t.Fatal(diff) 776 } 777 }) 778} 779 780func TestFileCase(t *testing.T) { 781 b := testIndexBuilder(t, nil, 782 Document{Name: "BANANA", Content: []byte("x orange y")}) 783 784 t.Run("LineMatches", func(t *testing.T) { 785 sres := searchForTest(t, b, &query.Substring{ 786 Pattern: "banana", 787 FileName: true, 788 }) 789 790 matches := sres.Files 791 if len(matches) != 1 || matches[0].FileName != "BANANA" { 792 t.Fatalf("got %v, want 1 match 'BANANA'", matches) 793 } 794 }) 795 796 t.Run("ChunkMatches", func(t *testing.T) { 797 sres := searchForTest(t, b, &query.Substring{ 798 Pattern: "banana", 799 FileName: true, 800 }, chunkOpts) 801 802 matches := sres.Files 803 if len(matches) != 1 || matches[0].FileName != "BANANA" { 804 t.Fatalf("got %v, want 1 match 'BANANA'", matches) 805 } 806 }) 807} 808 809func TestFileRegexpSearchBruteForce(t *testing.T) { 810 b := testIndexBuilder(t, nil, 811 Document{Name: "banzana", Content: []byte("x orange y")}, 812 Document{Name: "banana", Content: []byte("x apple y")}, 813 ) 814 t.Run("LineMatches", func(t *testing.T) { 815 sres := searchForTest(t, b, &query.Regexp{ 816 Regexp: mustParseRE("[qn][zx]"), 817 FileName: true, 818 }) 819 820 matches := sres.Files 821 if len(matches) != 1 || matches[0].FileName != "banzana" { 822 t.Fatalf("got %v, want 1 match on 'banzana'", matches) 823 } 824 }) 825 t.Run("LineMatches", func(t *testing.T) { 826 sres := searchForTest(t, b, &query.Regexp{ 827 Regexp: mustParseRE("[qn][zx]"), 828 FileName: true, 829 }, chunkOpts) 830 831 matches := sres.Files 832 if len(matches) != 1 || matches[0].FileName != "banzana" { 833 t.Fatalf("got %v, want 1 match on 'banzana'", matches) 834 } 835 }) 836} 837 838func TestFileRegexpSearchShortString(t *testing.T) { 839 b := testIndexBuilder(t, nil, 840 Document{Name: "banana.py", Content: []byte("x orange y")}) 841 842 t.Run("LineMatches", func(t *testing.T) { 843 sres := searchForTest(t, b, &query.Regexp{ 844 Regexp: mustParseRE("ana.py"), 845 FileName: true, 846 }) 847 848 matches := sres.Files 849 if len(matches) != 1 || matches[0].FileName != "banana.py" { 850 t.Fatalf("got %v, want 1 match on 'banana.py'", matches) 851 } 852 }) 853 854 t.Run("ChunkMatches", func(t *testing.T) { 855 sres := searchForTest(t, b, &query.Regexp{ 856 Regexp: mustParseRE("ana.py"), 857 FileName: true, 858 }, chunkOpts) 859 860 matches := sres.Files 861 if len(matches) != 1 || matches[0].FileName != "banana.py" { 862 t.Fatalf("got %v, want 1 match on 'banana.py'", matches) 863 } 864 }) 865} 866 867func TestFileSubstringSearchBruteForce(t *testing.T) { 868 b := testIndexBuilder(t, nil, 869 Document{Name: "BANZANA", Content: []byte("x orange y")}, 870 Document{Name: "banana", Content: []byte("x apple y")}) 871 872 q := &query.Substring{ 873 Pattern: "z", 874 FileName: true, 875 } 876 877 t.Run("LineMatches", func(t *testing.T) { 878 res := searchForTest(t, b, q) 879 if len(res.Files) != 1 || res.Files[0].FileName != "BANZANA" { 880 t.Fatalf("got %v, want 1 match on 'BANZANA''", res.Files) 881 } 882 }) 883 884 t.Run("ChunkMatches", func(t *testing.T) { 885 res := searchForTest(t, b, q, chunkOpts) 886 if len(res.Files) != 1 || res.Files[0].FileName != "BANZANA" { 887 t.Fatalf("got %v, want 1 match on 'BANZANA''", res.Files) 888 } 889 }) 890} 891 892func TestFileSubstringSearchBruteForceEnd(t *testing.T) { 893 b := testIndexBuilder(t, nil, 894 Document{Name: "BANZANA", Content: []byte("x orange y")}, 895 Document{Name: "bananaq", Content: []byte("x apple y")}) 896 897 q := &query.Substring{ 898 Pattern: "q", 899 FileName: true, 900 } 901 t.Run("LineMatches", func(t *testing.T) { 902 res := searchForTest(t, b, q) 903 if want := "bananaq"; len(res.Files) != 1 || res.Files[0].FileName != want { 904 t.Fatalf("got %v, want 1 match in %q", res.Files, want) 905 } 906 }) 907 908 t.Run("LineMatches", func(t *testing.T) { 909 res := searchForTest(t, b, q, chunkOpts) 910 if want := "bananaq"; len(res.Files) != 1 || res.Files[0].FileName != want { 911 t.Fatalf("got %v, want 1 match in %q", res.Files, want) 912 } 913 }) 914} 915 916func TestSearchMatchAll(t *testing.T) { 917 b := testIndexBuilder(t, nil, 918 Document{Name: "banzana", Content: []byte("x orange y")}, 919 Document{Name: "banana", Content: []byte("x apple y")}) 920 921 t.Run("LineMatches", func(t *testing.T) { 922 sres := searchForTest(t, b, &query.Const{Value: true}) 923 matches := sres.Files 924 if len(matches) != 2 { 925 t.Fatalf("got %v, want 2 matches", matches) 926 } 927 }) 928 929 t.Run("ChunkMatches", func(t *testing.T) { 930 sres := searchForTest(t, b, &query.Const{Value: true}, chunkOpts) 931 matches := sres.Files 932 if len(matches) != 2 { 933 t.Fatalf("got %v, want 2 matches", matches) 934 } 935 }) 936} 937 938func TestSearchNewline(t *testing.T) { 939 b := testIndexBuilder(t, nil, 940 Document{Name: "banzana", Content: []byte("abcd\ndefg")}) 941 942 t.Run("LineMatches", func(t *testing.T) { 943 sres := searchForTest(t, b, &query.Substring{Pattern: "d\nd"}) 944 945 // Just check that we don't crash. 946 947 matches := sres.Files 948 if len(matches) != 1 { 949 t.Fatalf("got %v, want 1 matches", matches) 950 } 951 }) 952 953 t.Run("ChunkMatches", func(t *testing.T) { 954 sres := searchForTest(t, b, &query.Substring{Pattern: "d\nd"}, chunkOpts) 955 956 // Just check that we don't crash. 957 958 matches := sres.Files 959 if len(matches) != 1 { 960 t.Fatalf("got %v, want 1 matches", matches) 961 } 962 }) 963} 964 965func TestSearchMatchAllRegexp(t *testing.T) { 966 b := testIndexBuilder(t, nil, 967 Document{Name: "banzana", Content: []byte("abcd")}, 968 Document{Name: "banana", Content: []byte("pqrs")}) 969 970 t.Run("LineMatches", func(t *testing.T) { 971 sres := searchForTest(t, b, &query.Regexp{Regexp: mustParseRE(".")}) 972 973 matches := sres.Files 974 if len(matches) != 2 || sres.Stats.MatchCount != 2 { 975 t.Fatalf("got %v, want 2 matches", matches) 976 } 977 if len(matches[0].LineMatches[0].Line) != 4 || len(matches[1].LineMatches[0].Line) != 4 { 978 t.Fatalf("want 4 chars in every file, got %#v", matches) 979 } 980 981 }) 982 983 t.Run("ChunkMatches", func(t *testing.T) { 984 sres := searchForTest(t, b, &query.Regexp{Regexp: mustParseRE(".")}, chunkOpts) 985 986 matches := sres.Files 987 if len(matches) != 2 || sres.Stats.MatchCount != 8 { 988 t.Fatalf("got %v, want 2 matches", matches) 989 } 990 if len(matches[0].ChunkMatches[0].Content) != 4 || len(matches[1].ChunkMatches[0].Content) != 4 { 991 t.Fatalf("want 4 chars in every file, got %#v", matches) 992 } 993 994 }) 995} 996 997func TestFileRestriction(t *testing.T) { 998 b := testIndexBuilder(t, nil, 999 Document{Name: "banana1", Content: []byte("x orange y")}, 1000 Document{Name: "banana2", Content: []byte("x apple y")}, 1001 Document{Name: "orange", Content: []byte("x apple z")}) 1002 1003 t.Run("LineMatches", func(t *testing.T) { 1004 sres := searchForTest(t, b, query.NewAnd( 1005 &query.Substring{ 1006 Pattern: "banana", 1007 FileName: true, 1008 }, 1009 &query.Substring{ 1010 Pattern: "apple", 1011 })) 1012 1013 matches := sres.Files 1014 if len(matches) != 1 || len(matches[0].LineMatches) != 1 { 1015 t.Fatalf("got %v, want 1 match", matches) 1016 } 1017 1018 match := matches[0].LineMatches[0] 1019 got := string(match.Line) 1020 want := "x apple y" 1021 if got != want { 1022 t.Errorf("got match %#v, want line %q", match, want) 1023 } 1024 }) 1025 1026 t.Run("ChunkMatches", func(t *testing.T) { 1027 sres := searchForTest(t, b, query.NewAnd( 1028 &query.Substring{ 1029 Pattern: "banana", 1030 FileName: true, 1031 }, 1032 &query.Substring{ 1033 Pattern: "apple", 1034 }), chunkOpts) 1035 1036 matches := sres.Files 1037 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 { 1038 t.Fatalf("got %v, want 1 match", matches) 1039 } 1040 1041 match := matches[0].ChunkMatches[0] 1042 got := string(match.Content) 1043 want := "x apple y" 1044 if got != want { 1045 t.Errorf("got match %#v, want line %q", match, want) 1046 } 1047 }) 1048} 1049 1050func TestFileNameBoundary(t *testing.T) { 1051 b := testIndexBuilder(t, nil, 1052 Document{Name: "banana2", Content: []byte("x apple y")}, 1053 Document{Name: "helpers.go", Content: []byte("x apple y")}, 1054 Document{Name: "foo", Content: []byte("x apple y")}) 1055 1056 t.Run("LineMatches", func(t *testing.T) { 1057 sres := searchForTest(t, b, &query.Substring{ 1058 Pattern: "helpers.go", 1059 FileName: true, 1060 }) 1061 1062 matches := sres.Files 1063 if len(matches) != 1 || len(matches[0].LineMatches) != 1 { 1064 t.Fatalf("got %v, want 1 match", matches) 1065 } 1066 }) 1067 1068 t.Run("ChunkMatches", func(t *testing.T) { 1069 sres := searchForTest(t, b, &query.Substring{ 1070 Pattern: "helpers.go", 1071 FileName: true, 1072 }, chunkOpts) 1073 1074 matches := sres.Files 1075 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 { 1076 t.Fatalf("got %v, want 1 match", matches) 1077 } 1078 }) 1079} 1080 1081func TestDocumentOrder(t *testing.T) { 1082 var docs []Document 1083 for i := 0; i < 3; i++ { 1084 docs = append(docs, Document{Name: fmt.Sprintf("f%d", i), Content: []byte("needle")}) 1085 } 1086 1087 b := testIndexBuilder(t, nil, docs...) 1088 1089 t.Run("LineMatches", func(t *testing.T) { 1090 sres := searchForTest(t, b, query.NewAnd( 1091 &query.Substring{ 1092 Pattern: "needle", 1093 })) 1094 1095 want := []string{"f0", "f1", "f2"} 1096 var got []string 1097 for _, f := range sres.Files { 1098 got = append(got, f.FileName) 1099 } 1100 if !reflect.DeepEqual(got, want) { 1101 t.Fatalf("got %v, want %v", got, want) 1102 } 1103 }) 1104 1105 t.Run("ChunkMatches", func(t *testing.T) { 1106 sres := searchForTest(t, b, 1107 query.NewAnd(&query.Substring{ 1108 Pattern: "needle", 1109 }), 1110 chunkOpts, 1111 ) 1112 1113 want := []string{"f0", "f1", "f2"} 1114 var got []string 1115 for _, f := range sres.Files { 1116 got = append(got, f.FileName) 1117 } 1118 if !reflect.DeepEqual(got, want) { 1119 t.Fatalf("got %v, want %v", got, want) 1120 } 1121 }) 1122} 1123 1124func TestBranchMask(t *testing.T) { 1125 b := testIndexBuilder(t, &Repository{ 1126 Branches: []RepositoryBranch{ 1127 {"master", "v-master"}, 1128 {"stable", "v-stable"}, 1129 {"bonzai", "v-bonzai"}, 1130 }, 1131 }, Document{Name: "f1", Content: []byte("needle"), Branches: []string{"master"}}, 1132 Document{Name: "f2", Content: []byte("needle"), Branches: []string{"stable", "master"}}, 1133 Document{Name: "f3", Content: []byte("needle"), Branches: []string{"stable", "master"}}, 1134 Document{Name: "f4", Content: []byte("needle"), Branches: []string{"bonzai"}}, 1135 ) 1136 1137 t.Run("LineMatches", func(t *testing.T) { 1138 sres := searchForTest(t, b, query.NewAnd( 1139 &query.Substring{ 1140 Pattern: "needle", 1141 }, 1142 &query.Branch{ 1143 Pattern: "table", 1144 })) 1145 1146 if len(sres.Files) != 2 || sres.Files[0].FileName != "f2" || sres.Files[1].FileName != "f3" { 1147 t.Fatalf("got %v, want 2 result from [f2,f3]", sres.Files) 1148 } 1149 1150 if len(sres.Files[0].Branches) != 1 || sres.Files[0].Branches[0] != "stable" { 1151 t.Fatalf("got %v, want 1 branch 'stable'", sres.Files[0].Branches) 1152 } 1153 }) 1154 1155 t.Run("ChunkMatches", func(t *testing.T) { 1156 sres := searchForTest(t, b, query.NewAnd( 1157 &query.Substring{ 1158 Pattern: "needle", 1159 }, 1160 &query.Branch{ 1161 Pattern: "table", 1162 }), 1163 chunkOpts, 1164 ) 1165 1166 if len(sres.Files) != 2 || sres.Files[0].FileName != "f2" || sres.Files[1].FileName != "f3" { 1167 t.Fatalf("got %v, want 2 result from [f2,f3]", sres.Files) 1168 } 1169 1170 if len(sres.Files[0].Branches) != 1 || sres.Files[0].Branches[0] != "stable" { 1171 t.Fatalf("got %v, want 1 branch 'stable'", sres.Files[0].Branches) 1172 } 1173 }) 1174} 1175 1176func TestBranchLimit(t *testing.T) { 1177 for limit := 64; limit <= 65; limit++ { 1178 r := &Repository{} 1179 for i := 0; i < limit; i++ { 1180 s := fmt.Sprintf("b%d", i) 1181 r.Branches = append(r.Branches, RepositoryBranch{ 1182 s, "v-" + s, 1183 }) 1184 } 1185 _, err := NewIndexBuilder(r) 1186 if limit == 64 && err != nil { 1187 t.Fatalf("NewIndexBuilder: %v", err) 1188 } else if limit == 65 && err == nil { 1189 t.Fatalf("NewIndexBuilder succeeded") 1190 } 1191 } 1192} 1193 1194func TestBranchReport(t *testing.T) { 1195 branches := []string{"stable", "master"} 1196 b := testIndexBuilder(t, &Repository{ 1197 Branches: []RepositoryBranch{ 1198 {"stable", "vs"}, 1199 {"master", "vm"}, 1200 }, 1201 }, 1202 Document{Name: "f2", Content: []byte("needle"), Branches: branches}) 1203 1204 t.Run("LineMatches", func(t *testing.T) { 1205 sres := searchForTest(t, b, &query.Substring{ 1206 Pattern: "needle", 1207 }) 1208 if len(sres.Files) != 1 { 1209 t.Fatalf("got %v, want 1 result from f2", sres.Files) 1210 } 1211 1212 f := sres.Files[0] 1213 if !reflect.DeepEqual(f.Branches, branches) { 1214 t.Fatalf("got branches %q, want %q", f.Branches, branches) 1215 } 1216 }) 1217 1218 t.Run("ChunkMatches", func(t *testing.T) { 1219 sres := searchForTest(t, b, &query.Substring{ 1220 Pattern: "needle", 1221 }, chunkOpts) 1222 if len(sres.Files) != 1 { 1223 t.Fatalf("got %v, want 1 result from f2", sres.Files) 1224 } 1225 1226 f := sres.Files[0] 1227 if !reflect.DeepEqual(f.Branches, branches) { 1228 t.Fatalf("got branches %q, want %q", f.Branches, branches) 1229 } 1230 }) 1231 1232} 1233 1234func TestBranchVersions(t *testing.T) { 1235 b := testIndexBuilder(t, &Repository{ 1236 Branches: []RepositoryBranch{ 1237 {"stable", "v-stable"}, 1238 {"master", "v-master"}, 1239 }, 1240 }, Document{Name: "f2", Content: []byte("needle"), Branches: []string{"master"}}) 1241 1242 t.Run("LineMatches", func(t *testing.T) { 1243 sres := searchForTest(t, b, &query.Substring{ 1244 Pattern: "needle", 1245 }) 1246 if len(sres.Files) != 1 { 1247 t.Fatalf("got %v, want 1 result from f2", sres.Files) 1248 } 1249 1250 f := sres.Files[0] 1251 if f.Version != "v-master" { 1252 t.Fatalf("got file %#v, want version 'v-master'", f) 1253 } 1254 }) 1255 1256 t.Run("ChunkMatches", func(t *testing.T) { 1257 sres := searchForTest(t, b, &query.Substring{ 1258 Pattern: "needle", 1259 }, chunkOpts) 1260 if len(sres.Files) != 1 { 1261 t.Fatalf("got %v, want 1 result from f2", sres.Files) 1262 } 1263 1264 f := sres.Files[0] 1265 if f.Version != "v-master" { 1266 t.Fatalf("got file %#v, want version 'v-master'", f) 1267 } 1268 }) 1269} 1270 1271func mustParseRE(s string) *syntax.Regexp { 1272 r, err := syntax.Parse(s, syntax.Perl) 1273 if err != nil { 1274 panic(err) 1275 } 1276 1277 return r 1278} 1279 1280func TestRegexp(t *testing.T) { 1281 content := []byte("needle the bla") 1282 // ----------------01234567890123 1283 1284 b := testIndexBuilder(t, nil, 1285 Document{ 1286 Name: "f1", 1287 Content: content, 1288 }) 1289 1290 t.Run("LineMatches", func(t *testing.T) { 1291 sres := searchForTest(t, b, 1292 &query.Regexp{ 1293 Regexp: mustParseRE("dle.*bla"), 1294 }) 1295 1296 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 { 1297 t.Fatalf("got %v, want 1 match in 1 file", sres.Files) 1298 } 1299 1300 got := sres.Files[0].LineMatches[0] 1301 want := LineMatch{ 1302 LineFragments: []LineFragmentMatch{{ 1303 LineOffset: 3, 1304 Offset: 3, 1305 MatchLength: 11, 1306 }}, 1307 Line: content, 1308 FileName: false, 1309 LineNumber: 1, 1310 LineStart: 0, 1311 LineEnd: 14, 1312 } 1313 1314 if !reflect.DeepEqual(got, want) { 1315 t.Errorf("got %#v, want %#v", got, want) 1316 } 1317 }) 1318 1319 t.Run("ChunkMatches", func(t *testing.T) { 1320 sres := searchForTest(t, b, 1321 &query.Regexp{ 1322 Regexp: mustParseRE("dle.*bla"), 1323 }, chunkOpts) 1324 1325 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 { 1326 t.Fatalf("got %v, want 1 match in 1 file", sres.Files) 1327 } 1328 1329 got := sres.Files[0].ChunkMatches[0] 1330 want := ChunkMatch{ 1331 Content: content, 1332 ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1}, 1333 Ranges: []Range{{ 1334 Start: Location{ByteOffset: 3, LineNumber: 1, Column: 4}, 1335 End: Location{ByteOffset: 14, LineNumber: 1, Column: 15}, 1336 }}, 1337 } 1338 1339 if diff := cmp.Diff(want, got); diff != "" { 1340 t.Fatal(diff) 1341 } 1342 }) 1343} 1344 1345func TestRegexpFile(t *testing.T) { 1346 content := []byte("needle the bla") 1347 1348 name := "let's play: find the mussel" 1349 b := testIndexBuilder(t, nil, 1350 Document{Name: name, Content: content}, 1351 Document{Name: "play.txt", Content: content}) 1352 1353 t.Run("LineMatches", func(t *testing.T) { 1354 sres := searchForTest(t, b, 1355 &query.Regexp{ 1356 Regexp: mustParseRE("play.*mussel"), 1357 FileName: true, 1358 }) 1359 1360 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 { 1361 t.Fatalf("got %v, want 1 match in 1 file", sres.Files) 1362 } 1363 1364 if sres.Files[0].FileName != name { 1365 t.Errorf("got match %#v, want name %q", sres.Files[0], name) 1366 } 1367 }) 1368 1369 t.Run("ChunkMatches", func(t *testing.T) { 1370 sres := searchForTest(t, b, 1371 &query.Regexp{ 1372 Regexp: mustParseRE("play.*mussel"), 1373 FileName: true, 1374 }, chunkOpts) 1375 1376 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 { 1377 t.Fatalf("got %v, want 1 match in 1 file", sres.Files) 1378 } 1379 1380 if sres.Files[0].FileName != name { 1381 t.Errorf("got match %#v, want name %q", sres.Files[0], name) 1382 } 1383 }) 1384} 1385 1386func TestRegexpOrder(t *testing.T) { 1387 content := []byte("bla the needle") 1388 // ----------------01234567890123 1389 1390 b := testIndexBuilder(t, nil, 1391 Document{Name: "f1", Content: content}) 1392 1393 t.Run("LineMatches", func(t *testing.T) { 1394 sres := searchForTest(t, b, 1395 &query.Regexp{ 1396 Regexp: mustParseRE("dle.*bla"), 1397 }) 1398 1399 if len(sres.Files) != 0 { 1400 t.Fatalf("got %v, want 0 matches", sres.Files) 1401 } 1402 }) 1403 1404 t.Run("ChunkMatches", func(t *testing.T) { 1405 sres := searchForTest(t, b, 1406 &query.Regexp{ 1407 Regexp: mustParseRE("dle.*bla"), 1408 }) 1409 1410 if len(sres.Files) != 0 { 1411 t.Fatalf("got %v, want 0 matches", sres.Files) 1412 } 1413 }) 1414} 1415 1416func TestRepoName(t *testing.T) { 1417 content := []byte("bla the needle") 1418 // ----------------01234567890123 1419 1420 b := testIndexBuilder(t, &Repository{Name: "bla"}, 1421 Document{Name: "f1", Content: content}) 1422 1423 t.Run("LineMatches", func(t *testing.T) { 1424 sres := searchForTest(t, b, 1425 query.NewAnd( 1426 &query.Substring{Pattern: "needle"}, 1427 &query.Repo{Regexp: regexp.MustCompile("foo")}, 1428 )) 1429 1430 if len(sres.Files) != 0 { 1431 t.Fatalf("got %v, want 0 matches", sres.Files) 1432 } 1433 1434 if sres.Stats.FilesConsidered > 0 { 1435 t.Fatalf("got FilesConsidered %d, should have short circuited", sres.Stats.FilesConsidered) 1436 } 1437 1438 sres = searchForTest(t, b, 1439 query.NewAnd( 1440 &query.Substring{Pattern: "needle"}, 1441 &query.Repo{Regexp: regexp.MustCompile("bla")}, 1442 )) 1443 if len(sres.Files) != 1 { 1444 t.Fatalf("got %v, want 1 match", sres.Files) 1445 } 1446 }) 1447 1448 t.Run("ChunkMatches", func(t *testing.T) { 1449 sres := searchForTest(t, b, 1450 query.NewAnd( 1451 &query.Substring{Pattern: "needle"}, 1452 &query.Repo{Regexp: regexp.MustCompile("foo")}, 1453 ), 1454 chunkOpts, 1455 ) 1456 1457 if len(sres.Files) != 0 { 1458 t.Fatalf("got %v, want 0 matches", sres.Files) 1459 } 1460 1461 if sres.Stats.FilesConsidered > 0 { 1462 t.Fatalf("got FilesConsidered %d, should have short circuited", sres.Stats.FilesConsidered) 1463 } 1464 1465 sres = searchForTest(t, b, 1466 query.NewAnd( 1467 &query.Substring{Pattern: "needle"}, 1468 &query.Repo{Regexp: regexp.MustCompile("bla")}, 1469 )) 1470 if len(sres.Files) != 1 { 1471 t.Fatalf("got %v, want 1 match", sres.Files) 1472 } 1473 }) 1474} 1475 1476func TestMergeMatches(t *testing.T) { 1477 content := []byte("blablabla") 1478 b := testIndexBuilder(t, nil, 1479 Document{Name: "f1", Content: content}) 1480 1481 t.Run("LineMatches", func(t *testing.T) { 1482 sres := searchForTest(t, b, 1483 &query.Substring{Pattern: "bla"}) 1484 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 { 1485 t.Fatalf("got %v, want 1 match", sres.Files) 1486 } 1487 }) 1488 1489 t.Run("ChunkMatches", func(t *testing.T) { 1490 sres := searchForTest(t, b, 1491 &query.Substring{Pattern: "bla"}, 1492 chunkOpts, 1493 ) 1494 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 { 1495 t.Fatalf("got %v, want 1 match", sres.Files) 1496 } 1497 }) 1498} 1499 1500func TestRepoURL(t *testing.T) { 1501 content := []byte("blablabla") 1502 b := testIndexBuilder(t, &Repository{ 1503 Name: "name", 1504 URL: "URL", 1505 CommitURLTemplate: "commit", 1506 FileURLTemplate: "file-url", 1507 LineFragmentTemplate: "fragment", 1508 }, Document{Name: "f1", Content: content}) 1509 1510 sres := searchForTest(t, b, &query.Substring{Pattern: "bla"}) 1511 1512 if sres.RepoURLs["name"] != "file-url" { 1513 t.Errorf("got RepoURLs %v, want {name: URL}", sres.RepoURLs) 1514 } 1515 if sres.LineFragments["name"] != "fragment" { 1516 t.Errorf("got URLs %v, want {name: URL}", sres.LineFragments) 1517 } 1518} 1519 1520func TestRegexpCaseSensitive(t *testing.T) { 1521 content := []byte("bla\nfunc unmarshalGitiles\n") 1522 b := testIndexBuilder(t, nil, Document{ 1523 Name: "f1", 1524 Content: content, 1525 }) 1526 1527 t.Run("LineMatches", func(t *testing.T) { 1528 res := searchForTest(t, b, 1529 &query.Regexp{ 1530 Regexp: mustParseRE("func.*Gitiles"), 1531 CaseSensitive: true, 1532 }) 1533 1534 if len(res.Files) != 1 { 1535 t.Fatalf("got %v, want one match", res.Files) 1536 } 1537 }) 1538 1539 t.Run("ChunkMatches", func(t *testing.T) { 1540 res := searchForTest(t, b, 1541 &query.Regexp{ 1542 Regexp: mustParseRE("func.*Gitiles"), 1543 CaseSensitive: true, 1544 }, 1545 chunkOpts, 1546 ) 1547 1548 if len(res.Files) != 1 { 1549 t.Fatalf("got %v, want one match", res.Files) 1550 } 1551 }) 1552} 1553 1554func TestRegexpCaseFolding(t *testing.T) { 1555 content := []byte("bla\nfunc unmarshalGitiles\n") 1556 1557 b := testIndexBuilder(t, nil, 1558 Document{Name: "f1", Content: content}) 1559 res := searchForTest(t, b, 1560 &query.Regexp{ 1561 Regexp: mustParseRE("func.*GITILES"), 1562 CaseSensitive: false, 1563 }) 1564 1565 if len(res.Files) != 1 { 1566 t.Fatalf("got %v, want one match", res.Files) 1567 } 1568} 1569 1570func TestCaseRegexp(t *testing.T) { 1571 content := []byte("BLABLABLA") 1572 b := testIndexBuilder(t, nil, 1573 Document{Name: "f1", Content: content}) 1574 1575 t.Run("LineMatches", func(t *testing.T) { 1576 res := searchForTest(t, b, 1577 &query.Regexp{ 1578 Regexp: mustParseRE("[xb][xl][xa]"), 1579 CaseSensitive: true, 1580 }) 1581 1582 if len(res.Files) > 0 { 1583 t.Fatalf("got %v, want no matches", res.Files) 1584 } 1585 }) 1586 1587 t.Run("ChunkMatches", func(t *testing.T) { 1588 res := searchForTest(t, b, 1589 &query.Regexp{ 1590 Regexp: mustParseRE("[xb][xl][xa]"), 1591 CaseSensitive: true, 1592 }, 1593 chunkOpts, 1594 ) 1595 1596 if len(res.Files) > 0 { 1597 t.Fatalf("got %v, want no matches", res.Files) 1598 } 1599 }) 1600} 1601 1602func TestNegativeRegexp(t *testing.T) { 1603 content := []byte("BLABLABLA needle bla") 1604 b := testIndexBuilder(t, nil, 1605 Document{Name: "f1", Content: content}) 1606 1607 t.Run("LineMatches", func(t *testing.T) { 1608 res := searchForTest(t, b, 1609 query.NewAnd( 1610 &query.Substring{ 1611 Pattern: "needle", 1612 }, 1613 &query.Not{ 1614 Child: &query.Regexp{ 1615 Regexp: mustParseRE(".cs"), 1616 }, 1617 })) 1618 1619 if len(res.Files) != 1 { 1620 t.Fatalf("got %v, want 1 match", res.Files) 1621 } 1622 }) 1623 1624 t.Run("ChunkMatches", func(t *testing.T) { 1625 res := searchForTest(t, b, 1626 query.NewAnd( 1627 &query.Substring{ 1628 Pattern: "needle", 1629 }, 1630 &query.Not{ 1631 Child: &query.Regexp{ 1632 Regexp: mustParseRE(".cs"), 1633 }, 1634 }, 1635 ), 1636 chunkOpts) 1637 1638 if len(res.Files) != 1 { 1639 t.Fatalf("got %v, want 1 match", res.Files) 1640 } 1641 }) 1642} 1643 1644func TestSymbolRank(t *testing.T) { 1645 t.Skip() 1646 1647 content := []byte("func bla() blubxxxxx") 1648 // ----------------01234567890123456789 1649 b := testIndexBuilder(t, nil, 1650 Document{ 1651 Name: "f1", 1652 Content: content, 1653 }, Document{ 1654 Name: "f2", 1655 Content: content, 1656 Symbols: []DocumentSection{{5, 8}}, 1657 }, Document{ 1658 Name: "f3", 1659 Content: content, 1660 }) 1661 1662 t.Run("LineMatches", func(t *testing.T) { 1663 res := searchForTest(t, b, 1664 &query.Substring{ 1665 CaseSensitive: false, 1666 Pattern: "bla", 1667 }) 1668 1669 if len(res.Files) != 3 { 1670 t.Fatalf("got %d files, want 3 files. Full data: %v", len(res.Files), res.Files) 1671 } 1672 if res.Files[0].FileName != "f2" { 1673 t.Errorf("got %#v, want 'f2' as top match", res.Files[0]) 1674 } 1675 }) 1676 1677 t.Run("ChunkMatches", func(t *testing.T) { 1678 res := searchForTest(t, b, 1679 &query.Substring{ 1680 CaseSensitive: false, 1681 Pattern: "bla", 1682 }, chunkOpts) 1683 1684 if len(res.Files) != 3 { 1685 t.Fatalf("got %d files, want 3 files. Full data: %v", len(res.Files), res.Files) 1686 } 1687 if res.Files[0].FileName != "f2" { 1688 t.Errorf("got %#v, want 'f2' as top match", res.Files[0]) 1689 } 1690 }) 1691} 1692 1693func TestSymbolRankRegexpUTF8(t *testing.T) { 1694 t.Skip() 1695 1696 prefix := strings.Repeat(string([]rune{kelvinCodePoint}), 100) + "\n" 1697 content := []byte(prefix + 1698 "func bla() blub") 1699 // ------012345678901234 1700 b := testIndexBuilder(t, nil, 1701 Document{ 1702 Name: "f1", 1703 Content: content, 1704 }, Document{ 1705 Name: "f2", 1706 Content: content, 1707 Symbols: []DocumentSection{{uint32(len(prefix) + 5), uint32(len(prefix) + 8)}}, 1708 }, Document{ 1709 Name: "f3", 1710 Content: content, 1711 }) 1712 1713 t.Run("LineMatches", func(t *testing.T) { 1714 res := searchForTest(t, b, 1715 &query.Regexp{ 1716 Regexp: mustParseRE("b.a"), 1717 }) 1718 1719 if len(res.Files) != 3 { 1720 t.Fatalf("got %#v, want 3 files", res.Files) 1721 } 1722 if res.Files[0].FileName != "f2" { 1723 t.Errorf("got %#v, want 'f2' as top match", res.Files[0]) 1724 } 1725 }) 1726 1727 t.Run("ChunjkMatches", func(t *testing.T) { 1728 res := searchForTest(t, b, 1729 &query.Regexp{ 1730 Regexp: mustParseRE("b.a"), 1731 }, chunkOpts) 1732 1733 if len(res.Files) != 3 { 1734 t.Fatalf("got %#v, want 3 files", res.Files) 1735 } 1736 if res.Files[0].FileName != "f2" { 1737 t.Errorf("got %#v, want 'f2' as top match", res.Files[0]) 1738 } 1739 }) 1740} 1741 1742func TestPartialSymbolRank(t *testing.T) { 1743 t.Skip() 1744 1745 content := []byte("func bla() blub") 1746 // ----------------012345678901234 1747 1748 b := testIndexBuilder(t, nil, 1749 Document{ 1750 Name: "f1", 1751 Content: content, 1752 Symbols: []DocumentSection{{4, 9}}, 1753 }, Document{ 1754 Name: "f2", 1755 Content: content, 1756 Symbols: []DocumentSection{{4, 8}}, 1757 }, Document{ 1758 Name: "f3", 1759 Content: content, 1760 Symbols: []DocumentSection{{4, 9}}, 1761 }) 1762 1763 t.Run("LineMatches", func(t *testing.T) { 1764 res := searchForTest(t, b, 1765 &query.Substring{ 1766 Pattern: "bla", 1767 }) 1768 1769 if len(res.Files) != 3 { 1770 t.Fatalf("got %#v, want 3 files", res.Files) 1771 } 1772 if res.Files[0].FileName != "f2" { 1773 t.Errorf("got %#v, want 'f2' as top match", res.Files[0]) 1774 } 1775 }) 1776 1777 t.Run("ChunkMatches", func(t *testing.T) { 1778 res := searchForTest(t, b, 1779 &query.Substring{ 1780 Pattern: "bla", 1781 }, chunkOpts) 1782 1783 if len(res.Files) != 3 { 1784 t.Fatalf("got %#v, want 3 files", res.Files) 1785 } 1786 if res.Files[0].FileName != "f2" { 1787 t.Errorf("got %#v, want 'f2' as top match", res.Files[0]) 1788 } 1789 }) 1790} 1791 1792func TestNegativeRepo(t *testing.T) { 1793 content := []byte("bla the needle") 1794 // ----------------01234567890123 1795 b := testIndexBuilder(t, &Repository{ 1796 Name: "bla", 1797 }, Document{Name: "f1", Content: content}) 1798 1799 t.Run("LineMatches", func(t *testing.T) { 1800 sres := searchForTest(t, b, 1801 query.NewAnd( 1802 &query.Substring{Pattern: "needle"}, 1803 &query.Not{Child: &query.Repo{Regexp: regexp.MustCompile("bla")}}, 1804 )) 1805 1806 if len(sres.Files) != 0 { 1807 t.Fatalf("got %v, want 0 matches", sres.Files) 1808 } 1809 }) 1810 1811 t.Run("ChunkMatches", func(t *testing.T) { 1812 sres := searchForTest(t, b, 1813 query.NewAnd( 1814 &query.Substring{Pattern: "needle"}, 1815 &query.Not{Child: &query.Repo{Regexp: regexp.MustCompile("bla")}}, 1816 ), chunkOpts) 1817 1818 if len(sres.Files) != 0 { 1819 t.Fatalf("got %v, want 0 matches", sres.Files) 1820 } 1821 }) 1822} 1823 1824func TestListRepos(t *testing.T) { 1825 content := []byte("bla the needle\n") 1826 // ----------------012345678901234- 1827 1828 t.Run("default and minimal fallback", func(t *testing.T) { 1829 repo := &Repository{ 1830 Name: "reponame", 1831 Branches: []RepositoryBranch{{Name: "main"}, {Name: "dev"}}, 1832 } 1833 b := testIndexBuilder(t, repo, 1834 Document{Name: "f1", Content: content, Branches: []string{"main", "dev"}}, 1835 Document{Name: "f2", Content: content, Branches: []string{"main"}}, 1836 Document{Name: "f2", Content: content, Branches: []string{"dev"}}, 1837 Document{Name: "f3", Content: content, Branches: []string{"dev"}}) 1838 1839 searcher := searcherForTest(t, b) 1840 1841 for _, opts := range []*ListOptions{ 1842 nil, 1843 {}, 1844 {Field: RepoListFieldRepos}, 1845 {Field: RepoListFieldReposMap}, 1846 } { 1847 t.Run(fmt.Sprint(opts), func(t *testing.T) { 1848 q := &query.Repo{Regexp: regexp.MustCompile("epo")} 1849 1850 res, err := searcher.List(context.Background(), q, opts) 1851 if err != nil { 1852 t.Fatalf("List(%v): %v", q, err) 1853 } 1854 1855 want := &RepoList{ 1856 Repos: []*RepoListEntry{{ 1857 Repository: *repo, 1858 Stats: RepoStats{ 1859 Documents: 4, 1860 ContentBytes: 68, // (15 bytes of content and 2 bytes of filename) x 4 1861 Shards: 1, 1862 1863 NewLinesCount: 4, 1864 DefaultBranchNewLinesCount: 2, 1865 OtherBranchesNewLinesCount: 3, 1866 }, 1867 }}, 1868 Stats: RepoStats{ 1869 Repos: 1, 1870 Documents: 4, 1871 ContentBytes: 68, 1872 Shards: 1, 1873 1874 NewLinesCount: 4, 1875 DefaultBranchNewLinesCount: 2, 1876 OtherBranchesNewLinesCount: 3, 1877 }, 1878 } 1879 ignored := []cmp.Option{ 1880 cmpopts.EquateEmpty(), 1881 cmpopts.IgnoreFields(RepoListEntry{}, "IndexMetadata"), 1882 cmpopts.IgnoreFields(RepoStats{}, "IndexBytes"), 1883 cmpopts.IgnoreFields(Repository{}, "SubRepoMap"), 1884 cmpopts.IgnoreFields(Repository{}, "priority"), 1885 } 1886 if diff := cmp.Diff(want, res, ignored...); diff != "" { 1887 t.Fatalf("mismatch (-want +got):\n%s", diff) 1888 } 1889 1890 q = &query.Repo{Regexp: regexp.MustCompile("bla")} 1891 res, err = searcher.List(context.Background(), q, nil) 1892 if err != nil { 1893 t.Fatalf("List(%v): %v", q, err) 1894 } 1895 if len(res.Repos) != 0 || len(res.ReposMap) != 0 { 1896 t.Fatalf("got %v, want 0 matches", res) 1897 } 1898 }) 1899 } 1900 }) 1901 1902 t.Run("minimal", func(t *testing.T) { 1903 repo := &Repository{ 1904 ID: 1234, 1905 Name: "reponame", 1906 Branches: []RepositoryBranch{{Name: "main"}, {Name: "dev"}}, 1907 RawConfig: map[string]string{"repoid": "1234"}, 1908 } 1909 b := testIndexBuilder(t, repo, 1910 Document{Name: "f1", Content: content, Branches: []string{"main", "dev"}}, 1911 Document{Name: "f2", Content: content, Branches: []string{"main"}}, 1912 Document{Name: "f2", Content: content, Branches: []string{"dev"}}, 1913 Document{Name: "f3", Content: content, Branches: []string{"dev"}}) 1914 1915 searcher := searcherForTest(t, b) 1916 1917 q := &query.Repo{Regexp: regexp.MustCompile("epo")} 1918 res, err := searcher.List(context.Background(), q, &ListOptions{Field: RepoListFieldReposMap}) 1919 if err != nil { 1920 t.Fatalf("List(%v): %v", q, err) 1921 } 1922 1923 want := &RepoList{ 1924 ReposMap: ReposMap{ 1925 repo.ID: { 1926 HasSymbols: repo.HasSymbols, 1927 Branches: repo.Branches, 1928 }, 1929 }, 1930 Stats: RepoStats{ 1931 Repos: 1, 1932 Shards: 1, 1933 Documents: 4, 1934 IndexBytes: 412, 1935 ContentBytes: 68, 1936 NewLinesCount: 4, 1937 DefaultBranchNewLinesCount: 2, 1938 OtherBranchesNewLinesCount: 3, 1939 }, 1940 } 1941 1942 ignored := []cmp.Option{ 1943 cmpopts.IgnoreFields(MinimalRepoListEntry{}, "IndexTimeUnix"), 1944 } 1945 if diff := cmp.Diff(want, res, ignored...); diff != "" { 1946 t.Fatalf("mismatch (-want +got):\n%s", diff) 1947 } 1948 1949 q = &query.Repo{Regexp: regexp.MustCompile("bla")} 1950 res, err = searcher.List(context.Background(), q, &ListOptions{Field: RepoListFieldReposMap}) 1951 if err != nil { 1952 t.Fatalf("List(%v): %v", q, err) 1953 } 1954 if len(res.Repos) != 0 || len(res.ReposMap) != 0 { 1955 t.Fatalf("got %v, want 0 matches", res) 1956 } 1957 }) 1958} 1959 1960func TestListReposByContent(t *testing.T) { 1961 content := []byte("bla the needle") 1962 1963 b := testIndexBuilder(t, &Repository{ 1964 Name: "reponame", 1965 }, 1966 Document{Name: "f1", Content: content}, 1967 Document{Name: "f2", Content: content}) 1968 1969 searcher := searcherForTest(t, b) 1970 q := &query.Substring{Pattern: "needle"} 1971 res, err := searcher.List(context.Background(), q, nil) 1972 if err != nil { 1973 t.Fatalf("List(%v): %v", q, err) 1974 } 1975 if len(res.Repos) != 1 || res.Repos[0].Repository.Name != "reponame" { 1976 t.Fatalf("got %v, want 1 matches", res) 1977 } 1978 if got := res.Repos[0].Stats.Shards; got != 1 { 1979 t.Fatalf("got %d, want 1 shard", got) 1980 } 1981 q = &query.Substring{Pattern: "foo"} 1982 res, err = searcher.List(context.Background(), q, nil) 1983 if err != nil { 1984 t.Fatalf("List(%v): %v", q, err) 1985 } 1986 if len(res.Repos) != 0 { 1987 t.Fatalf("got %v, want 0 matches", res) 1988 } 1989} 1990 1991func TestMetadata(t *testing.T) { 1992 content := []byte("bla the needle") 1993 1994 b := testIndexBuilder(t, &Repository{ 1995 Name: "reponame", 1996 }, Document{Name: "f1", Content: content}, 1997 Document{Name: "f2", Content: content}) 1998 1999 var buf bytes.Buffer 2000 if err := b.Write(&buf); err != nil { 2001 t.Fatal(err) 2002 } 2003 f := &memSeeker{buf.Bytes()} 2004 2005 rd, _, err := ReadMetadata(f) 2006 if err != nil { 2007 t.Fatalf("ReadMetadata: %v", err) 2008 } 2009 2010 if got, want := rd[0].Name, "reponame"; got != want { 2011 t.Fatalf("got %q want %q", got, want) 2012 } 2013} 2014 2015func TestOr(t *testing.T) { 2016 b := testIndexBuilder(t, nil, 2017 Document{Name: "f1", Content: []byte("needle")}, 2018 Document{Name: "f2", Content: []byte("banana")}) 2019 t.Run("LineMatches", func(t *testing.T) { 2020 sres := searchForTest(t, b, query.NewOr( 2021 &query.Substring{Pattern: "needle"}, 2022 &query.Substring{Pattern: "banana"})) 2023 2024 if len(sres.Files) != 2 { 2025 t.Fatalf("got %v, want 2 files", sres.Files) 2026 } 2027 }) 2028 2029 t.Run("ChunkMatches", func(t *testing.T) { 2030 sres := searchForTest(t, b, query.NewOr( 2031 &query.Substring{Pattern: "needle"}, 2032 &query.Substring{Pattern: "banana"})) 2033 2034 if len(sres.Files) != 2 { 2035 t.Fatalf("got %v, want 2 files", sres.Files) 2036 } 2037 }) 2038} 2039 2040func TestFrequency(t *testing.T) { 2041 content := []byte("sla _Py_HashDouble(double v sla las las shd dot dot") 2042 2043 b := testIndexBuilder(t, nil, 2044 Document{ 2045 Name: "f1", 2046 Content: content, 2047 }) 2048 2049 t.Run("LineMatches", func(t *testing.T) { 2050 sres := searchForTest(t, b, &query.Substring{Pattern: "slashdot"}) 2051 if len(sres.Files) != 0 { 2052 t.Errorf("got %v, wanted 0 matches", sres.Files) 2053 } 2054 }) 2055 2056 t.Run("ChunkMatches", func(t *testing.T) { 2057 sres := searchForTest(t, b, &query.Substring{Pattern: "slashdot"}, chunkOpts) 2058 if len(sres.Files) != 0 { 2059 t.Errorf("got %v, wanted 0 matches", sres.Files) 2060 } 2061 }) 2062} 2063 2064func TestMatchNewline(t *testing.T) { 2065 re, err := syntax.Parse("[^a]a", syntax.ClassNL) 2066 if err != nil { 2067 t.Fatalf("syntax.Parse: %v", err) 2068 } 2069 2070 content := []byte("pqr\nalex") 2071 2072 b := testIndexBuilder(t, nil, 2073 Document{ 2074 Name: "f1", 2075 Content: content, 2076 }) 2077 2078 t.Run("LineMatches", func(t *testing.T) { 2079 sres := searchForTest(t, b, &query.Regexp{Regexp: re, CaseSensitive: true}) 2080 if len(sres.Files) != 1 { 2081 t.Errorf("got %v, wanted 1 matches", sres.Files) 2082 } else if l := sres.Files[0].LineMatches[0].Line; !bytes.Equal(l, content[len("pqr\n"):]) { 2083 t.Errorf("got match line %q, want %q", l, content) 2084 } 2085 }) 2086 2087 t.Run("ChunkMatches", func(t *testing.T) { 2088 sres := searchForTest(t, b, &query.Regexp{Regexp: re, CaseSensitive: true}, chunkOpts) 2089 if len(sres.Files) != 1 { 2090 t.Errorf("got %v, wanted 1 matches", sres.Files) 2091 } else if c := sres.Files[0].ChunkMatches[0].Content; !bytes.Equal(c, content) { 2092 t.Errorf("got match line %q, want %q", c, content) 2093 } 2094 }) 2095} 2096 2097func TestSubRepo(t *testing.T) { 2098 subRepos := map[string]*Repository{ 2099 "sub": { 2100 Name: "sub-name", 2101 LineFragmentTemplate: "sub-line", 2102 }, 2103 } 2104 2105 content := []byte("pqr\nalex") 2106 2107 b := testIndexBuilder(t, &Repository{ 2108 SubRepoMap: subRepos, 2109 }, Document{ 2110 Name: "sub/f1", 2111 Content: content, 2112 SubRepositoryPath: "sub", 2113 }) 2114 2115 sres := searchForTest(t, b, &query.Substring{Pattern: "alex"}) 2116 if len(sres.Files) != 1 { 2117 t.Fatalf("got %v, wanted 1 matches", sres.Files) 2118 } 2119 2120 f := sres.Files[0] 2121 if f.SubRepositoryPath != "sub" || f.SubRepositoryName != "sub-name" { 2122 t.Errorf("got %#v, want SubRepository{Path,Name} = {'sub', 'sub-name'}", f) 2123 } 2124 2125 if sres.LineFragments["sub-name"] != "sub-line" { 2126 t.Errorf("got LineFragmentTemplate %v, want {'sub':'sub-line'}", sres.LineFragments) 2127 } 2128} 2129 2130func TestSearchEither(t *testing.T) { 2131 b := testIndexBuilder(t, nil, 2132 Document{Name: "f1", Content: []byte("bla needle bla")}, 2133 Document{Name: "needle-file-branch", Content: []byte("bla content")}) 2134 2135 t.Run("LineMatches", func(t *testing.T) { 2136 sres := searchForTest(t, b, &query.Substring{Pattern: "needle"}) 2137 if len(sres.Files) != 2 { 2138 t.Fatalf("got %v, wanted 2 matches", sres.Files) 2139 } 2140 2141 sres = searchForTest(t, b, &query.Substring{Pattern: "needle", Content: true}) 2142 if len(sres.Files) != 1 { 2143 t.Fatalf("got %v, wanted 1 match", sres.Files) 2144 } 2145 2146 if got, want := sres.Files[0].FileName, "f1"; got != want { 2147 t.Errorf("got %q, want %q", got, want) 2148 } 2149 }) 2150 2151 t.Run("ChunkMatches", func(t *testing.T) { 2152 sres := searchForTest(t, b, &query.Substring{Pattern: "needle"}, chunkOpts) 2153 if len(sres.Files) != 2 { 2154 t.Fatalf("got %v, wanted 2 matches", sres.Files) 2155 } 2156 2157 sres = searchForTest(t, b, &query.Substring{Pattern: "needle", Content: true}, chunkOpts) 2158 if len(sres.Files) != 1 { 2159 t.Fatalf("got %v, wanted 1 match", sres.Files) 2160 } 2161 2162 if got, want := sres.Files[0].FileName, "f1"; got != want { 2163 t.Errorf("got %q, want %q", got, want) 2164 } 2165 }) 2166} 2167 2168func TestUnicodeExactMatch(t *testing.T) { 2169 needle := "néédlÉ" 2170 content := []byte("blá blá " + needle + " blâ") 2171 2172 b := testIndexBuilder(t, nil, 2173 Document{Name: "f1", Content: content}) 2174 2175 t.Run("LineMatches", func(t *testing.T) { 2176 if res := searchForTest(t, b, &query.Substring{Pattern: needle, CaseSensitive: true}); len(res.Files) != 1 { 2177 t.Fatalf("case sensitive: got %v, wanted 1 match", res.Files) 2178 } 2179 }) 2180 2181 t.Run("ChunkMatches", func(t *testing.T) { 2182 res := searchForTest(t, b, &query.Substring{Pattern: needle, CaseSensitive: true}, chunkOpts) 2183 if len(res.Files) != 1 { 2184 t.Fatalf("case sensitive: got %v, wanted 1 match", res.Files) 2185 } 2186 }) 2187} 2188 2189func TestUnicodeCoverContent(t *testing.T) { 2190 needle := "néédlÉ" 2191 content := []byte("blá blá " + needle + " blâ") 2192 2193 b := testIndexBuilder(t, nil, 2194 Document{Name: "f1", Content: content}) 2195 2196 t.Run("LineMatches", func(t *testing.T) { 2197 if res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ", CaseSensitive: true}); len(res.Files) != 0 { 2198 t.Fatalf("case sensitive: got %v, wanted 0 match", res.Files) 2199 } 2200 2201 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ"}) 2202 if len(res.Files) != 1 { 2203 t.Fatalf("case insensitive: got %v, wanted 1 match", res.Files) 2204 } 2205 2206 if got, want := res.Files[0].LineMatches[0].LineFragments[0].Offset, uint32(strings.Index(string(content), needle)); got != want { 2207 t.Errorf("got %d want %d", got, want) 2208 } 2209 }) 2210 2211 t.Run("ChunkMatches", func(t *testing.T) { 2212 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ", CaseSensitive: true}, chunkOpts) 2213 if len(res.Files) != 0 { 2214 t.Fatalf("case sensitive: got %v, wanted 0 match", res.Files) 2215 } 2216 2217 res = searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ"}, chunkOpts) 2218 if len(res.Files) != 1 { 2219 t.Fatalf("case insensitive: got %v, wanted 1 match", res.Files) 2220 } 2221 2222 got := res.Files[0].ChunkMatches[0].Ranges[0].Start.ByteOffset 2223 want := uint32(strings.Index(string(content), needle)) 2224 if got != want { 2225 t.Errorf("got %d want %d", got, want) 2226 } 2227 }) 2228} 2229 2230func TestUnicodeNonCoverContent(t *testing.T) { 2231 needle := "nééáádlÉ" 2232 content := []byte("blá blá " + needle + " blâ") 2233 2234 b := testIndexBuilder(t, nil, 2235 Document{Name: "f1", Content: content}) 2236 2237 t.Run("LineMatches", func(t *testing.T) { 2238 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉÁÁDLÉ", Content: true}) 2239 if len(res.Files) != 1 { 2240 t.Fatalf("got %v, wanted 1 match", res.Files) 2241 } 2242 2243 if got, want := res.Files[0].LineMatches[0].LineFragments[0].Offset, uint32(strings.Index(string(content), needle)); got != want { 2244 t.Errorf("got %d want %d", got, want) 2245 } 2246 }) 2247 2248 t.Run("ChunkMatches", func(t *testing.T) { 2249 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉÁÁDLÉ", Content: true}, chunkOpts) 2250 if len(res.Files) != 1 { 2251 t.Fatalf("got %v, wanted 1 match", res.Files) 2252 } 2253 2254 got := res.Files[0].ChunkMatches[0].Ranges[0].Start.ByteOffset 2255 want := uint32(strings.Index(string(content), needle)) 2256 if got != want { 2257 t.Errorf("got %d want %d", got, want) 2258 } 2259 }) 2260} 2261 2262const kelvinCodePoint = 8490 2263 2264func TestUnicodeVariableLength(t *testing.T) { 2265 lower := 'k' 2266 upper := rune(kelvinCodePoint) 2267 2268 needle := "nee" + string([]rune{lower}) + "eed" 2269 corpus := []byte("nee" + string([]rune{upper}) + "eed" + 2270 " ee" + string([]rune{lower}) + "ee" + 2271 " ee" + string([]rune{upper}) + "ee") 2272 2273 t.Run("LineMatches", func(t *testing.T) { 2274 b := testIndexBuilder(t, nil, 2275 Document{Name: "f1", Content: []byte(corpus)}) 2276 2277 res := searchForTest(t, b, &query.Substring{Pattern: needle, Content: true}) 2278 if len(res.Files) != 1 { 2279 t.Fatalf("got %v, wanted 1 match", res.Files) 2280 } 2281 }) 2282 2283 t.Run("ChunkMatches", func(t *testing.T) { 2284 b := testIndexBuilder(t, nil, 2285 Document{Name: "f1", Content: []byte(corpus)}) 2286 2287 res := searchForTest(t, b, &query.Substring{Pattern: needle, Content: true}, chunkOpts) 2288 if len(res.Files) != 1 { 2289 t.Fatalf("got %v, wanted 1 match", res.Files) 2290 } 2291 }) 2292} 2293 2294func TestUnicodeFileStartOffsets(t *testing.T) { 2295 unicode := "世界" 2296 wat := "waaaaaat" 2297 b := testIndexBuilder(t, nil, 2298 Document{ 2299 Name: "f1", 2300 Content: []byte(unicode), 2301 }, 2302 Document{ 2303 Name: "f2", 2304 Content: []byte(wat), 2305 }, 2306 ) 2307 q := &query.Substring{Pattern: wat, Content: true} 2308 res := searchForTest(t, b, q) 2309 if len(res.Files) != 1 { 2310 t.Fatalf("got %v, wanted 1 match", res.Files) 2311 } 2312} 2313 2314func TestLongFileUTF8(t *testing.T) { 2315 needle := "neeedle" 2316 2317 // 6 bytes. 2318 unicode := "世界" 2319 content := []byte(strings.Repeat(unicode, 100) + needle) 2320 b := testIndexBuilder(t, nil, 2321 Document{ 2322 Name: "f1", 2323 Content: []byte(strings.Repeat("a", 50)), 2324 }, 2325 Document{ 2326 Name: "f2", 2327 Content: content, 2328 }) 2329 2330 t.Run("LineMatches", func(t *testing.T) { 2331 q := &query.Substring{Pattern: needle, Content: true} 2332 res := searchForTest(t, b, q) 2333 if len(res.Files) != 1 { 2334 t.Errorf("got %v, want 1 result", res) 2335 } 2336 }) 2337 2338 t.Run("ChunkMatches", func(t *testing.T) { 2339 q := &query.Substring{Pattern: needle, Content: true} 2340 res := searchForTest(t, b, q, chunkOpts) 2341 if len(res.Files) != 1 { 2342 t.Errorf("got %v, want 1 result", res) 2343 } 2344 }) 2345} 2346 2347func TestEstimateDocCount(t *testing.T) { 2348 content := []byte("bla needle bla") 2349 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2350 Document{Name: "f1", Content: content}, 2351 Document{Name: "f2", Content: content}, 2352 ) 2353 2354 t.Run("LineMatches", func(t *testing.T) { 2355 if sres := searchForTest(t, b, 2356 query.NewAnd( 2357 &query.Substring{Pattern: "needle"}, 2358 &query.Repo{Regexp: regexp.MustCompile("reponame")}, 2359 ), SearchOptions{ 2360 EstimateDocCount: true, 2361 }); sres.Stats.ShardFilesConsidered != 2 { 2362 t.Errorf("got FilesConsidered = %d, want 2", sres.Stats.FilesConsidered) 2363 } 2364 if sres := searchForTest(t, b, 2365 query.NewAnd( 2366 &query.Substring{Pattern: "needle"}, 2367 &query.Repo{Regexp: regexp.MustCompile("nomatch")}, 2368 ), SearchOptions{ 2369 EstimateDocCount: true, 2370 }); sres.Stats.ShardFilesConsidered != 0 { 2371 t.Errorf("got FilesConsidered = %d, want 0", sres.Stats.FilesConsidered) 2372 } 2373 }) 2374 2375 t.Run("ChunkMatches", func(t *testing.T) { 2376 if sres := searchForTest(t, b, 2377 query.NewAnd( 2378 &query.Substring{Pattern: "needle"}, 2379 &query.Repo{Regexp: regexp.MustCompile("reponame")}, 2380 ), SearchOptions{ 2381 EstimateDocCount: true, 2382 ChunkMatches: true, 2383 }); sres.Stats.ShardFilesConsidered != 2 { 2384 t.Errorf("got FilesConsidered = %d, want 2", sres.Stats.FilesConsidered) 2385 } 2386 if sres := searchForTest(t, b, 2387 query.NewAnd( 2388 &query.Substring{Pattern: "needle"}, 2389 &query.Repo{Regexp: regexp.MustCompile("nomatch")}, 2390 ), SearchOptions{ 2391 EstimateDocCount: true, 2392 ChunkMatches: true, 2393 }); sres.Stats.ShardFilesConsidered != 0 { 2394 t.Errorf("got FilesConsidered = %d, want 0", sres.Stats.FilesConsidered) 2395 } 2396 }) 2397} 2398 2399func TestUTF8CorrectCorpus(t *testing.T) { 2400 needle := "neeedle" 2401 2402 // 6 bytes. 2403 unicode := "世界" 2404 b := testIndexBuilder(t, nil, 2405 Document{ 2406 Name: "f1", 2407 Content: []byte(strings.Repeat(unicode, 100)), 2408 }, 2409 Document{ 2410 Name: "xxxxxneeedle", 2411 Content: []byte("hello"), 2412 }) 2413 2414 t.Run("LineMatches", func(t *testing.T) { 2415 q := &query.Substring{Pattern: needle, FileName: true} 2416 res := searchForTest(t, b, q) 2417 if len(res.Files) != 1 { 2418 t.Errorf("got %v, want 1 result", res) 2419 } 2420 }) 2421 2422 t.Run("ChunkMatches", func(t *testing.T) { 2423 q := &query.Substring{Pattern: needle, FileName: true} 2424 res := searchForTest(t, b, q, chunkOpts) 2425 if len(res.Files) != 1 { 2426 t.Errorf("got %v, want 1 result", res) 2427 } 2428 }) 2429} 2430 2431func TestBuilderStats(t *testing.T) { 2432 b := testIndexBuilder(t, nil, 2433 Document{ 2434 Name: "f1", 2435 Content: []byte(strings.Repeat("abcd", 1024)), 2436 }) 2437 var buf bytes.Buffer 2438 if err := b.Write(&buf); err != nil { 2439 t.Fatal(err) 2440 } 2441 2442 if got, want := b.ContentSize(), uint32(2+4*1024); got != want { 2443 t.Errorf("got %d, want %d", got, want) 2444 } 2445} 2446 2447func TestIOStats(t *testing.T) { 2448 b := testIndexBuilder(t, nil, 2449 Document{ 2450 Name: "f1", 2451 Content: []byte(strings.Repeat("abcd", 1024)), 2452 }) 2453 2454 t.Run("LineMatches", func(t *testing.T) { 2455 q := &query.Substring{Pattern: "abc", CaseSensitive: true, Content: true} 2456 res := searchForTest(t, b, q) 2457 2458 // 4096 (content) + 2 (overhead: newlines or doc sections) 2459 if got, want := res.Stats.ContentBytesLoaded, int64(4098); got != want { 2460 t.Errorf("got content I/O %d, want %d", got, want) 2461 } 2462 2463 // 1024 entries, each 4 bytes apart. 4 fits into single byte 2464 // delta encoded. 2465 if got, want := res.Stats.IndexBytesLoaded, int64(1024); got != want { 2466 t.Errorf("got index I/O %d, want %d", got, want) 2467 } 2468 }) 2469 2470 t.Run("ChunkMatches", func(t *testing.T) { 2471 q := &query.Substring{Pattern: "abc", CaseSensitive: true, Content: true} 2472 res := searchForTest(t, b, q, chunkOpts) 2473 2474 // 4096 (content) + 2 (overhead: newlines or doc sections) 2475 if got, want := res.Stats.ContentBytesLoaded, int64(4098); got != want { 2476 t.Errorf("got content I/O %d, want %d", got, want) 2477 } 2478 2479 // 1024 entries, each 4 bytes apart. 4 fits into single byte 2480 // delta encoded. 2481 if got, want := res.Stats.IndexBytesLoaded, int64(1024); got != want { 2482 t.Errorf("got index I/O %d, want %d", got, want) 2483 } 2484 }) 2485} 2486 2487func TestStartLineAnchor(t *testing.T) { 2488 b := testIndexBuilder(t, nil, 2489 Document{ 2490 Name: "f1", 2491 Content: []byte( 2492 `hello 2493start of middle of line 2494`), 2495 }) 2496 2497 t.Run("LineMatches", func(t *testing.T) { 2498 q, err := query.Parse("^start") 2499 if err != nil { 2500 t.Errorf("parse: %v", err) 2501 } 2502 2503 res := searchForTest(t, b, q) 2504 if len(res.Files) != 1 { 2505 t.Errorf("got %v, want 1 file", res.Files) 2506 } 2507 2508 q, err = query.Parse("^middle") 2509 if err != nil { 2510 t.Errorf("parse: %v", err) 2511 } 2512 res = searchForTest(t, b, q) 2513 if len(res.Files) != 0 { 2514 t.Errorf("got %v, want 0 files", res.Files) 2515 } 2516 }) 2517 2518 t.Run("ChunkMatches", func(t *testing.T) { 2519 q, err := query.Parse("^start") 2520 if err != nil { 2521 t.Errorf("parse: %v", err) 2522 } 2523 2524 res := searchForTest(t, b, q, chunkOpts) 2525 if len(res.Files) != 1 { 2526 t.Errorf("got %v, want 1 file", res.Files) 2527 } 2528 2529 q, err = query.Parse("^middle") 2530 if err != nil { 2531 t.Errorf("parse: %v", err) 2532 } 2533 res = searchForTest(t, b, q, chunkOpts) 2534 if len(res.Files) != 0 { 2535 t.Errorf("got %v, want 0 files", res.Files) 2536 } 2537 }) 2538} 2539 2540func TestAndOrUnicode(t *testing.T) { 2541 q, err := query.Parse("orange.*apple") 2542 if err != nil { 2543 t.Errorf("parse: %v", err) 2544 } 2545 finalQ := query.NewAnd(q, 2546 query.NewOr(query.NewAnd(&query.Repo{Regexp: regexp.MustCompile("name")}, 2547 query.NewOr(&query.Branch{Pattern: "master"})))) 2548 2549 b := testIndexBuilder(t, &Repository{ 2550 Name: "name", 2551 Branches: []RepositoryBranch{{"master", "master-version"}}, 2552 }, Document{ 2553 Name: "f2", 2554 Content: []byte("orange\u2318apple"), 2555 // --------------0123456 78901 2556 Branches: []string{"master"}, 2557 }) 2558 2559 t.Run("LineMatches", func(t *testing.T) { 2560 res := searchForTest(t, b, finalQ) 2561 if len(res.Files) != 1 { 2562 t.Errorf("got %v, want 1 result", res.Files) 2563 } 2564 }) 2565 2566 t.Run("ChunkMatches", func(t *testing.T) { 2567 res := searchForTest(t, b, finalQ, chunkOpts) 2568 if len(res.Files) != 1 { 2569 t.Errorf("got %v, want 1 result", res.Files) 2570 } 2571 }) 2572} 2573 2574func TestAndShort(t *testing.T) { 2575 content := []byte("bla needle at orange bla") 2576 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2577 Document{Name: "f1", Content: content}, 2578 Document{Name: "f2", Content: []byte("xx at xx")}, 2579 Document{Name: "f3", Content: []byte("yy orange xx")}, 2580 ) 2581 2582 q := query.NewAnd(&query.Substring{Pattern: "at"}, 2583 &query.Substring{Pattern: "orange"}) 2584 2585 t.Run("LineMatches", func(t *testing.T) { 2586 res := searchForTest(t, b, q) 2587 if len(res.Files) != 1 || res.Files[0].FileName != "f1" { 2588 t.Errorf("got %v, want 1 result", res.Files) 2589 } 2590 }) 2591 2592 t.Run("ChunkMatches", func(t *testing.T) { 2593 res := searchForTest(t, b, q, chunkOpts) 2594 if len(res.Files) != 1 || res.Files[0].FileName != "f1" { 2595 t.Errorf("got %v, want 1 result", res.Files) 2596 } 2597 }) 2598} 2599 2600func TestNoCollectRegexpSubstring(t *testing.T) { 2601 content := []byte("bla final bla\nfoo final, foo") 2602 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2603 Document{Name: "f1", Content: content}, 2604 ) 2605 2606 q := &query.Regexp{ 2607 Regexp: mustParseRE("final[,.]"), 2608 } 2609 2610 t.Run("LineMatches", func(t *testing.T) { 2611 res := searchForTest(t, b, q) 2612 if len(res.Files) != 1 { 2613 t.Fatalf("got %v, want 1 result", res.Files) 2614 } 2615 if f := res.Files[0]; len(f.LineMatches) != 1 { 2616 t.Fatalf("got line matches %v, want 1 line match", printLineMatches(f.LineMatches)) 2617 } 2618 }) 2619 2620 t.Run("ChunkMatches", func(t *testing.T) { 2621 res := searchForTest(t, b, q, chunkOpts) 2622 if len(res.Files) != 1 { 2623 t.Fatalf("got %v, want 1 result", res.Files) 2624 } 2625 if f := res.Files[0]; len(f.ChunkMatches) != 1 { 2626 t.Fatalf("got line matches %v, want 1 line match", printLineMatches(f.LineMatches)) 2627 } 2628 }) 2629} 2630 2631func printLineMatches(ms []LineMatch) string { 2632 var ss []string 2633 for _, m := range ms { 2634 ss = append(ss, fmt.Sprintf("%d:%q %v", m.LineNumber, m.Line, m.LineFragments)) 2635 } 2636 2637 return strings.Join(ss, ", ") 2638} 2639 2640func TestLang(t *testing.T) { 2641 content := []byte("bla needle bla") 2642 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2643 Document{Name: "f1", Content: content}, 2644 Document{Name: "f2", Language: "java", Content: content}, 2645 Document{Name: "f3", Language: "cpp", Content: content}, 2646 ) 2647 2648 q := query.NewAnd(&query.Substring{Pattern: "needle"}, 2649 &query.Language{Language: "cpp"}) 2650 2651 t.Run("LineMatches", func(t *testing.T) { 2652 res := searchForTest(t, b, q) 2653 if len(res.Files) != 1 { 2654 t.Fatalf("got %v, want 1 result in f3", res.Files) 2655 } 2656 f := res.Files[0] 2657 if f.FileName != "f3" || f.Language != "cpp" { 2658 t.Fatalf("got %v, want 1 match with language cpp", f) 2659 } 2660 }) 2661 2662 t.Run("ChunkMatches", func(t *testing.T) { 2663 res := searchForTest(t, b, q, chunkOpts) 2664 if len(res.Files) != 1 { 2665 t.Fatalf("got %v, want 1 result in f3", res.Files) 2666 } 2667 f := res.Files[0] 2668 if f.FileName != "f3" || f.Language != "cpp" { 2669 t.Fatalf("got %v, want 1 match with language cpp", f) 2670 } 2671 }) 2672} 2673 2674func TestLangShortcut(t *testing.T) { 2675 content := []byte("bla needle bla") 2676 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2677 Document{Name: "f2", Language: "java", Content: content}, 2678 Document{Name: "f3", Language: "cpp", Content: content}, 2679 ) 2680 2681 q := query.NewAnd(&query.Substring{Pattern: "needle"}, 2682 &query.Language{Language: "fortran"}) 2683 2684 t.Run("LineMatches", func(t *testing.T) { 2685 res := searchForTest(t, b, q) 2686 if len(res.Files) != 0 { 2687 t.Fatalf("got %v, want 0 results", res.Files) 2688 } 2689 if res.Stats.IndexBytesLoaded > 0 { 2690 t.Errorf("got IndexBytesLoaded %d, want 0", res.Stats.IndexBytesLoaded) 2691 } 2692 }) 2693 2694 t.Run("ChunkMatches", func(t *testing.T) { 2695 res := searchForTest(t, b, q, chunkOpts) 2696 if len(res.Files) != 0 { 2697 t.Fatalf("got %v, want 0 results", res.Files) 2698 } 2699 if res.Stats.IndexBytesLoaded > 0 { 2700 t.Errorf("got IndexBytesLoaded %d, want 0", res.Stats.IndexBytesLoaded) 2701 } 2702 }) 2703} 2704 2705func TestNoTextMatchAtoms(t *testing.T) { 2706 content := []byte("bla needle bla") 2707 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2708 Document{Name: "f1", Content: content}, 2709 Document{Name: "f2", Language: "java", Content: content}, 2710 Document{Name: "f3", Language: "cpp", Content: content}, 2711 ) 2712 q := query.NewAnd(&query.Language{Language: "java"}) 2713 t.Run("LineMatches", func(t *testing.T) { 2714 res := searchForTest(t, b, q) 2715 if len(res.Files) != 1 { 2716 t.Fatalf("got %v, want 1 result in f3", res.Files) 2717 } 2718 }) 2719 2720 t.Run("ChunkMatches", func(t *testing.T) { 2721 res := searchForTest(t, b, q, chunkOpts) 2722 if len(res.Files) != 1 { 2723 t.Fatalf("got %v, want 1 result in f3", res.Files) 2724 } 2725 }) 2726} 2727 2728func TestNoPositiveAtoms(t *testing.T) { 2729 content := []byte("bla needle bla") 2730 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2731 Document{Name: "f1", Content: content}, 2732 Document{Name: "f2", Content: content}, 2733 ) 2734 2735 q := query.NewAnd( 2736 &query.Not{Child: &query.Substring{Pattern: "xyz"}}, 2737 &query.Repo{Regexp: regexp.MustCompile("reponame")}) 2738 t.Run("LineMatches", func(t *testing.T) { 2739 res := searchForTest(t, b, q) 2740 if len(res.Files) != 2 { 2741 t.Fatalf("got %v, want 2 results in f3", res.Files) 2742 } 2743 }) 2744 t.Run("ChunkMatches", func(t *testing.T) { 2745 res := searchForTest(t, b, q, chunkOpts) 2746 if len(res.Files) != 2 { 2747 t.Fatalf("got %v, want 2 results in f3", res.Files) 2748 } 2749 }) 2750} 2751 2752func TestSymbolBoundaryStart(t *testing.T) { 2753 content := []byte("start\nbla bla\nend") 2754 // ----------------012345-67890123-456 2755 2756 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2757 Document{ 2758 Name: "f1", 2759 Content: content, 2760 Symbols: []DocumentSection{{0, 5}, {14, 17}}, 2761 }, 2762 ) 2763 q := &query.Symbol{ 2764 Expr: &query.Substring{Pattern: "start"}, 2765 } 2766 t.Run("LineMatches", func(t *testing.T) { 2767 res := searchForTest(t, b, q) 2768 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 { 2769 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2770 } 2771 m := res.Files[0].LineMatches[0].LineFragments[0] 2772 if m.Offset != 0 { 2773 t.Fatalf("got offset %d want 0", m.Offset) 2774 } 2775 }) 2776 2777 t.Run("ChunkMatches", func(t *testing.T) { 2778 res := searchForTest(t, b, q, chunkOpts) 2779 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 { 2780 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2781 } 2782 m := res.Files[0].ChunkMatches[0].Ranges[0] 2783 if m.Start.ByteOffset != 0 { 2784 t.Fatalf("got offset %d want 0", m.Start.ByteOffset) 2785 } 2786 }) 2787} 2788 2789func TestSymbolBoundaryEnd(t *testing.T) { 2790 content := []byte("start\nbla bla\nend") 2791 // ----------------012345-67890123-456 2792 2793 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2794 Document{ 2795 Name: "f1", 2796 Content: content, 2797 Symbols: []DocumentSection{{14, 17}}, 2798 }, 2799 ) 2800 q := &query.Symbol{ 2801 Expr: &query.Substring{Pattern: "end"}, 2802 } 2803 t.Run("LineMatches", func(t *testing.T) { 2804 res := searchForTest(t, b, q) 2805 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 { 2806 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2807 } 2808 m := res.Files[0].LineMatches[0].LineFragments[0] 2809 if m.Offset != 14 { 2810 t.Fatalf("got offset %d want 0", m.Offset) 2811 } 2812 }) 2813 2814 t.Run("ChunkMatches", func(t *testing.T) { 2815 res := searchForTest(t, b, q, chunkOpts) 2816 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 { 2817 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2818 } 2819 m := res.Files[0].ChunkMatches[0].Ranges[0] 2820 if m.Start.ByteOffset != 14 { 2821 t.Fatalf("got offset %d want 0", m.Start.ByteOffset) 2822 } 2823 }) 2824} 2825 2826func TestSymbolSubstring(t *testing.T) { 2827 content := []byte("bla\nsymblabla\nbla") 2828 // ----------------0123-4567890123-456 2829 2830 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2831 Document{ 2832 Name: "f1", 2833 Content: content, 2834 Symbols: []DocumentSection{{4, 12}}, 2835 }, 2836 ) 2837 q := &query.Symbol{ 2838 Expr: &query.Substring{Pattern: "bla"}, 2839 } 2840 t.Run("LineMatches", func(t *testing.T) { 2841 res := searchForTest(t, b, q) 2842 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 { 2843 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2844 } 2845 m := res.Files[0].LineMatches[0].LineFragments[0] 2846 if m.Offset != 7 || m.MatchLength != 3 { 2847 t.Fatalf("got offset %d, size %d want 7 size 3", m.Offset, m.MatchLength) 2848 } 2849 }) 2850 2851 t.Run("ChunkMatches", func(t *testing.T) { 2852 res := searchForTest(t, b, q, chunkOpts) 2853 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 { 2854 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2855 } 2856 m := res.Files[0].ChunkMatches[0].Ranges[0] 2857 if m.Start.ByteOffset != 7 || m.End.ByteOffset != 10 { 2858 t.Fatalf("got offset %d, end %d want 7, 10", m.Start.ByteOffset, m.End.ByteOffset) 2859 } 2860 }) 2861} 2862 2863func TestSymbolSubstringExact(t *testing.T) { 2864 content := []byte("bla\nsym\nbla\nsym\nasymb") 2865 // ----------------0123-4567-890123456-78901 2866 2867 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2868 Document{ 2869 Name: "f1", 2870 Content: content, 2871 Symbols: []DocumentSection{{4, 7}}, 2872 }, 2873 ) 2874 q := &query.Symbol{ 2875 Expr: &query.Substring{Pattern: "sym"}, 2876 } 2877 t.Run("LineMatches", func(t *testing.T) { 2878 res := searchForTest(t, b, q) 2879 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 { 2880 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2881 } 2882 m := res.Files[0].LineMatches[0].LineFragments[0] 2883 if m.Offset != 4 { 2884 t.Fatalf("got offset %d, want 7", m.Offset) 2885 } 2886 }) 2887 2888 t.Run("ChunkMatches", func(t *testing.T) { 2889 res := searchForTest(t, b, q, chunkOpts) 2890 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 { 2891 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2892 } 2893 m := res.Files[0].ChunkMatches[0].Ranges[0] 2894 if m.Start.ByteOffset != 4 { 2895 t.Fatalf("got offset %d, want 7", m.Start.ByteOffset) 2896 } 2897 }) 2898} 2899 2900func TestSymbolRegexpExact(t *testing.T) { 2901 content := []byte("blah\nbla\nbl") 2902 // ----------------01234-5678-90 2903 2904 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2905 Document{ 2906 Name: "f1", 2907 Content: content, 2908 Symbols: []DocumentSection{{0, 4}, {5, 8}, {9, 11}}, 2909 }, 2910 ) 2911 q := &query.Symbol{ 2912 Expr: &query.Regexp{Regexp: mustParseRE("^bla$")}, 2913 } 2914 t.Run("LineMatches", func(t *testing.T) { 2915 res := searchForTest(t, b, q) 2916 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 { 2917 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2918 } 2919 m := res.Files[0].LineMatches[0].LineFragments[0] 2920 if m.Offset != 5 { 2921 t.Fatalf("got offset %d, want 5", m.Offset) 2922 } 2923 }) 2924 2925 t.Run("ChunkMatches", func(t *testing.T) { 2926 res := searchForTest(t, b, q, chunkOpts) 2927 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 { 2928 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2929 } 2930 m := res.Files[0].ChunkMatches[0].Ranges[0] 2931 if m.Start.ByteOffset != 5 { 2932 t.Fatalf("got offset %d, want 5", m.Start.ByteOffset) 2933 } 2934 }) 2935} 2936 2937func TestSymbolRegexpPartial(t *testing.T) { 2938 content := []byte("abcdef") 2939 // ----------------012345 2940 2941 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2942 Document{ 2943 Name: "f1", 2944 Content: content, 2945 Symbols: []DocumentSection{{0, 6}}, 2946 }, 2947 ) 2948 q := &query.Symbol{ 2949 Expr: &query.Regexp{Regexp: mustParseRE("(b|d)c(d|b)")}, 2950 } 2951 t.Run("LineMatches", func(t *testing.T) { 2952 res := searchForTest(t, b, q) 2953 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 { 2954 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2955 } 2956 m := res.Files[0].LineMatches[0].LineFragments[0] 2957 if m.Offset != 1 { 2958 t.Fatalf("got offset %d, want 1", m.Offset) 2959 } 2960 if m.MatchLength != 3 { 2961 t.Fatalf("got match length %d, want 3", m.MatchLength) 2962 } 2963 }) 2964 2965 t.Run("ChunkMatches", func(t *testing.T) { 2966 res := searchForTest(t, b, q, chunkOpts) 2967 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 { 2968 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2969 } 2970 m := res.Files[0].ChunkMatches[0].Ranges[0] 2971 if m.Start.ByteOffset != 1 { 2972 t.Fatalf("got offset %d, want 1", m.Start.ByteOffset) 2973 } 2974 if m.End.ByteOffset != 4 { 2975 t.Fatalf("got match end %d, want 4", m.End.ByteOffset) 2976 } 2977 }) 2978} 2979 2980func TestSymbolRegexpAll(t *testing.T) { 2981 docs := []Document{ 2982 { 2983 Name: "f1", 2984 Content: []byte("Hello Zoekt"), 2985 // --------------01234567890 2986 Symbols: []DocumentSection{{0, 5}, {6, 11}}, 2987 }, 2988 { 2989 Name: "f2", 2990 Content: []byte("Second Zoekt Third"), 2991 // --------------012345678901234567 2992 Symbols: []DocumentSection{{0, 6}, {7, 12}, {13, 18}}, 2993 }, 2994 } 2995 2996 b := testIndexBuilder(t, &Repository{Name: "reponame"}, docs...) 2997 q := &query.Symbol{ 2998 Expr: &query.Regexp{Regexp: mustParseRE(".*")}, 2999 } 3000 t.Run("LineMatches", func(t *testing.T) { 3001 res := searchForTest(t, b, q) 3002 if len(res.Files) != len(docs) { 3003 t.Fatalf("got %v, want %d file", res.Files, len(docs)) 3004 } 3005 for i, want := range docs { 3006 got := res.Files[i].LineMatches[0].LineFragments 3007 if len(got) != len(want.Symbols) { 3008 t.Fatalf("got %d symbols, want %d symbols in doc %s", len(got), len(want.Symbols), want.Name) 3009 } 3010 3011 for j, sec := range want.Symbols { 3012 if sec.Start != got[j].Offset { 3013 t.Fatalf("got offset %d, want %d in doc %s", got[j].Offset, sec.Start, want.Name) 3014 } 3015 } 3016 } 3017 }) 3018 3019 t.Run("ChunkMatches", func(t *testing.T) { 3020 res := searchForTest(t, b, q, chunkOpts) 3021 if len(res.Files) != len(docs) { 3022 t.Fatalf("got %v, want %d file", res.Files, len(docs)) 3023 } 3024 for i, want := range docs { 3025 got := res.Files[i].ChunkMatches[0].Ranges 3026 if len(got) != len(want.Symbols) { 3027 t.Fatalf("got %d symbols, want %d symbols in doc %s", len(got), len(want.Symbols), want.Name) 3028 } 3029 3030 for j, sec := range want.Symbols { 3031 if sec.Start != uint32(got[j].Start.ByteOffset) { 3032 t.Fatalf("got offset %d, want %d in doc %s", got[j].Start.ByteOffset, sec.Start, want.Name) 3033 } 3034 } 3035 } 3036 }) 3037} 3038 3039func TestHitIterTerminate(t *testing.T) { 3040 // contrived input: trigram frequencies forces selecting abc + 3041 // def for the distance iteration. There is no match, so this 3042 // will advance the compressedPostingIterator to beyond the 3043 // end. 3044 content := []byte("abc bcdbcd cdecde abcabc def efg") 3045 b := testIndexBuilder(t, nil, 3046 Document{ 3047 Name: "f1", 3048 Content: content, 3049 }, 3050 ) 3051 3052 t.Run("LineMatches", func(t *testing.T) { 3053 searchForTest(t, b, &query.Substring{Pattern: "abcdef"}) 3054 }) 3055 3056 t.Run("ChunkMatches", func(t *testing.T) { 3057 searchForTest(t, b, &query.Substring{Pattern: "abcdef"}, chunkOpts) 3058 }) 3059} 3060 3061func TestDistanceHitIterBailLast(t *testing.T) { 3062 content := []byte("AST AST AST UASH") 3063 b := testIndexBuilder(t, nil, 3064 Document{ 3065 Name: "f1", 3066 Content: content, 3067 }, 3068 ) 3069 t.Run("LineMatches", func(t *testing.T) { 3070 res := searchForTest(t, b, &query.Substring{Pattern: "UAST"}) 3071 if len(res.Files) != 0 { 3072 t.Fatalf("got %v, want no results", res.Files) 3073 } 3074 }) 3075 3076 t.Run("LineMatches", func(t *testing.T) { 3077 res := searchForTest(t, b, &query.Substring{Pattern: "UAST"}, chunkOpts) 3078 if len(res.Files) != 0 { 3079 t.Fatalf("got %v, want no results", res.Files) 3080 } 3081 }) 3082} 3083 3084func TestDocumentSectionRuneBoundary(t *testing.T) { 3085 content := string([]rune{kelvinCodePoint, kelvinCodePoint, kelvinCodePoint}) 3086 b, err := NewIndexBuilder(nil) 3087 if err != nil { 3088 t.Fatalf("NewIndexBuilder: %v", err) 3089 } 3090 3091 for i, sec := range []DocumentSection{ 3092 {2, 6}, 3093 {3, 7}, 3094 } { 3095 if err := b.Add(Document{ 3096 Name: "f1", 3097 Content: []byte(content), 3098 Symbols: []DocumentSection{sec}, 3099 }); err == nil { 3100 t.Errorf("%d: Add succeeded", i) 3101 } 3102 } 3103} 3104 3105func TestUnicodeQuery(t *testing.T) { 3106 content := string([]rune{kelvinCodePoint, kelvinCodePoint, kelvinCodePoint}) 3107 b := testIndexBuilder(t, nil, 3108 Document{ 3109 Name: "f1", 3110 Content: []byte(content), 3111 }, 3112 ) 3113 3114 q := &query.Substring{Pattern: content} 3115 3116 t.Run("LineMatches", func(t *testing.T) { 3117 res := searchForTest(t, b, q) 3118 if len(res.Files) != 1 { 3119 t.Fatalf("want 1 match, got %v", res.Files) 3120 } 3121 3122 f := res.Files[0] 3123 if len(f.LineMatches) != 1 { 3124 t.Fatalf("want 1 line, got %v", f.LineMatches) 3125 } 3126 l := f.LineMatches[0] 3127 3128 if len(l.LineFragments) != 1 { 3129 t.Fatalf("want 1 line fragment, got %v", l.LineFragments) 3130 } 3131 fr := l.LineFragments[0] 3132 if fr.MatchLength != len(content) { 3133 t.Fatalf("got MatchLength %d want %d", fr.MatchLength, len(content)) 3134 } 3135 }) 3136 3137 t.Run("ChunkMatches", func(t *testing.T) { 3138 res := searchForTest(t, b, q, chunkOpts) 3139 if len(res.Files) != 1 { 3140 t.Fatalf("want 1 match, got %v", res.Files) 3141 } 3142 3143 f := res.Files[0] 3144 if len(f.ChunkMatches) != 1 { 3145 t.Fatalf("want 1 line, got %v", f.LineMatches) 3146 } 3147 cm := f.ChunkMatches[0] 3148 3149 if len(cm.Ranges) != 1 { 3150 t.Fatalf("want 1 line fragment, got %v", cm.Ranges) 3151 } 3152 rr := cm.Ranges[0] 3153 if matchLen := rr.End.ByteOffset - rr.Start.ByteOffset; int(matchLen) != len(content) { 3154 t.Fatalf("got MatchLength %d want %d", matchLen, len(content)) 3155 } 3156 }) 3157} 3158 3159func TestSkipInvalidContent(t *testing.T) { 3160 for _, content := range []string{ 3161 // Binary 3162 "abc def \x00 abc", 3163 } { 3164 3165 b, err := NewIndexBuilder(nil) 3166 if err != nil { 3167 t.Fatalf("NewIndexBuilder: %v", err) 3168 } 3169 3170 if err := b.Add(Document{ 3171 Name: "f1", 3172 Content: []byte(content), 3173 }); err != nil { 3174 t.Fatal(err) 3175 } 3176 3177 t.Run("LineMatches", func(t *testing.T) { 3178 q := &query.Substring{Pattern: "abc def"} 3179 res := searchForTest(t, b, q) 3180 if len(res.Files) != 0 { 3181 t.Fatalf("got %v, want no results", res.Files) 3182 } 3183 3184 q = &query.Substring{Pattern: "NOT-INDEXED"} 3185 res = searchForTest(t, b, q) 3186 if len(res.Files) != 1 { 3187 t.Fatalf("got %v, want 1 result", res.Files) 3188 } 3189 }) 3190 3191 t.Run("ChunkMatches", func(t *testing.T) { 3192 q := &query.Substring{Pattern: "abc def"} 3193 res := searchForTest(t, b, q, chunkOpts) 3194 if len(res.Files) != 0 { 3195 t.Fatalf("got %v, want no results", res.Files) 3196 } 3197 3198 q = &query.Substring{Pattern: "NOT-INDEXED"} 3199 res = searchForTest(t, b, q, chunkOpts) 3200 if len(res.Files) != 1 { 3201 t.Fatalf("got %v, want 1 result", res.Files) 3202 } 3203 }) 3204 } 3205} 3206 3207func TestDocChecker(t *testing.T) { 3208 docChecker := DocChecker{} 3209 3210 // Test valid and invalid text 3211 for _, text := range []string{"", "simple ascii", "símplé unicödé", "\uFEFFwith utf8 'bom'", "with \uFFFD unicode replacement char"} { 3212 if err := docChecker.Check([]byte(text), 20000, false); err != nil { 3213 t.Errorf("Check(%q): %v", text, err) 3214 } 3215 } 3216 for _, text := range []string{"zero\x00byte", "xx", "0123456789abcdefghi"} { 3217 if err := docChecker.Check([]byte(text), 15, false); err == nil { 3218 t.Errorf("Check(%q) succeeded", text) 3219 } 3220 } 3221 3222 // Test valid and invalid text with an allowed large file 3223 for _, text := range []string{"0123456789abcdefghi", "qwertyuiopasdfghjklzxcvbnm"} { 3224 if err := docChecker.Check([]byte(text), 15, true); err != nil { 3225 t.Errorf("Check(%q): %v", text, err) 3226 } 3227 } 3228 for _, text := range []string{"zero\x00byte", "xx"} { 3229 if err := docChecker.Check([]byte(text), 15, true); err == nil { 3230 t.Errorf("Check(%q) succeeded", text) 3231 } 3232 } 3233} 3234 3235func TestLineAnd(t *testing.T) { 3236 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 3237 Document{Name: "f1", Content: []byte("apple\nbanana\napple banana chocolate apple pudding banana\ngrape")}, 3238 Document{Name: "f2", Content: []byte("apple orange\nbanana")}, 3239 Document{Name: "f3", Content: []byte("banana grape")}, 3240 ) 3241 pattern := "(apple)(?-s:.)*?(banana)" 3242 r, _ := syntax.Parse(pattern, syntax.Perl) 3243 3244 q := query.Regexp{ 3245 Regexp: r, 3246 Content: true, 3247 } 3248 t.Run("LineMatches", func(t *testing.T) { 3249 res := searchForTest(t, b, &q) 3250 wantRegexpCount := 1 3251 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount { 3252 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount) 3253 } 3254 if len(res.Files) != 1 || res.Files[0].FileName != "f1" { 3255 t.Errorf("got %v, want 1 result", res.Files) 3256 } 3257 }) 3258 3259 t.Run("ChunkMatches", func(t *testing.T) { 3260 res := searchForTest(t, b, &q, chunkOpts) 3261 wantRegexpCount := 1 3262 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount { 3263 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount) 3264 } 3265 if len(res.Files) != 1 || res.Files[0].FileName != "f1" { 3266 t.Errorf("got %v, want 1 result", res.Files) 3267 } 3268 }) 3269} 3270 3271func TestLineAndFileName(t *testing.T) { 3272 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 3273 Document{Name: "f1", Content: []byte("apple banana\ngrape")}, 3274 Document{Name: "f2", Content: []byte("apple banana\norange")}, 3275 Document{Name: "apple banana", Content: []byte("banana grape")}, 3276 ) 3277 pattern := "(apple)(?-s:.)*?(banana)" 3278 r, _ := syntax.Parse(pattern, syntax.Perl) 3279 3280 q := query.Regexp{ 3281 Regexp: r, 3282 FileName: true, 3283 } 3284 t.Run("LineMatches", func(t *testing.T) { 3285 res := searchForTest(t, b, &q) 3286 wantRegexpCount := 1 3287 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount { 3288 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount) 3289 } 3290 if len(res.Files) != 1 || res.Files[0].FileName != "apple banana" { 3291 t.Errorf("got %v, want 1 result", res.Files) 3292 } 3293 }) 3294 3295 t.Run("ChunkMatches", func(t *testing.T) { 3296 res := searchForTest(t, b, &q, chunkOpts) 3297 wantRegexpCount := 1 3298 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount { 3299 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount) 3300 } 3301 if len(res.Files) != 1 || res.Files[0].FileName != "apple banana" { 3302 t.Errorf("got %v, want 1 result", res.Files) 3303 } 3304 }) 3305} 3306 3307func TestMultiLineRegex(t *testing.T) { 3308 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 3309 Document{Name: "f1", Content: []byte("apple banana\ngrape")}, 3310 Document{Name: "f2", Content: []byte("apple orange")}, 3311 Document{Name: "f3", Content: []byte("grape apple")}, 3312 ) 3313 pattern := "(apple).*?[[:space:]].*?(grape)" 3314 r, _ := syntax.Parse(pattern, syntax.Perl) 3315 3316 q := query.Regexp{ 3317 Regexp: r, 3318 } 3319 t.Run("LineMatches", func(t *testing.T) { 3320 res := searchForTest(t, b, &q) 3321 wantRegexpCount := 2 3322 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount { 3323 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount) 3324 } 3325 if len(res.Files) != 1 || res.Files[0].FileName != "f1" { 3326 t.Errorf("got %v, want 1 result", res.Files) 3327 } 3328 if l := len(res.Files[0].LineMatches); l != 2 { 3329 t.Errorf("got %v, want 2 line matches", l) 3330 } 3331 }) 3332 3333 t.Run("ChunkMatches", func(t *testing.T) { 3334 res := searchForTest(t, b, &q, chunkOpts) 3335 wantRegexpCount := 2 3336 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount { 3337 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount) 3338 } 3339 if len(res.Files) != 1 || res.Files[0].FileName != "f1" { 3340 t.Errorf("got %v, want 1 result", res.Files) 3341 } 3342 if l := len(res.Files[0].ChunkMatches); l != 1 { 3343 t.Errorf("got %v, want 1 chunk matches", l) 3344 } 3345 if l := len(res.Files[0].ChunkMatches[0].Ranges); l != 1 { 3346 t.Errorf("got %v, want 1 chunk ranges", l) 3347 } 3348 }) 3349} 3350 3351func TestSearchTypeFileName(t *testing.T) { 3352 b := testIndexBuilder(t, &Repository{ 3353 Name: "reponame", 3354 }, 3355 Document{Name: "f1", Content: []byte("bla the needle")}, 3356 Document{Name: "f2", Content: []byte("another file another\nneedle")}, 3357 // -----------------------------------012345678901234567890-123456 3358 ) 3359 3360 t.Run("LineMatches", func(t *testing.T) { 3361 wantSingleMatch := func(res *SearchResult, want string) { 3362 t.Helper() 3363 fmatches := res.Files 3364 if len(fmatches) != 1 { 3365 t.Errorf("got %v, want 1 matches", len(fmatches)) 3366 return 3367 } 3368 if len(fmatches[0].LineMatches) != 1 { 3369 t.Errorf("got %d line matches", len(fmatches[0].LineMatches)) 3370 return 3371 } 3372 var got string 3373 if fmatches[0].LineMatches[0].FileName { 3374 got = fmatches[0].FileName 3375 } else { 3376 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].LineMatches[0].LineFragments[0].Offset) 3377 } 3378 3379 if got != want { 3380 t.Errorf("got %s, want %s", got, want) 3381 } 3382 } 3383 3384 // Only return the later match in the second file 3385 res := searchForTest(t, b, query.NewAnd( 3386 &query.Type{ 3387 Type: query.TypeFileName, 3388 Child: &query.Substring{Pattern: "needle"}, 3389 }, 3390 &query.Substring{Pattern: "file"})) 3391 wantSingleMatch(res, "f2:8") 3392 3393 // Only return a filename result 3394 res = searchForTest(t, b, 3395 &query.Type{ 3396 Type: query.TypeFileName, 3397 Child: &query.Substring{Pattern: "file"}, 3398 }) 3399 wantSingleMatch(res, "f2") 3400 }) 3401 3402 t.Run("ChunkMatches", func(t *testing.T) { 3403 wantSingleMatch := func(res *SearchResult, want string) { 3404 t.Helper() 3405 fmatches := res.Files 3406 if len(fmatches) != 1 { 3407 t.Errorf("got %v, want 1 matches", len(fmatches)) 3408 return 3409 } 3410 if len(fmatches[0].ChunkMatches) != 1 { 3411 t.Errorf("got %d line matches", len(fmatches[0].ChunkMatches)) 3412 return 3413 } 3414 var got string 3415 if fmatches[0].ChunkMatches[0].FileName { 3416 got = fmatches[0].FileName 3417 } else { 3418 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset) 3419 } 3420 3421 if got != want { 3422 t.Errorf("got %s, want %s", got, want) 3423 } 3424 } 3425 3426 // Only return the later match in the second file 3427 res := searchForTest(t, b, query.NewAnd( 3428 &query.Type{ 3429 Type: query.TypeFileName, 3430 Child: &query.Substring{Pattern: "needle"}, 3431 }, 3432 &query.Substring{Pattern: "file"}), 3433 chunkOpts, 3434 ) 3435 wantSingleMatch(res, "f2:8") 3436 3437 // Only return a filename result 3438 res = searchForTest(t, b, 3439 &query.Type{ 3440 Type: query.TypeFileName, 3441 Child: &query.Substring{Pattern: "file"}, 3442 }, 3443 chunkOpts, 3444 ) 3445 wantSingleMatch(res, "f2") 3446 }) 3447} 3448 3449func TestSearchTypeLanguage(t *testing.T) { 3450 b := testIndexBuilder(t, &Repository{ 3451 Name: "reponame", 3452 }, 3453 Document{Name: "apex.cls", Content: []byte("public class Car extends Vehicle {")}, 3454 Document{Name: "tex.cls", Content: []byte(`\DeclareOption*{`)}, 3455 Document{Name: "hello.h", Content: []byte(`#include <stdio.h>`)}, 3456 ) 3457 3458 t.Log(b.languageMap) 3459 3460 t.Run("LineMatches", func(t *testing.T) { 3461 wantSingleMatch := func(res *SearchResult, want string) { 3462 t.Helper() 3463 fmatches := res.Files 3464 if len(fmatches) != 1 { 3465 t.Errorf("got %v, want 1 matches", len(fmatches)) 3466 return 3467 } 3468 if len(fmatches[0].LineMatches) != 1 { 3469 t.Errorf("got %d line matches", len(fmatches[0].LineMatches)) 3470 return 3471 } 3472 var got string 3473 if fmatches[0].LineMatches[0].FileName { 3474 got = fmatches[0].FileName 3475 } else { 3476 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].LineMatches[0].LineFragments[0].Offset) 3477 } 3478 3479 if got != want { 3480 t.Errorf("got %s, want %s", got, want) 3481 } 3482 } 3483 3484 res := searchForTest(t, b, &query.Language{Language: "Apex"}) 3485 wantSingleMatch(res, "apex.cls") 3486 3487 res = searchForTest(t, b, &query.Language{Language: "TeX"}) 3488 wantSingleMatch(res, "tex.cls") 3489 3490 res = searchForTest(t, b, &query.Language{Language: "C"}) 3491 wantSingleMatch(res, "hello.h") 3492 3493 // test fallback language search by pretending it's an older index version 3494 res = searchForTest(t, b, &query.Language{Language: "C++"}) 3495 if len(res.Files) != 0 { 3496 t.Errorf("got %d results for C++, want 0", len(res.Files)) 3497 } 3498 3499 b.featureVersion = 11 // force fallback 3500 res = searchForTest(t, b, &query.Language{Language: "C++"}) 3501 wantSingleMatch(res, "hello.h") 3502 }) 3503 3504 t.Run("ChunkMatches", func(t *testing.T) { 3505 wantSingleMatch := func(res *SearchResult, want string) { 3506 t.Helper() 3507 fmatches := res.Files 3508 if len(fmatches) != 1 { 3509 t.Errorf("got %v, want 1 matches", len(fmatches)) 3510 return 3511 } 3512 if len(fmatches[0].ChunkMatches) != 1 { 3513 t.Errorf("got %d line matches", len(fmatches[0].ChunkMatches)) 3514 return 3515 } 3516 var got string 3517 if fmatches[0].ChunkMatches[0].FileName { 3518 got = fmatches[0].FileName 3519 } else { 3520 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset) 3521 } 3522 3523 if got != want { 3524 t.Errorf("got %s, want %s", got, want) 3525 } 3526 } 3527 3528 b.featureVersion = FeatureVersion // reset feature version 3529 res := searchForTest(t, b, &query.Language{Language: "Apex"}, chunkOpts) 3530 wantSingleMatch(res, "apex.cls") 3531 3532 res = searchForTest(t, b, &query.Language{Language: "TeX"}, chunkOpts) 3533 wantSingleMatch(res, "tex.cls") 3534 3535 res = searchForTest(t, b, &query.Language{Language: "C"}, chunkOpts) 3536 wantSingleMatch(res, "hello.h") 3537 3538 // test fallback language search by pretending it's an older index version 3539 res = searchForTest(t, b, &query.Language{Language: "C++"}, chunkOpts) 3540 if len(res.Files) != 0 { 3541 t.Errorf("got %d results for C++, want 0", len(res.Files)) 3542 } 3543 3544 b.featureVersion = 11 // force fallback 3545 res = searchForTest(t, b, &query.Language{Language: "C++"}, chunkOpts) 3546 wantSingleMatch(res, "hello.h") 3547 }) 3548} 3549 3550func TestStats(t *testing.T) { 3551 ignored := []cmp.Option{ 3552 cmpopts.EquateEmpty(), 3553 cmpopts.IgnoreFields(RepoListEntry{}, "Repository"), 3554 cmpopts.IgnoreFields(RepoListEntry{}, "IndexMetadata"), 3555 cmpopts.IgnoreFields(RepoStats{}, "IndexBytes"), 3556 } 3557 3558 repoListEntries := func(b *IndexBuilder) []RepoListEntry { 3559 searcher := searcherForTest(t, b) 3560 indexdata := searcher.(*indexData) 3561 return indexdata.repoListEntry 3562 } 3563 3564 t.Run("one empty repo", func(t *testing.T) { 3565 b := testIndexBuilder(t, nil) 3566 got := repoListEntries(b) 3567 want := []RepoListEntry{ 3568 { 3569 Stats: RepoStats{ 3570 Repos: 0, 3571 Shards: 1, 3572 Documents: 0, 3573 IndexBytes: 20, 3574 ContentBytes: 0, 3575 NewLinesCount: 0, 3576 DefaultBranchNewLinesCount: 0, 3577 OtherBranchesNewLinesCount: 0, 3578 }, 3579 }, 3580 } 3581 3582 if diff := cmp.Diff(want, got, ignored...); diff != "" { 3583 t.Fatalf("mismatch (-want +got):\n%s", diff) 3584 } 3585 3586 }) 3587 3588 t.Run("one simple shard", func(t *testing.T) { 3589 b := testIndexBuilder(t, nil, 3590 Document{Name: "doc 0", Content: []byte("content 0")}, 3591 Document{Name: "doc 1", Content: []byte("content 1")}, 3592 ) 3593 got := repoListEntries(b) 3594 want := []RepoListEntry{ 3595 { 3596 Stats: RepoStats{ 3597 Repos: 0, 3598 Shards: 1, 3599 Documents: 2, 3600 IndexBytes: 224, 3601 ContentBytes: 28, 3602 NewLinesCount: 0, 3603 DefaultBranchNewLinesCount: 0, 3604 OtherBranchesNewLinesCount: 0, 3605 }, 3606 }, 3607 } 3608 3609 if diff := cmp.Diff(want, got, ignored...); diff != "" { 3610 t.Fatalf("mismatch (-want +got):\n%s", diff) 3611 } 3612 3613 }) 3614 3615 t.Run("one compound shard", func(t *testing.T) { 3616 b := testIndexBuilderCompound(t, 3617 []*Repository{ 3618 {Name: "repo 0"}, 3619 {Name: "repo 1"}, 3620 }, 3621 [][]Document{ 3622 { 3623 {Name: "doc 0", Content: []byte("content 0")}, 3624 {Name: "doc 1", Content: []byte("content 1")}, 3625 }, 3626 { 3627 {Name: "doc 2", Content: []byte("content 2")}, 3628 {Name: "doc 3", Content: []byte("content 3")}, 3629 }, 3630 }, 3631 ) 3632 got := repoListEntries(b) 3633 want := []RepoListEntry{ 3634 { 3635 Stats: RepoStats{ 3636 Repos: 0, 3637 Shards: 1, 3638 Documents: 2, 3639 IndexBytes: 180, 3640 ContentBytes: 28, 3641 NewLinesCount: 0, 3642 DefaultBranchNewLinesCount: 0, 3643 OtherBranchesNewLinesCount: 0, 3644 }, 3645 }, 3646 { 3647 Stats: RepoStats{ 3648 Repos: 0, 3649 Shards: 1, 3650 Documents: 2, 3651 IndexBytes: 180, 3652 ContentBytes: 28, 3653 NewLinesCount: 0, 3654 DefaultBranchNewLinesCount: 0, 3655 OtherBranchesNewLinesCount: 0, 3656 }, 3657 }, 3658 } 3659 3660 if diff := cmp.Diff(want, got, ignored...); diff != "" { 3661 t.Fatalf("mismatch (-want +got):\n%s", diff) 3662 } 3663 }) 3664 3665 t.Run("compound shard with empty repos", func(t *testing.T) { 3666 b := testIndexBuilderCompound(t, 3667 []*Repository{ 3668 {Name: "repo 0"}, 3669 {Name: "repo 1"}, 3670 {Name: "repo 2"}, 3671 {Name: "repo 3"}, 3672 {Name: "repo 4"}, 3673 }, 3674 [][]Document{ 3675 {{Name: "doc 0", Content: []byte("content 0")}}, 3676 nil, 3677 {{Name: "doc 1", Content: []byte("content 1")}}, 3678 nil, 3679 nil, 3680 }, 3681 ) 3682 got := repoListEntries(b) 3683 3684 entryEmpty := RepoListEntry{Stats: RepoStats{ 3685 Shards: 1, 3686 Documents: 0, 3687 ContentBytes: 0, 3688 }} 3689 entryNonEmpty := RepoListEntry{Stats: RepoStats{ 3690 Shards: 1, 3691 Documents: 1, 3692 ContentBytes: 14, 3693 }} 3694 3695 want := []RepoListEntry{ 3696 entryNonEmpty, 3697 entryEmpty, 3698 entryNonEmpty, 3699 entryEmpty, 3700 entryEmpty, 3701 } 3702 3703 if diff := cmp.Diff(want, got, ignored...); diff != "" { 3704 t.Fatalf("mismatch (-want +got):\n%s", diff) 3705 } 3706 3707 }) 3708} 3709 3710// This tests the frequent pattern "\bLITERAL\b". 3711func TestWordSearch(t *testing.T) { 3712 content := []byte("needle the bla") 3713 // ----------------01234567890123 3714 3715 b := testIndexBuilder(t, nil, 3716 Document{ 3717 Name: "f1", 3718 Content: content, 3719 }) 3720 3721 t.Run("LineMatches", func(t *testing.T) { 3722 sres := searchForTest(t, b, 3723 &query.Regexp{ 3724 Regexp: mustParseRE("\\bthe\\b"), 3725 CaseSensitive: true, 3726 Content: true, 3727 }) 3728 3729 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 { 3730 t.Fatalf("got %v, want 1 match in 1 file", sres.Files) 3731 } 3732 3733 if sres.Stats.RegexpsConsidered != 0 { 3734 t.Fatal("expected regexp to be skipped") 3735 } 3736 3737 got := sres.Files[0].LineMatches[0] 3738 want := LineMatch{ 3739 LineFragments: []LineFragmentMatch{{ 3740 LineOffset: 7, 3741 Offset: 7, 3742 MatchLength: 3, 3743 }}, 3744 Line: content, 3745 FileName: false, 3746 LineNumber: 1, 3747 LineStart: 0, 3748 LineEnd: 14, 3749 } 3750 3751 if !reflect.DeepEqual(got, want) { 3752 t.Errorf("got %#v, want %#v", got, want) 3753 } 3754 }) 3755 3756 t.Run("ChunkMatches", func(t *testing.T) { 3757 sres := searchForTest(t, b, 3758 &query.Regexp{ 3759 Regexp: mustParseRE("\\bthe\\b"), 3760 CaseSensitive: true, 3761 }, chunkOpts) 3762 3763 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 { 3764 t.Fatalf("got %v, want 1 match in 1 file", sres.Files) 3765 } 3766 3767 if sres.Stats.RegexpsConsidered != 0 { 3768 t.Fatal("expected regexp to be skipped") 3769 } 3770 3771 got := sres.Files[0].ChunkMatches[0] 3772 want := ChunkMatch{ 3773 Content: content, 3774 ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1}, 3775 Ranges: []Range{{ 3776 Start: Location{ByteOffset: 7, LineNumber: 1, Column: 8}, 3777 End: Location{ByteOffset: 10, LineNumber: 1, Column: 11}, 3778 }}, 3779 } 3780 3781 if diff := cmp.Diff(want, got); diff != "" { 3782 t.Fatal(diff) 3783 } 3784 }) 3785}