fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

1// Copyright 2016 Google Inc. All rights reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package zoekt 16 17import ( 18 "bytes" 19 "context" 20 "fmt" 21 "reflect" 22 "regexp/syntax" 23 "strings" 24 "testing" 25 26 "github.com/google/go-cmp/cmp" 27 "github.com/google/go-cmp/cmp/cmpopts" 28 "github.com/grafana/regexp" 29 30 "github.com/sourcegraph/zoekt/query" 31) 32 33func clearScores(r *SearchResult) { 34 for i := range r.Files { 35 r.Files[i].Score = 0.0 36 for j := range r.Files[i].LineMatches { 37 r.Files[i].LineMatches[j].Score = 0.0 38 } 39 for j := range r.Files[i].ChunkMatches { 40 r.Files[i].ChunkMatches[j].Score = 0.0 41 } 42 r.Files[i].Checksum = nil 43 r.Files[i].Debug = "" 44 } 45} 46 47func testIndexBuilder(t *testing.T, repo *Repository, docs ...Document) *IndexBuilder { 48 t.Helper() 49 50 b, err := NewIndexBuilder(repo) 51 if err != nil { 52 t.Fatalf("NewIndexBuilder: %v", err) 53 } 54 55 for i, d := range docs { 56 if err := b.Add(d); err != nil { 57 t.Fatalf("Add %d: %v", i, err) 58 } 59 } 60 61 return b 62} 63 64func testIndexBuilderCompound(t *testing.T, repos []*Repository, docs [][]Document) *IndexBuilder { 65 t.Helper() 66 67 b := newIndexBuilder() 68 b.indexFormatVersion = NextIndexFormatVersion 69 70 if len(repos) != len(docs) { 71 t.Fatalf("testIndexBuilderCompound: repos must be the same length as docs, got: len(repos)=%d len(docs)=%d", len(repos), len(docs)) 72 } 73 74 for i, repo := range repos { 75 if err := b.setRepository(repo); err != nil { 76 t.Fatal(err) 77 } 78 for j, d := range docs[i] { 79 if err := b.Add(d); err != nil { 80 t.Fatalf("Add %d %d: %v", i, j, err) 81 } 82 } 83 } 84 85 return b 86} 87 88func TestBoundary(t *testing.T) { 89 b := testIndexBuilder(t, nil, 90 Document{Name: "f1", Content: []byte("x the")}, 91 Document{Name: "f1", Content: []byte("reader")}) 92 res := searchForTest(t, b, &query.Substring{Pattern: "there"}) 93 if len(res.Files) > 0 { 94 t.Fatalf("got %v, want no matches", res.Files) 95 } 96} 97 98func TestDocSectionInvalid(t *testing.T) { 99 b, err := NewIndexBuilder(nil) 100 if err != nil { 101 t.Fatalf("NewIndexBuilder: %v", err) 102 } 103 doc := Document{ 104 Name: "f1", 105 Content: []byte("01234567890123"), 106 Symbols: []DocumentSection{{5, 8}, {7, 9}}, 107 } 108 109 if err := b.Add(doc); err == nil { 110 t.Errorf("overlapping doc sections should fail") 111 } 112 113 doc = Document{ 114 Name: "f1", 115 Content: []byte("01234567890123"), 116 Symbols: []DocumentSection{{0, 20}}, 117 } 118 119 if err := b.Add(doc); err == nil { 120 t.Errorf("doc sections beyond EOF should fail") 121 } 122} 123 124func TestBasic(t *testing.T) { 125 b := testIndexBuilder(t, nil, 126 Document{ 127 Name: "f2", 128 Content: []byte("to carry water in the no later bla"), 129 // --------------0123456789012345678901234567890123 130 }) 131 132 t.Run("LineMatch", func(t *testing.T) { 133 res := searchForTest(t, b, &query.Substring{ 134 Pattern: "water", 135 CaseSensitive: true, 136 }) 137 fmatches := res.Files 138 if len(fmatches) != 1 || len(fmatches[0].LineMatches) != 1 { 139 t.Fatalf("got %v, want 1 matches", fmatches) 140 } 141 142 got := fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].LineMatches[0].LineFragments[0].Offset) 143 want := "f2:9" 144 if got != want { 145 t.Errorf("1: got %s, want %s", got, want) 146 } 147 }) 148 149 t.Run("ChunkMatch", func(t *testing.T) { 150 res := searchForTest(t, b, &query.Substring{ 151 Pattern: "water", 152 CaseSensitive: true, 153 }, chunkOpts) 154 fmatches := res.Files 155 if len(fmatches) != 1 || len(fmatches[0].ChunkMatches) != 1 { 156 t.Fatalf("got %v, want 1 matches", fmatches) 157 } 158 159 got := fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset) 160 want := "f2:9" 161 if got != want { 162 t.Errorf("1: got %s, want %s", got, want) 163 } 164 }) 165} 166 167func TestEmptyIndex(t *testing.T) { 168 b := testIndexBuilder(t, nil) 169 searcher := searcherForTest(t, b) 170 171 var opts SearchOptions 172 if _, err := searcher.Search(context.Background(), &query.Substring{}, &opts); err != nil { 173 t.Fatalf("Search: %v", err) 174 } 175 176 if _, err := searcher.List(context.Background(), &query.Repo{Regexp: regexp.MustCompile("")}, nil); err != nil { 177 t.Fatalf("List: %v", err) 178 } 179 180 if _, err := searcher.Search(context.Background(), &query.Substring{Pattern: "java", FileName: true}, &opts); err != nil { 181 t.Fatalf("Search: %v", err) 182 } 183} 184 185type memSeeker struct { 186 data []byte 187} 188 189func (s *memSeeker) Name() string { 190 return "memseeker" 191} 192 193func (s *memSeeker) Close() {} 194func (s *memSeeker) Read(off, sz uint32) ([]byte, error) { 195 return s.data[off : off+sz], nil 196} 197 198func (s *memSeeker) Size() (uint32, error) { 199 return uint32(len(s.data)), nil 200} 201 202func TestNewlines(t *testing.T) { 203 b := testIndexBuilder(t, nil, 204 Document{Name: "filename", Content: []byte("line1\nline2\nbla")}) 205 // ---------------------------------------------012345-678901-234 206 207 t.Run("LineMatches", func(t *testing.T) { 208 sres := searchForTest(t, b, &query.Substring{Pattern: "ne2"}) 209 210 matches := sres.Files 211 want := []FileMatch{{ 212 FileName: "filename", 213 LineMatches: []LineMatch{{ 214 LineFragments: []LineFragmentMatch{{ 215 Offset: 8, 216 LineOffset: 2, 217 MatchLength: 3, 218 }}, 219 Line: []byte("line2"), 220 LineStart: 6, 221 LineEnd: 11, 222 LineNumber: 2, 223 }}, 224 }} 225 226 if !reflect.DeepEqual(matches, want) { 227 t.Errorf("got %v, want %v", matches, want) 228 } 229 }) 230 231 t.Run("ChunkMatches", func(t *testing.T) { 232 sres := searchForTest(t, b, &query.Substring{Pattern: "ne2"}, chunkOpts) 233 234 matches := sres.Files 235 want := []FileMatch{{ 236 FileName: "filename", 237 ChunkMatches: []ChunkMatch{{ 238 Content: []byte("line2"), 239 ContentStart: Location{ 240 ByteOffset: 6, 241 LineNumber: 2, 242 Column: 1, 243 }, 244 Ranges: []Range{{ 245 Start: Location{ByteOffset: 8, LineNumber: 2, Column: 3}, 246 End: Location{ByteOffset: 11, LineNumber: 2, Column: 6}, 247 }}, 248 }}, 249 }} 250 251 if diff := cmp.Diff(want, matches); diff != "" { 252 t.Fatal(diff) 253 } 254 }) 255} 256 257// A result spanning multiple lines should have LineMatches that only cover 258// single lines. 259func TestQueryNewlines(t *testing.T) { 260 text := "line1\nline2\nbla" 261 b := testIndexBuilder(t, nil, 262 Document{Name: "filename", Content: []byte(text)}) 263 264 t.Run("LineMatches", func(t *testing.T) { 265 sres := searchForTest(t, b, &query.Substring{Pattern: "ine2\nbla"}) 266 matches := sres.Files 267 if len(matches) != 1 { 268 t.Fatalf("got %d file matches, want exactly one", len(matches)) 269 } 270 m := matches[0] 271 if len(m.LineMatches) != 2 { 272 t.Fatalf("got %d line matches, want exactly two", len(m.LineMatches)) 273 } 274 }) 275 276 t.Run("ChunkMatches", func(t *testing.T) { 277 sres := searchForTest(t, b, &query.Substring{Pattern: "ine2\nbla"}, chunkOpts) 278 matches := sres.Files 279 if len(matches) != 1 { 280 t.Fatalf("got %d file matches, want exactly one", len(matches)) 281 } 282 m := matches[0] 283 if len(m.ChunkMatches) != 1 { 284 t.Fatalf("got %d chunk matches, want exactly one", len(m.ChunkMatches)) 285 } 286 }) 287} 288 289var chunkOpts = SearchOptions{ChunkMatches: true} 290 291func searchForTest(t *testing.T, b *IndexBuilder, q query.Q, o ...SearchOptions) *SearchResult { 292 searcher := searcherForTest(t, b) 293 var opts SearchOptions 294 if len(o) > 0 { 295 opts = o[0] 296 } 297 res, err := searcher.Search(context.Background(), q, &opts) 298 if err != nil { 299 t.Fatalf("Search(%s): %v", q, err) 300 } 301 clearScores(res) 302 return res 303} 304 305func searcherForTest(t *testing.T, b *IndexBuilder) Searcher { 306 var buf bytes.Buffer 307 if err := b.Write(&buf); err != nil { 308 t.Fatal(err) 309 } 310 f := &memSeeker{buf.Bytes()} 311 312 searcher, err := NewSearcher(f) 313 if err != nil { 314 t.Fatalf("NewSearcher: %v", err) 315 } 316 317 return searcher 318} 319 320func TestCaseFold(t *testing.T) { 321 b := testIndexBuilder(t, nil, 322 Document{Name: "f1", Content: []byte("I love BaNaNAS.")}, 323 // -----------------------------------012345678901234 324 ) 325 t.Run("LineMatches", func(t *testing.T) { 326 sres := searchForTest(t, b, &query.Substring{ 327 Pattern: "bananas", 328 CaseSensitive: true, 329 }) 330 matches := sres.Files 331 if len(matches) != 0 { 332 t.Errorf("foldcase: got %#v, want 0 matches", matches) 333 } 334 335 sres = searchForTest(t, b, 336 &query.Substring{ 337 Pattern: "BaNaNAS", 338 CaseSensitive: true, 339 }) 340 matches = sres.Files 341 if len(matches) != 1 { 342 t.Errorf("no foldcase: got %v, want 1 matches", matches) 343 } else if matches[0].LineMatches[0].LineFragments[0].Offset != 7 { 344 t.Errorf("foldcase: got %v, want offsets 7", matches) 345 } 346 }) 347 348 t.Run("ChunkMatches", func(t *testing.T) { 349 sres := searchForTest(t, b, &query.Substring{ 350 Pattern: "bananas", 351 CaseSensitive: true, 352 }, chunkOpts) 353 matches := sres.Files 354 if len(matches) != 0 { 355 t.Errorf("foldcase: got %#v, want 0 matches", matches) 356 } 357 358 sres = searchForTest(t, b, 359 &query.Substring{ 360 Pattern: "BaNaNAS", 361 CaseSensitive: true, 362 }) 363 matches = sres.Files 364 if len(matches) != 1 { 365 t.Errorf("no foldcase: got %v, want 1 matches", matches) 366 } else if matches[0].LineMatches[0].LineFragments[0].Offset != 7 { 367 t.Errorf("foldcase: got %v, want offsets 7", matches) 368 } 369 }) 370} 371 372// wordsAsSymbols finds all ASCII words in doc.Content which are longer than 2 373// chars. Those are then set as symbols. 374func wordsAsSymbols(doc Document) Document { 375 re := regexp.MustCompile(`\b\w{2,}\b`) 376 var symbols []DocumentSection 377 for _, match := range re.FindAllIndex(doc.Content, -1) { 378 symbols = append(symbols, DocumentSection{ 379 Start: uint32(match[0]), 380 End: uint32(match[1]), 381 }) 382 } 383 doc.Symbols = symbols 384 return doc 385} 386 387func TestSearchStats(t *testing.T) { 388 ctx := context.Background() 389 searcher := searcherForTest(t, testIndexBuilder(t, nil, 390 wordsAsSymbols(Document{Name: "f1", Content: []byte("x banana y")}), 391 wordsAsSymbols(Document{Name: "f2", Content: []byte("x apple y")}), 392 wordsAsSymbols(Document{Name: "f3", Content: []byte("x banana apple y")}), 393 // --------------------------------------------------0123456789012345 394 )) 395 396 andQuery := query.NewAnd( 397 &query.Substring{ 398 Pattern: "banana", 399 }, 400 &query.Substring{ 401 Pattern: "apple", 402 }, 403 ) 404 405 t.Run("LineMatches", func(t *testing.T) { 406 sres, err := searcher.Search(ctx, andQuery, &SearchOptions{}) 407 if err != nil { 408 t.Fatal(err) 409 } 410 matches := sres.Files 411 if len(matches) != 1 || len(matches[0].LineMatches) != 1 || len(matches[0].LineMatches[0].LineFragments) != 2 { 412 t.Fatalf("got %#v, want 1 match with 2 fragments", matches) 413 } 414 415 if matches[0].LineMatches[0].LineFragments[0].Offset != 2 || matches[0].LineMatches[0].LineFragments[1].Offset != 9 { 416 t.Fatalf("got %#v, want offsets 2,9", matches) 417 } 418 }) 419 t.Run("ChunkMatches", func(t *testing.T) { 420 sres, err := searcher.Search(ctx, andQuery, &chunkOpts) 421 if err != nil { 422 t.Fatal(err) 423 } 424 matches := sres.Files 425 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 || len(matches[0].ChunkMatches[0].Ranges) != 2 { 426 t.Fatalf("got %#v, want 1 chunk match with 2 ranges", matches) 427 } 428 429 if matches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset != 2 || matches[0].ChunkMatches[0].Ranges[1].Start.ByteOffset != 9 { 430 t.Fatalf("got %#v, want offsets 2,9", matches) 431 } 432 }) 433 t.Run("Stats", func(t *testing.T) { 434 cases := []struct { 435 Name string 436 Q query.Q 437 Want Stats 438 }{{ 439 Name: "and-query", 440 Q: andQuery, 441 Want: Stats{ 442 FilesLoaded: 1, 443 ContentBytesLoaded: 22, 444 IndexBytesLoaded: 8, 445 NgramMatches: 3, // we look at doc 1, because it's max(0,1) due to AND 446 NgramLookups: 104, 447 MatchCount: 2, 448 FileCount: 1, 449 FilesConsidered: 2, 450 ShardsScanned: 1, 451 }, 452 }, { 453 Name: "one-trigram", 454 Q: &query.Substring{ 455 Pattern: "a y", 456 Content: true, 457 CaseSensitive: true, 458 }, 459 Want: Stats{ 460 ContentBytesLoaded: 14, 461 IndexBytesLoaded: 1, 462 FileCount: 1, 463 FilesConsidered: 1, 464 FilesLoaded: 1, 465 ShardsScanned: 1, 466 MatchCount: 1, 467 NgramMatches: 1, 468 NgramLookups: 2, // once to lookup frequency then again to access posting list. 469 }, 470 }, { 471 Name: "one-trigram-case-insensitive", 472 Q: &query.Substring{ 473 Pattern: "a y", 474 Content: true, 475 }, 476 Want: Stats{ 477 ContentBytesLoaded: 14, 478 IndexBytesLoaded: 1, 479 FileCount: 1, 480 FilesConsidered: 1, 481 FilesLoaded: 1, 482 ShardsScanned: 1, 483 MatchCount: 1, 484 NgramMatches: 1, 485 NgramLookups: 8, // "a y" has 2**2 casings which we lookup twice. 486 }, 487 }, { 488 Name: "one-trigram-pruned", 489 Q: &query.Substring{ 490 Pattern: "foo", 491 Content: true, 492 CaseSensitive: true, 493 }, 494 Want: Stats{ 495 ShardsSkippedFilter: 1, 496 NgramLookups: 1, // only had to lookup once 497 }, 498 }, { 499 Name: "one-trigram-branch-pruned", 500 Q: query.NewAnd( 501 &query.Substring{ 502 Pattern: "foo", 503 Content: true, 504 CaseSensitive: true, 505 }, 506 &query.Substring{ 507 Pattern: "a y", 508 Content: true, 509 CaseSensitive: true, 510 }, 511 ), 512 Want: Stats{ 513 IndexBytesLoaded: 1, // we created an iterator for "a y" before pruning. 514 ShardsSkippedFilter: 1, 515 NgramLookups: 3, // we lookedup "foo" once (1), but lookedup and created "a y" (2). 516 }, 517 }, { 518 Name: "symbol-substr-nomatch", 519 Q: &query.Symbol{Expr: &query.Substring{ 520 Pattern: "banana apple", 521 Content: true, 522 CaseSensitive: true, 523 }}, 524 Want: Stats{ 525 IndexBytesLoaded: 3, 526 FilesConsidered: 1, // important that we only check 1 file to ensure we are using the index 527 MatchCount: 0, // even though there is a match it doesn't align with a symbol 528 ShardsScanned: 1, 529 NgramMatches: 1, 530 NgramLookups: 12, 531 }, 532 }, { 533 Name: "symbol-substr", 534 Q: &query.Symbol{Expr: &query.Substring{ 535 Pattern: "apple", 536 Content: true, 537 CaseSensitive: true, 538 }}, 539 Want: Stats{ 540 ContentBytesLoaded: 35, 541 IndexBytesLoaded: 4, 542 FileCount: 2, 543 FilesConsidered: 2, // must be 2 to ensure we used the index 544 FilesLoaded: 2, 545 MatchCount: 2, // apple symbols is in two files 546 ShardsScanned: 1, 547 NgramMatches: 2, 548 NgramLookups: 5, 549 }, 550 }, { 551 Name: "symbol-regexp-nomatch", 552 Q: &query.Symbol{Expr: &query.Regexp{ 553 Regexp: mustParseRE("^apple.banana$"), 554 Content: true, 555 CaseSensitive: true, 556 }}, 557 Want: Stats{ 558 ContentBytesLoaded: 33, // we still have to run regex since "app" matches two documents 559 IndexBytesLoaded: 8, 560 FilesConsidered: 2, // important that we don't check 3 to ensure we are using the index 561 FilesLoaded: 2, 562 MatchCount: 0, // even though there is a match it doesn't align with a symbol 563 ShardsScanned: 1, 564 NgramMatches: 3, 565 NgramLookups: 11, 566 }, 567 }, { 568 Name: "symbol-regexp", 569 Q: &query.Symbol{Expr: &query.Regexp{ 570 Regexp: mustParseRE("^app.e$"), 571 Content: true, 572 CaseSensitive: true, 573 }}, 574 Want: Stats{ 575 ContentBytesLoaded: 35, 576 IndexBytesLoaded: 2, 577 FileCount: 2, 578 FilesConsidered: 2, // must be 2 to ensure we used the index 579 FilesLoaded: 2, 580 MatchCount: 2, // apple symbols is in two files 581 ShardsScanned: 1, 582 NgramMatches: 2, 583 NgramLookups: 2, 584 }, 585 }} 586 587 for _, tc := range cases { 588 t.Run(tc.Name, func(t *testing.T) { 589 sres, err := searcher.Search(ctx, tc.Q, &chunkOpts) 590 if err != nil { 591 t.Fatal(err) 592 } 593 if diff := cmp.Diff(tc.Want, sres.Stats, cmpopts.IgnoreFields(Stats{}, "MatchTreeConstruction", "MatchTreeSearch")); diff != "" { 594 t.Errorf("unexpected Stats (-want +got):\n%s", diff) 595 } 596 }) 597 } 598 }) 599} 600 601func TestAndNegateSearch(t *testing.T) { 602 b := testIndexBuilder(t, nil, 603 Document{Name: "f1", Content: []byte("x banana y")}, 604 // -----------------------------------0123456789 605 Document{Name: "f4", Content: []byte("x banana apple y")}) 606 607 t.Run("LineMatches", func(t *testing.T) { 608 sres := searchForTest(t, b, query.NewAnd( 609 &query.Substring{ 610 Pattern: "banana", 611 }, 612 &query.Not{Child: &query.Substring{ 613 Pattern: "apple", 614 }})) 615 616 matches := sres.Files 617 618 if len(matches) != 1 || len(matches[0].LineMatches) != 1 { 619 t.Fatalf("got %v, want 1 match", matches) 620 } 621 if matches[0].FileName != "f1" { 622 t.Fatalf("got match %#v, want FileName: f1", matches[0]) 623 } 624 if matches[0].LineMatches[0].LineFragments[0].Offset != 2 { 625 t.Fatalf("got %v, want offset 2", matches) 626 } 627 }) 628 629 t.Run("ChunkMatches", func(t *testing.T) { 630 sres := searchForTest(t, b, 631 query.NewAnd( 632 &query.Substring{ 633 Pattern: "banana", 634 }, 635 &query.Not{Child: &query.Substring{ 636 Pattern: "apple", 637 }}, 638 ), 639 chunkOpts, 640 ) 641 642 matches := sres.Files 643 644 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 { 645 t.Fatalf("got %v, want 1 match", matches) 646 } 647 if matches[0].FileName != "f1" { 648 t.Fatalf("got match %#v, want FileName: f1", matches[0]) 649 } 650 if matches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset != 2 { 651 t.Fatalf("got %v, want offset 2", matches) 652 } 653 }) 654} 655 656func TestNegativeMatchesOnlyShortcut(t *testing.T) { 657 b := testIndexBuilder(t, nil, 658 Document{Name: "f1", Content: []byte("x banana y")}, 659 Document{Name: "f2", Content: []byte("x appelmoes y")}, 660 Document{Name: "f3", Content: []byte("x appelmoes y")}, 661 Document{Name: "f3", Content: []byte("x appelmoes y")}) 662 663 t.Run("LineMatches", func(t *testing.T) { 664 sres := searchForTest(t, b, query.NewAnd( 665 &query.Substring{ 666 Pattern: "banana", 667 }, 668 &query.Not{Child: &query.Substring{ 669 Pattern: "appel", 670 }})) 671 672 if sres.Stats.FilesConsidered != 1 { 673 t.Errorf("got %#v, want FilesConsidered: 1", sres.Stats) 674 } 675 }) 676 677 t.Run("ChunkMatches", func(t *testing.T) { 678 sres := searchForTest(t, b, query.NewAnd( 679 &query.Substring{ 680 Pattern: "banana", 681 }, 682 &query.Not{Child: &query.Substring{ 683 Pattern: "appel", 684 }}), chunkOpts) 685 686 if sres.Stats.FilesConsidered != 1 { 687 t.Errorf("got %#v, want FilesConsidered: 1", sres.Stats) 688 } 689 }) 690} 691 692func TestFileSearch(t *testing.T) { 693 b := testIndexBuilder(t, nil, 694 Document{Name: "banzana", Content: []byte("x orange y")}, 695 // -------------0123456 696 Document{Name: "banana", Content: []byte("x apple y")}, 697 // -------------012345 698 ) 699 700 t.Run("LineMatches", func(t *testing.T) { 701 sres := searchForTest(t, b, &query.Substring{ 702 Pattern: "anan", 703 FileName: true, 704 }) 705 706 matches := sres.Files 707 if len(matches) != 1 || len(matches[0].LineMatches) != 1 { 708 t.Fatalf("got %v, want 1 match", matches) 709 } 710 711 got := matches[0].LineMatches[0] 712 want := LineMatch{ 713 Line: []byte("banana"), 714 LineFragments: []LineFragmentMatch{{ 715 Offset: 1, 716 LineOffset: 1, 717 MatchLength: 4, 718 }}, 719 FileName: true, 720 } 721 722 if !reflect.DeepEqual(got, want) { 723 t.Errorf("got %#v, want %#v", got, want) 724 } 725 }) 726 727 t.Run("ChunkMatches", func(t *testing.T) { 728 sres := searchForTest(t, b, &query.Substring{ 729 Pattern: "anan", 730 FileName: true, 731 }, chunkOpts) 732 733 matches := sres.Files 734 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 { 735 t.Fatalf("got %v, want 1 match", matches) 736 } 737 738 got := matches[0].ChunkMatches[0] 739 want := ChunkMatch{ 740 Content: []byte("banana"), 741 ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1}, 742 Ranges: []Range{{ 743 Start: Location{ByteOffset: 1, LineNumber: 1, Column: 2}, 744 End: Location{ByteOffset: 5, LineNumber: 1, Column: 6}, 745 }}, 746 FileName: true, 747 } 748 749 if diff := cmp.Diff(want, got); diff != "" { 750 t.Fatal(diff) 751 } 752 }) 753 754 t.Run("FileNameSet", func(t *testing.T) { 755 sres := searchForTest(t, b, query.NewFileNameSet("banana"), chunkOpts) 756 757 matches := sres.Files 758 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 { 759 t.Fatalf("got %v, want 1 match", matches) 760 } 761 762 got := matches[0].ChunkMatches[0] 763 want := ChunkMatch{ 764 Content: []byte("banana"), 765 ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1}, 766 Ranges: []Range{{ 767 Start: Location{ByteOffset: 0, LineNumber: 1, Column: 1}, 768 End: Location{ByteOffset: 6, LineNumber: 1, Column: 7}, 769 }}, 770 FileName: true, 771 } 772 773 if diff := cmp.Diff(want, got); diff != "" { 774 t.Fatal(diff) 775 } 776 }) 777} 778 779func TestFileCase(t *testing.T) { 780 b := testIndexBuilder(t, nil, 781 Document{Name: "BANANA", Content: []byte("x orange y")}) 782 783 t.Run("LineMatches", func(t *testing.T) { 784 sres := searchForTest(t, b, &query.Substring{ 785 Pattern: "banana", 786 FileName: true, 787 }) 788 789 matches := sres.Files 790 if len(matches) != 1 || matches[0].FileName != "BANANA" { 791 t.Fatalf("got %v, want 1 match 'BANANA'", matches) 792 } 793 }) 794 795 t.Run("ChunkMatches", func(t *testing.T) { 796 sres := searchForTest(t, b, &query.Substring{ 797 Pattern: "banana", 798 FileName: true, 799 }, chunkOpts) 800 801 matches := sres.Files 802 if len(matches) != 1 || matches[0].FileName != "BANANA" { 803 t.Fatalf("got %v, want 1 match 'BANANA'", matches) 804 } 805 }) 806} 807 808func TestFileRegexpSearchBruteForce(t *testing.T) { 809 b := testIndexBuilder(t, nil, 810 Document{Name: "banzana", Content: []byte("x orange y")}, 811 Document{Name: "banana", Content: []byte("x apple y")}, 812 ) 813 t.Run("LineMatches", func(t *testing.T) { 814 sres := searchForTest(t, b, &query.Regexp{ 815 Regexp: mustParseRE("[qn][zx]"), 816 FileName: true, 817 }) 818 819 matches := sres.Files 820 if len(matches) != 1 || matches[0].FileName != "banzana" { 821 t.Fatalf("got %v, want 1 match on 'banzana'", matches) 822 } 823 }) 824 t.Run("LineMatches", func(t *testing.T) { 825 sres := searchForTest(t, b, &query.Regexp{ 826 Regexp: mustParseRE("[qn][zx]"), 827 FileName: true, 828 }, chunkOpts) 829 830 matches := sres.Files 831 if len(matches) != 1 || matches[0].FileName != "banzana" { 832 t.Fatalf("got %v, want 1 match on 'banzana'", matches) 833 } 834 }) 835} 836 837func TestFileRegexpSearchShortString(t *testing.T) { 838 b := testIndexBuilder(t, nil, 839 Document{Name: "banana.py", Content: []byte("x orange y")}) 840 841 t.Run("LineMatches", func(t *testing.T) { 842 sres := searchForTest(t, b, &query.Regexp{ 843 Regexp: mustParseRE("ana.py"), 844 FileName: true, 845 }) 846 847 matches := sres.Files 848 if len(matches) != 1 || matches[0].FileName != "banana.py" { 849 t.Fatalf("got %v, want 1 match on 'banana.py'", matches) 850 } 851 }) 852 853 t.Run("ChunkMatches", func(t *testing.T) { 854 sres := searchForTest(t, b, &query.Regexp{ 855 Regexp: mustParseRE("ana.py"), 856 FileName: true, 857 }, chunkOpts) 858 859 matches := sres.Files 860 if len(matches) != 1 || matches[0].FileName != "banana.py" { 861 t.Fatalf("got %v, want 1 match on 'banana.py'", matches) 862 } 863 }) 864} 865 866func TestFileSubstringSearchBruteForce(t *testing.T) { 867 b := testIndexBuilder(t, nil, 868 Document{Name: "BANZANA", Content: []byte("x orange y")}, 869 Document{Name: "banana", Content: []byte("x apple y")}) 870 871 q := &query.Substring{ 872 Pattern: "z", 873 FileName: true, 874 } 875 876 t.Run("LineMatches", func(t *testing.T) { 877 res := searchForTest(t, b, q) 878 if len(res.Files) != 1 || res.Files[0].FileName != "BANZANA" { 879 t.Fatalf("got %v, want 1 match on 'BANZANA''", res.Files) 880 } 881 }) 882 883 t.Run("ChunkMatches", func(t *testing.T) { 884 res := searchForTest(t, b, q, chunkOpts) 885 if len(res.Files) != 1 || res.Files[0].FileName != "BANZANA" { 886 t.Fatalf("got %v, want 1 match on 'BANZANA''", res.Files) 887 } 888 }) 889} 890 891func TestFileSubstringSearchBruteForceEnd(t *testing.T) { 892 b := testIndexBuilder(t, nil, 893 Document{Name: "BANZANA", Content: []byte("x orange y")}, 894 Document{Name: "bananaq", Content: []byte("x apple y")}) 895 896 q := &query.Substring{ 897 Pattern: "q", 898 FileName: true, 899 } 900 t.Run("LineMatches", func(t *testing.T) { 901 res := searchForTest(t, b, q) 902 if want := "bananaq"; len(res.Files) != 1 || res.Files[0].FileName != want { 903 t.Fatalf("got %v, want 1 match in %q", res.Files, want) 904 } 905 }) 906 907 t.Run("LineMatches", func(t *testing.T) { 908 res := searchForTest(t, b, q, chunkOpts) 909 if want := "bananaq"; len(res.Files) != 1 || res.Files[0].FileName != want { 910 t.Fatalf("got %v, want 1 match in %q", res.Files, want) 911 } 912 }) 913} 914 915func TestSearchMatchAll(t *testing.T) { 916 b := testIndexBuilder(t, nil, 917 Document{Name: "banzana", Content: []byte("x orange y")}, 918 Document{Name: "banana", Content: []byte("x apple y")}) 919 920 t.Run("LineMatches", func(t *testing.T) { 921 sres := searchForTest(t, b, &query.Const{Value: true}) 922 matches := sres.Files 923 if len(matches) != 2 { 924 t.Fatalf("got %v, want 2 matches", matches) 925 } 926 }) 927 928 t.Run("ChunkMatches", func(t *testing.T) { 929 sres := searchForTest(t, b, &query.Const{Value: true}, chunkOpts) 930 matches := sres.Files 931 if len(matches) != 2 { 932 t.Fatalf("got %v, want 2 matches", matches) 933 } 934 }) 935} 936 937func TestSearchNewline(t *testing.T) { 938 b := testIndexBuilder(t, nil, 939 Document{Name: "banzana", Content: []byte("abcd\ndefg")}) 940 941 t.Run("LineMatches", func(t *testing.T) { 942 sres := searchForTest(t, b, &query.Substring{Pattern: "d\nd"}) 943 944 // Just check that we don't crash. 945 946 matches := sres.Files 947 if len(matches) != 1 { 948 t.Fatalf("got %v, want 1 matches", matches) 949 } 950 }) 951 952 t.Run("ChunkMatches", func(t *testing.T) { 953 sres := searchForTest(t, b, &query.Substring{Pattern: "d\nd"}, chunkOpts) 954 955 // Just check that we don't crash. 956 957 matches := sres.Files 958 if len(matches) != 1 { 959 t.Fatalf("got %v, want 1 matches", matches) 960 } 961 }) 962} 963 964func TestSearchMatchAllRegexp(t *testing.T) { 965 b := testIndexBuilder(t, nil, 966 Document{Name: "banzana", Content: []byte("abcd")}, 967 Document{Name: "banana", Content: []byte("pqrs")}) 968 969 t.Run("LineMatches", func(t *testing.T) { 970 sres := searchForTest(t, b, &query.Regexp{Regexp: mustParseRE(".")}) 971 972 matches := sres.Files 973 if len(matches) != 2 || sres.Stats.MatchCount != 2 { 974 t.Fatalf("got %v, want 2 matches", matches) 975 } 976 if len(matches[0].LineMatches[0].Line) != 4 || len(matches[1].LineMatches[0].Line) != 4 { 977 t.Fatalf("want 4 chars in every file, got %#v", matches) 978 } 979 }) 980 981 t.Run("ChunkMatches", func(t *testing.T) { 982 sres := searchForTest(t, b, &query.Regexp{Regexp: mustParseRE(".")}, chunkOpts) 983 984 matches := sres.Files 985 if len(matches) != 2 || sres.Stats.MatchCount != 8 { 986 t.Fatalf("got %v, want 2 matches", matches) 987 } 988 if len(matches[0].ChunkMatches[0].Content) != 4 || len(matches[1].ChunkMatches[0].Content) != 4 { 989 t.Fatalf("want 4 chars in every file, got %#v", matches) 990 } 991 }) 992} 993 994func TestFileRestriction(t *testing.T) { 995 b := testIndexBuilder(t, nil, 996 Document{Name: "banana1", Content: []byte("x orange y")}, 997 Document{Name: "banana2", Content: []byte("x apple y")}, 998 Document{Name: "orange", Content: []byte("x apple z")}) 999 1000 t.Run("LineMatches", func(t *testing.T) { 1001 sres := searchForTest(t, b, query.NewAnd( 1002 &query.Substring{ 1003 Pattern: "banana", 1004 FileName: true, 1005 }, 1006 &query.Substring{ 1007 Pattern: "apple", 1008 })) 1009 1010 matches := sres.Files 1011 if len(matches) != 1 || len(matches[0].LineMatches) != 1 { 1012 t.Fatalf("got %v, want 1 match", matches) 1013 } 1014 1015 match := matches[0].LineMatches[0] 1016 got := string(match.Line) 1017 want := "x apple y" 1018 if got != want { 1019 t.Errorf("got match %#v, want line %q", match, want) 1020 } 1021 }) 1022 1023 t.Run("ChunkMatches", func(t *testing.T) { 1024 sres := searchForTest(t, b, query.NewAnd( 1025 &query.Substring{ 1026 Pattern: "banana", 1027 FileName: true, 1028 }, 1029 &query.Substring{ 1030 Pattern: "apple", 1031 }), chunkOpts) 1032 1033 matches := sres.Files 1034 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 { 1035 t.Fatalf("got %v, want 1 match", matches) 1036 } 1037 1038 match := matches[0].ChunkMatches[0] 1039 got := string(match.Content) 1040 want := "x apple y" 1041 if got != want { 1042 t.Errorf("got match %#v, want line %q", match, want) 1043 } 1044 }) 1045} 1046 1047func TestFileNameBoundary(t *testing.T) { 1048 b := testIndexBuilder(t, nil, 1049 Document{Name: "banana2", Content: []byte("x apple y")}, 1050 Document{Name: "helpers.go", Content: []byte("x apple y")}, 1051 Document{Name: "foo", Content: []byte("x apple y")}) 1052 1053 t.Run("LineMatches", func(t *testing.T) { 1054 sres := searchForTest(t, b, &query.Substring{ 1055 Pattern: "helpers.go", 1056 FileName: true, 1057 }) 1058 1059 matches := sres.Files 1060 if len(matches) != 1 || len(matches[0].LineMatches) != 1 { 1061 t.Fatalf("got %v, want 1 match", matches) 1062 } 1063 }) 1064 1065 t.Run("ChunkMatches", func(t *testing.T) { 1066 sres := searchForTest(t, b, &query.Substring{ 1067 Pattern: "helpers.go", 1068 FileName: true, 1069 }, chunkOpts) 1070 1071 matches := sres.Files 1072 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 { 1073 t.Fatalf("got %v, want 1 match", matches) 1074 } 1075 }) 1076} 1077 1078func TestDocumentOrder(t *testing.T) { 1079 var docs []Document 1080 for i := 0; i < 3; i++ { 1081 docs = append(docs, Document{Name: fmt.Sprintf("f%d", i), Content: []byte("needle")}) 1082 } 1083 1084 b := testIndexBuilder(t, nil, docs...) 1085 1086 t.Run("LineMatches", func(t *testing.T) { 1087 sres := searchForTest(t, b, query.NewAnd( 1088 &query.Substring{ 1089 Pattern: "needle", 1090 })) 1091 1092 want := []string{"f0", "f1", "f2"} 1093 var got []string 1094 for _, f := range sres.Files { 1095 got = append(got, f.FileName) 1096 } 1097 if !reflect.DeepEqual(got, want) { 1098 t.Fatalf("got %v, want %v", got, want) 1099 } 1100 }) 1101 1102 t.Run("ChunkMatches", func(t *testing.T) { 1103 sres := searchForTest(t, b, 1104 query.NewAnd(&query.Substring{ 1105 Pattern: "needle", 1106 }), 1107 chunkOpts, 1108 ) 1109 1110 want := []string{"f0", "f1", "f2"} 1111 var got []string 1112 for _, f := range sres.Files { 1113 got = append(got, f.FileName) 1114 } 1115 if !reflect.DeepEqual(got, want) { 1116 t.Fatalf("got %v, want %v", got, want) 1117 } 1118 }) 1119} 1120 1121func TestBranchMask(t *testing.T) { 1122 b := testIndexBuilder(t, &Repository{ 1123 Branches: []RepositoryBranch{ 1124 {"master", "v-master"}, 1125 {"stable", "v-stable"}, 1126 {"bonzai", "v-bonzai"}, 1127 }, 1128 }, Document{Name: "f1", Content: []byte("needle"), Branches: []string{"master"}}, 1129 Document{Name: "f2", Content: []byte("needle"), Branches: []string{"stable", "master"}}, 1130 Document{Name: "f3", Content: []byte("needle"), Branches: []string{"stable", "master"}}, 1131 Document{Name: "f4", Content: []byte("needle"), Branches: []string{"bonzai"}}, 1132 ) 1133 1134 t.Run("LineMatches", func(t *testing.T) { 1135 sres := searchForTest(t, b, query.NewAnd( 1136 &query.Substring{ 1137 Pattern: "needle", 1138 }, 1139 &query.Branch{ 1140 Pattern: "table", 1141 })) 1142 1143 if len(sres.Files) != 2 || sres.Files[0].FileName != "f2" || sres.Files[1].FileName != "f3" { 1144 t.Fatalf("got %v, want 2 result from [f2,f3]", sres.Files) 1145 } 1146 1147 if len(sres.Files[0].Branches) != 1 || sres.Files[0].Branches[0] != "stable" { 1148 t.Fatalf("got %v, want 1 branch 'stable'", sres.Files[0].Branches) 1149 } 1150 }) 1151 1152 t.Run("ChunkMatches", func(t *testing.T) { 1153 sres := searchForTest(t, b, query.NewAnd( 1154 &query.Substring{ 1155 Pattern: "needle", 1156 }, 1157 &query.Branch{ 1158 Pattern: "table", 1159 }), 1160 chunkOpts, 1161 ) 1162 1163 if len(sres.Files) != 2 || sres.Files[0].FileName != "f2" || sres.Files[1].FileName != "f3" { 1164 t.Fatalf("got %v, want 2 result from [f2,f3]", sres.Files) 1165 } 1166 1167 if len(sres.Files[0].Branches) != 1 || sres.Files[0].Branches[0] != "stable" { 1168 t.Fatalf("got %v, want 1 branch 'stable'", sres.Files[0].Branches) 1169 } 1170 }) 1171} 1172 1173func TestBranchLimit(t *testing.T) { 1174 for limit := 64; limit <= 65; limit++ { 1175 r := &Repository{} 1176 for i := 0; i < limit; i++ { 1177 s := fmt.Sprintf("b%d", i) 1178 r.Branches = append(r.Branches, RepositoryBranch{ 1179 s, "v-" + s, 1180 }) 1181 } 1182 _, err := NewIndexBuilder(r) 1183 if limit == 64 && err != nil { 1184 t.Fatalf("NewIndexBuilder: %v", err) 1185 } else if limit == 65 && err == nil { 1186 t.Fatalf("NewIndexBuilder succeeded") 1187 } 1188 } 1189} 1190 1191func TestBranchReport(t *testing.T) { 1192 branches := []string{"stable", "master"} 1193 b := testIndexBuilder(t, &Repository{ 1194 Branches: []RepositoryBranch{ 1195 {"stable", "vs"}, 1196 {"master", "vm"}, 1197 }, 1198 }, 1199 Document{Name: "f2", Content: []byte("needle"), Branches: branches}) 1200 1201 t.Run("LineMatches", func(t *testing.T) { 1202 sres := searchForTest(t, b, &query.Substring{ 1203 Pattern: "needle", 1204 }) 1205 if len(sres.Files) != 1 { 1206 t.Fatalf("got %v, want 1 result from f2", sres.Files) 1207 } 1208 1209 f := sres.Files[0] 1210 if !reflect.DeepEqual(f.Branches, branches) { 1211 t.Fatalf("got branches %q, want %q", f.Branches, branches) 1212 } 1213 }) 1214 1215 t.Run("ChunkMatches", func(t *testing.T) { 1216 sres := searchForTest(t, b, &query.Substring{ 1217 Pattern: "needle", 1218 }, chunkOpts) 1219 if len(sres.Files) != 1 { 1220 t.Fatalf("got %v, want 1 result from f2", sres.Files) 1221 } 1222 1223 f := sres.Files[0] 1224 if !reflect.DeepEqual(f.Branches, branches) { 1225 t.Fatalf("got branches %q, want %q", f.Branches, branches) 1226 } 1227 }) 1228} 1229 1230func TestBranchVersions(t *testing.T) { 1231 b := testIndexBuilder(t, &Repository{ 1232 Branches: []RepositoryBranch{ 1233 {"stable", "v-stable"}, 1234 {"master", "v-master"}, 1235 }, 1236 }, Document{Name: "f2", Content: []byte("needle"), Branches: []string{"master"}}) 1237 1238 t.Run("LineMatches", func(t *testing.T) { 1239 sres := searchForTest(t, b, &query.Substring{ 1240 Pattern: "needle", 1241 }) 1242 if len(sres.Files) != 1 { 1243 t.Fatalf("got %v, want 1 result from f2", sres.Files) 1244 } 1245 1246 f := sres.Files[0] 1247 if f.Version != "v-master" { 1248 t.Fatalf("got file %#v, want version 'v-master'", f) 1249 } 1250 }) 1251 1252 t.Run("ChunkMatches", func(t *testing.T) { 1253 sres := searchForTest(t, b, &query.Substring{ 1254 Pattern: "needle", 1255 }, chunkOpts) 1256 if len(sres.Files) != 1 { 1257 t.Fatalf("got %v, want 1 result from f2", sres.Files) 1258 } 1259 1260 f := sres.Files[0] 1261 if f.Version != "v-master" { 1262 t.Fatalf("got file %#v, want version 'v-master'", f) 1263 } 1264 }) 1265} 1266 1267func mustParseRE(s string) *syntax.Regexp { 1268 r, err := syntax.Parse(s, syntax.Perl) 1269 if err != nil { 1270 panic(err) 1271 } 1272 1273 return r 1274} 1275 1276func TestRegexp(t *testing.T) { 1277 content := []byte("needle the bla") 1278 // ----------------01234567890123 1279 1280 b := testIndexBuilder(t, nil, 1281 Document{ 1282 Name: "f1", 1283 Content: content, 1284 }) 1285 1286 t.Run("LineMatches", func(t *testing.T) { 1287 sres := searchForTest(t, b, 1288 &query.Regexp{ 1289 Regexp: mustParseRE("dle.*bla"), 1290 }) 1291 1292 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 { 1293 t.Fatalf("got %v, want 1 match in 1 file", sres.Files) 1294 } 1295 1296 got := sres.Files[0].LineMatches[0] 1297 want := LineMatch{ 1298 LineFragments: []LineFragmentMatch{{ 1299 LineOffset: 3, 1300 Offset: 3, 1301 MatchLength: 11, 1302 }}, 1303 Line: content, 1304 FileName: false, 1305 LineNumber: 1, 1306 LineStart: 0, 1307 LineEnd: 14, 1308 } 1309 1310 if !reflect.DeepEqual(got, want) { 1311 t.Errorf("got %#v, want %#v", got, want) 1312 } 1313 }) 1314 1315 t.Run("ChunkMatches", func(t *testing.T) { 1316 sres := searchForTest(t, b, 1317 &query.Regexp{ 1318 Regexp: mustParseRE("dle.*bla"), 1319 }, chunkOpts) 1320 1321 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 { 1322 t.Fatalf("got %v, want 1 match in 1 file", sres.Files) 1323 } 1324 1325 got := sres.Files[0].ChunkMatches[0] 1326 want := ChunkMatch{ 1327 Content: content, 1328 ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1}, 1329 Ranges: []Range{{ 1330 Start: Location{ByteOffset: 3, LineNumber: 1, Column: 4}, 1331 End: Location{ByteOffset: 14, LineNumber: 1, Column: 15}, 1332 }}, 1333 } 1334 1335 if diff := cmp.Diff(want, got); diff != "" { 1336 t.Fatal(diff) 1337 } 1338 }) 1339} 1340 1341func TestRegexpFile(t *testing.T) { 1342 content := []byte("needle the bla") 1343 1344 name := "let's play: find the mussel" 1345 b := testIndexBuilder(t, nil, 1346 Document{Name: name, Content: content}, 1347 Document{Name: "play.txt", Content: content}) 1348 1349 t.Run("LineMatches", func(t *testing.T) { 1350 sres := searchForTest(t, b, 1351 &query.Regexp{ 1352 Regexp: mustParseRE("play.*mussel"), 1353 FileName: true, 1354 }) 1355 1356 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 { 1357 t.Fatalf("got %v, want 1 match in 1 file", sres.Files) 1358 } 1359 1360 if sres.Files[0].FileName != name { 1361 t.Errorf("got match %#v, want name %q", sres.Files[0], name) 1362 } 1363 }) 1364 1365 t.Run("ChunkMatches", func(t *testing.T) { 1366 sres := searchForTest(t, b, 1367 &query.Regexp{ 1368 Regexp: mustParseRE("play.*mussel"), 1369 FileName: true, 1370 }, chunkOpts) 1371 1372 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 { 1373 t.Fatalf("got %v, want 1 match in 1 file", sres.Files) 1374 } 1375 1376 if sres.Files[0].FileName != name { 1377 t.Errorf("got match %#v, want name %q", sres.Files[0], name) 1378 } 1379 }) 1380} 1381 1382func TestRegexpOrder(t *testing.T) { 1383 content := []byte("bla the needle") 1384 // ----------------01234567890123 1385 1386 b := testIndexBuilder(t, nil, 1387 Document{Name: "f1", Content: content}) 1388 1389 t.Run("LineMatches", func(t *testing.T) { 1390 sres := searchForTest(t, b, 1391 &query.Regexp{ 1392 Regexp: mustParseRE("dle.*bla"), 1393 }) 1394 1395 if len(sres.Files) != 0 { 1396 t.Fatalf("got %v, want 0 matches", sres.Files) 1397 } 1398 }) 1399 1400 t.Run("ChunkMatches", func(t *testing.T) { 1401 sres := searchForTest(t, b, 1402 &query.Regexp{ 1403 Regexp: mustParseRE("dle.*bla"), 1404 }) 1405 1406 if len(sres.Files) != 0 { 1407 t.Fatalf("got %v, want 0 matches", sres.Files) 1408 } 1409 }) 1410} 1411 1412func TestRepoName(t *testing.T) { 1413 content := []byte("bla the needle") 1414 // ----------------01234567890123 1415 1416 b := testIndexBuilder(t, &Repository{Name: "bla"}, 1417 Document{Name: "f1", Content: content}) 1418 1419 t.Run("LineMatches", func(t *testing.T) { 1420 sres := searchForTest(t, b, 1421 query.NewAnd( 1422 &query.Substring{Pattern: "needle"}, 1423 &query.Repo{Regexp: regexp.MustCompile("foo")}, 1424 )) 1425 1426 if len(sres.Files) != 0 { 1427 t.Fatalf("got %v, want 0 matches", sres.Files) 1428 } 1429 1430 if sres.Stats.FilesConsidered > 0 { 1431 t.Fatalf("got FilesConsidered %d, should have short circuited", sres.Stats.FilesConsidered) 1432 } 1433 1434 sres = searchForTest(t, b, 1435 query.NewAnd( 1436 &query.Substring{Pattern: "needle"}, 1437 &query.Repo{Regexp: regexp.MustCompile("bla")}, 1438 )) 1439 if len(sres.Files) != 1 { 1440 t.Fatalf("got %v, want 1 match", sres.Files) 1441 } 1442 }) 1443 1444 t.Run("ChunkMatches", func(t *testing.T) { 1445 sres := searchForTest(t, b, 1446 query.NewAnd( 1447 &query.Substring{Pattern: "needle"}, 1448 &query.Repo{Regexp: regexp.MustCompile("foo")}, 1449 ), 1450 chunkOpts, 1451 ) 1452 1453 if len(sres.Files) != 0 { 1454 t.Fatalf("got %v, want 0 matches", sres.Files) 1455 } 1456 1457 if sres.Stats.FilesConsidered > 0 { 1458 t.Fatalf("got FilesConsidered %d, should have short circuited", sres.Stats.FilesConsidered) 1459 } 1460 1461 sres = searchForTest(t, b, 1462 query.NewAnd( 1463 &query.Substring{Pattern: "needle"}, 1464 &query.Repo{Regexp: regexp.MustCompile("bla")}, 1465 )) 1466 if len(sres.Files) != 1 { 1467 t.Fatalf("got %v, want 1 match", sres.Files) 1468 } 1469 }) 1470} 1471 1472func TestMergeMatches(t *testing.T) { 1473 content := []byte("blablabla") 1474 b := testIndexBuilder(t, nil, 1475 Document{Name: "f1", Content: content}) 1476 1477 t.Run("LineMatches", func(t *testing.T) { 1478 sres := searchForTest(t, b, 1479 &query.Substring{Pattern: "bla"}) 1480 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 { 1481 t.Fatalf("got %v, want 1 match", sres.Files) 1482 } 1483 }) 1484 1485 t.Run("ChunkMatches", func(t *testing.T) { 1486 sres := searchForTest(t, b, 1487 &query.Substring{Pattern: "bla"}, 1488 chunkOpts, 1489 ) 1490 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 { 1491 t.Fatalf("got %v, want 1 match", sres.Files) 1492 } 1493 }) 1494} 1495 1496func TestRepoURL(t *testing.T) { 1497 content := []byte("blablabla") 1498 b := testIndexBuilder(t, &Repository{ 1499 Name: "name", 1500 URL: "URL", 1501 CommitURLTemplate: "commit", 1502 FileURLTemplate: "file-url", 1503 LineFragmentTemplate: "fragment", 1504 }, Document{Name: "f1", Content: content}) 1505 1506 sres := searchForTest(t, b, &query.Substring{Pattern: "bla"}) 1507 1508 if sres.RepoURLs["name"] != "file-url" { 1509 t.Errorf("got RepoURLs %v, want {name: URL}", sres.RepoURLs) 1510 } 1511 if sres.LineFragments["name"] != "fragment" { 1512 t.Errorf("got URLs %v, want {name: URL}", sres.LineFragments) 1513 } 1514} 1515 1516func TestRegexpCaseSensitive(t *testing.T) { 1517 content := []byte("bla\nfunc unmarshalGitiles\n") 1518 b := testIndexBuilder(t, nil, Document{ 1519 Name: "f1", 1520 Content: content, 1521 }) 1522 1523 t.Run("LineMatches", func(t *testing.T) { 1524 res := searchForTest(t, b, 1525 &query.Regexp{ 1526 Regexp: mustParseRE("func.*Gitiles"), 1527 CaseSensitive: true, 1528 }) 1529 1530 if len(res.Files) != 1 { 1531 t.Fatalf("got %v, want one match", res.Files) 1532 } 1533 }) 1534 1535 t.Run("ChunkMatches", func(t *testing.T) { 1536 res := searchForTest(t, b, 1537 &query.Regexp{ 1538 Regexp: mustParseRE("func.*Gitiles"), 1539 CaseSensitive: true, 1540 }, 1541 chunkOpts, 1542 ) 1543 1544 if len(res.Files) != 1 { 1545 t.Fatalf("got %v, want one match", res.Files) 1546 } 1547 }) 1548} 1549 1550func TestRegexpCaseFolding(t *testing.T) { 1551 content := []byte("bla\nfunc unmarshalGitiles\n") 1552 1553 b := testIndexBuilder(t, nil, 1554 Document{Name: "f1", Content: content}) 1555 res := searchForTest(t, b, 1556 &query.Regexp{ 1557 Regexp: mustParseRE("func.*GITILES"), 1558 CaseSensitive: false, 1559 }) 1560 1561 if len(res.Files) != 1 { 1562 t.Fatalf("got %v, want one match", res.Files) 1563 } 1564} 1565 1566func TestCaseRegexp(t *testing.T) { 1567 content := []byte("BLABLABLA") 1568 b := testIndexBuilder(t, nil, 1569 Document{Name: "f1", Content: content}) 1570 1571 t.Run("LineMatches", func(t *testing.T) { 1572 res := searchForTest(t, b, 1573 &query.Regexp{ 1574 Regexp: mustParseRE("[xb][xl][xa]"), 1575 CaseSensitive: true, 1576 }) 1577 1578 if len(res.Files) > 0 { 1579 t.Fatalf("got %v, want no matches", res.Files) 1580 } 1581 }) 1582 1583 t.Run("ChunkMatches", func(t *testing.T) { 1584 res := searchForTest(t, b, 1585 &query.Regexp{ 1586 Regexp: mustParseRE("[xb][xl][xa]"), 1587 CaseSensitive: true, 1588 }, 1589 chunkOpts, 1590 ) 1591 1592 if len(res.Files) > 0 { 1593 t.Fatalf("got %v, want no matches", res.Files) 1594 } 1595 }) 1596} 1597 1598func TestNegativeRegexp(t *testing.T) { 1599 content := []byte("BLABLABLA needle bla") 1600 b := testIndexBuilder(t, nil, 1601 Document{Name: "f1", Content: content}) 1602 1603 t.Run("LineMatches", func(t *testing.T) { 1604 res := searchForTest(t, b, 1605 query.NewAnd( 1606 &query.Substring{ 1607 Pattern: "needle", 1608 }, 1609 &query.Not{ 1610 Child: &query.Regexp{ 1611 Regexp: mustParseRE(".cs"), 1612 }, 1613 })) 1614 1615 if len(res.Files) != 1 { 1616 t.Fatalf("got %v, want 1 match", res.Files) 1617 } 1618 }) 1619 1620 t.Run("ChunkMatches", func(t *testing.T) { 1621 res := searchForTest(t, b, 1622 query.NewAnd( 1623 &query.Substring{ 1624 Pattern: "needle", 1625 }, 1626 &query.Not{ 1627 Child: &query.Regexp{ 1628 Regexp: mustParseRE(".cs"), 1629 }, 1630 }, 1631 ), 1632 chunkOpts) 1633 1634 if len(res.Files) != 1 { 1635 t.Fatalf("got %v, want 1 match", res.Files) 1636 } 1637 }) 1638} 1639 1640func TestSymbolRank(t *testing.T) { 1641 t.Skip() 1642 1643 content := []byte("func bla() blubxxxxx") 1644 // ----------------01234567890123456789 1645 b := testIndexBuilder(t, nil, 1646 Document{ 1647 Name: "f1", 1648 Content: content, 1649 }, Document{ 1650 Name: "f2", 1651 Content: content, 1652 Symbols: []DocumentSection{{5, 8}}, 1653 }, Document{ 1654 Name: "f3", 1655 Content: content, 1656 }) 1657 1658 t.Run("LineMatches", func(t *testing.T) { 1659 res := searchForTest(t, b, 1660 &query.Substring{ 1661 CaseSensitive: false, 1662 Pattern: "bla", 1663 }) 1664 1665 if len(res.Files) != 3 { 1666 t.Fatalf("got %d files, want 3 files. Full data: %v", len(res.Files), res.Files) 1667 } 1668 if res.Files[0].FileName != "f2" { 1669 t.Errorf("got %#v, want 'f2' as top match", res.Files[0]) 1670 } 1671 }) 1672 1673 t.Run("ChunkMatches", func(t *testing.T) { 1674 res := searchForTest(t, b, 1675 &query.Substring{ 1676 CaseSensitive: false, 1677 Pattern: "bla", 1678 }, chunkOpts) 1679 1680 if len(res.Files) != 3 { 1681 t.Fatalf("got %d files, want 3 files. Full data: %v", len(res.Files), res.Files) 1682 } 1683 if res.Files[0].FileName != "f2" { 1684 t.Errorf("got %#v, want 'f2' as top match", res.Files[0]) 1685 } 1686 }) 1687} 1688 1689func TestSymbolRankRegexpUTF8(t *testing.T) { 1690 t.Skip() 1691 1692 prefix := strings.Repeat(string([]rune{kelvinCodePoint}), 100) + "\n" 1693 content := []byte(prefix + 1694 "func bla() blub") 1695 // ------012345678901234 1696 b := testIndexBuilder(t, nil, 1697 Document{ 1698 Name: "f1", 1699 Content: content, 1700 }, Document{ 1701 Name: "f2", 1702 Content: content, 1703 Symbols: []DocumentSection{{uint32(len(prefix) + 5), uint32(len(prefix) + 8)}}, 1704 }, Document{ 1705 Name: "f3", 1706 Content: content, 1707 }) 1708 1709 t.Run("LineMatches", func(t *testing.T) { 1710 res := searchForTest(t, b, 1711 &query.Regexp{ 1712 Regexp: mustParseRE("b.a"), 1713 }) 1714 1715 if len(res.Files) != 3 { 1716 t.Fatalf("got %#v, want 3 files", res.Files) 1717 } 1718 if res.Files[0].FileName != "f2" { 1719 t.Errorf("got %#v, want 'f2' as top match", res.Files[0]) 1720 } 1721 }) 1722 1723 t.Run("ChunjkMatches", func(t *testing.T) { 1724 res := searchForTest(t, b, 1725 &query.Regexp{ 1726 Regexp: mustParseRE("b.a"), 1727 }, chunkOpts) 1728 1729 if len(res.Files) != 3 { 1730 t.Fatalf("got %#v, want 3 files", res.Files) 1731 } 1732 if res.Files[0].FileName != "f2" { 1733 t.Errorf("got %#v, want 'f2' as top match", res.Files[0]) 1734 } 1735 }) 1736} 1737 1738func TestPartialSymbolRank(t *testing.T) { 1739 t.Skip() 1740 1741 content := []byte("func bla() blub") 1742 // ----------------012345678901234 1743 1744 b := testIndexBuilder(t, nil, 1745 Document{ 1746 Name: "f1", 1747 Content: content, 1748 Symbols: []DocumentSection{{4, 9}}, 1749 }, Document{ 1750 Name: "f2", 1751 Content: content, 1752 Symbols: []DocumentSection{{4, 8}}, 1753 }, Document{ 1754 Name: "f3", 1755 Content: content, 1756 Symbols: []DocumentSection{{4, 9}}, 1757 }) 1758 1759 t.Run("LineMatches", func(t *testing.T) { 1760 res := searchForTest(t, b, 1761 &query.Substring{ 1762 Pattern: "bla", 1763 }) 1764 1765 if len(res.Files) != 3 { 1766 t.Fatalf("got %#v, want 3 files", res.Files) 1767 } 1768 if res.Files[0].FileName != "f2" { 1769 t.Errorf("got %#v, want 'f2' as top match", res.Files[0]) 1770 } 1771 }) 1772 1773 t.Run("ChunkMatches", func(t *testing.T) { 1774 res := searchForTest(t, b, 1775 &query.Substring{ 1776 Pattern: "bla", 1777 }, chunkOpts) 1778 1779 if len(res.Files) != 3 { 1780 t.Fatalf("got %#v, want 3 files", res.Files) 1781 } 1782 if res.Files[0].FileName != "f2" { 1783 t.Errorf("got %#v, want 'f2' as top match", res.Files[0]) 1784 } 1785 }) 1786} 1787 1788func TestNegativeRepo(t *testing.T) { 1789 content := []byte("bla the needle") 1790 // ----------------01234567890123 1791 b := testIndexBuilder(t, &Repository{ 1792 Name: "bla", 1793 }, Document{Name: "f1", Content: content}) 1794 1795 t.Run("LineMatches", func(t *testing.T) { 1796 sres := searchForTest(t, b, 1797 query.NewAnd( 1798 &query.Substring{Pattern: "needle"}, 1799 &query.Not{Child: &query.Repo{Regexp: regexp.MustCompile("bla")}}, 1800 )) 1801 1802 if len(sres.Files) != 0 { 1803 t.Fatalf("got %v, want 0 matches", sres.Files) 1804 } 1805 }) 1806 1807 t.Run("ChunkMatches", func(t *testing.T) { 1808 sres := searchForTest(t, b, 1809 query.NewAnd( 1810 &query.Substring{Pattern: "needle"}, 1811 &query.Not{Child: &query.Repo{Regexp: regexp.MustCompile("bla")}}, 1812 ), chunkOpts) 1813 1814 if len(sres.Files) != 0 { 1815 t.Fatalf("got %v, want 0 matches", sres.Files) 1816 } 1817 }) 1818} 1819 1820func TestListRepos(t *testing.T) { 1821 content := []byte("bla the needle\n") 1822 // ----------------012345678901234- 1823 1824 t.Run("default and minimal fallback", func(t *testing.T) { 1825 repo := &Repository{ 1826 Name: "reponame", 1827 Branches: []RepositoryBranch{{Name: "main"}, {Name: "dev"}}, 1828 } 1829 b := testIndexBuilder(t, repo, 1830 Document{Name: "f1", Content: content, Branches: []string{"main", "dev"}}, 1831 Document{Name: "f2", Content: content, Branches: []string{"main"}}, 1832 Document{Name: "f2", Content: content, Branches: []string{"dev"}}, 1833 Document{Name: "f3", Content: content, Branches: []string{"dev"}}) 1834 1835 searcher := searcherForTest(t, b) 1836 1837 for _, opts := range []*ListOptions{ 1838 nil, 1839 {}, 1840 {Field: RepoListFieldRepos}, 1841 {Field: RepoListFieldReposMap}, 1842 } { 1843 t.Run(fmt.Sprint(opts), func(t *testing.T) { 1844 q := &query.Repo{Regexp: regexp.MustCompile("epo")} 1845 1846 res, err := searcher.List(context.Background(), q, opts) 1847 if err != nil { 1848 t.Fatalf("List(%v): %v", q, err) 1849 } 1850 1851 want := &RepoList{ 1852 Repos: []*RepoListEntry{{ 1853 Repository: *repo, 1854 Stats: RepoStats{ 1855 Documents: 4, 1856 ContentBytes: 68, // (15 bytes of content and 2 bytes of filename) x 4 1857 Shards: 1, 1858 1859 NewLinesCount: 4, 1860 DefaultBranchNewLinesCount: 2, 1861 OtherBranchesNewLinesCount: 3, 1862 }, 1863 }}, 1864 Stats: RepoStats{ 1865 Repos: 1, 1866 Documents: 4, 1867 ContentBytes: 68, 1868 Shards: 1, 1869 1870 NewLinesCount: 4, 1871 DefaultBranchNewLinesCount: 2, 1872 OtherBranchesNewLinesCount: 3, 1873 }, 1874 } 1875 ignored := []cmp.Option{ 1876 cmpopts.EquateEmpty(), 1877 cmpopts.IgnoreFields(RepoListEntry{}, "IndexMetadata"), 1878 cmpopts.IgnoreFields(RepoStats{}, "IndexBytes"), 1879 cmpopts.IgnoreFields(Repository{}, "SubRepoMap"), 1880 cmpopts.IgnoreFields(Repository{}, "priority"), 1881 } 1882 if diff := cmp.Diff(want, res, ignored...); diff != "" { 1883 t.Fatalf("mismatch (-want +got):\n%s", diff) 1884 } 1885 1886 q = &query.Repo{Regexp: regexp.MustCompile("bla")} 1887 res, err = searcher.List(context.Background(), q, nil) 1888 if err != nil { 1889 t.Fatalf("List(%v): %v", q, err) 1890 } 1891 if len(res.Repos) != 0 || len(res.ReposMap) != 0 { 1892 t.Fatalf("got %v, want 0 matches", res) 1893 } 1894 }) 1895 } 1896 }) 1897 1898 t.Run("minimal", func(t *testing.T) { 1899 repo := &Repository{ 1900 ID: 1234, 1901 Name: "reponame", 1902 Branches: []RepositoryBranch{{Name: "main"}, {Name: "dev"}}, 1903 RawConfig: map[string]string{"repoid": "1234"}, 1904 } 1905 b := testIndexBuilder(t, repo, 1906 Document{Name: "f1", Content: content, Branches: []string{"main", "dev"}}, 1907 Document{Name: "f2", Content: content, Branches: []string{"main"}}, 1908 Document{Name: "f2", Content: content, Branches: []string{"dev"}}, 1909 Document{Name: "f3", Content: content, Branches: []string{"dev"}}) 1910 1911 searcher := searcherForTest(t, b) 1912 1913 q := &query.Repo{Regexp: regexp.MustCompile("epo")} 1914 res, err := searcher.List(context.Background(), q, &ListOptions{Field: RepoListFieldReposMap}) 1915 if err != nil { 1916 t.Fatalf("List(%v): %v", q, err) 1917 } 1918 1919 want := &RepoList{ 1920 ReposMap: ReposMap{ 1921 repo.ID: { 1922 HasSymbols: repo.HasSymbols, 1923 Branches: repo.Branches, 1924 }, 1925 }, 1926 Stats: RepoStats{ 1927 Repos: 1, 1928 Shards: 1, 1929 Documents: 4, 1930 IndexBytes: 412, 1931 ContentBytes: 68, 1932 NewLinesCount: 4, 1933 DefaultBranchNewLinesCount: 2, 1934 OtherBranchesNewLinesCount: 3, 1935 }, 1936 } 1937 1938 ignored := []cmp.Option{ 1939 cmpopts.IgnoreFields(MinimalRepoListEntry{}, "IndexTimeUnix"), 1940 } 1941 if diff := cmp.Diff(want, res, ignored...); diff != "" { 1942 t.Fatalf("mismatch (-want +got):\n%s", diff) 1943 } 1944 1945 q = &query.Repo{Regexp: regexp.MustCompile("bla")} 1946 res, err = searcher.List(context.Background(), q, &ListOptions{Field: RepoListFieldReposMap}) 1947 if err != nil { 1948 t.Fatalf("List(%v): %v", q, err) 1949 } 1950 if len(res.Repos) != 0 || len(res.ReposMap) != 0 { 1951 t.Fatalf("got %v, want 0 matches", res) 1952 } 1953 }) 1954} 1955 1956func TestListReposByContent(t *testing.T) { 1957 content := []byte("bla the needle") 1958 1959 b := testIndexBuilder(t, &Repository{ 1960 Name: "reponame", 1961 }, 1962 Document{Name: "f1", Content: content}, 1963 Document{Name: "f2", Content: content}) 1964 1965 searcher := searcherForTest(t, b) 1966 q := &query.Substring{Pattern: "needle"} 1967 res, err := searcher.List(context.Background(), q, nil) 1968 if err != nil { 1969 t.Fatalf("List(%v): %v", q, err) 1970 } 1971 if len(res.Repos) != 1 || res.Repos[0].Repository.Name != "reponame" { 1972 t.Fatalf("got %v, want 1 matches", res) 1973 } 1974 if got := res.Repos[0].Stats.Shards; got != 1 { 1975 t.Fatalf("got %d, want 1 shard", got) 1976 } 1977 q = &query.Substring{Pattern: "foo"} 1978 res, err = searcher.List(context.Background(), q, nil) 1979 if err != nil { 1980 t.Fatalf("List(%v): %v", q, err) 1981 } 1982 if len(res.Repos) != 0 { 1983 t.Fatalf("got %v, want 0 matches", res) 1984 } 1985} 1986 1987func TestMetadata(t *testing.T) { 1988 content := []byte("bla the needle") 1989 1990 b := testIndexBuilder(t, &Repository{ 1991 Name: "reponame", 1992 }, Document{Name: "f1", Content: content}, 1993 Document{Name: "f2", Content: content}) 1994 1995 var buf bytes.Buffer 1996 if err := b.Write(&buf); err != nil { 1997 t.Fatal(err) 1998 } 1999 f := &memSeeker{buf.Bytes()} 2000 2001 rd, _, err := ReadMetadata(f) 2002 if err != nil { 2003 t.Fatalf("ReadMetadata: %v", err) 2004 } 2005 2006 if got, want := rd[0].Name, "reponame"; got != want { 2007 t.Fatalf("got %q want %q", got, want) 2008 } 2009} 2010 2011func TestOr(t *testing.T) { 2012 b := testIndexBuilder(t, nil, 2013 Document{Name: "f1", Content: []byte("needle")}, 2014 Document{Name: "f2", Content: []byte("banana")}) 2015 t.Run("LineMatches", func(t *testing.T) { 2016 sres := searchForTest(t, b, query.NewOr( 2017 &query.Substring{Pattern: "needle"}, 2018 &query.Substring{Pattern: "banana"})) 2019 2020 if len(sres.Files) != 2 { 2021 t.Fatalf("got %v, want 2 files", sres.Files) 2022 } 2023 }) 2024 2025 t.Run("ChunkMatches", func(t *testing.T) { 2026 sres := searchForTest(t, b, query.NewOr( 2027 &query.Substring{Pattern: "needle"}, 2028 &query.Substring{Pattern: "banana"})) 2029 2030 if len(sres.Files) != 2 { 2031 t.Fatalf("got %v, want 2 files", sres.Files) 2032 } 2033 }) 2034} 2035 2036func TestFrequency(t *testing.T) { 2037 content := []byte("sla _Py_HashDouble(double v sla las las shd dot dot") 2038 2039 b := testIndexBuilder(t, nil, 2040 Document{ 2041 Name: "f1", 2042 Content: content, 2043 }) 2044 2045 t.Run("LineMatches", func(t *testing.T) { 2046 sres := searchForTest(t, b, &query.Substring{Pattern: "slashdot"}) 2047 if len(sres.Files) != 0 { 2048 t.Errorf("got %v, wanted 0 matches", sres.Files) 2049 } 2050 }) 2051 2052 t.Run("ChunkMatches", func(t *testing.T) { 2053 sres := searchForTest(t, b, &query.Substring{Pattern: "slashdot"}, chunkOpts) 2054 if len(sres.Files) != 0 { 2055 t.Errorf("got %v, wanted 0 matches", sres.Files) 2056 } 2057 }) 2058} 2059 2060func TestMatchNewline(t *testing.T) { 2061 re, err := syntax.Parse("[^a]a", syntax.ClassNL) 2062 if err != nil { 2063 t.Fatalf("syntax.Parse: %v", err) 2064 } 2065 2066 content := []byte("pqr\nalex") 2067 2068 b := testIndexBuilder(t, nil, 2069 Document{ 2070 Name: "f1", 2071 Content: content, 2072 }) 2073 2074 t.Run("LineMatches", func(t *testing.T) { 2075 sres := searchForTest(t, b, &query.Regexp{Regexp: re, CaseSensitive: true}) 2076 if len(sres.Files) != 1 { 2077 t.Errorf("got %v, wanted 1 matches", sres.Files) 2078 } else if l := sres.Files[0].LineMatches[0].Line; !bytes.Equal(l, content[len("pqr\n"):]) { 2079 t.Errorf("got match line %q, want %q", l, content) 2080 } 2081 }) 2082 2083 t.Run("ChunkMatches", func(t *testing.T) { 2084 sres := searchForTest(t, b, &query.Regexp{Regexp: re, CaseSensitive: true}, chunkOpts) 2085 if len(sres.Files) != 1 { 2086 t.Errorf("got %v, wanted 1 matches", sres.Files) 2087 } else if c := sres.Files[0].ChunkMatches[0].Content; !bytes.Equal(c, content) { 2088 t.Errorf("got match line %q, want %q", c, content) 2089 } 2090 }) 2091} 2092 2093func TestSubRepo(t *testing.T) { 2094 subRepos := map[string]*Repository{ 2095 "sub": { 2096 Name: "sub-name", 2097 LineFragmentTemplate: "sub-line", 2098 }, 2099 } 2100 2101 content := []byte("pqr\nalex") 2102 2103 b := testIndexBuilder(t, &Repository{ 2104 SubRepoMap: subRepos, 2105 }, Document{ 2106 Name: "sub/f1", 2107 Content: content, 2108 SubRepositoryPath: "sub", 2109 }) 2110 2111 sres := searchForTest(t, b, &query.Substring{Pattern: "alex"}) 2112 if len(sres.Files) != 1 { 2113 t.Fatalf("got %v, wanted 1 matches", sres.Files) 2114 } 2115 2116 f := sres.Files[0] 2117 if f.SubRepositoryPath != "sub" || f.SubRepositoryName != "sub-name" { 2118 t.Errorf("got %#v, want SubRepository{Path,Name} = {'sub', 'sub-name'}", f) 2119 } 2120 2121 if sres.LineFragments["sub-name"] != "sub-line" { 2122 t.Errorf("got LineFragmentTemplate %v, want {'sub':'sub-line'}", sres.LineFragments) 2123 } 2124} 2125 2126func TestSearchEither(t *testing.T) { 2127 b := testIndexBuilder(t, nil, 2128 Document{Name: "f1", Content: []byte("bla needle bla")}, 2129 Document{Name: "needle-file-branch", Content: []byte("bla content")}) 2130 2131 t.Run("LineMatches", func(t *testing.T) { 2132 sres := searchForTest(t, b, &query.Substring{Pattern: "needle"}) 2133 if len(sres.Files) != 2 { 2134 t.Fatalf("got %v, wanted 2 matches", sres.Files) 2135 } 2136 2137 sres = searchForTest(t, b, &query.Substring{Pattern: "needle", Content: true}) 2138 if len(sres.Files) != 1 { 2139 t.Fatalf("got %v, wanted 1 match", sres.Files) 2140 } 2141 2142 if got, want := sres.Files[0].FileName, "f1"; got != want { 2143 t.Errorf("got %q, want %q", got, want) 2144 } 2145 }) 2146 2147 t.Run("ChunkMatches", func(t *testing.T) { 2148 sres := searchForTest(t, b, &query.Substring{Pattern: "needle"}, chunkOpts) 2149 if len(sres.Files) != 2 { 2150 t.Fatalf("got %v, wanted 2 matches", sres.Files) 2151 } 2152 2153 sres = searchForTest(t, b, &query.Substring{Pattern: "needle", Content: true}, chunkOpts) 2154 if len(sres.Files) != 1 { 2155 t.Fatalf("got %v, wanted 1 match", sres.Files) 2156 } 2157 2158 if got, want := sres.Files[0].FileName, "f1"; got != want { 2159 t.Errorf("got %q, want %q", got, want) 2160 } 2161 }) 2162} 2163 2164func TestUnicodeExactMatch(t *testing.T) { 2165 needle := "néédlÉ" 2166 content := []byte("blá blá " + needle + " blâ") 2167 2168 b := testIndexBuilder(t, nil, 2169 Document{Name: "f1", Content: content}) 2170 2171 t.Run("LineMatches", func(t *testing.T) { 2172 if res := searchForTest(t, b, &query.Substring{Pattern: needle, CaseSensitive: true}); len(res.Files) != 1 { 2173 t.Fatalf("case sensitive: got %v, wanted 1 match", res.Files) 2174 } 2175 }) 2176 2177 t.Run("ChunkMatches", func(t *testing.T) { 2178 res := searchForTest(t, b, &query.Substring{Pattern: needle, CaseSensitive: true}, chunkOpts) 2179 if len(res.Files) != 1 { 2180 t.Fatalf("case sensitive: got %v, wanted 1 match", res.Files) 2181 } 2182 }) 2183} 2184 2185func TestUnicodeCoverContent(t *testing.T) { 2186 needle := "néédlÉ" 2187 content := []byte("blá blá " + needle + " blâ") 2188 2189 b := testIndexBuilder(t, nil, 2190 Document{Name: "f1", Content: content}) 2191 2192 t.Run("LineMatches", func(t *testing.T) { 2193 if res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ", CaseSensitive: true}); len(res.Files) != 0 { 2194 t.Fatalf("case sensitive: got %v, wanted 0 match", res.Files) 2195 } 2196 2197 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ"}) 2198 if len(res.Files) != 1 { 2199 t.Fatalf("case insensitive: got %v, wanted 1 match", res.Files) 2200 } 2201 2202 if got, want := res.Files[0].LineMatches[0].LineFragments[0].Offset, uint32(strings.Index(string(content), needle)); got != want { 2203 t.Errorf("got %d want %d", got, want) 2204 } 2205 }) 2206 2207 t.Run("ChunkMatches", func(t *testing.T) { 2208 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ", CaseSensitive: true}, chunkOpts) 2209 if len(res.Files) != 0 { 2210 t.Fatalf("case sensitive: got %v, wanted 0 match", res.Files) 2211 } 2212 2213 res = searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ"}, chunkOpts) 2214 if len(res.Files) != 1 { 2215 t.Fatalf("case insensitive: got %v, wanted 1 match", res.Files) 2216 } 2217 2218 got := res.Files[0].ChunkMatches[0].Ranges[0].Start.ByteOffset 2219 want := uint32(strings.Index(string(content), needle)) 2220 if got != want { 2221 t.Errorf("got %d want %d", got, want) 2222 } 2223 }) 2224} 2225 2226func TestUnicodeNonCoverContent(t *testing.T) { 2227 needle := "nééáádlÉ" 2228 content := []byte("blá blá " + needle + " blâ") 2229 2230 b := testIndexBuilder(t, nil, 2231 Document{Name: "f1", Content: content}) 2232 2233 t.Run("LineMatches", func(t *testing.T) { 2234 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉÁÁDLÉ", Content: true}) 2235 if len(res.Files) != 1 { 2236 t.Fatalf("got %v, wanted 1 match", res.Files) 2237 } 2238 2239 if got, want := res.Files[0].LineMatches[0].LineFragments[0].Offset, uint32(strings.Index(string(content), needle)); got != want { 2240 t.Errorf("got %d want %d", got, want) 2241 } 2242 }) 2243 2244 t.Run("ChunkMatches", func(t *testing.T) { 2245 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉÁÁDLÉ", Content: true}, chunkOpts) 2246 if len(res.Files) != 1 { 2247 t.Fatalf("got %v, wanted 1 match", res.Files) 2248 } 2249 2250 got := res.Files[0].ChunkMatches[0].Ranges[0].Start.ByteOffset 2251 want := uint32(strings.Index(string(content), needle)) 2252 if got != want { 2253 t.Errorf("got %d want %d", got, want) 2254 } 2255 }) 2256} 2257 2258const kelvinCodePoint = 8490 2259 2260func TestUnicodeVariableLength(t *testing.T) { 2261 lower := 'k' 2262 upper := rune(kelvinCodePoint) 2263 2264 needle := "nee" + string([]rune{lower}) + "eed" 2265 corpus := []byte("nee" + string([]rune{upper}) + "eed" + 2266 " ee" + string([]rune{lower}) + "ee" + 2267 " ee" + string([]rune{upper}) + "ee") 2268 2269 t.Run("LineMatches", func(t *testing.T) { 2270 b := testIndexBuilder(t, nil, 2271 Document{Name: "f1", Content: []byte(corpus)}) 2272 2273 res := searchForTest(t, b, &query.Substring{Pattern: needle, Content: true}) 2274 if len(res.Files) != 1 { 2275 t.Fatalf("got %v, wanted 1 match", res.Files) 2276 } 2277 }) 2278 2279 t.Run("ChunkMatches", func(t *testing.T) { 2280 b := testIndexBuilder(t, nil, 2281 Document{Name: "f1", Content: []byte(corpus)}) 2282 2283 res := searchForTest(t, b, &query.Substring{Pattern: needle, Content: true}, chunkOpts) 2284 if len(res.Files) != 1 { 2285 t.Fatalf("got %v, wanted 1 match", res.Files) 2286 } 2287 }) 2288} 2289 2290func TestUnicodeFileStartOffsets(t *testing.T) { 2291 unicode := "世界" 2292 wat := "waaaaaat" 2293 b := testIndexBuilder(t, nil, 2294 Document{ 2295 Name: "f1", 2296 Content: []byte(unicode), 2297 }, 2298 Document{ 2299 Name: "f2", 2300 Content: []byte(wat), 2301 }, 2302 ) 2303 q := &query.Substring{Pattern: wat, Content: true} 2304 res := searchForTest(t, b, q) 2305 if len(res.Files) != 1 { 2306 t.Fatalf("got %v, wanted 1 match", res.Files) 2307 } 2308} 2309 2310func TestLongFileUTF8(t *testing.T) { 2311 needle := "neeedle" 2312 2313 // 6 bytes. 2314 unicode := "世界" 2315 content := []byte(strings.Repeat(unicode, 100) + needle) 2316 b := testIndexBuilder(t, nil, 2317 Document{ 2318 Name: "f1", 2319 Content: []byte(strings.Repeat("a", 50)), 2320 }, 2321 Document{ 2322 Name: "f2", 2323 Content: content, 2324 }) 2325 2326 t.Run("LineMatches", func(t *testing.T) { 2327 q := &query.Substring{Pattern: needle, Content: true} 2328 res := searchForTest(t, b, q) 2329 if len(res.Files) != 1 { 2330 t.Errorf("got %v, want 1 result", res) 2331 } 2332 }) 2333 2334 t.Run("ChunkMatches", func(t *testing.T) { 2335 q := &query.Substring{Pattern: needle, Content: true} 2336 res := searchForTest(t, b, q, chunkOpts) 2337 if len(res.Files) != 1 { 2338 t.Errorf("got %v, want 1 result", res) 2339 } 2340 }) 2341} 2342 2343func TestEstimateDocCount(t *testing.T) { 2344 content := []byte("bla needle bla") 2345 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2346 Document{Name: "f1", Content: content}, 2347 Document{Name: "f2", Content: content}, 2348 ) 2349 2350 t.Run("LineMatches", func(t *testing.T) { 2351 if sres := searchForTest(t, b, 2352 query.NewAnd( 2353 &query.Substring{Pattern: "needle"}, 2354 &query.Repo{Regexp: regexp.MustCompile("reponame")}, 2355 ), SearchOptions{ 2356 EstimateDocCount: true, 2357 }); sres.Stats.ShardFilesConsidered != 2 { 2358 t.Errorf("got FilesConsidered = %d, want 2", sres.Stats.FilesConsidered) 2359 } 2360 if sres := searchForTest(t, b, 2361 query.NewAnd( 2362 &query.Substring{Pattern: "needle"}, 2363 &query.Repo{Regexp: regexp.MustCompile("nomatch")}, 2364 ), SearchOptions{ 2365 EstimateDocCount: true, 2366 }); sres.Stats.ShardFilesConsidered != 0 { 2367 t.Errorf("got FilesConsidered = %d, want 0", sres.Stats.FilesConsidered) 2368 } 2369 }) 2370 2371 t.Run("ChunkMatches", func(t *testing.T) { 2372 if sres := searchForTest(t, b, 2373 query.NewAnd( 2374 &query.Substring{Pattern: "needle"}, 2375 &query.Repo{Regexp: regexp.MustCompile("reponame")}, 2376 ), SearchOptions{ 2377 EstimateDocCount: true, 2378 ChunkMatches: true, 2379 }); sres.Stats.ShardFilesConsidered != 2 { 2380 t.Errorf("got FilesConsidered = %d, want 2", sres.Stats.FilesConsidered) 2381 } 2382 if sres := searchForTest(t, b, 2383 query.NewAnd( 2384 &query.Substring{Pattern: "needle"}, 2385 &query.Repo{Regexp: regexp.MustCompile("nomatch")}, 2386 ), SearchOptions{ 2387 EstimateDocCount: true, 2388 ChunkMatches: true, 2389 }); sres.Stats.ShardFilesConsidered != 0 { 2390 t.Errorf("got FilesConsidered = %d, want 0", sres.Stats.FilesConsidered) 2391 } 2392 }) 2393} 2394 2395func TestUTF8CorrectCorpus(t *testing.T) { 2396 needle := "neeedle" 2397 2398 // 6 bytes. 2399 unicode := "世界" 2400 b := testIndexBuilder(t, nil, 2401 Document{ 2402 Name: "f1", 2403 Content: []byte(strings.Repeat(unicode, 100)), 2404 }, 2405 Document{ 2406 Name: "xxxxxneeedle", 2407 Content: []byte("hello"), 2408 }) 2409 2410 t.Run("LineMatches", func(t *testing.T) { 2411 q := &query.Substring{Pattern: needle, FileName: true} 2412 res := searchForTest(t, b, q) 2413 if len(res.Files) != 1 { 2414 t.Errorf("got %v, want 1 result", res) 2415 } 2416 }) 2417 2418 t.Run("ChunkMatches", func(t *testing.T) { 2419 q := &query.Substring{Pattern: needle, FileName: true} 2420 res := searchForTest(t, b, q, chunkOpts) 2421 if len(res.Files) != 1 { 2422 t.Errorf("got %v, want 1 result", res) 2423 } 2424 }) 2425} 2426 2427func TestBuilderStats(t *testing.T) { 2428 b := testIndexBuilder(t, nil, 2429 Document{ 2430 Name: "f1", 2431 Content: []byte(strings.Repeat("abcd", 1024)), 2432 }) 2433 var buf bytes.Buffer 2434 if err := b.Write(&buf); err != nil { 2435 t.Fatal(err) 2436 } 2437 2438 if got, want := b.ContentSize(), uint32(2+4*1024); got != want { 2439 t.Errorf("got %d, want %d", got, want) 2440 } 2441} 2442 2443func TestIOStats(t *testing.T) { 2444 b := testIndexBuilder(t, nil, 2445 Document{ 2446 Name: "f1", 2447 Content: []byte(strings.Repeat("abcd", 1024)), 2448 }) 2449 2450 t.Run("LineMatches", func(t *testing.T) { 2451 q := &query.Substring{Pattern: "abc", CaseSensitive: true, Content: true} 2452 res := searchForTest(t, b, q) 2453 2454 // 4096 (content) + 2 (overhead: newlines or doc sections) 2455 if got, want := res.Stats.ContentBytesLoaded, int64(4098); got != want { 2456 t.Errorf("got content I/O %d, want %d", got, want) 2457 } 2458 2459 // 1024 entries, each 4 bytes apart. 4 fits into single byte 2460 // delta encoded. 2461 if got, want := res.Stats.IndexBytesLoaded, int64(1024); got != want { 2462 t.Errorf("got index I/O %d, want %d", got, want) 2463 } 2464 }) 2465 2466 t.Run("ChunkMatches", func(t *testing.T) { 2467 q := &query.Substring{Pattern: "abc", CaseSensitive: true, Content: true} 2468 res := searchForTest(t, b, q, chunkOpts) 2469 2470 // 4096 (content) + 2 (overhead: newlines or doc sections) 2471 if got, want := res.Stats.ContentBytesLoaded, int64(4098); got != want { 2472 t.Errorf("got content I/O %d, want %d", got, want) 2473 } 2474 2475 // 1024 entries, each 4 bytes apart. 4 fits into single byte 2476 // delta encoded. 2477 if got, want := res.Stats.IndexBytesLoaded, int64(1024); got != want { 2478 t.Errorf("got index I/O %d, want %d", got, want) 2479 } 2480 }) 2481} 2482 2483func TestStartLineAnchor(t *testing.T) { 2484 b := testIndexBuilder(t, nil, 2485 Document{ 2486 Name: "f1", 2487 Content: []byte( 2488 `hello 2489start of middle of line 2490`), 2491 }) 2492 2493 t.Run("LineMatches", func(t *testing.T) { 2494 q, err := query.Parse("^start") 2495 if err != nil { 2496 t.Errorf("parse: %v", err) 2497 } 2498 2499 res := searchForTest(t, b, q) 2500 if len(res.Files) != 1 { 2501 t.Errorf("got %v, want 1 file", res.Files) 2502 } 2503 2504 q, err = query.Parse("^middle") 2505 if err != nil { 2506 t.Errorf("parse: %v", err) 2507 } 2508 res = searchForTest(t, b, q) 2509 if len(res.Files) != 0 { 2510 t.Errorf("got %v, want 0 files", res.Files) 2511 } 2512 }) 2513 2514 t.Run("ChunkMatches", func(t *testing.T) { 2515 q, err := query.Parse("^start") 2516 if err != nil { 2517 t.Errorf("parse: %v", err) 2518 } 2519 2520 res := searchForTest(t, b, q, chunkOpts) 2521 if len(res.Files) != 1 { 2522 t.Errorf("got %v, want 1 file", res.Files) 2523 } 2524 2525 q, err = query.Parse("^middle") 2526 if err != nil { 2527 t.Errorf("parse: %v", err) 2528 } 2529 res = searchForTest(t, b, q, chunkOpts) 2530 if len(res.Files) != 0 { 2531 t.Errorf("got %v, want 0 files", res.Files) 2532 } 2533 }) 2534} 2535 2536func TestAndOrUnicode(t *testing.T) { 2537 q, err := query.Parse("orange.*apple") 2538 if err != nil { 2539 t.Errorf("parse: %v", err) 2540 } 2541 finalQ := query.NewAnd(q, 2542 query.NewOr(query.NewAnd(&query.Repo{Regexp: regexp.MustCompile("name")}, 2543 query.NewOr(&query.Branch{Pattern: "master"})))) 2544 2545 b := testIndexBuilder(t, &Repository{ 2546 Name: "name", 2547 Branches: []RepositoryBranch{{"master", "master-version"}}, 2548 }, Document{ 2549 Name: "f2", 2550 Content: []byte("orange\u2318apple"), 2551 // --------------0123456 78901 2552 Branches: []string{"master"}, 2553 }) 2554 2555 t.Run("LineMatches", func(t *testing.T) { 2556 res := searchForTest(t, b, finalQ) 2557 if len(res.Files) != 1 { 2558 t.Errorf("got %v, want 1 result", res.Files) 2559 } 2560 }) 2561 2562 t.Run("ChunkMatches", func(t *testing.T) { 2563 res := searchForTest(t, b, finalQ, chunkOpts) 2564 if len(res.Files) != 1 { 2565 t.Errorf("got %v, want 1 result", res.Files) 2566 } 2567 }) 2568} 2569 2570func TestAndShort(t *testing.T) { 2571 content := []byte("bla needle at orange bla") 2572 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2573 Document{Name: "f1", Content: content}, 2574 Document{Name: "f2", Content: []byte("xx at xx")}, 2575 Document{Name: "f3", Content: []byte("yy orange xx")}, 2576 ) 2577 2578 q := query.NewAnd(&query.Substring{Pattern: "at"}, 2579 &query.Substring{Pattern: "orange"}) 2580 2581 t.Run("LineMatches", func(t *testing.T) { 2582 res := searchForTest(t, b, q) 2583 if len(res.Files) != 1 || res.Files[0].FileName != "f1" { 2584 t.Errorf("got %v, want 1 result", res.Files) 2585 } 2586 }) 2587 2588 t.Run("ChunkMatches", func(t *testing.T) { 2589 res := searchForTest(t, b, q, chunkOpts) 2590 if len(res.Files) != 1 || res.Files[0].FileName != "f1" { 2591 t.Errorf("got %v, want 1 result", res.Files) 2592 } 2593 }) 2594} 2595 2596func TestNoCollectRegexpSubstring(t *testing.T) { 2597 content := []byte("bla final bla\nfoo final, foo") 2598 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2599 Document{Name: "f1", Content: content}, 2600 ) 2601 2602 q := &query.Regexp{ 2603 Regexp: mustParseRE("final[,.]"), 2604 } 2605 2606 t.Run("LineMatches", func(t *testing.T) { 2607 res := searchForTest(t, b, q) 2608 if len(res.Files) != 1 { 2609 t.Fatalf("got %v, want 1 result", res.Files) 2610 } 2611 if f := res.Files[0]; len(f.LineMatches) != 1 { 2612 t.Fatalf("got line matches %v, want 1 line match", printLineMatches(f.LineMatches)) 2613 } 2614 }) 2615 2616 t.Run("ChunkMatches", func(t *testing.T) { 2617 res := searchForTest(t, b, q, chunkOpts) 2618 if len(res.Files) != 1 { 2619 t.Fatalf("got %v, want 1 result", res.Files) 2620 } 2621 if f := res.Files[0]; len(f.ChunkMatches) != 1 { 2622 t.Fatalf("got line matches %v, want 1 line match", printLineMatches(f.LineMatches)) 2623 } 2624 }) 2625} 2626 2627func printLineMatches(ms []LineMatch) string { 2628 var ss []string 2629 for _, m := range ms { 2630 ss = append(ss, fmt.Sprintf("%d:%q %v", m.LineNumber, m.Line, m.LineFragments)) 2631 } 2632 2633 return strings.Join(ss, ", ") 2634} 2635 2636func TestLang(t *testing.T) { 2637 content := []byte("bla needle bla") 2638 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2639 Document{Name: "f1", Content: content}, 2640 Document{Name: "f2", Language: "java", Content: content}, 2641 Document{Name: "f3", Language: "cpp", Content: content}, 2642 ) 2643 2644 q := query.NewAnd(&query.Substring{Pattern: "needle"}, 2645 &query.Language{Language: "cpp"}) 2646 2647 t.Run("LineMatches", func(t *testing.T) { 2648 res := searchForTest(t, b, q) 2649 if len(res.Files) != 1 { 2650 t.Fatalf("got %v, want 1 result in f3", res.Files) 2651 } 2652 f := res.Files[0] 2653 if f.FileName != "f3" || f.Language != "cpp" { 2654 t.Fatalf("got %v, want 1 match with language cpp", f) 2655 } 2656 }) 2657 2658 t.Run("ChunkMatches", func(t *testing.T) { 2659 res := searchForTest(t, b, q, chunkOpts) 2660 if len(res.Files) != 1 { 2661 t.Fatalf("got %v, want 1 result in f3", res.Files) 2662 } 2663 f := res.Files[0] 2664 if f.FileName != "f3" || f.Language != "cpp" { 2665 t.Fatalf("got %v, want 1 match with language cpp", f) 2666 } 2667 }) 2668} 2669 2670func TestLangShortcut(t *testing.T) { 2671 content := []byte("bla needle bla") 2672 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2673 Document{Name: "f2", Language: "java", Content: content}, 2674 Document{Name: "f3", Language: "cpp", Content: content}, 2675 ) 2676 2677 q := query.NewAnd(&query.Substring{Pattern: "needle"}, 2678 &query.Language{Language: "fortran"}) 2679 2680 t.Run("LineMatches", func(t *testing.T) { 2681 res := searchForTest(t, b, q) 2682 if len(res.Files) != 0 { 2683 t.Fatalf("got %v, want 0 results", res.Files) 2684 } 2685 if res.Stats.IndexBytesLoaded > 0 { 2686 t.Errorf("got IndexBytesLoaded %d, want 0", res.Stats.IndexBytesLoaded) 2687 } 2688 }) 2689 2690 t.Run("ChunkMatches", func(t *testing.T) { 2691 res := searchForTest(t, b, q, chunkOpts) 2692 if len(res.Files) != 0 { 2693 t.Fatalf("got %v, want 0 results", res.Files) 2694 } 2695 if res.Stats.IndexBytesLoaded > 0 { 2696 t.Errorf("got IndexBytesLoaded %d, want 0", res.Stats.IndexBytesLoaded) 2697 } 2698 }) 2699} 2700 2701func TestNoTextMatchAtoms(t *testing.T) { 2702 content := []byte("bla needle bla") 2703 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2704 Document{Name: "f1", Content: content}, 2705 Document{Name: "f2", Language: "java", Content: content}, 2706 Document{Name: "f3", Language: "cpp", Content: content}, 2707 ) 2708 q := query.NewAnd(&query.Language{Language: "java"}) 2709 t.Run("LineMatches", func(t *testing.T) { 2710 res := searchForTest(t, b, q) 2711 if len(res.Files) != 1 { 2712 t.Fatalf("got %v, want 1 result in f3", res.Files) 2713 } 2714 }) 2715 2716 t.Run("ChunkMatches", func(t *testing.T) { 2717 res := searchForTest(t, b, q, chunkOpts) 2718 if len(res.Files) != 1 { 2719 t.Fatalf("got %v, want 1 result in f3", res.Files) 2720 } 2721 }) 2722} 2723 2724func TestNoPositiveAtoms(t *testing.T) { 2725 content := []byte("bla needle bla") 2726 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2727 Document{Name: "f1", Content: content}, 2728 Document{Name: "f2", Content: content}, 2729 ) 2730 2731 q := query.NewAnd( 2732 &query.Not{Child: &query.Substring{Pattern: "xyz"}}, 2733 &query.Repo{Regexp: regexp.MustCompile("reponame")}) 2734 t.Run("LineMatches", func(t *testing.T) { 2735 res := searchForTest(t, b, q) 2736 if len(res.Files) != 2 { 2737 t.Fatalf("got %v, want 2 results in f3", res.Files) 2738 } 2739 }) 2740 t.Run("ChunkMatches", func(t *testing.T) { 2741 res := searchForTest(t, b, q, chunkOpts) 2742 if len(res.Files) != 2 { 2743 t.Fatalf("got %v, want 2 results in f3", res.Files) 2744 } 2745 }) 2746} 2747 2748func TestSymbolBoundaryStart(t *testing.T) { 2749 content := []byte("start\nbla bla\nend") 2750 // ----------------012345-67890123-456 2751 2752 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2753 Document{ 2754 Name: "f1", 2755 Content: content, 2756 Symbols: []DocumentSection{{0, 5}, {14, 17}}, 2757 }, 2758 ) 2759 q := &query.Symbol{ 2760 Expr: &query.Substring{Pattern: "start"}, 2761 } 2762 t.Run("LineMatches", func(t *testing.T) { 2763 res := searchForTest(t, b, q) 2764 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 { 2765 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2766 } 2767 m := res.Files[0].LineMatches[0].LineFragments[0] 2768 if m.Offset != 0 { 2769 t.Fatalf("got offset %d want 0", m.Offset) 2770 } 2771 }) 2772 2773 t.Run("ChunkMatches", func(t *testing.T) { 2774 res := searchForTest(t, b, q, chunkOpts) 2775 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 { 2776 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2777 } 2778 m := res.Files[0].ChunkMatches[0].Ranges[0] 2779 if m.Start.ByteOffset != 0 { 2780 t.Fatalf("got offset %d want 0", m.Start.ByteOffset) 2781 } 2782 }) 2783} 2784 2785func TestSymbolBoundaryEnd(t *testing.T) { 2786 content := []byte("start\nbla bla\nend") 2787 // ----------------012345-67890123-456 2788 2789 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2790 Document{ 2791 Name: "f1", 2792 Content: content, 2793 Symbols: []DocumentSection{{14, 17}}, 2794 }, 2795 ) 2796 q := &query.Symbol{ 2797 Expr: &query.Substring{Pattern: "end"}, 2798 } 2799 t.Run("LineMatches", func(t *testing.T) { 2800 res := searchForTest(t, b, q) 2801 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 { 2802 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2803 } 2804 m := res.Files[0].LineMatches[0].LineFragments[0] 2805 if m.Offset != 14 { 2806 t.Fatalf("got offset %d want 0", m.Offset) 2807 } 2808 }) 2809 2810 t.Run("ChunkMatches", func(t *testing.T) { 2811 res := searchForTest(t, b, q, chunkOpts) 2812 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 { 2813 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2814 } 2815 m := res.Files[0].ChunkMatches[0].Ranges[0] 2816 if m.Start.ByteOffset != 14 { 2817 t.Fatalf("got offset %d want 0", m.Start.ByteOffset) 2818 } 2819 }) 2820} 2821 2822func TestSymbolSubstring(t *testing.T) { 2823 content := []byte("bla\nsymblabla\nbla") 2824 // ----------------0123-4567890123-456 2825 2826 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2827 Document{ 2828 Name: "f1", 2829 Content: content, 2830 Symbols: []DocumentSection{{4, 12}}, 2831 }, 2832 ) 2833 q := &query.Symbol{ 2834 Expr: &query.Substring{Pattern: "bla"}, 2835 } 2836 t.Run("LineMatches", func(t *testing.T) { 2837 res := searchForTest(t, b, q) 2838 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 { 2839 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2840 } 2841 m := res.Files[0].LineMatches[0].LineFragments[0] 2842 if m.Offset != 7 || m.MatchLength != 3 { 2843 t.Fatalf("got offset %d, size %d want 7 size 3", m.Offset, m.MatchLength) 2844 } 2845 }) 2846 2847 t.Run("ChunkMatches", func(t *testing.T) { 2848 res := searchForTest(t, b, q, chunkOpts) 2849 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 { 2850 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2851 } 2852 m := res.Files[0].ChunkMatches[0].Ranges[0] 2853 if m.Start.ByteOffset != 7 || m.End.ByteOffset != 10 { 2854 t.Fatalf("got offset %d, end %d want 7, 10", m.Start.ByteOffset, m.End.ByteOffset) 2855 } 2856 }) 2857} 2858 2859func TestSymbolSubstringExact(t *testing.T) { 2860 content := []byte("bla\nsym\nbla\nsym\nasymb") 2861 // ----------------0123-4567-890123456-78901 2862 2863 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2864 Document{ 2865 Name: "f1", 2866 Content: content, 2867 Symbols: []DocumentSection{{4, 7}}, 2868 }, 2869 ) 2870 q := &query.Symbol{ 2871 Expr: &query.Substring{Pattern: "sym"}, 2872 } 2873 t.Run("LineMatches", func(t *testing.T) { 2874 res := searchForTest(t, b, q) 2875 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 { 2876 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2877 } 2878 m := res.Files[0].LineMatches[0].LineFragments[0] 2879 if m.Offset != 4 { 2880 t.Fatalf("got offset %d, want 7", m.Offset) 2881 } 2882 }) 2883 2884 t.Run("ChunkMatches", func(t *testing.T) { 2885 res := searchForTest(t, b, q, chunkOpts) 2886 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 { 2887 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2888 } 2889 m := res.Files[0].ChunkMatches[0].Ranges[0] 2890 if m.Start.ByteOffset != 4 { 2891 t.Fatalf("got offset %d, want 7", m.Start.ByteOffset) 2892 } 2893 }) 2894} 2895 2896func TestSymbolRegexpExact(t *testing.T) { 2897 content := []byte("blah\nbla\nbl") 2898 // ----------------01234-5678-90 2899 2900 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2901 Document{ 2902 Name: "f1", 2903 Content: content, 2904 Symbols: []DocumentSection{{0, 4}, {5, 8}, {9, 11}}, 2905 }, 2906 ) 2907 q := &query.Symbol{ 2908 Expr: &query.Regexp{Regexp: mustParseRE("^bla$")}, 2909 } 2910 t.Run("LineMatches", func(t *testing.T) { 2911 res := searchForTest(t, b, q) 2912 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 { 2913 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2914 } 2915 m := res.Files[0].LineMatches[0].LineFragments[0] 2916 if m.Offset != 5 { 2917 t.Fatalf("got offset %d, want 5", m.Offset) 2918 } 2919 }) 2920 2921 t.Run("ChunkMatches", func(t *testing.T) { 2922 res := searchForTest(t, b, q, chunkOpts) 2923 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 { 2924 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2925 } 2926 m := res.Files[0].ChunkMatches[0].Ranges[0] 2927 if m.Start.ByteOffset != 5 { 2928 t.Fatalf("got offset %d, want 5", m.Start.ByteOffset) 2929 } 2930 }) 2931} 2932 2933func TestSymbolRegexpPartial(t *testing.T) { 2934 content := []byte("abcdef") 2935 // ----------------012345 2936 2937 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2938 Document{ 2939 Name: "f1", 2940 Content: content, 2941 Symbols: []DocumentSection{{0, 6}}, 2942 }, 2943 ) 2944 q := &query.Symbol{ 2945 Expr: &query.Regexp{Regexp: mustParseRE("(b|d)c(d|b)")}, 2946 } 2947 t.Run("LineMatches", func(t *testing.T) { 2948 res := searchForTest(t, b, q) 2949 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 { 2950 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2951 } 2952 m := res.Files[0].LineMatches[0].LineFragments[0] 2953 if m.Offset != 1 { 2954 t.Fatalf("got offset %d, want 1", m.Offset) 2955 } 2956 if m.MatchLength != 3 { 2957 t.Fatalf("got match length %d, want 3", m.MatchLength) 2958 } 2959 }) 2960 2961 t.Run("ChunkMatches", func(t *testing.T) { 2962 res := searchForTest(t, b, q, chunkOpts) 2963 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 { 2964 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2965 } 2966 m := res.Files[0].ChunkMatches[0].Ranges[0] 2967 if m.Start.ByteOffset != 1 { 2968 t.Fatalf("got offset %d, want 1", m.Start.ByteOffset) 2969 } 2970 if m.End.ByteOffset != 4 { 2971 t.Fatalf("got match end %d, want 4", m.End.ByteOffset) 2972 } 2973 }) 2974} 2975 2976func TestSymbolRegexpAll(t *testing.T) { 2977 docs := []Document{ 2978 { 2979 Name: "f1", 2980 Content: []byte("Hello Zoekt"), 2981 // --------------01234567890 2982 Symbols: []DocumentSection{{0, 5}, {6, 11}}, 2983 }, 2984 { 2985 Name: "f2", 2986 Content: []byte("Second Zoekt Third"), 2987 // --------------012345678901234567 2988 Symbols: []DocumentSection{{0, 6}, {7, 12}, {13, 18}}, 2989 }, 2990 } 2991 2992 b := testIndexBuilder(t, &Repository{Name: "reponame"}, docs...) 2993 q := &query.Symbol{ 2994 Expr: &query.Regexp{Regexp: mustParseRE(".*")}, 2995 } 2996 t.Run("LineMatches", func(t *testing.T) { 2997 res := searchForTest(t, b, q) 2998 if len(res.Files) != len(docs) { 2999 t.Fatalf("got %v, want %d file", res.Files, len(docs)) 3000 } 3001 for i, want := range docs { 3002 got := res.Files[i].LineMatches[0].LineFragments 3003 if len(got) != len(want.Symbols) { 3004 t.Fatalf("got %d symbols, want %d symbols in doc %s", len(got), len(want.Symbols), want.Name) 3005 } 3006 3007 for j, sec := range want.Symbols { 3008 if sec.Start != got[j].Offset { 3009 t.Fatalf("got offset %d, want %d in doc %s", got[j].Offset, sec.Start, want.Name) 3010 } 3011 } 3012 } 3013 }) 3014 3015 t.Run("ChunkMatches", func(t *testing.T) { 3016 res := searchForTest(t, b, q, chunkOpts) 3017 if len(res.Files) != len(docs) { 3018 t.Fatalf("got %v, want %d file", res.Files, len(docs)) 3019 } 3020 for i, want := range docs { 3021 got := res.Files[i].ChunkMatches[0].Ranges 3022 if len(got) != len(want.Symbols) { 3023 t.Fatalf("got %d symbols, want %d symbols in doc %s", len(got), len(want.Symbols), want.Name) 3024 } 3025 3026 for j, sec := range want.Symbols { 3027 if sec.Start != uint32(got[j].Start.ByteOffset) { 3028 t.Fatalf("got offset %d, want %d in doc %s", got[j].Start.ByteOffset, sec.Start, want.Name) 3029 } 3030 } 3031 } 3032 }) 3033} 3034 3035func TestHitIterTerminate(t *testing.T) { 3036 // contrived input: trigram frequencies forces selecting abc + 3037 // def for the distance iteration. There is no match, so this 3038 // will advance the compressedPostingIterator to beyond the 3039 // end. 3040 content := []byte("abc bcdbcd cdecde abcabc def efg") 3041 b := testIndexBuilder(t, nil, 3042 Document{ 3043 Name: "f1", 3044 Content: content, 3045 }, 3046 ) 3047 3048 t.Run("LineMatches", func(t *testing.T) { 3049 searchForTest(t, b, &query.Substring{Pattern: "abcdef"}) 3050 }) 3051 3052 t.Run("ChunkMatches", func(t *testing.T) { 3053 searchForTest(t, b, &query.Substring{Pattern: "abcdef"}, chunkOpts) 3054 }) 3055} 3056 3057func TestDistanceHitIterBailLast(t *testing.T) { 3058 content := []byte("AST AST AST UASH") 3059 b := testIndexBuilder(t, nil, 3060 Document{ 3061 Name: "f1", 3062 Content: content, 3063 }, 3064 ) 3065 t.Run("LineMatches", func(t *testing.T) { 3066 res := searchForTest(t, b, &query.Substring{Pattern: "UAST"}) 3067 if len(res.Files) != 0 { 3068 t.Fatalf("got %v, want no results", res.Files) 3069 } 3070 }) 3071 3072 t.Run("LineMatches", func(t *testing.T) { 3073 res := searchForTest(t, b, &query.Substring{Pattern: "UAST"}, chunkOpts) 3074 if len(res.Files) != 0 { 3075 t.Fatalf("got %v, want no results", res.Files) 3076 } 3077 }) 3078} 3079 3080func TestDocumentSectionRuneBoundary(t *testing.T) { 3081 content := string([]rune{kelvinCodePoint, kelvinCodePoint, kelvinCodePoint}) 3082 b, err := NewIndexBuilder(nil) 3083 if err != nil { 3084 t.Fatalf("NewIndexBuilder: %v", err) 3085 } 3086 3087 for i, sec := range []DocumentSection{ 3088 {2, 6}, 3089 {3, 7}, 3090 } { 3091 if err := b.Add(Document{ 3092 Name: "f1", 3093 Content: []byte(content), 3094 Symbols: []DocumentSection{sec}, 3095 }); err == nil { 3096 t.Errorf("%d: Add succeeded", i) 3097 } 3098 } 3099} 3100 3101func TestUnicodeQuery(t *testing.T) { 3102 content := string([]rune{kelvinCodePoint, kelvinCodePoint, kelvinCodePoint}) 3103 b := testIndexBuilder(t, nil, 3104 Document{ 3105 Name: "f1", 3106 Content: []byte(content), 3107 }, 3108 ) 3109 3110 q := &query.Substring{Pattern: content} 3111 3112 t.Run("LineMatches", func(t *testing.T) { 3113 res := searchForTest(t, b, q) 3114 if len(res.Files) != 1 { 3115 t.Fatalf("want 1 match, got %v", res.Files) 3116 } 3117 3118 f := res.Files[0] 3119 if len(f.LineMatches) != 1 { 3120 t.Fatalf("want 1 line, got %v", f.LineMatches) 3121 } 3122 l := f.LineMatches[0] 3123 3124 if len(l.LineFragments) != 1 { 3125 t.Fatalf("want 1 line fragment, got %v", l.LineFragments) 3126 } 3127 fr := l.LineFragments[0] 3128 if fr.MatchLength != len(content) { 3129 t.Fatalf("got MatchLength %d want %d", fr.MatchLength, len(content)) 3130 } 3131 }) 3132 3133 t.Run("ChunkMatches", func(t *testing.T) { 3134 res := searchForTest(t, b, q, chunkOpts) 3135 if len(res.Files) != 1 { 3136 t.Fatalf("want 1 match, got %v", res.Files) 3137 } 3138 3139 f := res.Files[0] 3140 if len(f.ChunkMatches) != 1 { 3141 t.Fatalf("want 1 line, got %v", f.LineMatches) 3142 } 3143 cm := f.ChunkMatches[0] 3144 3145 if len(cm.Ranges) != 1 { 3146 t.Fatalf("want 1 line fragment, got %v", cm.Ranges) 3147 } 3148 rr := cm.Ranges[0] 3149 if matchLen := rr.End.ByteOffset - rr.Start.ByteOffset; int(matchLen) != len(content) { 3150 t.Fatalf("got MatchLength %d want %d", matchLen, len(content)) 3151 } 3152 }) 3153} 3154 3155func TestSkipInvalidContent(t *testing.T) { 3156 for _, content := range []string{ 3157 // Binary 3158 "abc def \x00 abc", 3159 } { 3160 3161 b, err := NewIndexBuilder(nil) 3162 if err != nil { 3163 t.Fatalf("NewIndexBuilder: %v", err) 3164 } 3165 3166 if err := b.Add(Document{ 3167 Name: "f1", 3168 Content: []byte(content), 3169 }); err != nil { 3170 t.Fatal(err) 3171 } 3172 3173 t.Run("LineMatches", func(t *testing.T) { 3174 q := &query.Substring{Pattern: "abc def"} 3175 res := searchForTest(t, b, q) 3176 if len(res.Files) != 0 { 3177 t.Fatalf("got %v, want no results", res.Files) 3178 } 3179 3180 q = &query.Substring{Pattern: "NOT-INDEXED"} 3181 res = searchForTest(t, b, q) 3182 if len(res.Files) != 1 { 3183 t.Fatalf("got %v, want 1 result", res.Files) 3184 } 3185 }) 3186 3187 t.Run("ChunkMatches", func(t *testing.T) { 3188 q := &query.Substring{Pattern: "abc def"} 3189 res := searchForTest(t, b, q, chunkOpts) 3190 if len(res.Files) != 0 { 3191 t.Fatalf("got %v, want no results", res.Files) 3192 } 3193 3194 q = &query.Substring{Pattern: "NOT-INDEXED"} 3195 res = searchForTest(t, b, q, chunkOpts) 3196 if len(res.Files) != 1 { 3197 t.Fatalf("got %v, want 1 result", res.Files) 3198 } 3199 }) 3200 } 3201} 3202 3203func TestDocChecker(t *testing.T) { 3204 docChecker := DocChecker{} 3205 3206 // Test valid and invalid text 3207 for _, text := range []string{"", "simple ascii", "símplé unicödé", "\uFEFFwith utf8 'bom'", "with \uFFFD unicode replacement char"} { 3208 if err := docChecker.Check([]byte(text), 20000, false); err != nil { 3209 t.Errorf("Check(%q): %v", text, err) 3210 } 3211 } 3212 for _, text := range []string{"zero\x00byte", "xx", "0123456789abcdefghi"} { 3213 if err := docChecker.Check([]byte(text), 15, false); err == nil { 3214 t.Errorf("Check(%q) succeeded", text) 3215 } 3216 } 3217 3218 // Test valid and invalid text with an allowed large file 3219 for _, text := range []string{"0123456789abcdefghi", "qwertyuiopasdfghjklzxcvbnm"} { 3220 if err := docChecker.Check([]byte(text), 15, true); err != nil { 3221 t.Errorf("Check(%q): %v", text, err) 3222 } 3223 } 3224 for _, text := range []string{"zero\x00byte", "xx"} { 3225 if err := docChecker.Check([]byte(text), 15, true); err == nil { 3226 t.Errorf("Check(%q) succeeded", text) 3227 } 3228 } 3229} 3230 3231func TestLineAnd(t *testing.T) { 3232 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 3233 Document{Name: "f1", Content: []byte("apple\nbanana\napple banana chocolate apple pudding banana\ngrape")}, 3234 Document{Name: "f2", Content: []byte("apple orange\nbanana")}, 3235 Document{Name: "f3", Content: []byte("banana grape")}, 3236 ) 3237 pattern := "(apple)(?-s:.)*?(banana)" 3238 r, _ := syntax.Parse(pattern, syntax.Perl) 3239 3240 q := query.Regexp{ 3241 Regexp: r, 3242 Content: true, 3243 } 3244 t.Run("LineMatches", func(t *testing.T) { 3245 res := searchForTest(t, b, &q) 3246 wantRegexpCount := 1 3247 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount { 3248 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount) 3249 } 3250 if len(res.Files) != 1 || res.Files[0].FileName != "f1" { 3251 t.Errorf("got %v, want 1 result", res.Files) 3252 } 3253 }) 3254 3255 t.Run("ChunkMatches", func(t *testing.T) { 3256 res := searchForTest(t, b, &q, chunkOpts) 3257 wantRegexpCount := 1 3258 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount { 3259 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount) 3260 } 3261 if len(res.Files) != 1 || res.Files[0].FileName != "f1" { 3262 t.Errorf("got %v, want 1 result", res.Files) 3263 } 3264 }) 3265} 3266 3267func TestLineAndFileName(t *testing.T) { 3268 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 3269 Document{Name: "f1", Content: []byte("apple banana\ngrape")}, 3270 Document{Name: "f2", Content: []byte("apple banana\norange")}, 3271 Document{Name: "apple banana", Content: []byte("banana grape")}, 3272 ) 3273 pattern := "(apple)(?-s:.)*?(banana)" 3274 r, _ := syntax.Parse(pattern, syntax.Perl) 3275 3276 q := query.Regexp{ 3277 Regexp: r, 3278 FileName: true, 3279 } 3280 t.Run("LineMatches", func(t *testing.T) { 3281 res := searchForTest(t, b, &q) 3282 wantRegexpCount := 1 3283 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount { 3284 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount) 3285 } 3286 if len(res.Files) != 1 || res.Files[0].FileName != "apple banana" { 3287 t.Errorf("got %v, want 1 result", res.Files) 3288 } 3289 }) 3290 3291 t.Run("ChunkMatches", func(t *testing.T) { 3292 res := searchForTest(t, b, &q, chunkOpts) 3293 wantRegexpCount := 1 3294 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount { 3295 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount) 3296 } 3297 if len(res.Files) != 1 || res.Files[0].FileName != "apple banana" { 3298 t.Errorf("got %v, want 1 result", res.Files) 3299 } 3300 }) 3301} 3302 3303func TestMultiLineRegex(t *testing.T) { 3304 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 3305 Document{Name: "f1", Content: []byte("apple banana\ngrape")}, 3306 Document{Name: "f2", Content: []byte("apple orange")}, 3307 Document{Name: "f3", Content: []byte("grape apple")}, 3308 ) 3309 pattern := "(apple).*?[[:space:]].*?(grape)" 3310 r, _ := syntax.Parse(pattern, syntax.Perl) 3311 3312 q := query.Regexp{ 3313 Regexp: r, 3314 } 3315 t.Run("LineMatches", func(t *testing.T) { 3316 res := searchForTest(t, b, &q) 3317 wantRegexpCount := 2 3318 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount { 3319 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount) 3320 } 3321 if len(res.Files) != 1 || res.Files[0].FileName != "f1" { 3322 t.Errorf("got %v, want 1 result", res.Files) 3323 } 3324 if l := len(res.Files[0].LineMatches); l != 2 { 3325 t.Errorf("got %v, want 2 line matches", l) 3326 } 3327 }) 3328 3329 t.Run("ChunkMatches", func(t *testing.T) { 3330 res := searchForTest(t, b, &q, chunkOpts) 3331 wantRegexpCount := 2 3332 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount { 3333 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount) 3334 } 3335 if len(res.Files) != 1 || res.Files[0].FileName != "f1" { 3336 t.Errorf("got %v, want 1 result", res.Files) 3337 } 3338 if l := len(res.Files[0].ChunkMatches); l != 1 { 3339 t.Errorf("got %v, want 1 chunk matches", l) 3340 } 3341 if l := len(res.Files[0].ChunkMatches[0].Ranges); l != 1 { 3342 t.Errorf("got %v, want 1 chunk ranges", l) 3343 } 3344 }) 3345} 3346 3347func TestSearchTypeFileName(t *testing.T) { 3348 b := testIndexBuilder(t, &Repository{ 3349 Name: "reponame", 3350 }, 3351 Document{Name: "f1", Content: []byte("bla the needle")}, 3352 Document{Name: "f2", Content: []byte("another file another\nneedle")}, 3353 // -----------------------------------012345678901234567890-123456 3354 ) 3355 3356 t.Run("LineMatches", func(t *testing.T) { 3357 wantSingleMatch := func(res *SearchResult, want string) { 3358 t.Helper() 3359 fmatches := res.Files 3360 if len(fmatches) != 1 { 3361 t.Errorf("got %v, want 1 matches", len(fmatches)) 3362 return 3363 } 3364 if len(fmatches[0].LineMatches) != 1 { 3365 t.Errorf("got %d line matches", len(fmatches[0].LineMatches)) 3366 return 3367 } 3368 var got string 3369 if fmatches[0].LineMatches[0].FileName { 3370 got = fmatches[0].FileName 3371 } else { 3372 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].LineMatches[0].LineFragments[0].Offset) 3373 } 3374 3375 if got != want { 3376 t.Errorf("got %s, want %s", got, want) 3377 } 3378 } 3379 3380 // Only return the later match in the second file 3381 res := searchForTest(t, b, query.NewAnd( 3382 &query.Type{ 3383 Type: query.TypeFileName, 3384 Child: &query.Substring{Pattern: "needle"}, 3385 }, 3386 &query.Substring{Pattern: "file"})) 3387 wantSingleMatch(res, "f2:8") 3388 3389 // Only return a filename result 3390 res = searchForTest(t, b, 3391 &query.Type{ 3392 Type: query.TypeFileName, 3393 Child: &query.Substring{Pattern: "file"}, 3394 }) 3395 wantSingleMatch(res, "f2") 3396 }) 3397 3398 t.Run("ChunkMatches", func(t *testing.T) { 3399 wantSingleMatch := func(res *SearchResult, want string) { 3400 t.Helper() 3401 fmatches := res.Files 3402 if len(fmatches) != 1 { 3403 t.Errorf("got %v, want 1 matches", len(fmatches)) 3404 return 3405 } 3406 if len(fmatches[0].ChunkMatches) != 1 { 3407 t.Errorf("got %d line matches", len(fmatches[0].ChunkMatches)) 3408 return 3409 } 3410 var got string 3411 if fmatches[0].ChunkMatches[0].FileName { 3412 got = fmatches[0].FileName 3413 } else { 3414 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset) 3415 } 3416 3417 if got != want { 3418 t.Errorf("got %s, want %s", got, want) 3419 } 3420 } 3421 3422 // Only return the later match in the second file 3423 res := searchForTest(t, b, query.NewAnd( 3424 &query.Type{ 3425 Type: query.TypeFileName, 3426 Child: &query.Substring{Pattern: "needle"}, 3427 }, 3428 &query.Substring{Pattern: "file"}), 3429 chunkOpts, 3430 ) 3431 wantSingleMatch(res, "f2:8") 3432 3433 // Only return a filename result 3434 res = searchForTest(t, b, 3435 &query.Type{ 3436 Type: query.TypeFileName, 3437 Child: &query.Substring{Pattern: "file"}, 3438 }, 3439 chunkOpts, 3440 ) 3441 wantSingleMatch(res, "f2") 3442 }) 3443} 3444 3445func TestSearchTypeLanguage(t *testing.T) { 3446 b := testIndexBuilder(t, &Repository{ 3447 Name: "reponame", 3448 }, 3449 Document{Name: "apex.cls", Content: []byte("public class Car extends Vehicle {")}, 3450 Document{Name: "tex.cls", Content: []byte(`\DeclareOption*{`)}, 3451 Document{Name: "hello.h", Content: []byte(`#include <stdio.h>`)}, 3452 ) 3453 3454 t.Log(b.languageMap) 3455 3456 t.Run("LineMatches", func(t *testing.T) { 3457 wantSingleMatch := func(res *SearchResult, want string) { 3458 t.Helper() 3459 fmatches := res.Files 3460 if len(fmatches) != 1 { 3461 t.Errorf("got %v, want 1 matches", len(fmatches)) 3462 return 3463 } 3464 if len(fmatches[0].LineMatches) != 1 { 3465 t.Errorf("got %d line matches", len(fmatches[0].LineMatches)) 3466 return 3467 } 3468 var got string 3469 if fmatches[0].LineMatches[0].FileName { 3470 got = fmatches[0].FileName 3471 } else { 3472 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].LineMatches[0].LineFragments[0].Offset) 3473 } 3474 3475 if got != want { 3476 t.Errorf("got %s, want %s", got, want) 3477 } 3478 } 3479 3480 res := searchForTest(t, b, &query.Language{Language: "Apex"}) 3481 wantSingleMatch(res, "apex.cls") 3482 3483 res = searchForTest(t, b, &query.Language{Language: "TeX"}) 3484 wantSingleMatch(res, "tex.cls") 3485 3486 res = searchForTest(t, b, &query.Language{Language: "C"}) 3487 wantSingleMatch(res, "hello.h") 3488 3489 // test fallback language search by pretending it's an older index version 3490 res = searchForTest(t, b, &query.Language{Language: "C++"}) 3491 if len(res.Files) != 0 { 3492 t.Errorf("got %d results for C++, want 0", len(res.Files)) 3493 } 3494 3495 b.featureVersion = 11 // force fallback 3496 res = searchForTest(t, b, &query.Language{Language: "C++"}) 3497 wantSingleMatch(res, "hello.h") 3498 }) 3499 3500 t.Run("ChunkMatches", func(t *testing.T) { 3501 wantSingleMatch := func(res *SearchResult, want string) { 3502 t.Helper() 3503 fmatches := res.Files 3504 if len(fmatches) != 1 { 3505 t.Errorf("got %v, want 1 matches", len(fmatches)) 3506 return 3507 } 3508 if len(fmatches[0].ChunkMatches) != 1 { 3509 t.Errorf("got %d line matches", len(fmatches[0].ChunkMatches)) 3510 return 3511 } 3512 var got string 3513 if fmatches[0].ChunkMatches[0].FileName { 3514 got = fmatches[0].FileName 3515 } else { 3516 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset) 3517 } 3518 3519 if got != want { 3520 t.Errorf("got %s, want %s", got, want) 3521 } 3522 } 3523 3524 b.featureVersion = FeatureVersion // reset feature version 3525 res := searchForTest(t, b, &query.Language{Language: "Apex"}, chunkOpts) 3526 wantSingleMatch(res, "apex.cls") 3527 3528 res = searchForTest(t, b, &query.Language{Language: "TeX"}, chunkOpts) 3529 wantSingleMatch(res, "tex.cls") 3530 3531 res = searchForTest(t, b, &query.Language{Language: "C"}, chunkOpts) 3532 wantSingleMatch(res, "hello.h") 3533 3534 // test fallback language search by pretending it's an older index version 3535 res = searchForTest(t, b, &query.Language{Language: "C++"}, chunkOpts) 3536 if len(res.Files) != 0 { 3537 t.Errorf("got %d results for C++, want 0", len(res.Files)) 3538 } 3539 3540 b.featureVersion = 11 // force fallback 3541 res = searchForTest(t, b, &query.Language{Language: "C++"}, chunkOpts) 3542 wantSingleMatch(res, "hello.h") 3543 }) 3544} 3545 3546func TestStats(t *testing.T) { 3547 ignored := []cmp.Option{ 3548 cmpopts.EquateEmpty(), 3549 cmpopts.IgnoreFields(RepoListEntry{}, "Repository"), 3550 cmpopts.IgnoreFields(RepoListEntry{}, "IndexMetadata"), 3551 cmpopts.IgnoreFields(RepoStats{}, "IndexBytes"), 3552 } 3553 3554 repoListEntries := func(b *IndexBuilder) []RepoListEntry { 3555 searcher := searcherForTest(t, b) 3556 indexdata := searcher.(*indexData) 3557 return indexdata.repoListEntry 3558 } 3559 3560 t.Run("one empty repo", func(t *testing.T) { 3561 b := testIndexBuilder(t, nil) 3562 got := repoListEntries(b) 3563 want := []RepoListEntry{ 3564 { 3565 Stats: RepoStats{ 3566 Repos: 0, 3567 Shards: 1, 3568 Documents: 0, 3569 IndexBytes: 20, 3570 ContentBytes: 0, 3571 NewLinesCount: 0, 3572 DefaultBranchNewLinesCount: 0, 3573 OtherBranchesNewLinesCount: 0, 3574 }, 3575 }, 3576 } 3577 3578 if diff := cmp.Diff(want, got, ignored...); diff != "" { 3579 t.Fatalf("mismatch (-want +got):\n%s", diff) 3580 } 3581 }) 3582 3583 t.Run("one simple shard", func(t *testing.T) { 3584 b := testIndexBuilder(t, nil, 3585 Document{Name: "doc 0", Content: []byte("content 0")}, 3586 Document{Name: "doc 1", Content: []byte("content 1")}, 3587 ) 3588 got := repoListEntries(b) 3589 want := []RepoListEntry{ 3590 { 3591 Stats: RepoStats{ 3592 Repos: 0, 3593 Shards: 1, 3594 Documents: 2, 3595 IndexBytes: 224, 3596 ContentBytes: 28, 3597 NewLinesCount: 0, 3598 DefaultBranchNewLinesCount: 0, 3599 OtherBranchesNewLinesCount: 0, 3600 }, 3601 }, 3602 } 3603 3604 if diff := cmp.Diff(want, got, ignored...); diff != "" { 3605 t.Fatalf("mismatch (-want +got):\n%s", diff) 3606 } 3607 }) 3608 3609 t.Run("one compound shard", func(t *testing.T) { 3610 b := testIndexBuilderCompound(t, 3611 []*Repository{ 3612 {Name: "repo 0"}, 3613 {Name: "repo 1"}, 3614 }, 3615 [][]Document{ 3616 { 3617 {Name: "doc 0", Content: []byte("content 0")}, 3618 {Name: "doc 1", Content: []byte("content 1")}, 3619 }, 3620 { 3621 {Name: "doc 2", Content: []byte("content 2")}, 3622 {Name: "doc 3", Content: []byte("content 3")}, 3623 }, 3624 }, 3625 ) 3626 got := repoListEntries(b) 3627 want := []RepoListEntry{ 3628 { 3629 Stats: RepoStats{ 3630 Repos: 0, 3631 Shards: 1, 3632 Documents: 2, 3633 IndexBytes: 180, 3634 ContentBytes: 28, 3635 NewLinesCount: 0, 3636 DefaultBranchNewLinesCount: 0, 3637 OtherBranchesNewLinesCount: 0, 3638 }, 3639 }, 3640 { 3641 Stats: RepoStats{ 3642 Repos: 0, 3643 Shards: 1, 3644 Documents: 2, 3645 IndexBytes: 180, 3646 ContentBytes: 28, 3647 NewLinesCount: 0, 3648 DefaultBranchNewLinesCount: 0, 3649 OtherBranchesNewLinesCount: 0, 3650 }, 3651 }, 3652 } 3653 3654 if diff := cmp.Diff(want, got, ignored...); diff != "" { 3655 t.Fatalf("mismatch (-want +got):\n%s", diff) 3656 } 3657 }) 3658 3659 t.Run("compound shard with empty repos", func(t *testing.T) { 3660 b := testIndexBuilderCompound(t, 3661 []*Repository{ 3662 {Name: "repo 0"}, 3663 {Name: "repo 1"}, 3664 {Name: "repo 2"}, 3665 {Name: "repo 3"}, 3666 {Name: "repo 4"}, 3667 }, 3668 [][]Document{ 3669 {{Name: "doc 0", Content: []byte("content 0")}}, 3670 nil, 3671 {{Name: "doc 1", Content: []byte("content 1")}}, 3672 nil, 3673 nil, 3674 }, 3675 ) 3676 got := repoListEntries(b) 3677 3678 entryEmpty := RepoListEntry{Stats: RepoStats{ 3679 Shards: 1, 3680 Documents: 0, 3681 ContentBytes: 0, 3682 }} 3683 entryNonEmpty := RepoListEntry{Stats: RepoStats{ 3684 Shards: 1, 3685 Documents: 1, 3686 ContentBytes: 14, 3687 }} 3688 3689 want := []RepoListEntry{ 3690 entryNonEmpty, 3691 entryEmpty, 3692 entryNonEmpty, 3693 entryEmpty, 3694 entryEmpty, 3695 } 3696 3697 if diff := cmp.Diff(want, got, ignored...); diff != "" { 3698 t.Fatalf("mismatch (-want +got):\n%s", diff) 3699 } 3700 }) 3701} 3702 3703// This tests the frequent pattern "\bLITERAL\b". 3704func TestWordSearch(t *testing.T) { 3705 content := []byte("needle the bla") 3706 // ----------------01234567890123 3707 3708 b := testIndexBuilder(t, nil, 3709 Document{ 3710 Name: "f1", 3711 Content: content, 3712 }) 3713 3714 t.Run("LineMatches", func(t *testing.T) { 3715 sres := searchForTest(t, b, 3716 &query.Regexp{ 3717 Regexp: mustParseRE("\\bthe\\b"), 3718 CaseSensitive: true, 3719 Content: true, 3720 }) 3721 3722 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 { 3723 t.Fatalf("got %v, want 1 match in 1 file", sres.Files) 3724 } 3725 3726 if sres.Stats.RegexpsConsidered != 0 { 3727 t.Fatal("expected regexp to be skipped") 3728 } 3729 3730 got := sres.Files[0].LineMatches[0] 3731 want := LineMatch{ 3732 LineFragments: []LineFragmentMatch{{ 3733 LineOffset: 7, 3734 Offset: 7, 3735 MatchLength: 3, 3736 }}, 3737 Line: content, 3738 FileName: false, 3739 LineNumber: 1, 3740 LineStart: 0, 3741 LineEnd: 14, 3742 } 3743 3744 if !reflect.DeepEqual(got, want) { 3745 t.Errorf("got %#v, want %#v", got, want) 3746 } 3747 }) 3748 3749 t.Run("ChunkMatches", func(t *testing.T) { 3750 sres := searchForTest(t, b, 3751 &query.Regexp{ 3752 Regexp: mustParseRE("\\bthe\\b"), 3753 CaseSensitive: true, 3754 }, chunkOpts) 3755 3756 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 { 3757 t.Fatalf("got %v, want 1 match in 1 file", sres.Files) 3758 } 3759 3760 if sres.Stats.RegexpsConsidered != 0 { 3761 t.Fatal("expected regexp to be skipped") 3762 } 3763 3764 got := sres.Files[0].ChunkMatches[0] 3765 want := ChunkMatch{ 3766 Content: content, 3767 ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1}, 3768 Ranges: []Range{{ 3769 Start: Location{ByteOffset: 7, LineNumber: 1, Column: 8}, 3770 End: Location{ByteOffset: 10, LineNumber: 1, Column: 11}, 3771 }}, 3772 } 3773 3774 if diff := cmp.Diff(want, got); diff != "" { 3775 t.Fatal(diff) 3776 } 3777 }) 3778}