fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

1// Copyright 2016 Google Inc. All rights reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package zoekt 16 17import ( 18 "bytes" 19 "context" 20 "fmt" 21 "reflect" 22 "regexp/syntax" 23 "strings" 24 "testing" 25 26 "github.com/google/go-cmp/cmp" 27 "github.com/google/go-cmp/cmp/cmpopts" 28 "github.com/grafana/regexp" 29 "github.com/kylelemons/godebug/pretty" 30 31 "github.com/sourcegraph/zoekt/query" 32) 33 34func clearScores(r *SearchResult) { 35 for i := range r.Files { 36 r.Files[i].Score = 0.0 37 for j := range r.Files[i].LineMatches { 38 r.Files[i].LineMatches[j].Score = 0.0 39 } 40 for j := range r.Files[i].ChunkMatches { 41 r.Files[i].ChunkMatches[j].Score = 0.0 42 } 43 r.Files[i].Checksum = nil 44 r.Files[i].Debug = "" 45 } 46} 47 48func testIndexBuilder(t *testing.T, repo *Repository, docs ...Document) *IndexBuilder { 49 t.Helper() 50 51 b, err := NewIndexBuilder(repo) 52 if err != nil { 53 t.Fatalf("NewIndexBuilder: %v", err) 54 } 55 56 for i, d := range docs { 57 if err := b.Add(d); err != nil { 58 t.Fatalf("Add %d: %v", i, err) 59 } 60 } 61 62 return b 63} 64 65func testIndexBuilderCompound(t *testing.T, repos []*Repository, docs [][]Document) *IndexBuilder { 66 t.Helper() 67 68 b := newIndexBuilder() 69 b.indexFormatVersion = NextIndexFormatVersion 70 71 if len(repos) != len(docs) { 72 t.Fatalf("testIndexBuilderCompound: repos must be the same length as docs, got: len(repos)=%d len(docs)=%d", len(repos), len(docs)) 73 } 74 75 for i, repo := range repos { 76 if err := b.setRepository(repo); err != nil { 77 t.Fatal(err) 78 } 79 for j, d := range docs[i] { 80 if err := b.Add(d); err != nil { 81 t.Fatalf("Add %d %d: %v", i, j, err) 82 } 83 } 84 } 85 86 return b 87} 88 89func TestBoundary(t *testing.T) { 90 b := testIndexBuilder(t, nil, 91 Document{Name: "f1", Content: []byte("x the")}, 92 Document{Name: "f1", Content: []byte("reader")}) 93 res := searchForTest(t, b, &query.Substring{Pattern: "there"}) 94 if len(res.Files) > 0 { 95 t.Fatalf("got %v, want no matches", res.Files) 96 } 97} 98 99func TestDocSectionInvalid(t *testing.T) { 100 b, err := NewIndexBuilder(nil) 101 if err != nil { 102 t.Fatalf("NewIndexBuilder: %v", err) 103 } 104 doc := Document{ 105 Name: "f1", 106 Content: []byte("01234567890123"), 107 Symbols: []DocumentSection{{5, 8}, {7, 9}}, 108 } 109 110 if err := b.Add(doc); err == nil { 111 t.Errorf("overlapping doc sections should fail") 112 } 113 114 doc = Document{ 115 Name: "f1", 116 Content: []byte("01234567890123"), 117 Symbols: []DocumentSection{{0, 20}}, 118 } 119 120 if err := b.Add(doc); err == nil { 121 t.Errorf("doc sections beyond EOF should fail") 122 } 123} 124 125func TestBasic(t *testing.T) { 126 b := testIndexBuilder(t, nil, 127 Document{ 128 Name: "f2", 129 Content: []byte("to carry water in the no later bla"), 130 // --------------0123456789012345678901234567890123 131 }) 132 133 t.Run("LineMatch", func(t *testing.T) { 134 res := searchForTest(t, b, &query.Substring{ 135 Pattern: "water", 136 CaseSensitive: true, 137 }) 138 fmatches := res.Files 139 if len(fmatches) != 1 || len(fmatches[0].LineMatches) != 1 { 140 t.Fatalf("got %v, want 1 matches", fmatches) 141 } 142 143 got := fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].LineMatches[0].LineFragments[0].Offset) 144 want := "f2:9" 145 if got != want { 146 t.Errorf("1: got %s, want %s", got, want) 147 } 148 }) 149 150 t.Run("ChunkMatch", func(t *testing.T) { 151 res := searchForTest(t, b, &query.Substring{ 152 Pattern: "water", 153 CaseSensitive: true, 154 }, chunkOpts) 155 fmatches := res.Files 156 if len(fmatches) != 1 || len(fmatches[0].ChunkMatches) != 1 { 157 t.Fatalf("got %v, want 1 matches", fmatches) 158 } 159 160 got := fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset) 161 want := "f2:9" 162 if got != want { 163 t.Errorf("1: got %s, want %s", got, want) 164 } 165 }) 166} 167 168func TestEmptyIndex(t *testing.T) { 169 b := testIndexBuilder(t, nil) 170 searcher := searcherForTest(t, b) 171 172 var opts SearchOptions 173 if _, err := searcher.Search(context.Background(), &query.Substring{}, &opts); err != nil { 174 t.Fatalf("Search: %v", err) 175 } 176 177 if _, err := searcher.List(context.Background(), &query.Repo{Regexp: regexp.MustCompile("")}, nil); err != nil { 178 t.Fatalf("List: %v", err) 179 } 180 181 if _, err := searcher.Search(context.Background(), &query.Substring{Pattern: "java", FileName: true}, &opts); err != nil { 182 t.Fatalf("Search: %v", err) 183 } 184} 185 186type memSeeker struct { 187 data []byte 188} 189 190func (s *memSeeker) Name() string { 191 return "memseeker" 192} 193 194func (s *memSeeker) Close() {} 195func (s *memSeeker) Read(off, sz uint32) ([]byte, error) { 196 return s.data[off : off+sz], nil 197} 198 199func (s *memSeeker) Size() (uint32, error) { 200 return uint32(len(s.data)), nil 201} 202 203func TestNewlines(t *testing.T) { 204 b := testIndexBuilder(t, nil, 205 Document{Name: "filename", Content: []byte("line1\nline2\nbla")}) 206 // ---------------------------------------------012345-678901-234 207 208 t.Run("LineMatches", func(t *testing.T) { 209 sres := searchForTest(t, b, &query.Substring{Pattern: "ne2"}) 210 211 matches := sres.Files 212 want := []FileMatch{{ 213 FileName: "filename", 214 LineMatches: []LineMatch{{ 215 LineFragments: []LineFragmentMatch{{ 216 Offset: 8, 217 LineOffset: 2, 218 MatchLength: 3, 219 }}, 220 Line: []byte("line2"), 221 LineStart: 6, 222 LineEnd: 11, 223 LineNumber: 2, 224 }}, 225 }} 226 227 if !reflect.DeepEqual(matches, want) { 228 t.Errorf("got %v, want %v", matches, want) 229 } 230 }) 231 232 t.Run("ChunkMatches", func(t *testing.T) { 233 sres := searchForTest(t, b, &query.Substring{Pattern: "ne2"}, chunkOpts) 234 235 matches := sres.Files 236 want := []FileMatch{{ 237 FileName: "filename", 238 ChunkMatches: []ChunkMatch{{ 239 Content: []byte("line2"), 240 ContentStart: Location{ 241 ByteOffset: 6, 242 LineNumber: 2, 243 Column: 1, 244 }, 245 Ranges: []Range{{ 246 Start: Location{ByteOffset: 8, LineNumber: 2, Column: 3}, 247 End: Location{ByteOffset: 11, LineNumber: 2, Column: 6}, 248 }}, 249 }}, 250 }} 251 252 if diff := cmp.Diff(want, matches); diff != "" { 253 t.Fatal(diff) 254 } 255 }) 256} 257 258// A result spanning multiple lines should have LineMatches that only cover 259// single lines. 260func TestQueryNewlines(t *testing.T) { 261 text := "line1\nline2\nbla" 262 b := testIndexBuilder(t, nil, 263 Document{Name: "filename", Content: []byte(text)}) 264 265 t.Run("LineMatches", func(t *testing.T) { 266 sres := searchForTest(t, b, &query.Substring{Pattern: "ine2\nbla"}) 267 matches := sres.Files 268 if len(matches) != 1 { 269 t.Fatalf("got %d file matches, want exactly one", len(matches)) 270 } 271 m := matches[0] 272 if len(m.LineMatches) != 2 { 273 t.Fatalf("got %d line matches, want exactly two", len(m.LineMatches)) 274 } 275 }) 276 277 t.Run("ChunkMatches", func(t *testing.T) { 278 sres := searchForTest(t, b, &query.Substring{Pattern: "ine2\nbla"}, chunkOpts) 279 matches := sres.Files 280 if len(matches) != 1 { 281 t.Fatalf("got %d file matches, want exactly one", len(matches)) 282 } 283 m := matches[0] 284 if len(m.ChunkMatches) != 1 { 285 t.Fatalf("got %d chunk matches, want exactly one", len(m.ChunkMatches)) 286 } 287 }) 288} 289 290var chunkOpts = SearchOptions{ChunkMatches: true} 291 292func searchForTest(t *testing.T, b *IndexBuilder, q query.Q, o ...SearchOptions) *SearchResult { 293 searcher := searcherForTest(t, b) 294 var opts SearchOptions 295 if len(o) > 0 { 296 opts = o[0] 297 } 298 res, err := searcher.Search(context.Background(), q, &opts) 299 if err != nil { 300 t.Fatalf("Search(%s): %v", q, err) 301 } 302 clearScores(res) 303 return res 304} 305 306func searcherForTest(t *testing.T, b *IndexBuilder) Searcher { 307 var buf bytes.Buffer 308 if err := b.Write(&buf); err != nil { 309 t.Fatal(err) 310 } 311 f := &memSeeker{buf.Bytes()} 312 313 searcher, err := NewSearcher(f) 314 if err != nil { 315 t.Fatalf("NewSearcher: %v", err) 316 } 317 318 return searcher 319} 320 321func TestCaseFold(t *testing.T) { 322 b := testIndexBuilder(t, nil, 323 Document{Name: "f1", Content: []byte("I love BaNaNAS.")}, 324 // -----------------------------------012345678901234 325 ) 326 t.Run("LineMatches", func(t *testing.T) { 327 sres := searchForTest(t, b, &query.Substring{ 328 Pattern: "bananas", 329 CaseSensitive: true, 330 }) 331 matches := sres.Files 332 if len(matches) != 0 { 333 t.Errorf("foldcase: got %#v, want 0 matches", matches) 334 } 335 336 sres = searchForTest(t, b, 337 &query.Substring{ 338 Pattern: "BaNaNAS", 339 CaseSensitive: true, 340 }) 341 matches = sres.Files 342 if len(matches) != 1 { 343 t.Errorf("no foldcase: got %v, want 1 matches", matches) 344 } else if matches[0].LineMatches[0].LineFragments[0].Offset != 7 { 345 t.Errorf("foldcase: got %v, want offsets 7", matches) 346 } 347 }) 348 349 t.Run("ChunkMatches", func(t *testing.T) { 350 sres := searchForTest(t, b, &query.Substring{ 351 Pattern: "bananas", 352 CaseSensitive: true, 353 }, chunkOpts) 354 matches := sres.Files 355 if len(matches) != 0 { 356 t.Errorf("foldcase: got %#v, want 0 matches", matches) 357 } 358 359 sres = searchForTest(t, b, 360 &query.Substring{ 361 Pattern: "BaNaNAS", 362 CaseSensitive: true, 363 }) 364 matches = sres.Files 365 if len(matches) != 1 { 366 t.Errorf("no foldcase: got %v, want 1 matches", matches) 367 } else if matches[0].LineMatches[0].LineFragments[0].Offset != 7 { 368 t.Errorf("foldcase: got %v, want offsets 7", matches) 369 } 370 }) 371} 372 373func TestAndSearch(t *testing.T) { 374 b := testIndexBuilder(t, nil, 375 Document{Name: "f1", Content: []byte("x banana y")}, 376 Document{Name: "f2", Content: []byte("x apple y")}, 377 Document{Name: "f3", Content: []byte("x banana apple y")}, 378 // ---------------------------------------0123456789012345 379 ) 380 381 t.Run("LineMatches", func(t *testing.T) { 382 sres := searchForTest(t, b, query.NewAnd( 383 &query.Substring{ 384 Pattern: "banana", 385 }, 386 &query.Substring{ 387 Pattern: "apple", 388 }, 389 )) 390 matches := sres.Files 391 if len(matches) != 1 || len(matches[0].LineMatches) != 1 || len(matches[0].LineMatches[0].LineFragments) != 2 { 392 t.Fatalf("got %#v, want 1 match with 2 fragments", matches) 393 } 394 395 if matches[0].LineMatches[0].LineFragments[0].Offset != 2 || matches[0].LineMatches[0].LineFragments[1].Offset != 9 { 396 t.Fatalf("got %#v, want offsets 2,9", matches) 397 } 398 399 wantStats := Stats{ 400 FilesLoaded: 1, 401 ContentBytesLoaded: 18, 402 IndexBytesLoaded: 8, 403 NgramMatches: 3, // we look at doc 1, because it's max(0,1) due to AND 404 MatchCount: 1, 405 FileCount: 1, 406 FilesConsidered: 2, 407 ShardsScanned: 1, 408 } 409 if diff := pretty.Compare(wantStats, sres.Stats); diff != "" { 410 t.Errorf("got stats diff %s", diff) 411 } 412 }) 413 414 t.Run("ChunkMatches", func(t *testing.T) { 415 sres := searchForTest(t, b, query.NewAnd( 416 &query.Substring{ 417 Pattern: "banana", 418 }, 419 &query.Substring{ 420 Pattern: "apple", 421 }, 422 ), chunkOpts) 423 matches := sres.Files 424 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 || len(matches[0].ChunkMatches[0].Ranges) != 2 { 425 t.Fatalf("got %#v, want 1 chunk match with 2 ranges", matches) 426 } 427 428 if matches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset != 2 || matches[0].ChunkMatches[0].Ranges[1].Start.ByteOffset != 9 { 429 t.Fatalf("got %#v, want offsets 2,9", matches) 430 } 431 432 wantStats := Stats{ 433 FilesLoaded: 1, 434 ContentBytesLoaded: 18, 435 IndexBytesLoaded: 8, 436 NgramMatches: 3, // we look at doc 1, because it's max(0,1) due to AND 437 MatchCount: 2, 438 FileCount: 1, 439 FilesConsidered: 2, 440 ShardsScanned: 1, 441 } 442 if diff := pretty.Compare(wantStats, sres.Stats); diff != "" { 443 t.Errorf("got stats diff %s", diff) 444 } 445 }) 446} 447 448func TestAndNegateSearch(t *testing.T) { 449 b := testIndexBuilder(t, nil, 450 Document{Name: "f1", Content: []byte("x banana y")}, 451 // -----------------------------------0123456789 452 Document{Name: "f4", Content: []byte("x banana apple y")}) 453 454 t.Run("LineMatches", func(t *testing.T) { 455 sres := searchForTest(t, b, query.NewAnd( 456 &query.Substring{ 457 Pattern: "banana", 458 }, 459 &query.Not{Child: &query.Substring{ 460 Pattern: "apple", 461 }})) 462 463 matches := sres.Files 464 465 if len(matches) != 1 || len(matches[0].LineMatches) != 1 { 466 t.Fatalf("got %v, want 1 match", matches) 467 } 468 if matches[0].FileName != "f1" { 469 t.Fatalf("got match %#v, want FileName: f1", matches[0]) 470 } 471 if matches[0].LineMatches[0].LineFragments[0].Offset != 2 { 472 t.Fatalf("got %v, want offset 2", matches) 473 } 474 }) 475 476 t.Run("ChunkMatches", func(t *testing.T) { 477 sres := searchForTest(t, b, 478 query.NewAnd( 479 &query.Substring{ 480 Pattern: "banana", 481 }, 482 &query.Not{Child: &query.Substring{ 483 Pattern: "apple", 484 }}, 485 ), 486 chunkOpts, 487 ) 488 489 matches := sres.Files 490 491 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 { 492 t.Fatalf("got %v, want 1 match", matches) 493 } 494 if matches[0].FileName != "f1" { 495 t.Fatalf("got match %#v, want FileName: f1", matches[0]) 496 } 497 if matches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset != 2 { 498 t.Fatalf("got %v, want offset 2", matches) 499 } 500 }) 501} 502 503func TestNegativeMatchesOnlyShortcut(t *testing.T) { 504 b := testIndexBuilder(t, nil, 505 Document{Name: "f1", Content: []byte("x banana y")}, 506 Document{Name: "f2", Content: []byte("x appelmoes y")}, 507 Document{Name: "f3", Content: []byte("x appelmoes y")}, 508 Document{Name: "f3", Content: []byte("x appelmoes y")}) 509 510 t.Run("LineMatches", func(t *testing.T) { 511 sres := searchForTest(t, b, query.NewAnd( 512 &query.Substring{ 513 Pattern: "banana", 514 }, 515 &query.Not{Child: &query.Substring{ 516 Pattern: "appel", 517 }})) 518 519 if sres.Stats.FilesConsidered != 1 { 520 t.Errorf("got %#v, want FilesConsidered: 1", sres.Stats) 521 } 522 }) 523 524 t.Run("ChunkMatches", func(t *testing.T) { 525 sres := searchForTest(t, b, query.NewAnd( 526 &query.Substring{ 527 Pattern: "banana", 528 }, 529 &query.Not{Child: &query.Substring{ 530 Pattern: "appel", 531 }}), chunkOpts) 532 533 if sres.Stats.FilesConsidered != 1 { 534 t.Errorf("got %#v, want FilesConsidered: 1", sres.Stats) 535 } 536 }) 537} 538 539func TestFileSearch(t *testing.T) { 540 b := testIndexBuilder(t, nil, 541 Document{Name: "banzana", Content: []byte("x orange y")}, 542 // -------------0123456 543 Document{Name: "banana", Content: []byte("x apple y")}, 544 // -------------012345 545 ) 546 547 t.Run("LineMatches", func(t *testing.T) { 548 sres := searchForTest(t, b, &query.Substring{ 549 Pattern: "anan", 550 FileName: true, 551 }) 552 553 matches := sres.Files 554 if len(matches) != 1 || len(matches[0].LineMatches) != 1 { 555 t.Fatalf("got %v, want 1 match", matches) 556 } 557 558 got := matches[0].LineMatches[0] 559 want := LineMatch{ 560 Line: []byte("banana"), 561 LineFragments: []LineFragmentMatch{{ 562 Offset: 1, 563 LineOffset: 1, 564 MatchLength: 4, 565 }}, 566 FileName: true, 567 } 568 569 if !reflect.DeepEqual(got, want) { 570 t.Errorf("got %#v, want %#v", got, want) 571 } 572 }) 573 574 t.Run("ChunkMatches", func(t *testing.T) { 575 sres := searchForTest(t, b, &query.Substring{ 576 Pattern: "anan", 577 FileName: true, 578 }, chunkOpts) 579 580 matches := sres.Files 581 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 { 582 t.Fatalf("got %v, want 1 match", matches) 583 } 584 585 got := matches[0].ChunkMatches[0] 586 want := ChunkMatch{ 587 Content: []byte("banana"), 588 ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1}, 589 Ranges: []Range{{ 590 Start: Location{ByteOffset: 1, LineNumber: 1, Column: 2}, 591 End: Location{ByteOffset: 5, LineNumber: 1, Column: 6}, 592 }}, 593 FileName: true, 594 } 595 596 if diff := cmp.Diff(want, got); diff != "" { 597 t.Fatal(diff) 598 } 599 }) 600 601 t.Run("FileNameSet", func(t *testing.T) { 602 sres := searchForTest(t, b, query.NewFileNameSet("banana"), chunkOpts) 603 604 matches := sres.Files 605 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 { 606 t.Fatalf("got %v, want 1 match", matches) 607 } 608 609 got := matches[0].ChunkMatches[0] 610 want := ChunkMatch{ 611 Content: []byte("banana"), 612 ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1}, 613 Ranges: []Range{{ 614 Start: Location{ByteOffset: 0, LineNumber: 1, Column: 1}, 615 End: Location{ByteOffset: 6, LineNumber: 1, Column: 7}, 616 }}, 617 FileName: true, 618 } 619 620 if diff := cmp.Diff(want, got); diff != "" { 621 t.Fatal(diff) 622 } 623 }) 624} 625 626func TestFileCase(t *testing.T) { 627 b := testIndexBuilder(t, nil, 628 Document{Name: "BANANA", Content: []byte("x orange y")}) 629 630 t.Run("LineMatches", func(t *testing.T) { 631 sres := searchForTest(t, b, &query.Substring{ 632 Pattern: "banana", 633 FileName: true, 634 }) 635 636 matches := sres.Files 637 if len(matches) != 1 || matches[0].FileName != "BANANA" { 638 t.Fatalf("got %v, want 1 match 'BANANA'", matches) 639 } 640 }) 641 642 t.Run("ChunkMatches", func(t *testing.T) { 643 sres := searchForTest(t, b, &query.Substring{ 644 Pattern: "banana", 645 FileName: true, 646 }, chunkOpts) 647 648 matches := sres.Files 649 if len(matches) != 1 || matches[0].FileName != "BANANA" { 650 t.Fatalf("got %v, want 1 match 'BANANA'", matches) 651 } 652 }) 653} 654 655func TestFileRegexpSearchBruteForce(t *testing.T) { 656 b := testIndexBuilder(t, nil, 657 Document{Name: "banzana", Content: []byte("x orange y")}, 658 Document{Name: "banana", Content: []byte("x apple y")}, 659 ) 660 t.Run("LineMatches", func(t *testing.T) { 661 sres := searchForTest(t, b, &query.Regexp{ 662 Regexp: mustParseRE("[qn][zx]"), 663 FileName: true, 664 }) 665 666 matches := sres.Files 667 if len(matches) != 1 || matches[0].FileName != "banzana" { 668 t.Fatalf("got %v, want 1 match on 'banzana'", matches) 669 } 670 }) 671 t.Run("LineMatches", func(t *testing.T) { 672 sres := searchForTest(t, b, &query.Regexp{ 673 Regexp: mustParseRE("[qn][zx]"), 674 FileName: true, 675 }, chunkOpts) 676 677 matches := sres.Files 678 if len(matches) != 1 || matches[0].FileName != "banzana" { 679 t.Fatalf("got %v, want 1 match on 'banzana'", matches) 680 } 681 }) 682} 683 684func TestFileRegexpSearchShortString(t *testing.T) { 685 b := testIndexBuilder(t, nil, 686 Document{Name: "banana.py", Content: []byte("x orange y")}) 687 688 t.Run("LineMatches", func(t *testing.T) { 689 sres := searchForTest(t, b, &query.Regexp{ 690 Regexp: mustParseRE("ana.py"), 691 FileName: true, 692 }) 693 694 matches := sres.Files 695 if len(matches) != 1 || matches[0].FileName != "banana.py" { 696 t.Fatalf("got %v, want 1 match on 'banana.py'", matches) 697 } 698 }) 699 700 t.Run("ChunkMatches", func(t *testing.T) { 701 sres := searchForTest(t, b, &query.Regexp{ 702 Regexp: mustParseRE("ana.py"), 703 FileName: true, 704 }, chunkOpts) 705 706 matches := sres.Files 707 if len(matches) != 1 || matches[0].FileName != "banana.py" { 708 t.Fatalf("got %v, want 1 match on 'banana.py'", matches) 709 } 710 }) 711} 712 713func TestFileSubstringSearchBruteForce(t *testing.T) { 714 b := testIndexBuilder(t, nil, 715 Document{Name: "BANZANA", Content: []byte("x orange y")}, 716 Document{Name: "banana", Content: []byte("x apple y")}) 717 718 q := &query.Substring{ 719 Pattern: "z", 720 FileName: true, 721 } 722 723 t.Run("LineMatches", func(t *testing.T) { 724 res := searchForTest(t, b, q) 725 if len(res.Files) != 1 || res.Files[0].FileName != "BANZANA" { 726 t.Fatalf("got %v, want 1 match on 'BANZANA''", res.Files) 727 } 728 }) 729 730 t.Run("ChunkMatches", func(t *testing.T) { 731 res := searchForTest(t, b, q, chunkOpts) 732 if len(res.Files) != 1 || res.Files[0].FileName != "BANZANA" { 733 t.Fatalf("got %v, want 1 match on 'BANZANA''", res.Files) 734 } 735 }) 736} 737 738func TestFileSubstringSearchBruteForceEnd(t *testing.T) { 739 b := testIndexBuilder(t, nil, 740 Document{Name: "BANZANA", Content: []byte("x orange y")}, 741 Document{Name: "bananaq", Content: []byte("x apple y")}) 742 743 q := &query.Substring{ 744 Pattern: "q", 745 FileName: true, 746 } 747 t.Run("LineMatches", func(t *testing.T) { 748 res := searchForTest(t, b, q) 749 if want := "bananaq"; len(res.Files) != 1 || res.Files[0].FileName != want { 750 t.Fatalf("got %v, want 1 match in %q", res.Files, want) 751 } 752 }) 753 754 t.Run("LineMatches", func(t *testing.T) { 755 res := searchForTest(t, b, q, chunkOpts) 756 if want := "bananaq"; len(res.Files) != 1 || res.Files[0].FileName != want { 757 t.Fatalf("got %v, want 1 match in %q", res.Files, want) 758 } 759 }) 760} 761 762func TestSearchMatchAll(t *testing.T) { 763 b := testIndexBuilder(t, nil, 764 Document{Name: "banzana", Content: []byte("x orange y")}, 765 Document{Name: "banana", Content: []byte("x apple y")}) 766 767 t.Run("LineMatches", func(t *testing.T) { 768 sres := searchForTest(t, b, &query.Const{Value: true}) 769 matches := sres.Files 770 if len(matches) != 2 { 771 t.Fatalf("got %v, want 2 matches", matches) 772 } 773 }) 774 775 t.Run("ChunkMatches", func(t *testing.T) { 776 sres := searchForTest(t, b, &query.Const{Value: true}, chunkOpts) 777 matches := sres.Files 778 if len(matches) != 2 { 779 t.Fatalf("got %v, want 2 matches", matches) 780 } 781 }) 782} 783 784func TestSearchNewline(t *testing.T) { 785 b := testIndexBuilder(t, nil, 786 Document{Name: "banzana", Content: []byte("abcd\ndefg")}) 787 788 t.Run("LineMatches", func(t *testing.T) { 789 sres := searchForTest(t, b, &query.Substring{Pattern: "d\nd"}) 790 791 // Just check that we don't crash. 792 793 matches := sres.Files 794 if len(matches) != 1 { 795 t.Fatalf("got %v, want 1 matches", matches) 796 } 797 }) 798 799 t.Run("ChunkMatches", func(t *testing.T) { 800 sres := searchForTest(t, b, &query.Substring{Pattern: "d\nd"}, chunkOpts) 801 802 // Just check that we don't crash. 803 804 matches := sres.Files 805 if len(matches) != 1 { 806 t.Fatalf("got %v, want 1 matches", matches) 807 } 808 }) 809} 810 811func TestSearchMatchAllRegexp(t *testing.T) { 812 b := testIndexBuilder(t, nil, 813 Document{Name: "banzana", Content: []byte("abcd")}, 814 Document{Name: "banana", Content: []byte("pqrs")}) 815 816 t.Run("LineMatches", func(t *testing.T) { 817 sres := searchForTest(t, b, &query.Regexp{Regexp: mustParseRE(".")}) 818 819 matches := sres.Files 820 if len(matches) != 2 || sres.Stats.MatchCount != 2 { 821 t.Fatalf("got %v, want 2 matches", matches) 822 } 823 if len(matches[0].LineMatches[0].Line) != 4 || len(matches[1].LineMatches[0].Line) != 4 { 824 t.Fatalf("want 4 chars in every file, got %#v", matches) 825 } 826 827 }) 828 829 t.Run("ChunkMatches", func(t *testing.T) { 830 sres := searchForTest(t, b, &query.Regexp{Regexp: mustParseRE(".")}, chunkOpts) 831 832 matches := sres.Files 833 if len(matches) != 2 || sres.Stats.MatchCount != 8 { 834 t.Fatalf("got %v, want 2 matches", matches) 835 } 836 if len(matches[0].ChunkMatches[0].Content) != 4 || len(matches[1].ChunkMatches[0].Content) != 4 { 837 t.Fatalf("want 4 chars in every file, got %#v", matches) 838 } 839 840 }) 841} 842 843func TestFileRestriction(t *testing.T) { 844 b := testIndexBuilder(t, nil, 845 Document{Name: "banana1", Content: []byte("x orange y")}, 846 Document{Name: "banana2", Content: []byte("x apple y")}, 847 Document{Name: "orange", Content: []byte("x apple z")}) 848 849 t.Run("LineMatches", func(t *testing.T) { 850 sres := searchForTest(t, b, query.NewAnd( 851 &query.Substring{ 852 Pattern: "banana", 853 FileName: true, 854 }, 855 &query.Substring{ 856 Pattern: "apple", 857 })) 858 859 matches := sres.Files 860 if len(matches) != 1 || len(matches[0].LineMatches) != 1 { 861 t.Fatalf("got %v, want 1 match", matches) 862 } 863 864 match := matches[0].LineMatches[0] 865 got := string(match.Line) 866 want := "x apple y" 867 if got != want { 868 t.Errorf("got match %#v, want line %q", match, want) 869 } 870 }) 871 872 t.Run("ChunkMatches", func(t *testing.T) { 873 sres := searchForTest(t, b, query.NewAnd( 874 &query.Substring{ 875 Pattern: "banana", 876 FileName: true, 877 }, 878 &query.Substring{ 879 Pattern: "apple", 880 }), chunkOpts) 881 882 matches := sres.Files 883 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 { 884 t.Fatalf("got %v, want 1 match", matches) 885 } 886 887 match := matches[0].ChunkMatches[0] 888 got := string(match.Content) 889 want := "x apple y" 890 if got != want { 891 t.Errorf("got match %#v, want line %q", match, want) 892 } 893 }) 894} 895 896func TestFileNameBoundary(t *testing.T) { 897 b := testIndexBuilder(t, nil, 898 Document{Name: "banana2", Content: []byte("x apple y")}, 899 Document{Name: "helpers.go", Content: []byte("x apple y")}, 900 Document{Name: "foo", Content: []byte("x apple y")}) 901 902 t.Run("LineMatches", func(t *testing.T) { 903 sres := searchForTest(t, b, &query.Substring{ 904 Pattern: "helpers.go", 905 FileName: true, 906 }) 907 908 matches := sres.Files 909 if len(matches) != 1 || len(matches[0].LineMatches) != 1 { 910 t.Fatalf("got %v, want 1 match", matches) 911 } 912 }) 913 914 t.Run("ChunkMatches", func(t *testing.T) { 915 sres := searchForTest(t, b, &query.Substring{ 916 Pattern: "helpers.go", 917 FileName: true, 918 }, chunkOpts) 919 920 matches := sres.Files 921 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 { 922 t.Fatalf("got %v, want 1 match", matches) 923 } 924 }) 925} 926 927func TestDocumentOrder(t *testing.T) { 928 var docs []Document 929 for i := 0; i < 3; i++ { 930 docs = append(docs, Document{Name: fmt.Sprintf("f%d", i), Content: []byte("needle")}) 931 } 932 933 b := testIndexBuilder(t, nil, docs...) 934 935 t.Run("LineMatches", func(t *testing.T) { 936 sres := searchForTest(t, b, query.NewAnd( 937 &query.Substring{ 938 Pattern: "needle", 939 })) 940 941 want := []string{"f0", "f1", "f2"} 942 var got []string 943 for _, f := range sres.Files { 944 got = append(got, f.FileName) 945 } 946 if !reflect.DeepEqual(got, want) { 947 t.Fatalf("got %v, want %v", got, want) 948 } 949 }) 950 951 t.Run("ChunkMatches", func(t *testing.T) { 952 sres := searchForTest(t, b, 953 query.NewAnd(&query.Substring{ 954 Pattern: "needle", 955 }), 956 chunkOpts, 957 ) 958 959 want := []string{"f0", "f1", "f2"} 960 var got []string 961 for _, f := range sres.Files { 962 got = append(got, f.FileName) 963 } 964 if !reflect.DeepEqual(got, want) { 965 t.Fatalf("got %v, want %v", got, want) 966 } 967 }) 968} 969 970func TestBranchMask(t *testing.T) { 971 b := testIndexBuilder(t, &Repository{ 972 Branches: []RepositoryBranch{ 973 {"master", "v-master"}, 974 {"stable", "v-stable"}, 975 {"bonzai", "v-bonzai"}, 976 }, 977 }, Document{Name: "f1", Content: []byte("needle"), Branches: []string{"master"}}, 978 Document{Name: "f2", Content: []byte("needle"), Branches: []string{"stable", "master"}}, 979 Document{Name: "f3", Content: []byte("needle"), Branches: []string{"stable", "master"}}, 980 Document{Name: "f4", Content: []byte("needle"), Branches: []string{"bonzai"}}, 981 ) 982 983 t.Run("LineMatches", func(t *testing.T) { 984 sres := searchForTest(t, b, query.NewAnd( 985 &query.Substring{ 986 Pattern: "needle", 987 }, 988 &query.Branch{ 989 Pattern: "table", 990 })) 991 992 if len(sres.Files) != 2 || sres.Files[0].FileName != "f2" || sres.Files[1].FileName != "f3" { 993 t.Fatalf("got %v, want 2 result from [f2,f3]", sres.Files) 994 } 995 996 if len(sres.Files[0].Branches) != 1 || sres.Files[0].Branches[0] != "stable" { 997 t.Fatalf("got %v, want 1 branch 'stable'", sres.Files[0].Branches) 998 } 999 }) 1000 1001 t.Run("ChunkMatches", func(t *testing.T) { 1002 sres := searchForTest(t, b, query.NewAnd( 1003 &query.Substring{ 1004 Pattern: "needle", 1005 }, 1006 &query.Branch{ 1007 Pattern: "table", 1008 }), 1009 chunkOpts, 1010 ) 1011 1012 if len(sres.Files) != 2 || sres.Files[0].FileName != "f2" || sres.Files[1].FileName != "f3" { 1013 t.Fatalf("got %v, want 2 result from [f2,f3]", sres.Files) 1014 } 1015 1016 if len(sres.Files[0].Branches) != 1 || sres.Files[0].Branches[0] != "stable" { 1017 t.Fatalf("got %v, want 1 branch 'stable'", sres.Files[0].Branches) 1018 } 1019 }) 1020} 1021 1022func TestBranchLimit(t *testing.T) { 1023 for limit := 64; limit <= 65; limit++ { 1024 r := &Repository{} 1025 for i := 0; i < limit; i++ { 1026 s := fmt.Sprintf("b%d", i) 1027 r.Branches = append(r.Branches, RepositoryBranch{ 1028 s, "v-" + s, 1029 }) 1030 } 1031 _, err := NewIndexBuilder(r) 1032 if limit == 64 && err != nil { 1033 t.Fatalf("NewIndexBuilder: %v", err) 1034 } else if limit == 65 && err == nil { 1035 t.Fatalf("NewIndexBuilder succeeded") 1036 } 1037 } 1038} 1039 1040func TestBranchReport(t *testing.T) { 1041 branches := []string{"stable", "master"} 1042 b := testIndexBuilder(t, &Repository{ 1043 Branches: []RepositoryBranch{ 1044 {"stable", "vs"}, 1045 {"master", "vm"}, 1046 }, 1047 }, 1048 Document{Name: "f2", Content: []byte("needle"), Branches: branches}) 1049 1050 t.Run("LineMatches", func(t *testing.T) { 1051 sres := searchForTest(t, b, &query.Substring{ 1052 Pattern: "needle", 1053 }) 1054 if len(sres.Files) != 1 { 1055 t.Fatalf("got %v, want 1 result from f2", sres.Files) 1056 } 1057 1058 f := sres.Files[0] 1059 if !reflect.DeepEqual(f.Branches, branches) { 1060 t.Fatalf("got branches %q, want %q", f.Branches, branches) 1061 } 1062 }) 1063 1064 t.Run("ChunkMatches", func(t *testing.T) { 1065 sres := searchForTest(t, b, &query.Substring{ 1066 Pattern: "needle", 1067 }, chunkOpts) 1068 if len(sres.Files) != 1 { 1069 t.Fatalf("got %v, want 1 result from f2", sres.Files) 1070 } 1071 1072 f := sres.Files[0] 1073 if !reflect.DeepEqual(f.Branches, branches) { 1074 t.Fatalf("got branches %q, want %q", f.Branches, branches) 1075 } 1076 }) 1077 1078} 1079 1080func TestBranchVersions(t *testing.T) { 1081 b := testIndexBuilder(t, &Repository{ 1082 Branches: []RepositoryBranch{ 1083 {"stable", "v-stable"}, 1084 {"master", "v-master"}, 1085 }, 1086 }, Document{Name: "f2", Content: []byte("needle"), Branches: []string{"master"}}) 1087 1088 t.Run("LineMatches", func(t *testing.T) { 1089 sres := searchForTest(t, b, &query.Substring{ 1090 Pattern: "needle", 1091 }) 1092 if len(sres.Files) != 1 { 1093 t.Fatalf("got %v, want 1 result from f2", sres.Files) 1094 } 1095 1096 f := sres.Files[0] 1097 if f.Version != "v-master" { 1098 t.Fatalf("got file %#v, want version 'v-master'", f) 1099 } 1100 }) 1101 1102 t.Run("ChunkMatches", func(t *testing.T) { 1103 sres := searchForTest(t, b, &query.Substring{ 1104 Pattern: "needle", 1105 }, chunkOpts) 1106 if len(sres.Files) != 1 { 1107 t.Fatalf("got %v, want 1 result from f2", sres.Files) 1108 } 1109 1110 f := sres.Files[0] 1111 if f.Version != "v-master" { 1112 t.Fatalf("got file %#v, want version 'v-master'", f) 1113 } 1114 }) 1115} 1116 1117func mustParseRE(s string) *syntax.Regexp { 1118 r, err := syntax.Parse(s, syntax.Perl) 1119 if err != nil { 1120 panic(err) 1121 } 1122 1123 return r 1124} 1125 1126func TestRegexp(t *testing.T) { 1127 content := []byte("needle the bla") 1128 // ----------------01234567890123 1129 1130 b := testIndexBuilder(t, nil, 1131 Document{ 1132 Name: "f1", 1133 Content: content, 1134 }) 1135 1136 t.Run("LineMatches", func(t *testing.T) { 1137 sres := searchForTest(t, b, 1138 &query.Regexp{ 1139 Regexp: mustParseRE("dle.*bla"), 1140 }) 1141 1142 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 { 1143 t.Fatalf("got %v, want 1 match in 1 file", sres.Files) 1144 } 1145 1146 got := sres.Files[0].LineMatches[0] 1147 want := LineMatch{ 1148 LineFragments: []LineFragmentMatch{{ 1149 LineOffset: 3, 1150 Offset: 3, 1151 MatchLength: 11, 1152 }}, 1153 Line: content, 1154 FileName: false, 1155 LineNumber: 1, 1156 LineStart: 0, 1157 LineEnd: 14, 1158 } 1159 1160 if !reflect.DeepEqual(got, want) { 1161 t.Errorf("got %#v, want %#v", got, want) 1162 } 1163 }) 1164 1165 t.Run("ChunkMatches", func(t *testing.T) { 1166 sres := searchForTest(t, b, 1167 &query.Regexp{ 1168 Regexp: mustParseRE("dle.*bla"), 1169 }, chunkOpts) 1170 1171 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 { 1172 t.Fatalf("got %v, want 1 match in 1 file", sres.Files) 1173 } 1174 1175 got := sres.Files[0].ChunkMatches[0] 1176 want := ChunkMatch{ 1177 Content: content, 1178 ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1}, 1179 Ranges: []Range{{ 1180 Start: Location{ByteOffset: 3, LineNumber: 1, Column: 4}, 1181 End: Location{ByteOffset: 14, LineNumber: 1, Column: 15}, 1182 }}, 1183 } 1184 1185 if diff := cmp.Diff(want, got); diff != "" { 1186 t.Fatal(diff) 1187 } 1188 }) 1189} 1190 1191func TestRegexpFile(t *testing.T) { 1192 content := []byte("needle the bla") 1193 1194 name := "let's play: find the mussel" 1195 b := testIndexBuilder(t, nil, 1196 Document{Name: name, Content: content}, 1197 Document{Name: "play.txt", Content: content}) 1198 1199 t.Run("LineMatches", func(t *testing.T) { 1200 sres := searchForTest(t, b, 1201 &query.Regexp{ 1202 Regexp: mustParseRE("play.*mussel"), 1203 FileName: true, 1204 }) 1205 1206 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 { 1207 t.Fatalf("got %v, want 1 match in 1 file", sres.Files) 1208 } 1209 1210 if sres.Files[0].FileName != name { 1211 t.Errorf("got match %#v, want name %q", sres.Files[0], name) 1212 } 1213 }) 1214 1215 t.Run("ChunkMatches", func(t *testing.T) { 1216 sres := searchForTest(t, b, 1217 &query.Regexp{ 1218 Regexp: mustParseRE("play.*mussel"), 1219 FileName: true, 1220 }, chunkOpts) 1221 1222 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 { 1223 t.Fatalf("got %v, want 1 match in 1 file", sres.Files) 1224 } 1225 1226 if sres.Files[0].FileName != name { 1227 t.Errorf("got match %#v, want name %q", sres.Files[0], name) 1228 } 1229 }) 1230} 1231 1232func TestRegexpOrder(t *testing.T) { 1233 content := []byte("bla the needle") 1234 // ----------------01234567890123 1235 1236 b := testIndexBuilder(t, nil, 1237 Document{Name: "f1", Content: content}) 1238 1239 t.Run("LineMatches", func(t *testing.T) { 1240 sres := searchForTest(t, b, 1241 &query.Regexp{ 1242 Regexp: mustParseRE("dle.*bla"), 1243 }) 1244 1245 if len(sres.Files) != 0 { 1246 t.Fatalf("got %v, want 0 matches", sres.Files) 1247 } 1248 }) 1249 1250 t.Run("ChunkMatches", func(t *testing.T) { 1251 sres := searchForTest(t, b, 1252 &query.Regexp{ 1253 Regexp: mustParseRE("dle.*bla"), 1254 }) 1255 1256 if len(sres.Files) != 0 { 1257 t.Fatalf("got %v, want 0 matches", sres.Files) 1258 } 1259 }) 1260} 1261 1262func TestRepoName(t *testing.T) { 1263 content := []byte("bla the needle") 1264 // ----------------01234567890123 1265 1266 b := testIndexBuilder(t, &Repository{Name: "bla"}, 1267 Document{Name: "f1", Content: content}) 1268 1269 t.Run("LineMatches", func(t *testing.T) { 1270 sres := searchForTest(t, b, 1271 query.NewAnd( 1272 &query.Substring{Pattern: "needle"}, 1273 &query.Repo{Regexp: regexp.MustCompile("foo")}, 1274 )) 1275 1276 if len(sres.Files) != 0 { 1277 t.Fatalf("got %v, want 0 matches", sres.Files) 1278 } 1279 1280 if sres.Stats.FilesConsidered > 0 { 1281 t.Fatalf("got FilesConsidered %d, should have short circuited", sres.Stats.FilesConsidered) 1282 } 1283 1284 sres = searchForTest(t, b, 1285 query.NewAnd( 1286 &query.Substring{Pattern: "needle"}, 1287 &query.Repo{Regexp: regexp.MustCompile("bla")}, 1288 )) 1289 if len(sres.Files) != 1 { 1290 t.Fatalf("got %v, want 1 match", sres.Files) 1291 } 1292 }) 1293 1294 t.Run("ChunkMatches", func(t *testing.T) { 1295 sres := searchForTest(t, b, 1296 query.NewAnd( 1297 &query.Substring{Pattern: "needle"}, 1298 &query.Repo{Regexp: regexp.MustCompile("foo")}, 1299 ), 1300 chunkOpts, 1301 ) 1302 1303 if len(sres.Files) != 0 { 1304 t.Fatalf("got %v, want 0 matches", sres.Files) 1305 } 1306 1307 if sres.Stats.FilesConsidered > 0 { 1308 t.Fatalf("got FilesConsidered %d, should have short circuited", sres.Stats.FilesConsidered) 1309 } 1310 1311 sres = searchForTest(t, b, 1312 query.NewAnd( 1313 &query.Substring{Pattern: "needle"}, 1314 &query.Repo{Regexp: regexp.MustCompile("bla")}, 1315 )) 1316 if len(sres.Files) != 1 { 1317 t.Fatalf("got %v, want 1 match", sres.Files) 1318 } 1319 }) 1320} 1321 1322func TestMergeMatches(t *testing.T) { 1323 content := []byte("blablabla") 1324 b := testIndexBuilder(t, nil, 1325 Document{Name: "f1", Content: content}) 1326 1327 t.Run("LineMatches", func(t *testing.T) { 1328 sres := searchForTest(t, b, 1329 &query.Substring{Pattern: "bla"}) 1330 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 { 1331 t.Fatalf("got %v, want 1 match", sres.Files) 1332 } 1333 }) 1334 1335 t.Run("ChunkMatches", func(t *testing.T) { 1336 sres := searchForTest(t, b, 1337 &query.Substring{Pattern: "bla"}, 1338 chunkOpts, 1339 ) 1340 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 { 1341 t.Fatalf("got %v, want 1 match", sres.Files) 1342 } 1343 }) 1344} 1345 1346func TestRepoURL(t *testing.T) { 1347 content := []byte("blablabla") 1348 b := testIndexBuilder(t, &Repository{ 1349 Name: "name", 1350 URL: "URL", 1351 CommitURLTemplate: "commit", 1352 FileURLTemplate: "file-url", 1353 LineFragmentTemplate: "fragment", 1354 }, Document{Name: "f1", Content: content}) 1355 1356 sres := searchForTest(t, b, &query.Substring{Pattern: "bla"}) 1357 1358 if sres.RepoURLs["name"] != "file-url" { 1359 t.Errorf("got RepoURLs %v, want {name: URL}", sres.RepoURLs) 1360 } 1361 if sres.LineFragments["name"] != "fragment" { 1362 t.Errorf("got URLs %v, want {name: URL}", sres.LineFragments) 1363 } 1364} 1365 1366func TestRegexpCaseSensitive(t *testing.T) { 1367 content := []byte("bla\nfunc unmarshalGitiles\n") 1368 b := testIndexBuilder(t, nil, Document{ 1369 Name: "f1", 1370 Content: content, 1371 }) 1372 1373 t.Run("LineMatches", func(t *testing.T) { 1374 res := searchForTest(t, b, 1375 &query.Regexp{ 1376 Regexp: mustParseRE("func.*Gitiles"), 1377 CaseSensitive: true, 1378 }) 1379 1380 if len(res.Files) != 1 { 1381 t.Fatalf("got %v, want one match", res.Files) 1382 } 1383 }) 1384 1385 t.Run("ChunkMatches", func(t *testing.T) { 1386 res := searchForTest(t, b, 1387 &query.Regexp{ 1388 Regexp: mustParseRE("func.*Gitiles"), 1389 CaseSensitive: true, 1390 }, 1391 chunkOpts, 1392 ) 1393 1394 if len(res.Files) != 1 { 1395 t.Fatalf("got %v, want one match", res.Files) 1396 } 1397 }) 1398} 1399 1400func TestRegexpCaseFolding(t *testing.T) { 1401 content := []byte("bla\nfunc unmarshalGitiles\n") 1402 1403 b := testIndexBuilder(t, nil, 1404 Document{Name: "f1", Content: content}) 1405 res := searchForTest(t, b, 1406 &query.Regexp{ 1407 Regexp: mustParseRE("func.*GITILES"), 1408 CaseSensitive: false, 1409 }) 1410 1411 if len(res.Files) != 1 { 1412 t.Fatalf("got %v, want one match", res.Files) 1413 } 1414} 1415 1416func TestCaseRegexp(t *testing.T) { 1417 content := []byte("BLABLABLA") 1418 b := testIndexBuilder(t, nil, 1419 Document{Name: "f1", Content: content}) 1420 1421 t.Run("LineMatches", func(t *testing.T) { 1422 res := searchForTest(t, b, 1423 &query.Regexp{ 1424 Regexp: mustParseRE("[xb][xl][xa]"), 1425 CaseSensitive: true, 1426 }) 1427 1428 if len(res.Files) > 0 { 1429 t.Fatalf("got %v, want no matches", res.Files) 1430 } 1431 }) 1432 1433 t.Run("ChunkMatches", func(t *testing.T) { 1434 res := searchForTest(t, b, 1435 &query.Regexp{ 1436 Regexp: mustParseRE("[xb][xl][xa]"), 1437 CaseSensitive: true, 1438 }, 1439 chunkOpts, 1440 ) 1441 1442 if len(res.Files) > 0 { 1443 t.Fatalf("got %v, want no matches", res.Files) 1444 } 1445 }) 1446} 1447 1448func TestNegativeRegexp(t *testing.T) { 1449 content := []byte("BLABLABLA needle bla") 1450 b := testIndexBuilder(t, nil, 1451 Document{Name: "f1", Content: content}) 1452 1453 t.Run("LineMatches", func(t *testing.T) { 1454 res := searchForTest(t, b, 1455 query.NewAnd( 1456 &query.Substring{ 1457 Pattern: "needle", 1458 }, 1459 &query.Not{ 1460 Child: &query.Regexp{ 1461 Regexp: mustParseRE(".cs"), 1462 }, 1463 })) 1464 1465 if len(res.Files) != 1 { 1466 t.Fatalf("got %v, want 1 match", res.Files) 1467 } 1468 }) 1469 1470 t.Run("ChunkMatches", func(t *testing.T) { 1471 res := searchForTest(t, b, 1472 query.NewAnd( 1473 &query.Substring{ 1474 Pattern: "needle", 1475 }, 1476 &query.Not{ 1477 Child: &query.Regexp{ 1478 Regexp: mustParseRE(".cs"), 1479 }, 1480 }, 1481 ), 1482 chunkOpts) 1483 1484 if len(res.Files) != 1 { 1485 t.Fatalf("got %v, want 1 match", res.Files) 1486 } 1487 }) 1488} 1489 1490func TestSymbolRank(t *testing.T) { 1491 t.Skip() 1492 1493 content := []byte("func bla() blubxxxxx") 1494 // ----------------01234567890123456789 1495 b := testIndexBuilder(t, nil, 1496 Document{ 1497 Name: "f1", 1498 Content: content, 1499 }, Document{ 1500 Name: "f2", 1501 Content: content, 1502 Symbols: []DocumentSection{{5, 8}}, 1503 }, Document{ 1504 Name: "f3", 1505 Content: content, 1506 }) 1507 1508 t.Run("LineMatches", func(t *testing.T) { 1509 res := searchForTest(t, b, 1510 &query.Substring{ 1511 CaseSensitive: false, 1512 Pattern: "bla", 1513 }) 1514 1515 if len(res.Files) != 3 { 1516 t.Fatalf("got %d files, want 3 files. Full data: %v", len(res.Files), res.Files) 1517 } 1518 if res.Files[0].FileName != "f2" { 1519 t.Errorf("got %#v, want 'f2' as top match", res.Files[0]) 1520 } 1521 }) 1522 1523 t.Run("ChunkMatches", func(t *testing.T) { 1524 res := searchForTest(t, b, 1525 &query.Substring{ 1526 CaseSensitive: false, 1527 Pattern: "bla", 1528 }, chunkOpts) 1529 1530 if len(res.Files) != 3 { 1531 t.Fatalf("got %d files, want 3 files. Full data: %v", len(res.Files), res.Files) 1532 } 1533 if res.Files[0].FileName != "f2" { 1534 t.Errorf("got %#v, want 'f2' as top match", res.Files[0]) 1535 } 1536 }) 1537} 1538 1539func TestSymbolRankRegexpUTF8(t *testing.T) { 1540 t.Skip() 1541 1542 prefix := strings.Repeat(string([]rune{kelvinCodePoint}), 100) + "\n" 1543 content := []byte(prefix + 1544 "func bla() blub") 1545 // ------012345678901234 1546 b := testIndexBuilder(t, nil, 1547 Document{ 1548 Name: "f1", 1549 Content: content, 1550 }, Document{ 1551 Name: "f2", 1552 Content: content, 1553 Symbols: []DocumentSection{{uint32(len(prefix) + 5), uint32(len(prefix) + 8)}}, 1554 }, Document{ 1555 Name: "f3", 1556 Content: content, 1557 }) 1558 1559 t.Run("LineMatches", func(t *testing.T) { 1560 res := searchForTest(t, b, 1561 &query.Regexp{ 1562 Regexp: mustParseRE("b.a"), 1563 }) 1564 1565 if len(res.Files) != 3 { 1566 t.Fatalf("got %#v, want 3 files", res.Files) 1567 } 1568 if res.Files[0].FileName != "f2" { 1569 t.Errorf("got %#v, want 'f2' as top match", res.Files[0]) 1570 } 1571 }) 1572 1573 t.Run("ChunjkMatches", func(t *testing.T) { 1574 res := searchForTest(t, b, 1575 &query.Regexp{ 1576 Regexp: mustParseRE("b.a"), 1577 }, chunkOpts) 1578 1579 if len(res.Files) != 3 { 1580 t.Fatalf("got %#v, want 3 files", res.Files) 1581 } 1582 if res.Files[0].FileName != "f2" { 1583 t.Errorf("got %#v, want 'f2' as top match", res.Files[0]) 1584 } 1585 }) 1586} 1587 1588func TestPartialSymbolRank(t *testing.T) { 1589 t.Skip() 1590 1591 content := []byte("func bla() blub") 1592 // ----------------012345678901234 1593 1594 b := testIndexBuilder(t, nil, 1595 Document{ 1596 Name: "f1", 1597 Content: content, 1598 Symbols: []DocumentSection{{4, 9}}, 1599 }, Document{ 1600 Name: "f2", 1601 Content: content, 1602 Symbols: []DocumentSection{{4, 8}}, 1603 }, Document{ 1604 Name: "f3", 1605 Content: content, 1606 Symbols: []DocumentSection{{4, 9}}, 1607 }) 1608 1609 t.Run("LineMatches", func(t *testing.T) { 1610 res := searchForTest(t, b, 1611 &query.Substring{ 1612 Pattern: "bla", 1613 }) 1614 1615 if len(res.Files) != 3 { 1616 t.Fatalf("got %#v, want 3 files", res.Files) 1617 } 1618 if res.Files[0].FileName != "f2" { 1619 t.Errorf("got %#v, want 'f2' as top match", res.Files[0]) 1620 } 1621 }) 1622 1623 t.Run("ChunkMatches", func(t *testing.T) { 1624 res := searchForTest(t, b, 1625 &query.Substring{ 1626 Pattern: "bla", 1627 }, chunkOpts) 1628 1629 if len(res.Files) != 3 { 1630 t.Fatalf("got %#v, want 3 files", res.Files) 1631 } 1632 if res.Files[0].FileName != "f2" { 1633 t.Errorf("got %#v, want 'f2' as top match", res.Files[0]) 1634 } 1635 }) 1636} 1637 1638func TestNegativeRepo(t *testing.T) { 1639 content := []byte("bla the needle") 1640 // ----------------01234567890123 1641 b := testIndexBuilder(t, &Repository{ 1642 Name: "bla", 1643 }, Document{Name: "f1", Content: content}) 1644 1645 t.Run("LineMatches", func(t *testing.T) { 1646 sres := searchForTest(t, b, 1647 query.NewAnd( 1648 &query.Substring{Pattern: "needle"}, 1649 &query.Not{Child: &query.Repo{Regexp: regexp.MustCompile("bla")}}, 1650 )) 1651 1652 if len(sres.Files) != 0 { 1653 t.Fatalf("got %v, want 0 matches", sres.Files) 1654 } 1655 }) 1656 1657 t.Run("ChunkMatches", func(t *testing.T) { 1658 sres := searchForTest(t, b, 1659 query.NewAnd( 1660 &query.Substring{Pattern: "needle"}, 1661 &query.Not{Child: &query.Repo{Regexp: regexp.MustCompile("bla")}}, 1662 ), chunkOpts) 1663 1664 if len(sres.Files) != 0 { 1665 t.Fatalf("got %v, want 0 matches", sres.Files) 1666 } 1667 }) 1668} 1669 1670func TestListRepos(t *testing.T) { 1671 content := []byte("bla the needle\n") 1672 // ----------------012345678901234- 1673 1674 t.Run("default and minimal fallback", func(t *testing.T) { 1675 repo := &Repository{ 1676 Name: "reponame", 1677 Branches: []RepositoryBranch{{Name: "main"}, {Name: "dev"}}, 1678 } 1679 b := testIndexBuilder(t, repo, 1680 Document{Name: "f1", Content: content, Branches: []string{"main", "dev"}}, 1681 Document{Name: "f2", Content: content, Branches: []string{"main"}}, 1682 Document{Name: "f2", Content: content, Branches: []string{"dev"}}, 1683 Document{Name: "f3", Content: content, Branches: []string{"dev"}}) 1684 1685 searcher := searcherForTest(t, b) 1686 1687 for _, opts := range []*ListOptions{ 1688 nil, 1689 {Minimal: false}, 1690 {Minimal: true}, 1691 } { 1692 t.Run(fmt.Sprint(opts), func(t *testing.T) { 1693 q := &query.Repo{Regexp: regexp.MustCompile("epo")} 1694 1695 res, err := searcher.List(context.Background(), q, opts) 1696 if err != nil { 1697 t.Fatalf("List(%v): %v", q, err) 1698 } 1699 1700 want := &RepoList{ 1701 Repos: []*RepoListEntry{{ 1702 Repository: *repo, 1703 Stats: RepoStats{ 1704 Documents: 4, 1705 ContentBytes: 68, // (15 bytes of content and 2 bytes of filename) x 4 1706 Shards: 1, 1707 1708 NewLinesCount: 4, 1709 DefaultBranchNewLinesCount: 2, 1710 OtherBranchesNewLinesCount: 3, 1711 }, 1712 }}, 1713 Stats: RepoStats{ 1714 Documents: 4, 1715 ContentBytes: 68, 1716 Shards: 1, 1717 1718 NewLinesCount: 4, 1719 DefaultBranchNewLinesCount: 2, 1720 OtherBranchesNewLinesCount: 3, 1721 }, 1722 } 1723 ignored := []cmp.Option{ 1724 cmpopts.EquateEmpty(), 1725 cmpopts.IgnoreFields(RepoListEntry{}, "IndexMetadata"), 1726 cmpopts.IgnoreFields(RepoStats{}, "IndexBytes"), 1727 cmpopts.IgnoreFields(Repository{}, "SubRepoMap"), 1728 cmpopts.IgnoreFields(Repository{}, "priority"), 1729 } 1730 if diff := cmp.Diff(want, res, ignored...); diff != "" { 1731 t.Fatalf("mismatch (-want +got):\n%s", diff) 1732 } 1733 1734 q = &query.Repo{Regexp: regexp.MustCompile("bla")} 1735 res, err = searcher.List(context.Background(), q, nil) 1736 if err != nil { 1737 t.Fatalf("List(%v): %v", q, err) 1738 } 1739 if len(res.Repos) != 0 || len(res.Minimal) != 0 { 1740 t.Fatalf("got %v, want 0 matches", res) 1741 } 1742 }) 1743 } 1744 }) 1745 1746 t.Run("minimal", func(t *testing.T) { 1747 repo := &Repository{ 1748 ID: 1234, 1749 Name: "reponame", 1750 Branches: []RepositoryBranch{{Name: "main"}, {Name: "dev"}}, 1751 RawConfig: map[string]string{"repoid": "1234"}, 1752 } 1753 b := testIndexBuilder(t, repo, 1754 Document{Name: "f1", Content: content, Branches: []string{"main", "dev"}}, 1755 Document{Name: "f2", Content: content, Branches: []string{"main"}}, 1756 Document{Name: "f2", Content: content, Branches: []string{"dev"}}, 1757 Document{Name: "f3", Content: content, Branches: []string{"dev"}}) 1758 1759 searcher := searcherForTest(t, b) 1760 1761 q := &query.Repo{Regexp: regexp.MustCompile("epo")} 1762 res, err := searcher.List(context.Background(), q, &ListOptions{Minimal: true}) 1763 if err != nil { 1764 t.Fatalf("List(%v): %v", q, err) 1765 } 1766 1767 want := &RepoList{ 1768 Minimal: map[uint32]*MinimalRepoListEntry{ 1769 repo.ID: { 1770 HasSymbols: repo.HasSymbols, 1771 Branches: repo.Branches, 1772 }, 1773 }, 1774 Stats: RepoStats{ 1775 Shards: 1, 1776 Documents: 4, 1777 IndexBytes: 412, 1778 ContentBytes: 68, 1779 NewLinesCount: 4, 1780 DefaultBranchNewLinesCount: 2, 1781 OtherBranchesNewLinesCount: 3, 1782 }, 1783 } 1784 1785 if diff := cmp.Diff(want, res); diff != "" { 1786 t.Fatalf("mismatch (-want +got):\n%s", diff) 1787 } 1788 1789 q = &query.Repo{Regexp: regexp.MustCompile("bla")} 1790 res, err = searcher.List(context.Background(), q, &ListOptions{Minimal: true}) 1791 if err != nil { 1792 t.Fatalf("List(%v): %v", q, err) 1793 } 1794 if len(res.Repos) != 0 || len(res.Minimal) != 0 { 1795 t.Fatalf("got %v, want 0 matches", res) 1796 } 1797 }) 1798} 1799 1800func TestListReposByContent(t *testing.T) { 1801 content := []byte("bla the needle") 1802 1803 b := testIndexBuilder(t, &Repository{ 1804 Name: "reponame", 1805 }, 1806 Document{Name: "f1", Content: content}, 1807 Document{Name: "f2", Content: content}) 1808 1809 searcher := searcherForTest(t, b) 1810 q := &query.Substring{Pattern: "needle"} 1811 res, err := searcher.List(context.Background(), q, nil) 1812 if err != nil { 1813 t.Fatalf("List(%v): %v", q, err) 1814 } 1815 if len(res.Repos) != 1 || res.Repos[0].Repository.Name != "reponame" { 1816 t.Fatalf("got %v, want 1 matches", res) 1817 } 1818 if got := res.Repos[0].Stats.Shards; got != 1 { 1819 t.Fatalf("got %d, want 1 shard", got) 1820 } 1821 q = &query.Substring{Pattern: "foo"} 1822 res, err = searcher.List(context.Background(), q, nil) 1823 if err != nil { 1824 t.Fatalf("List(%v): %v", q, err) 1825 } 1826 if len(res.Repos) != 0 { 1827 t.Fatalf("got %v, want 0 matches", res) 1828 } 1829} 1830 1831func TestMetadata(t *testing.T) { 1832 content := []byte("bla the needle") 1833 1834 b := testIndexBuilder(t, &Repository{ 1835 Name: "reponame", 1836 }, Document{Name: "f1", Content: content}, 1837 Document{Name: "f2", Content: content}) 1838 1839 var buf bytes.Buffer 1840 if err := b.Write(&buf); err != nil { 1841 t.Fatal(err) 1842 } 1843 f := &memSeeker{buf.Bytes()} 1844 1845 rd, _, err := ReadMetadata(f) 1846 if err != nil { 1847 t.Fatalf("ReadMetadata: %v", err) 1848 } 1849 1850 if got, want := rd[0].Name, "reponame"; got != want { 1851 t.Fatalf("got %q want %q", got, want) 1852 } 1853} 1854 1855func TestOr(t *testing.T) { 1856 b := testIndexBuilder(t, nil, 1857 Document{Name: "f1", Content: []byte("needle")}, 1858 Document{Name: "f2", Content: []byte("banana")}) 1859 t.Run("LineMatches", func(t *testing.T) { 1860 sres := searchForTest(t, b, query.NewOr( 1861 &query.Substring{Pattern: "needle"}, 1862 &query.Substring{Pattern: "banana"})) 1863 1864 if len(sres.Files) != 2 { 1865 t.Fatalf("got %v, want 2 files", sres.Files) 1866 } 1867 }) 1868 1869 t.Run("ChunkMatches", func(t *testing.T) { 1870 sres := searchForTest(t, b, query.NewOr( 1871 &query.Substring{Pattern: "needle"}, 1872 &query.Substring{Pattern: "banana"})) 1873 1874 if len(sres.Files) != 2 { 1875 t.Fatalf("got %v, want 2 files", sres.Files) 1876 } 1877 }) 1878} 1879 1880func TestFrequency(t *testing.T) { 1881 content := []byte("sla _Py_HashDouble(double v sla las las shd dot dot") 1882 1883 b := testIndexBuilder(t, nil, 1884 Document{ 1885 Name: "f1", 1886 Content: content, 1887 }) 1888 1889 t.Run("LineMatches", func(t *testing.T) { 1890 sres := searchForTest(t, b, &query.Substring{Pattern: "slashdot"}) 1891 if len(sres.Files) != 0 { 1892 t.Errorf("got %v, wanted 0 matches", sres.Files) 1893 } 1894 }) 1895 1896 t.Run("ChunkMatches", func(t *testing.T) { 1897 sres := searchForTest(t, b, &query.Substring{Pattern: "slashdot"}, chunkOpts) 1898 if len(sres.Files) != 0 { 1899 t.Errorf("got %v, wanted 0 matches", sres.Files) 1900 } 1901 }) 1902} 1903 1904func TestMatchNewline(t *testing.T) { 1905 re, err := syntax.Parse("[^a]a", syntax.ClassNL) 1906 if err != nil { 1907 t.Fatalf("syntax.Parse: %v", err) 1908 } 1909 1910 content := []byte("pqr\nalex") 1911 1912 b := testIndexBuilder(t, nil, 1913 Document{ 1914 Name: "f1", 1915 Content: content, 1916 }) 1917 1918 t.Run("LineMatches", func(t *testing.T) { 1919 sres := searchForTest(t, b, &query.Regexp{Regexp: re, CaseSensitive: true}) 1920 if len(sres.Files) != 1 { 1921 t.Errorf("got %v, wanted 1 matches", sres.Files) 1922 } else if l := sres.Files[0].LineMatches[0].Line; !bytes.Equal(l, content[len("pqr\n"):]) { 1923 t.Errorf("got match line %q, want %q", l, content) 1924 } 1925 }) 1926 1927 t.Run("ChunkMatches", func(t *testing.T) { 1928 sres := searchForTest(t, b, &query.Regexp{Regexp: re, CaseSensitive: true}, chunkOpts) 1929 if len(sres.Files) != 1 { 1930 t.Errorf("got %v, wanted 1 matches", sres.Files) 1931 } else if c := sres.Files[0].ChunkMatches[0].Content; !bytes.Equal(c, content) { 1932 t.Errorf("got match line %q, want %q", c, content) 1933 } 1934 }) 1935} 1936 1937func TestSubRepo(t *testing.T) { 1938 subRepos := map[string]*Repository{ 1939 "sub": { 1940 Name: "sub-name", 1941 LineFragmentTemplate: "sub-line", 1942 }, 1943 } 1944 1945 content := []byte("pqr\nalex") 1946 1947 b := testIndexBuilder(t, &Repository{ 1948 SubRepoMap: subRepos, 1949 }, Document{ 1950 Name: "sub/f1", 1951 Content: content, 1952 SubRepositoryPath: "sub", 1953 }) 1954 1955 sres := searchForTest(t, b, &query.Substring{Pattern: "alex"}) 1956 if len(sres.Files) != 1 { 1957 t.Fatalf("got %v, wanted 1 matches", sres.Files) 1958 } 1959 1960 f := sres.Files[0] 1961 if f.SubRepositoryPath != "sub" || f.SubRepositoryName != "sub-name" { 1962 t.Errorf("got %#v, want SubRepository{Path,Name} = {'sub', 'sub-name'}", f) 1963 } 1964 1965 if sres.LineFragments["sub-name"] != "sub-line" { 1966 t.Errorf("got LineFragmentTemplate %v, want {'sub':'sub-line'}", sres.LineFragments) 1967 } 1968} 1969 1970func TestSearchEither(t *testing.T) { 1971 b := testIndexBuilder(t, nil, 1972 Document{Name: "f1", Content: []byte("bla needle bla")}, 1973 Document{Name: "needle-file-branch", Content: []byte("bla content")}) 1974 1975 t.Run("LineMatches", func(t *testing.T) { 1976 sres := searchForTest(t, b, &query.Substring{Pattern: "needle"}) 1977 if len(sres.Files) != 2 { 1978 t.Fatalf("got %v, wanted 2 matches", sres.Files) 1979 } 1980 1981 sres = searchForTest(t, b, &query.Substring{Pattern: "needle", Content: true}) 1982 if len(sres.Files) != 1 { 1983 t.Fatalf("got %v, wanted 1 match", sres.Files) 1984 } 1985 1986 if got, want := sres.Files[0].FileName, "f1"; got != want { 1987 t.Errorf("got %q, want %q", got, want) 1988 } 1989 }) 1990 1991 t.Run("ChunkMatches", func(t *testing.T) { 1992 sres := searchForTest(t, b, &query.Substring{Pattern: "needle"}, chunkOpts) 1993 if len(sres.Files) != 2 { 1994 t.Fatalf("got %v, wanted 2 matches", sres.Files) 1995 } 1996 1997 sres = searchForTest(t, b, &query.Substring{Pattern: "needle", Content: true}, chunkOpts) 1998 if len(sres.Files) != 1 { 1999 t.Fatalf("got %v, wanted 1 match", sres.Files) 2000 } 2001 2002 if got, want := sres.Files[0].FileName, "f1"; got != want { 2003 t.Errorf("got %q, want %q", got, want) 2004 } 2005 }) 2006} 2007 2008func TestUnicodeExactMatch(t *testing.T) { 2009 needle := "néédlÉ" 2010 content := []byte("blá blá " + needle + " blâ") 2011 2012 b := testIndexBuilder(t, nil, 2013 Document{Name: "f1", Content: content}) 2014 2015 t.Run("LineMatches", func(t *testing.T) { 2016 if res := searchForTest(t, b, &query.Substring{Pattern: needle, CaseSensitive: true}); len(res.Files) != 1 { 2017 t.Fatalf("case sensitive: got %v, wanted 1 match", res.Files) 2018 } 2019 }) 2020 2021 t.Run("ChunkMatches", func(t *testing.T) { 2022 res := searchForTest(t, b, &query.Substring{Pattern: needle, CaseSensitive: true}, chunkOpts) 2023 if len(res.Files) != 1 { 2024 t.Fatalf("case sensitive: got %v, wanted 1 match", res.Files) 2025 } 2026 }) 2027} 2028 2029func TestUnicodeCoverContent(t *testing.T) { 2030 needle := "néédlÉ" 2031 content := []byte("blá blá " + needle + " blâ") 2032 2033 b := testIndexBuilder(t, nil, 2034 Document{Name: "f1", Content: content}) 2035 2036 t.Run("LineMatches", func(t *testing.T) { 2037 if res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ", CaseSensitive: true}); len(res.Files) != 0 { 2038 t.Fatalf("case sensitive: got %v, wanted 0 match", res.Files) 2039 } 2040 2041 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ"}) 2042 if len(res.Files) != 1 { 2043 t.Fatalf("case insensitive: got %v, wanted 1 match", res.Files) 2044 } 2045 2046 if got, want := res.Files[0].LineMatches[0].LineFragments[0].Offset, uint32(strings.Index(string(content), needle)); got != want { 2047 t.Errorf("got %d want %d", got, want) 2048 } 2049 }) 2050 2051 t.Run("ChunkMatches", func(t *testing.T) { 2052 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ", CaseSensitive: true}, chunkOpts) 2053 if len(res.Files) != 0 { 2054 t.Fatalf("case sensitive: got %v, wanted 0 match", res.Files) 2055 } 2056 2057 res = searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ"}, chunkOpts) 2058 if len(res.Files) != 1 { 2059 t.Fatalf("case insensitive: got %v, wanted 1 match", res.Files) 2060 } 2061 2062 got := res.Files[0].ChunkMatches[0].Ranges[0].Start.ByteOffset 2063 want := uint32(strings.Index(string(content), needle)) 2064 if got != want { 2065 t.Errorf("got %d want %d", got, want) 2066 } 2067 }) 2068} 2069 2070func TestUnicodeNonCoverContent(t *testing.T) { 2071 needle := "nééáádlÉ" 2072 content := []byte("blá blá " + needle + " blâ") 2073 2074 b := testIndexBuilder(t, nil, 2075 Document{Name: "f1", Content: content}) 2076 2077 t.Run("LineMatches", func(t *testing.T) { 2078 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉÁÁDLÉ", Content: true}) 2079 if len(res.Files) != 1 { 2080 t.Fatalf("got %v, wanted 1 match", res.Files) 2081 } 2082 2083 if got, want := res.Files[0].LineMatches[0].LineFragments[0].Offset, uint32(strings.Index(string(content), needle)); got != want { 2084 t.Errorf("got %d want %d", got, want) 2085 } 2086 }) 2087 2088 t.Run("ChunkMatches", func(t *testing.T) { 2089 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉÁÁDLÉ", Content: true}, chunkOpts) 2090 if len(res.Files) != 1 { 2091 t.Fatalf("got %v, wanted 1 match", res.Files) 2092 } 2093 2094 got := res.Files[0].ChunkMatches[0].Ranges[0].Start.ByteOffset 2095 want := uint32(strings.Index(string(content), needle)) 2096 if got != want { 2097 t.Errorf("got %d want %d", got, want) 2098 } 2099 }) 2100} 2101 2102const kelvinCodePoint = 8490 2103 2104func TestUnicodeVariableLength(t *testing.T) { 2105 lower := 'k' 2106 upper := rune(kelvinCodePoint) 2107 2108 needle := "nee" + string([]rune{lower}) + "eed" 2109 corpus := []byte("nee" + string([]rune{upper}) + "eed" + 2110 " ee" + string([]rune{lower}) + "ee" + 2111 " ee" + string([]rune{upper}) + "ee") 2112 2113 t.Run("LineMatches", func(t *testing.T) { 2114 b := testIndexBuilder(t, nil, 2115 Document{Name: "f1", Content: []byte(corpus)}) 2116 2117 res := searchForTest(t, b, &query.Substring{Pattern: needle, Content: true}) 2118 if len(res.Files) != 1 { 2119 t.Fatalf("got %v, wanted 1 match", res.Files) 2120 } 2121 }) 2122 2123 t.Run("ChunkMatches", func(t *testing.T) { 2124 b := testIndexBuilder(t, nil, 2125 Document{Name: "f1", Content: []byte(corpus)}) 2126 2127 res := searchForTest(t, b, &query.Substring{Pattern: needle, Content: true}, chunkOpts) 2128 if len(res.Files) != 1 { 2129 t.Fatalf("got %v, wanted 1 match", res.Files) 2130 } 2131 }) 2132} 2133 2134func TestUnicodeFileStartOffsets(t *testing.T) { 2135 unicode := "世界" 2136 wat := "waaaaaat" 2137 b := testIndexBuilder(t, nil, 2138 Document{ 2139 Name: "f1", 2140 Content: []byte(unicode), 2141 }, 2142 Document{ 2143 Name: "f2", 2144 Content: []byte(wat), 2145 }, 2146 ) 2147 q := &query.Substring{Pattern: wat, Content: true} 2148 res := searchForTest(t, b, q) 2149 if len(res.Files) != 1 { 2150 t.Fatalf("got %v, wanted 1 match", res.Files) 2151 } 2152} 2153 2154func TestLongFileUTF8(t *testing.T) { 2155 needle := "neeedle" 2156 2157 // 6 bytes. 2158 unicode := "世界" 2159 content := []byte(strings.Repeat(unicode, 100) + needle) 2160 b := testIndexBuilder(t, nil, 2161 Document{ 2162 Name: "f1", 2163 Content: []byte(strings.Repeat("a", 50)), 2164 }, 2165 Document{ 2166 Name: "f2", 2167 Content: content, 2168 }) 2169 2170 t.Run("LineMatches", func(t *testing.T) { 2171 q := &query.Substring{Pattern: needle, Content: true} 2172 res := searchForTest(t, b, q) 2173 if len(res.Files) != 1 { 2174 t.Errorf("got %v, want 1 result", res) 2175 } 2176 }) 2177 2178 t.Run("ChunkMatches", func(t *testing.T) { 2179 q := &query.Substring{Pattern: needle, Content: true} 2180 res := searchForTest(t, b, q, chunkOpts) 2181 if len(res.Files) != 1 { 2182 t.Errorf("got %v, want 1 result", res) 2183 } 2184 }) 2185} 2186 2187func TestEstimateDocCount(t *testing.T) { 2188 content := []byte("bla needle bla") 2189 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2190 Document{Name: "f1", Content: content}, 2191 Document{Name: "f2", Content: content}, 2192 ) 2193 2194 t.Run("LineMatches", func(t *testing.T) { 2195 if sres := searchForTest(t, b, 2196 query.NewAnd( 2197 &query.Substring{Pattern: "needle"}, 2198 &query.Repo{Regexp: regexp.MustCompile("reponame")}, 2199 ), SearchOptions{ 2200 EstimateDocCount: true, 2201 }); sres.Stats.ShardFilesConsidered != 2 { 2202 t.Errorf("got FilesConsidered = %d, want 2", sres.Stats.FilesConsidered) 2203 } 2204 if sres := searchForTest(t, b, 2205 query.NewAnd( 2206 &query.Substring{Pattern: "needle"}, 2207 &query.Repo{Regexp: regexp.MustCompile("nomatch")}, 2208 ), SearchOptions{ 2209 EstimateDocCount: true, 2210 }); sres.Stats.ShardFilesConsidered != 0 { 2211 t.Errorf("got FilesConsidered = %d, want 0", sres.Stats.FilesConsidered) 2212 } 2213 }) 2214 2215 t.Run("ChunkMatches", func(t *testing.T) { 2216 if sres := searchForTest(t, b, 2217 query.NewAnd( 2218 &query.Substring{Pattern: "needle"}, 2219 &query.Repo{Regexp: regexp.MustCompile("reponame")}, 2220 ), SearchOptions{ 2221 EstimateDocCount: true, 2222 ChunkMatches: true, 2223 }); sres.Stats.ShardFilesConsidered != 2 { 2224 t.Errorf("got FilesConsidered = %d, want 2", sres.Stats.FilesConsidered) 2225 } 2226 if sres := searchForTest(t, b, 2227 query.NewAnd( 2228 &query.Substring{Pattern: "needle"}, 2229 &query.Repo{Regexp: regexp.MustCompile("nomatch")}, 2230 ), SearchOptions{ 2231 EstimateDocCount: true, 2232 ChunkMatches: true, 2233 }); sres.Stats.ShardFilesConsidered != 0 { 2234 t.Errorf("got FilesConsidered = %d, want 0", sres.Stats.FilesConsidered) 2235 } 2236 }) 2237} 2238 2239func TestUTF8CorrectCorpus(t *testing.T) { 2240 needle := "neeedle" 2241 2242 // 6 bytes. 2243 unicode := "世界" 2244 b := testIndexBuilder(t, nil, 2245 Document{ 2246 Name: "f1", 2247 Content: []byte(strings.Repeat(unicode, 100)), 2248 }, 2249 Document{ 2250 Name: "xxxxxneeedle", 2251 Content: []byte("hello"), 2252 }) 2253 2254 t.Run("LineMatches", func(t *testing.T) { 2255 q := &query.Substring{Pattern: needle, FileName: true} 2256 res := searchForTest(t, b, q) 2257 if len(res.Files) != 1 { 2258 t.Errorf("got %v, want 1 result", res) 2259 } 2260 }) 2261 2262 t.Run("ChunkMatches", func(t *testing.T) { 2263 q := &query.Substring{Pattern: needle, FileName: true} 2264 res := searchForTest(t, b, q, chunkOpts) 2265 if len(res.Files) != 1 { 2266 t.Errorf("got %v, want 1 result", res) 2267 } 2268 }) 2269} 2270 2271func TestBuilderStats(t *testing.T) { 2272 b := testIndexBuilder(t, nil, 2273 Document{ 2274 Name: "f1", 2275 Content: []byte(strings.Repeat("abcd", 1024)), 2276 }) 2277 var buf bytes.Buffer 2278 if err := b.Write(&buf); err != nil { 2279 t.Fatal(err) 2280 } 2281 2282 if got, want := b.ContentSize(), uint32(2+4*1024); got != want { 2283 t.Errorf("got %d, want %d", got, want) 2284 } 2285} 2286 2287func TestIOStats(t *testing.T) { 2288 b := testIndexBuilder(t, nil, 2289 Document{ 2290 Name: "f1", 2291 Content: []byte(strings.Repeat("abcd", 1024)), 2292 }) 2293 2294 t.Run("LineMatches", func(t *testing.T) { 2295 q := &query.Substring{Pattern: "abc", CaseSensitive: true, Content: true} 2296 res := searchForTest(t, b, q) 2297 2298 // 4096 (content) + 2 (overhead: newlines or doc sections) 2299 if got, want := res.Stats.ContentBytesLoaded, int64(4098); got != want { 2300 t.Errorf("got content I/O %d, want %d", got, want) 2301 } 2302 2303 // 1024 entries, each 4 bytes apart. 4 fits into single byte 2304 // delta encoded. 2305 if got, want := res.Stats.IndexBytesLoaded, int64(1024); got != want { 2306 t.Errorf("got index I/O %d, want %d", got, want) 2307 } 2308 }) 2309 2310 t.Run("ChunkMatches", func(t *testing.T) { 2311 q := &query.Substring{Pattern: "abc", CaseSensitive: true, Content: true} 2312 res := searchForTest(t, b, q, chunkOpts) 2313 2314 // 4096 (content) + 2 (overhead: newlines or doc sections) 2315 if got, want := res.Stats.ContentBytesLoaded, int64(4098); got != want { 2316 t.Errorf("got content I/O %d, want %d", got, want) 2317 } 2318 2319 // 1024 entries, each 4 bytes apart. 4 fits into single byte 2320 // delta encoded. 2321 if got, want := res.Stats.IndexBytesLoaded, int64(1024); got != want { 2322 t.Errorf("got index I/O %d, want %d", got, want) 2323 } 2324 }) 2325} 2326 2327func TestStartLineAnchor(t *testing.T) { 2328 b := testIndexBuilder(t, nil, 2329 Document{ 2330 Name: "f1", 2331 Content: []byte( 2332 `hello 2333start of middle of line 2334`), 2335 }) 2336 2337 t.Run("LineMatches", func(t *testing.T) { 2338 q, err := query.Parse("^start") 2339 if err != nil { 2340 t.Errorf("parse: %v", err) 2341 } 2342 2343 res := searchForTest(t, b, q) 2344 if len(res.Files) != 1 { 2345 t.Errorf("got %v, want 1 file", res.Files) 2346 } 2347 2348 q, err = query.Parse("^middle") 2349 if err != nil { 2350 t.Errorf("parse: %v", err) 2351 } 2352 res = searchForTest(t, b, q) 2353 if len(res.Files) != 0 { 2354 t.Errorf("got %v, want 0 files", res.Files) 2355 } 2356 }) 2357 2358 t.Run("ChunkMatches", func(t *testing.T) { 2359 q, err := query.Parse("^start") 2360 if err != nil { 2361 t.Errorf("parse: %v", err) 2362 } 2363 2364 res := searchForTest(t, b, q, chunkOpts) 2365 if len(res.Files) != 1 { 2366 t.Errorf("got %v, want 1 file", res.Files) 2367 } 2368 2369 q, err = query.Parse("^middle") 2370 if err != nil { 2371 t.Errorf("parse: %v", err) 2372 } 2373 res = searchForTest(t, b, q, chunkOpts) 2374 if len(res.Files) != 0 { 2375 t.Errorf("got %v, want 0 files", res.Files) 2376 } 2377 }) 2378} 2379 2380func TestAndOrUnicode(t *testing.T) { 2381 q, err := query.Parse("orange.*apple") 2382 if err != nil { 2383 t.Errorf("parse: %v", err) 2384 } 2385 finalQ := query.NewAnd(q, 2386 query.NewOr(query.NewAnd(&query.Repo{Regexp: regexp.MustCompile("name")}, 2387 query.NewOr(&query.Branch{Pattern: "master"})))) 2388 2389 b := testIndexBuilder(t, &Repository{ 2390 Name: "name", 2391 Branches: []RepositoryBranch{{"master", "master-version"}}, 2392 }, Document{ 2393 Name: "f2", 2394 Content: []byte("orange\u2318apple"), 2395 // --------------0123456 78901 2396 Branches: []string{"master"}, 2397 }) 2398 2399 t.Run("LineMatches", func(t *testing.T) { 2400 res := searchForTest(t, b, finalQ) 2401 if len(res.Files) != 1 { 2402 t.Errorf("got %v, want 1 result", res.Files) 2403 } 2404 }) 2405 2406 t.Run("ChunkMatches", func(t *testing.T) { 2407 res := searchForTest(t, b, finalQ, chunkOpts) 2408 if len(res.Files) != 1 { 2409 t.Errorf("got %v, want 1 result", res.Files) 2410 } 2411 }) 2412} 2413 2414func TestAndShort(t *testing.T) { 2415 content := []byte("bla needle at orange bla") 2416 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2417 Document{Name: "f1", Content: content}, 2418 Document{Name: "f2", Content: []byte("xx at xx")}, 2419 Document{Name: "f3", Content: []byte("yy orange xx")}, 2420 ) 2421 2422 q := query.NewAnd(&query.Substring{Pattern: "at"}, 2423 &query.Substring{Pattern: "orange"}) 2424 2425 t.Run("LineMatches", func(t *testing.T) { 2426 res := searchForTest(t, b, q) 2427 if len(res.Files) != 1 || res.Files[0].FileName != "f1" { 2428 t.Errorf("got %v, want 1 result", res.Files) 2429 } 2430 }) 2431 2432 t.Run("ChunkMatches", func(t *testing.T) { 2433 res := searchForTest(t, b, q, chunkOpts) 2434 if len(res.Files) != 1 || res.Files[0].FileName != "f1" { 2435 t.Errorf("got %v, want 1 result", res.Files) 2436 } 2437 }) 2438} 2439 2440func TestNoCollectRegexpSubstring(t *testing.T) { 2441 content := []byte("bla final bla\nfoo final, foo") 2442 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2443 Document{Name: "f1", Content: content}, 2444 ) 2445 2446 q := &query.Regexp{ 2447 Regexp: mustParseRE("final[,.]"), 2448 } 2449 2450 t.Run("LineMatches", func(t *testing.T) { 2451 res := searchForTest(t, b, q) 2452 if len(res.Files) != 1 { 2453 t.Fatalf("got %v, want 1 result", res.Files) 2454 } 2455 if f := res.Files[0]; len(f.LineMatches) != 1 { 2456 t.Fatalf("got line matches %v, want 1 line match", printLineMatches(f.LineMatches)) 2457 } 2458 }) 2459 2460 t.Run("ChunkMatches", func(t *testing.T) { 2461 res := searchForTest(t, b, q, chunkOpts) 2462 if len(res.Files) != 1 { 2463 t.Fatalf("got %v, want 1 result", res.Files) 2464 } 2465 if f := res.Files[0]; len(f.ChunkMatches) != 1 { 2466 t.Fatalf("got line matches %v, want 1 line match", printLineMatches(f.LineMatches)) 2467 } 2468 }) 2469} 2470 2471func printLineMatches(ms []LineMatch) string { 2472 var ss []string 2473 for _, m := range ms { 2474 ss = append(ss, fmt.Sprintf("%d:%q %v", m.LineNumber, m.Line, m.LineFragments)) 2475 } 2476 2477 return strings.Join(ss, ", ") 2478} 2479 2480func TestLang(t *testing.T) { 2481 content := []byte("bla needle bla") 2482 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2483 Document{Name: "f1", Content: content}, 2484 Document{Name: "f2", Language: "java", Content: content}, 2485 Document{Name: "f3", Language: "cpp", Content: content}, 2486 ) 2487 2488 q := query.NewAnd(&query.Substring{Pattern: "needle"}, 2489 &query.Language{Language: "cpp"}) 2490 2491 t.Run("LineMatches", func(t *testing.T) { 2492 res := searchForTest(t, b, q) 2493 if len(res.Files) != 1 { 2494 t.Fatalf("got %v, want 1 result in f3", res.Files) 2495 } 2496 f := res.Files[0] 2497 if f.FileName != "f3" || f.Language != "cpp" { 2498 t.Fatalf("got %v, want 1 match with language cpp", f) 2499 } 2500 }) 2501 2502 t.Run("ChunkMatches", func(t *testing.T) { 2503 res := searchForTest(t, b, q, chunkOpts) 2504 if len(res.Files) != 1 { 2505 t.Fatalf("got %v, want 1 result in f3", res.Files) 2506 } 2507 f := res.Files[0] 2508 if f.FileName != "f3" || f.Language != "cpp" { 2509 t.Fatalf("got %v, want 1 match with language cpp", f) 2510 } 2511 }) 2512} 2513 2514func TestLangShortcut(t *testing.T) { 2515 content := []byte("bla needle bla") 2516 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2517 Document{Name: "f2", Language: "java", Content: content}, 2518 Document{Name: "f3", Language: "cpp", Content: content}, 2519 ) 2520 2521 q := query.NewAnd(&query.Substring{Pattern: "needle"}, 2522 &query.Language{Language: "fortran"}) 2523 2524 t.Run("LineMatches", func(t *testing.T) { 2525 res := searchForTest(t, b, q) 2526 if len(res.Files) != 0 { 2527 t.Fatalf("got %v, want 0 results", res.Files) 2528 } 2529 if res.Stats.IndexBytesLoaded > 0 { 2530 t.Errorf("got IndexBytesLoaded %d, want 0", res.Stats.IndexBytesLoaded) 2531 } 2532 }) 2533 2534 t.Run("ChunkMatches", func(t *testing.T) { 2535 res := searchForTest(t, b, q, chunkOpts) 2536 if len(res.Files) != 0 { 2537 t.Fatalf("got %v, want 0 results", res.Files) 2538 } 2539 if res.Stats.IndexBytesLoaded > 0 { 2540 t.Errorf("got IndexBytesLoaded %d, want 0", res.Stats.IndexBytesLoaded) 2541 } 2542 }) 2543} 2544 2545func TestNoTextMatchAtoms(t *testing.T) { 2546 content := []byte("bla needle bla") 2547 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2548 Document{Name: "f1", Content: content}, 2549 Document{Name: "f2", Language: "java", Content: content}, 2550 Document{Name: "f3", Language: "cpp", Content: content}, 2551 ) 2552 q := query.NewAnd(&query.Language{Language: "java"}) 2553 t.Run("LineMatches", func(t *testing.T) { 2554 res := searchForTest(t, b, q) 2555 if len(res.Files) != 1 { 2556 t.Fatalf("got %v, want 1 result in f3", res.Files) 2557 } 2558 }) 2559 2560 t.Run("ChunkMatches", func(t *testing.T) { 2561 res := searchForTest(t, b, q, chunkOpts) 2562 if len(res.Files) != 1 { 2563 t.Fatalf("got %v, want 1 result in f3", res.Files) 2564 } 2565 }) 2566} 2567 2568func TestNoPositiveAtoms(t *testing.T) { 2569 content := []byte("bla needle bla") 2570 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2571 Document{Name: "f1", Content: content}, 2572 Document{Name: "f2", Content: content}, 2573 ) 2574 2575 q := query.NewAnd( 2576 &query.Not{Child: &query.Substring{Pattern: "xyz"}}, 2577 &query.Repo{Regexp: regexp.MustCompile("reponame")}) 2578 t.Run("LineMatches", func(t *testing.T) { 2579 res := searchForTest(t, b, q) 2580 if len(res.Files) != 2 { 2581 t.Fatalf("got %v, want 2 results in f3", res.Files) 2582 } 2583 }) 2584 t.Run("ChunkMatches", func(t *testing.T) { 2585 res := searchForTest(t, b, q, chunkOpts) 2586 if len(res.Files) != 2 { 2587 t.Fatalf("got %v, want 2 results in f3", res.Files) 2588 } 2589 }) 2590} 2591 2592func TestSymbolBoundaryStart(t *testing.T) { 2593 content := []byte("start\nbla bla\nend") 2594 // ----------------012345-67890123-456 2595 2596 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2597 Document{ 2598 Name: "f1", 2599 Content: content, 2600 Symbols: []DocumentSection{{0, 5}, {14, 17}}, 2601 }, 2602 ) 2603 q := &query.Symbol{ 2604 Expr: &query.Substring{Pattern: "start"}, 2605 } 2606 t.Run("LineMatches", func(t *testing.T) { 2607 res := searchForTest(t, b, q) 2608 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 { 2609 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2610 } 2611 m := res.Files[0].LineMatches[0].LineFragments[0] 2612 if m.Offset != 0 { 2613 t.Fatalf("got offset %d want 0", m.Offset) 2614 } 2615 }) 2616 2617 t.Run("ChunkMatches", func(t *testing.T) { 2618 res := searchForTest(t, b, q, chunkOpts) 2619 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 { 2620 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2621 } 2622 m := res.Files[0].ChunkMatches[0].Ranges[0] 2623 if m.Start.ByteOffset != 0 { 2624 t.Fatalf("got offset %d want 0", m.Start.ByteOffset) 2625 } 2626 }) 2627} 2628 2629func TestSymbolBoundaryEnd(t *testing.T) { 2630 content := []byte("start\nbla bla\nend") 2631 // ----------------012345-67890123-456 2632 2633 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2634 Document{ 2635 Name: "f1", 2636 Content: content, 2637 Symbols: []DocumentSection{{14, 17}}, 2638 }, 2639 ) 2640 q := &query.Symbol{ 2641 Expr: &query.Substring{Pattern: "end"}, 2642 } 2643 t.Run("LineMatches", func(t *testing.T) { 2644 res := searchForTest(t, b, q) 2645 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 { 2646 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2647 } 2648 m := res.Files[0].LineMatches[0].LineFragments[0] 2649 if m.Offset != 14 { 2650 t.Fatalf("got offset %d want 0", m.Offset) 2651 } 2652 }) 2653 2654 t.Run("ChunkMatches", func(t *testing.T) { 2655 res := searchForTest(t, b, q, chunkOpts) 2656 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 { 2657 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2658 } 2659 m := res.Files[0].ChunkMatches[0].Ranges[0] 2660 if m.Start.ByteOffset != 14 { 2661 t.Fatalf("got offset %d want 0", m.Start.ByteOffset) 2662 } 2663 }) 2664} 2665 2666func TestSymbolSubstring(t *testing.T) { 2667 content := []byte("bla\nsymblabla\nbla") 2668 // ----------------0123-4567890123-456 2669 2670 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2671 Document{ 2672 Name: "f1", 2673 Content: content, 2674 Symbols: []DocumentSection{{4, 12}}, 2675 }, 2676 ) 2677 q := &query.Symbol{ 2678 Expr: &query.Substring{Pattern: "bla"}, 2679 } 2680 t.Run("LineMatches", func(t *testing.T) { 2681 res := searchForTest(t, b, q) 2682 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 { 2683 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2684 } 2685 m := res.Files[0].LineMatches[0].LineFragments[0] 2686 if m.Offset != 7 || m.MatchLength != 3 { 2687 t.Fatalf("got offset %d, size %d want 7 size 3", m.Offset, m.MatchLength) 2688 } 2689 }) 2690 2691 t.Run("ChunkMatches", func(t *testing.T) { 2692 res := searchForTest(t, b, q, chunkOpts) 2693 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 { 2694 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2695 } 2696 m := res.Files[0].ChunkMatches[0].Ranges[0] 2697 if m.Start.ByteOffset != 7 || m.End.ByteOffset != 10 { 2698 t.Fatalf("got offset %d, end %d want 7, 10", m.Start.ByteOffset, m.End.ByteOffset) 2699 } 2700 }) 2701} 2702 2703func TestSymbolSubstringExact(t *testing.T) { 2704 content := []byte("bla\nsym\nbla\nsym\nasymb") 2705 // ----------------0123-4567-890123456-78901 2706 2707 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2708 Document{ 2709 Name: "f1", 2710 Content: content, 2711 Symbols: []DocumentSection{{4, 7}}, 2712 }, 2713 ) 2714 q := &query.Symbol{ 2715 Expr: &query.Substring{Pattern: "sym"}, 2716 } 2717 t.Run("LineMatches", func(t *testing.T) { 2718 res := searchForTest(t, b, q) 2719 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 { 2720 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2721 } 2722 m := res.Files[0].LineMatches[0].LineFragments[0] 2723 if m.Offset != 4 { 2724 t.Fatalf("got offset %d, want 7", m.Offset) 2725 } 2726 }) 2727 2728 t.Run("ChunkMatches", func(t *testing.T) { 2729 res := searchForTest(t, b, q, chunkOpts) 2730 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 { 2731 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2732 } 2733 m := res.Files[0].ChunkMatches[0].Ranges[0] 2734 if m.Start.ByteOffset != 4 { 2735 t.Fatalf("got offset %d, want 7", m.Start.ByteOffset) 2736 } 2737 }) 2738} 2739 2740func TestSymbolRegexpExact(t *testing.T) { 2741 content := []byte("blah\nbla\nbl") 2742 // ----------------01234-5678-90 2743 2744 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2745 Document{ 2746 Name: "f1", 2747 Content: content, 2748 Symbols: []DocumentSection{{0, 4}, {5, 8}, {9, 11}}, 2749 }, 2750 ) 2751 q := &query.Symbol{ 2752 Expr: &query.Regexp{Regexp: mustParseRE("^bla$")}, 2753 } 2754 t.Run("LineMatches", func(t *testing.T) { 2755 res := searchForTest(t, b, q) 2756 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 { 2757 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2758 } 2759 m := res.Files[0].LineMatches[0].LineFragments[0] 2760 if m.Offset != 5 { 2761 t.Fatalf("got offset %d, want 5", m.Offset) 2762 } 2763 }) 2764 2765 t.Run("ChunkMatches", func(t *testing.T) { 2766 res := searchForTest(t, b, q, chunkOpts) 2767 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 { 2768 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2769 } 2770 m := res.Files[0].ChunkMatches[0].Ranges[0] 2771 if m.Start.ByteOffset != 5 { 2772 t.Fatalf("got offset %d, want 5", m.Start.ByteOffset) 2773 } 2774 }) 2775} 2776 2777func TestSymbolRegexpPartial(t *testing.T) { 2778 content := []byte("abcdef") 2779 // ----------------012345 2780 2781 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2782 Document{ 2783 Name: "f1", 2784 Content: content, 2785 Symbols: []DocumentSection{{0, 6}}, 2786 }, 2787 ) 2788 q := &query.Symbol{ 2789 Expr: &query.Regexp{Regexp: mustParseRE("(b|d)c(d|b)")}, 2790 } 2791 t.Run("LineMatches", func(t *testing.T) { 2792 res := searchForTest(t, b, q) 2793 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 { 2794 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2795 } 2796 m := res.Files[0].LineMatches[0].LineFragments[0] 2797 if m.Offset != 1 { 2798 t.Fatalf("got offset %d, want 1", m.Offset) 2799 } 2800 if m.MatchLength != 3 { 2801 t.Fatalf("got match length %d, want 3", m.MatchLength) 2802 } 2803 }) 2804 2805 t.Run("ChunkMatches", func(t *testing.T) { 2806 res := searchForTest(t, b, q, chunkOpts) 2807 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 { 2808 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2809 } 2810 m := res.Files[0].ChunkMatches[0].Ranges[0] 2811 if m.Start.ByteOffset != 1 { 2812 t.Fatalf("got offset %d, want 1", m.Start.ByteOffset) 2813 } 2814 if m.End.ByteOffset != 4 { 2815 t.Fatalf("got match end %d, want 4", m.End.ByteOffset) 2816 } 2817 }) 2818} 2819 2820func TestSymbolRegexpAll(t *testing.T) { 2821 docs := []Document{ 2822 { 2823 Name: "f1", 2824 Content: []byte("Hello Zoekt"), 2825 // --------------01234567890 2826 Symbols: []DocumentSection{{0, 5}, {6, 11}}, 2827 }, 2828 { 2829 Name: "f2", 2830 Content: []byte("Second Zoekt Third"), 2831 // --------------012345678901234567 2832 Symbols: []DocumentSection{{0, 6}, {7, 12}, {13, 18}}, 2833 }, 2834 } 2835 2836 b := testIndexBuilder(t, &Repository{Name: "reponame"}, docs...) 2837 q := &query.Symbol{ 2838 Expr: &query.Regexp{Regexp: mustParseRE(".*")}, 2839 } 2840 t.Run("LineMatches", func(t *testing.T) { 2841 res := searchForTest(t, b, q) 2842 if len(res.Files) != len(docs) { 2843 t.Fatalf("got %v, want %d file", res.Files, len(docs)) 2844 } 2845 for i, want := range docs { 2846 got := res.Files[i].LineMatches[0].LineFragments 2847 if len(got) != len(want.Symbols) { 2848 t.Fatalf("got %d symbols, want %d symbols in doc %s", len(got), len(want.Symbols), want.Name) 2849 } 2850 2851 for j, sec := range want.Symbols { 2852 if sec.Start != got[j].Offset { 2853 t.Fatalf("got offset %d, want %d in doc %s", got[j].Offset, sec.Start, want.Name) 2854 } 2855 } 2856 } 2857 }) 2858 2859 t.Run("ChunkMatches", func(t *testing.T) { 2860 res := searchForTest(t, b, q, chunkOpts) 2861 if len(res.Files) != len(docs) { 2862 t.Fatalf("got %v, want %d file", res.Files, len(docs)) 2863 } 2864 for i, want := range docs { 2865 got := res.Files[i].ChunkMatches[0].Ranges 2866 if len(got) != len(want.Symbols) { 2867 t.Fatalf("got %d symbols, want %d symbols in doc %s", len(got), len(want.Symbols), want.Name) 2868 } 2869 2870 for j, sec := range want.Symbols { 2871 if sec.Start != uint32(got[j].Start.ByteOffset) { 2872 t.Fatalf("got offset %d, want %d in doc %s", got[j].Start.ByteOffset, sec.Start, want.Name) 2873 } 2874 } 2875 } 2876 }) 2877} 2878 2879func TestHitIterTerminate(t *testing.T) { 2880 // contrived input: trigram frequencies forces selecting abc + 2881 // def for the distance iteration. There is no match, so this 2882 // will advance the compressedPostingIterator to beyond the 2883 // end. 2884 content := []byte("abc bcdbcd cdecde abcabc def efg") 2885 b := testIndexBuilder(t, nil, 2886 Document{ 2887 Name: "f1", 2888 Content: content, 2889 }, 2890 ) 2891 2892 t.Run("LineMatches", func(t *testing.T) { 2893 searchForTest(t, b, &query.Substring{Pattern: "abcdef"}) 2894 }) 2895 2896 t.Run("ChunkMatches", func(t *testing.T) { 2897 searchForTest(t, b, &query.Substring{Pattern: "abcdef"}, chunkOpts) 2898 }) 2899} 2900 2901func TestDistanceHitIterBailLast(t *testing.T) { 2902 content := []byte("AST AST AST UASH") 2903 b := testIndexBuilder(t, nil, 2904 Document{ 2905 Name: "f1", 2906 Content: content, 2907 }, 2908 ) 2909 t.Run("LineMatches", func(t *testing.T) { 2910 res := searchForTest(t, b, &query.Substring{Pattern: "UAST"}) 2911 if len(res.Files) != 0 { 2912 t.Fatalf("got %v, want no results", res.Files) 2913 } 2914 }) 2915 2916 t.Run("LineMatches", func(t *testing.T) { 2917 res := searchForTest(t, b, &query.Substring{Pattern: "UAST"}, chunkOpts) 2918 if len(res.Files) != 0 { 2919 t.Fatalf("got %v, want no results", res.Files) 2920 } 2921 }) 2922} 2923 2924func TestDocumentSectionRuneBoundary(t *testing.T) { 2925 content := string([]rune{kelvinCodePoint, kelvinCodePoint, kelvinCodePoint}) 2926 b, err := NewIndexBuilder(nil) 2927 if err != nil { 2928 t.Fatalf("NewIndexBuilder: %v", err) 2929 } 2930 2931 for i, sec := range []DocumentSection{ 2932 {2, 6}, 2933 {3, 7}, 2934 } { 2935 if err := b.Add(Document{ 2936 Name: "f1", 2937 Content: []byte(content), 2938 Symbols: []DocumentSection{sec}, 2939 }); err == nil { 2940 t.Errorf("%d: Add succeeded", i) 2941 } 2942 } 2943} 2944 2945func TestUnicodeQuery(t *testing.T) { 2946 content := string([]rune{kelvinCodePoint, kelvinCodePoint, kelvinCodePoint}) 2947 b := testIndexBuilder(t, nil, 2948 Document{ 2949 Name: "f1", 2950 Content: []byte(content), 2951 }, 2952 ) 2953 2954 q := &query.Substring{Pattern: content} 2955 2956 t.Run("LineMatches", func(t *testing.T) { 2957 res := searchForTest(t, b, q) 2958 if len(res.Files) != 1 { 2959 t.Fatalf("want 1 match, got %v", res.Files) 2960 } 2961 2962 f := res.Files[0] 2963 if len(f.LineMatches) != 1 { 2964 t.Fatalf("want 1 line, got %v", f.LineMatches) 2965 } 2966 l := f.LineMatches[0] 2967 2968 if len(l.LineFragments) != 1 { 2969 t.Fatalf("want 1 line fragment, got %v", l.LineFragments) 2970 } 2971 fr := l.LineFragments[0] 2972 if fr.MatchLength != len(content) { 2973 t.Fatalf("got MatchLength %d want %d", fr.MatchLength, len(content)) 2974 } 2975 }) 2976 2977 t.Run("ChunkMatches", func(t *testing.T) { 2978 res := searchForTest(t, b, q, chunkOpts) 2979 if len(res.Files) != 1 { 2980 t.Fatalf("want 1 match, got %v", res.Files) 2981 } 2982 2983 f := res.Files[0] 2984 if len(f.ChunkMatches) != 1 { 2985 t.Fatalf("want 1 line, got %v", f.LineMatches) 2986 } 2987 cm := f.ChunkMatches[0] 2988 2989 if len(cm.Ranges) != 1 { 2990 t.Fatalf("want 1 line fragment, got %v", cm.Ranges) 2991 } 2992 rr := cm.Ranges[0] 2993 if matchLen := rr.End.ByteOffset - rr.Start.ByteOffset; int(matchLen) != len(content) { 2994 t.Fatalf("got MatchLength %d want %d", matchLen, len(content)) 2995 } 2996 }) 2997} 2998 2999func TestSkipInvalidContent(t *testing.T) { 3000 for _, content := range []string{ 3001 // Binary 3002 "abc def \x00 abc", 3003 } { 3004 3005 b, err := NewIndexBuilder(nil) 3006 if err != nil { 3007 t.Fatalf("NewIndexBuilder: %v", err) 3008 } 3009 3010 if err := b.Add(Document{ 3011 Name: "f1", 3012 Content: []byte(content), 3013 }); err != nil { 3014 t.Fatal(err) 3015 } 3016 3017 t.Run("LineMatches", func(t *testing.T) { 3018 q := &query.Substring{Pattern: "abc def"} 3019 res := searchForTest(t, b, q) 3020 if len(res.Files) != 0 { 3021 t.Fatalf("got %v, want no results", res.Files) 3022 } 3023 3024 q = &query.Substring{Pattern: "NOT-INDEXED"} 3025 res = searchForTest(t, b, q) 3026 if len(res.Files) != 1 { 3027 t.Fatalf("got %v, want 1 result", res.Files) 3028 } 3029 }) 3030 3031 t.Run("ChunkMatches", func(t *testing.T) { 3032 q := &query.Substring{Pattern: "abc def"} 3033 res := searchForTest(t, b, q, chunkOpts) 3034 if len(res.Files) != 0 { 3035 t.Fatalf("got %v, want no results", res.Files) 3036 } 3037 3038 q = &query.Substring{Pattern: "NOT-INDEXED"} 3039 res = searchForTest(t, b, q, chunkOpts) 3040 if len(res.Files) != 1 { 3041 t.Fatalf("got %v, want 1 result", res.Files) 3042 } 3043 }) 3044 } 3045} 3046 3047func TestCheckText(t *testing.T) { 3048 for _, text := range []string{"", "simple ascii", "símplé unicödé", "\uFEFFwith utf8 'bom'", "with \uFFFD unicode replacement char"} { 3049 if err := CheckText([]byte(text), 20000); err != nil { 3050 t.Errorf("CheckText(%q): %v", text, err) 3051 } 3052 } 3053 for _, text := range []string{"zero\x00byte", "xx", "0123456789abcdefghi"} { 3054 if err := CheckText([]byte(text), 15); err == nil { 3055 t.Errorf("CheckText(%q) succeeded", text) 3056 } 3057 } 3058} 3059 3060func TestLineAnd(t *testing.T) { 3061 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 3062 Document{Name: "f1", Content: []byte("apple\nbanana\napple banana chocolate apple pudding banana\ngrape")}, 3063 Document{Name: "f2", Content: []byte("apple orange\nbanana")}, 3064 Document{Name: "f3", Content: []byte("banana grape")}, 3065 ) 3066 pattern := "(apple)(?-s:.)*?(banana)" 3067 r, _ := syntax.Parse(pattern, syntax.Perl) 3068 3069 q := query.Regexp{ 3070 Regexp: r, 3071 Content: true, 3072 } 3073 t.Run("LineMatches", func(t *testing.T) { 3074 res := searchForTest(t, b, &q) 3075 wantRegexpCount := 1 3076 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount { 3077 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount) 3078 } 3079 if len(res.Files) != 1 || res.Files[0].FileName != "f1" { 3080 t.Errorf("got %v, want 1 result", res.Files) 3081 } 3082 }) 3083 3084 t.Run("ChunkMatches", func(t *testing.T) { 3085 res := searchForTest(t, b, &q, chunkOpts) 3086 wantRegexpCount := 1 3087 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount { 3088 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount) 3089 } 3090 if len(res.Files) != 1 || res.Files[0].FileName != "f1" { 3091 t.Errorf("got %v, want 1 result", res.Files) 3092 } 3093 }) 3094} 3095 3096func TestLineAndFileName(t *testing.T) { 3097 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 3098 Document{Name: "f1", Content: []byte("apple banana\ngrape")}, 3099 Document{Name: "f2", Content: []byte("apple banana\norange")}, 3100 Document{Name: "apple banana", Content: []byte("banana grape")}, 3101 ) 3102 pattern := "(apple)(?-s:.)*?(banana)" 3103 r, _ := syntax.Parse(pattern, syntax.Perl) 3104 3105 q := query.Regexp{ 3106 Regexp: r, 3107 FileName: true, 3108 } 3109 t.Run("LineMatches", func(t *testing.T) { 3110 res := searchForTest(t, b, &q) 3111 wantRegexpCount := 1 3112 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount { 3113 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount) 3114 } 3115 if len(res.Files) != 1 || res.Files[0].FileName != "apple banana" { 3116 t.Errorf("got %v, want 1 result", res.Files) 3117 } 3118 }) 3119 3120 t.Run("ChunkMatches", func(t *testing.T) { 3121 res := searchForTest(t, b, &q, chunkOpts) 3122 wantRegexpCount := 1 3123 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount { 3124 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount) 3125 } 3126 if len(res.Files) != 1 || res.Files[0].FileName != "apple banana" { 3127 t.Errorf("got %v, want 1 result", res.Files) 3128 } 3129 }) 3130} 3131 3132func TestMultiLineRegex(t *testing.T) { 3133 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 3134 Document{Name: "f1", Content: []byte("apple banana\ngrape")}, 3135 Document{Name: "f2", Content: []byte("apple orange")}, 3136 Document{Name: "f3", Content: []byte("grape apple")}, 3137 ) 3138 pattern := "(apple).*?[[:space:]].*?(grape)" 3139 r, _ := syntax.Parse(pattern, syntax.Perl) 3140 3141 q := query.Regexp{ 3142 Regexp: r, 3143 } 3144 t.Run("LineMatches", func(t *testing.T) { 3145 res := searchForTest(t, b, &q) 3146 wantRegexpCount := 2 3147 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount { 3148 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount) 3149 } 3150 if len(res.Files) != 1 || res.Files[0].FileName != "f1" { 3151 t.Errorf("got %v, want 1 result", res.Files) 3152 } 3153 if l := len(res.Files[0].LineMatches); l != 2 { 3154 t.Errorf("got %v, want 2 line matches", l) 3155 } 3156 }) 3157 3158 t.Run("ChunkMatches", func(t *testing.T) { 3159 res := searchForTest(t, b, &q, chunkOpts) 3160 wantRegexpCount := 2 3161 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount { 3162 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount) 3163 } 3164 if len(res.Files) != 1 || res.Files[0].FileName != "f1" { 3165 t.Errorf("got %v, want 1 result", res.Files) 3166 } 3167 if l := len(res.Files[0].ChunkMatches); l != 1 { 3168 t.Errorf("got %v, want 1 chunk matches", l) 3169 } 3170 if l := len(res.Files[0].ChunkMatches[0].Ranges); l != 1 { 3171 t.Errorf("got %v, want 1 chunk ranges", l) 3172 } 3173 }) 3174} 3175 3176func TestSearchTypeFileName(t *testing.T) { 3177 b := testIndexBuilder(t, &Repository{ 3178 Name: "reponame", 3179 }, 3180 Document{Name: "f1", Content: []byte("bla the needle")}, 3181 Document{Name: "f2", Content: []byte("another file another\nneedle")}, 3182 // -----------------------------------012345678901234567890-123456 3183 ) 3184 3185 t.Run("LineMatches", func(t *testing.T) { 3186 wantSingleMatch := func(res *SearchResult, want string) { 3187 t.Helper() 3188 fmatches := res.Files 3189 if len(fmatches) != 1 { 3190 t.Errorf("got %v, want 1 matches", len(fmatches)) 3191 return 3192 } 3193 if len(fmatches[0].LineMatches) != 1 { 3194 t.Errorf("got %d line matches", len(fmatches[0].LineMatches)) 3195 return 3196 } 3197 var got string 3198 if fmatches[0].LineMatches[0].FileName { 3199 got = fmatches[0].FileName 3200 } else { 3201 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].LineMatches[0].LineFragments[0].Offset) 3202 } 3203 3204 if got != want { 3205 t.Errorf("got %s, want %s", got, want) 3206 } 3207 } 3208 3209 // Only return the later match in the second file 3210 res := searchForTest(t, b, query.NewAnd( 3211 &query.Type{ 3212 Type: query.TypeFileName, 3213 Child: &query.Substring{Pattern: "needle"}, 3214 }, 3215 &query.Substring{Pattern: "file"})) 3216 wantSingleMatch(res, "f2:8") 3217 3218 // Only return a filename result 3219 res = searchForTest(t, b, 3220 &query.Type{ 3221 Type: query.TypeFileName, 3222 Child: &query.Substring{Pattern: "file"}, 3223 }) 3224 wantSingleMatch(res, "f2") 3225 }) 3226 3227 t.Run("ChunkMatches", func(t *testing.T) { 3228 wantSingleMatch := func(res *SearchResult, want string) { 3229 t.Helper() 3230 fmatches := res.Files 3231 if len(fmatches) != 1 { 3232 t.Errorf("got %v, want 1 matches", len(fmatches)) 3233 return 3234 } 3235 if len(fmatches[0].ChunkMatches) != 1 { 3236 t.Errorf("got %d line matches", len(fmatches[0].ChunkMatches)) 3237 return 3238 } 3239 var got string 3240 if fmatches[0].ChunkMatches[0].FileName { 3241 got = fmatches[0].FileName 3242 } else { 3243 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset) 3244 } 3245 3246 if got != want { 3247 t.Errorf("got %s, want %s", got, want) 3248 } 3249 } 3250 3251 // Only return the later match in the second file 3252 res := searchForTest(t, b, query.NewAnd( 3253 &query.Type{ 3254 Type: query.TypeFileName, 3255 Child: &query.Substring{Pattern: "needle"}, 3256 }, 3257 &query.Substring{Pattern: "file"}), 3258 chunkOpts, 3259 ) 3260 wantSingleMatch(res, "f2:8") 3261 3262 // Only return a filename result 3263 res = searchForTest(t, b, 3264 &query.Type{ 3265 Type: query.TypeFileName, 3266 Child: &query.Substring{Pattern: "file"}, 3267 }, 3268 chunkOpts, 3269 ) 3270 wantSingleMatch(res, "f2") 3271 }) 3272} 3273 3274func TestSearchTypeLanguage(t *testing.T) { 3275 b := testIndexBuilder(t, &Repository{ 3276 Name: "reponame", 3277 }, 3278 Document{Name: "apex.cls", Content: []byte("public class Car extends Vehicle {")}, 3279 Document{Name: "tex.cls", Content: []byte(`\DeclareOption*{`)}, 3280 Document{Name: "hello.h", Content: []byte(`#include <stdio.h>`)}, 3281 ) 3282 3283 t.Log(b.languageMap) 3284 3285 t.Run("LineMatches", func(t *testing.T) { 3286 wantSingleMatch := func(res *SearchResult, want string) { 3287 t.Helper() 3288 fmatches := res.Files 3289 if len(fmatches) != 1 { 3290 t.Errorf("got %v, want 1 matches", len(fmatches)) 3291 return 3292 } 3293 if len(fmatches[0].LineMatches) != 1 { 3294 t.Errorf("got %d line matches", len(fmatches[0].LineMatches)) 3295 return 3296 } 3297 var got string 3298 if fmatches[0].LineMatches[0].FileName { 3299 got = fmatches[0].FileName 3300 } else { 3301 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].LineMatches[0].LineFragments[0].Offset) 3302 } 3303 3304 if got != want { 3305 t.Errorf("got %s, want %s", got, want) 3306 } 3307 } 3308 3309 res := searchForTest(t, b, &query.Language{Language: "Apex"}) 3310 wantSingleMatch(res, "apex.cls") 3311 3312 res = searchForTest(t, b, &query.Language{Language: "TeX"}) 3313 wantSingleMatch(res, "tex.cls") 3314 3315 res = searchForTest(t, b, &query.Language{Language: "C"}) 3316 wantSingleMatch(res, "hello.h") 3317 3318 // test fallback language search by pretending it's an older index version 3319 res = searchForTest(t, b, &query.Language{Language: "C++"}) 3320 if len(res.Files) != 0 { 3321 t.Errorf("got %d results for C++, want 0", len(res.Files)) 3322 } 3323 3324 b.featureVersion = 11 // force fallback 3325 res = searchForTest(t, b, &query.Language{Language: "C++"}) 3326 wantSingleMatch(res, "hello.h") 3327 }) 3328 3329 t.Run("ChunkMatches", func(t *testing.T) { 3330 wantSingleMatch := func(res *SearchResult, want string) { 3331 t.Helper() 3332 fmatches := res.Files 3333 if len(fmatches) != 1 { 3334 t.Errorf("got %v, want 1 matches", len(fmatches)) 3335 return 3336 } 3337 if len(fmatches[0].ChunkMatches) != 1 { 3338 t.Errorf("got %d line matches", len(fmatches[0].ChunkMatches)) 3339 return 3340 } 3341 var got string 3342 if fmatches[0].ChunkMatches[0].FileName { 3343 got = fmatches[0].FileName 3344 } else { 3345 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset) 3346 } 3347 3348 if got != want { 3349 t.Errorf("got %s, want %s", got, want) 3350 } 3351 } 3352 3353 b.featureVersion = FeatureVersion // reset feature version 3354 res := searchForTest(t, b, &query.Language{Language: "Apex"}, chunkOpts) 3355 wantSingleMatch(res, "apex.cls") 3356 3357 res = searchForTest(t, b, &query.Language{Language: "TeX"}, chunkOpts) 3358 wantSingleMatch(res, "tex.cls") 3359 3360 res = searchForTest(t, b, &query.Language{Language: "C"}, chunkOpts) 3361 wantSingleMatch(res, "hello.h") 3362 3363 // test fallback language search by pretending it's an older index version 3364 res = searchForTest(t, b, &query.Language{Language: "C++"}, chunkOpts) 3365 if len(res.Files) != 0 { 3366 t.Errorf("got %d results for C++, want 0", len(res.Files)) 3367 } 3368 3369 b.featureVersion = 11 // force fallback 3370 res = searchForTest(t, b, &query.Language{Language: "C++"}, chunkOpts) 3371 wantSingleMatch(res, "hello.h") 3372 }) 3373} 3374 3375func TestStats(t *testing.T) { 3376 ignored := []cmp.Option{ 3377 cmpopts.EquateEmpty(), 3378 cmpopts.IgnoreFields(RepoListEntry{}, "Repository"), 3379 cmpopts.IgnoreFields(RepoListEntry{}, "IndexMetadata"), 3380 cmpopts.IgnoreFields(RepoStats{}, "IndexBytes"), 3381 } 3382 3383 repoListEntries := func(b *IndexBuilder) []RepoListEntry { 3384 searcher := searcherForTest(t, b) 3385 indexdata := searcher.(*indexData) 3386 return indexdata.repoListEntry 3387 } 3388 3389 t.Run("one empty repo", func(t *testing.T) { 3390 b := testIndexBuilder(t, nil) 3391 got := repoListEntries(b) 3392 want := []RepoListEntry{ 3393 { 3394 Stats: RepoStats{ 3395 Repos: 0, 3396 Shards: 1, 3397 Documents: 0, 3398 IndexBytes: 20, 3399 ContentBytes: 0, 3400 NewLinesCount: 0, 3401 DefaultBranchNewLinesCount: 0, 3402 OtherBranchesNewLinesCount: 0, 3403 }, 3404 }, 3405 } 3406 3407 if diff := cmp.Diff(want, got, ignored...); diff != "" { 3408 t.Fatalf("mismatch (-want +got):\n%s", diff) 3409 } 3410 3411 }) 3412 3413 t.Run("one simple shard", func(t *testing.T) { 3414 b := testIndexBuilder(t, nil, 3415 Document{Name: "doc 0", Content: []byte("content 0")}, 3416 Document{Name: "doc 1", Content: []byte("content 1")}, 3417 ) 3418 got := repoListEntries(b) 3419 want := []RepoListEntry{ 3420 { 3421 Stats: RepoStats{ 3422 Repos: 0, 3423 Shards: 1, 3424 Documents: 2, 3425 IndexBytes: 224, 3426 ContentBytes: 28, 3427 NewLinesCount: 0, 3428 DefaultBranchNewLinesCount: 0, 3429 OtherBranchesNewLinesCount: 0, 3430 }, 3431 }, 3432 } 3433 3434 if diff := cmp.Diff(want, got, ignored...); diff != "" { 3435 t.Fatalf("mismatch (-want +got):\n%s", diff) 3436 } 3437 3438 }) 3439 3440 t.Run("one compound shard", func(t *testing.T) { 3441 b := testIndexBuilderCompound(t, 3442 []*Repository{ 3443 {Name: "repo 0"}, 3444 {Name: "repo 1"}, 3445 }, 3446 [][]Document{ 3447 { 3448 {Name: "doc 0", Content: []byte("content 0")}, 3449 {Name: "doc 1", Content: []byte("content 1")}, 3450 }, 3451 { 3452 {Name: "doc 2", Content: []byte("content 2")}, 3453 {Name: "doc 3", Content: []byte("content 3")}, 3454 }, 3455 }, 3456 ) 3457 got := repoListEntries(b) 3458 want := []RepoListEntry{ 3459 { 3460 Stats: RepoStats{ 3461 Repos: 0, 3462 Shards: 1, 3463 Documents: 2, 3464 IndexBytes: 180, 3465 ContentBytes: 28, 3466 NewLinesCount: 0, 3467 DefaultBranchNewLinesCount: 0, 3468 OtherBranchesNewLinesCount: 0, 3469 }, 3470 }, 3471 { 3472 Stats: RepoStats{ 3473 Repos: 0, 3474 Shards: 1, 3475 Documents: 2, 3476 IndexBytes: 180, 3477 ContentBytes: 28, 3478 NewLinesCount: 0, 3479 DefaultBranchNewLinesCount: 0, 3480 OtherBranchesNewLinesCount: 0, 3481 }, 3482 }, 3483 } 3484 3485 if diff := cmp.Diff(want, got, ignored...); diff != "" { 3486 t.Fatalf("mismatch (-want +got):\n%s", diff) 3487 } 3488 }) 3489 3490 t.Run("compound shard with empty repos", func(t *testing.T) { 3491 b := testIndexBuilderCompound(t, 3492 []*Repository{ 3493 {Name: "repo 0"}, 3494 {Name: "repo 1"}, 3495 {Name: "repo 2"}, 3496 {Name: "repo 3"}, 3497 {Name: "repo 4"}, 3498 }, 3499 [][]Document{ 3500 {{Name: "doc 0", Content: []byte("content 0")}}, 3501 nil, 3502 {{Name: "doc 1", Content: []byte("content 1")}}, 3503 nil, 3504 nil, 3505 }, 3506 ) 3507 got := repoListEntries(b) 3508 3509 entryEmpty := RepoListEntry{Stats: RepoStats{ 3510 Shards: 1, 3511 Documents: 0, 3512 ContentBytes: 0, 3513 }} 3514 entryNonEmpty := RepoListEntry{Stats: RepoStats{ 3515 Shards: 1, 3516 Documents: 1, 3517 ContentBytes: 14, 3518 }} 3519 3520 want := []RepoListEntry{ 3521 entryNonEmpty, 3522 entryEmpty, 3523 entryNonEmpty, 3524 entryEmpty, 3525 entryEmpty, 3526 } 3527 3528 if diff := cmp.Diff(want, got, ignored...); diff != "" { 3529 t.Fatalf("mismatch (-want +got):\n%s", diff) 3530 } 3531 3532 }) 3533} 3534 3535// This tests the frequent pattern "\bLITERAL\b". 3536func TestWordSearch(t *testing.T) { 3537 content := []byte("needle the bla") 3538 // ----------------01234567890123 3539 3540 b := testIndexBuilder(t, nil, 3541 Document{ 3542 Name: "f1", 3543 Content: content, 3544 }) 3545 3546 t.Run("LineMatches", func(t *testing.T) { 3547 sres := searchForTest(t, b, 3548 &query.Regexp{ 3549 Regexp: mustParseRE("\\bthe\\b"), 3550 CaseSensitive: true, 3551 Content: true, 3552 }) 3553 3554 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 { 3555 t.Fatalf("got %v, want 1 match in 1 file", sres.Files) 3556 } 3557 3558 if sres.Stats.RegexpsConsidered != 0 { 3559 t.Fatal("expected regexp to be skipped") 3560 } 3561 3562 got := sres.Files[0].LineMatches[0] 3563 want := LineMatch{ 3564 LineFragments: []LineFragmentMatch{{ 3565 LineOffset: 7, 3566 Offset: 7, 3567 MatchLength: 3, 3568 }}, 3569 Line: content, 3570 FileName: false, 3571 LineNumber: 1, 3572 LineStart: 0, 3573 LineEnd: 14, 3574 } 3575 3576 if !reflect.DeepEqual(got, want) { 3577 t.Errorf("got %#v, want %#v", got, want) 3578 } 3579 }) 3580 3581 t.Run("ChunkMatches", func(t *testing.T) { 3582 sres := searchForTest(t, b, 3583 &query.Regexp{ 3584 Regexp: mustParseRE("\\bthe\\b"), 3585 CaseSensitive: true, 3586 }, chunkOpts) 3587 3588 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 { 3589 t.Fatalf("got %v, want 1 match in 1 file", sres.Files) 3590 } 3591 3592 if sres.Stats.RegexpsConsidered != 0 { 3593 t.Fatal("expected regexp to be skipped") 3594 } 3595 3596 got := sres.Files[0].ChunkMatches[0] 3597 want := ChunkMatch{ 3598 Content: content, 3599 ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1}, 3600 Ranges: []Range{{ 3601 Start: Location{ByteOffset: 7, LineNumber: 1, Column: 8}, 3602 End: Location{ByteOffset: 10, LineNumber: 1, Column: 11}, 3603 }}, 3604 } 3605 3606 if diff := cmp.Diff(want, got); diff != "" { 3607 t.Fatal(diff) 3608 } 3609 }) 3610}