fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

1// Copyright 2016 Google Inc. All rights reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package zoekt 16 17import ( 18 "bytes" 19 "context" 20 "fmt" 21 "reflect" 22 "regexp/syntax" 23 "strings" 24 "testing" 25 26 "github.com/google/go-cmp/cmp" 27 "github.com/google/go-cmp/cmp/cmpopts" 28 "github.com/grafana/regexp" 29 "github.com/kylelemons/godebug/pretty" 30 31 "github.com/sourcegraph/zoekt/query" 32) 33 34func clearScores(r *SearchResult) { 35 for i := range r.Files { 36 r.Files[i].Score = 0.0 37 for j := range r.Files[i].LineMatches { 38 r.Files[i].LineMatches[j].Score = 0.0 39 } 40 for j := range r.Files[i].ChunkMatches { 41 r.Files[i].ChunkMatches[j].Score = 0.0 42 } 43 r.Files[i].Checksum = nil 44 r.Files[i].Debug = "" 45 } 46} 47 48func testIndexBuilder(t *testing.T, repo *Repository, docs ...Document) *IndexBuilder { 49 t.Helper() 50 51 b, err := NewIndexBuilder(repo) 52 if err != nil { 53 t.Fatalf("NewIndexBuilder: %v", err) 54 } 55 56 for i, d := range docs { 57 if err := b.Add(d); err != nil { 58 t.Fatalf("Add %d: %v", i, err) 59 } 60 } 61 62 return b 63} 64 65func testIndexBuilderCompound(t *testing.T, repos []*Repository, docs [][]Document) *IndexBuilder { 66 t.Helper() 67 68 b := newIndexBuilder() 69 b.indexFormatVersion = NextIndexFormatVersion 70 71 if len(repos) != len(docs) { 72 t.Fatalf("testIndexBuilderCompound: repos must be the same length as docs, got: len(repos)=%d len(docs)=%d", len(repos), len(docs)) 73 } 74 75 for i, repo := range repos { 76 if err := b.setRepository(repo); err != nil { 77 t.Fatal(err) 78 } 79 for j, d := range docs[i] { 80 if err := b.Add(d); err != nil { 81 t.Fatalf("Add %d %d: %v", i, j, err) 82 } 83 } 84 } 85 86 return b 87} 88 89func TestBoundary(t *testing.T) { 90 b := testIndexBuilder(t, nil, 91 Document{Name: "f1", Content: []byte("x the")}, 92 Document{Name: "f1", Content: []byte("reader")}) 93 res := searchForTest(t, b, &query.Substring{Pattern: "there"}) 94 if len(res.Files) > 0 { 95 t.Fatalf("got %v, want no matches", res.Files) 96 } 97} 98 99func TestDocSectionInvalid(t *testing.T) { 100 b, err := NewIndexBuilder(nil) 101 if err != nil { 102 t.Fatalf("NewIndexBuilder: %v", err) 103 } 104 doc := Document{ 105 Name: "f1", 106 Content: []byte("01234567890123"), 107 Symbols: []DocumentSection{{5, 8}, {7, 9}}, 108 } 109 110 if err := b.Add(doc); err == nil { 111 t.Errorf("overlapping doc sections should fail") 112 } 113 114 doc = Document{ 115 Name: "f1", 116 Content: []byte("01234567890123"), 117 Symbols: []DocumentSection{{0, 20}}, 118 } 119 120 if err := b.Add(doc); err == nil { 121 t.Errorf("doc sections beyond EOF should fail") 122 } 123} 124 125func TestBasic(t *testing.T) { 126 b := testIndexBuilder(t, nil, 127 Document{ 128 Name: "f2", 129 Content: []byte("to carry water in the no later bla"), 130 // --------------0123456789012345678901234567890123 131 }) 132 133 t.Run("LineMatch", func(t *testing.T) { 134 res := searchForTest(t, b, &query.Substring{ 135 Pattern: "water", 136 CaseSensitive: true, 137 }) 138 fmatches := res.Files 139 if len(fmatches) != 1 || len(fmatches[0].LineMatches) != 1 { 140 t.Fatalf("got %v, want 1 matches", fmatches) 141 } 142 143 got := fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].LineMatches[0].LineFragments[0].Offset) 144 want := "f2:9" 145 if got != want { 146 t.Errorf("1: got %s, want %s", got, want) 147 } 148 }) 149 150 t.Run("ChunkMatch", func(t *testing.T) { 151 res := searchForTest(t, b, &query.Substring{ 152 Pattern: "water", 153 CaseSensitive: true, 154 }, chunkOpts) 155 fmatches := res.Files 156 if len(fmatches) != 1 || len(fmatches[0].ChunkMatches) != 1 { 157 t.Fatalf("got %v, want 1 matches", fmatches) 158 } 159 160 got := fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset) 161 want := "f2:9" 162 if got != want { 163 t.Errorf("1: got %s, want %s", got, want) 164 } 165 }) 166} 167 168func TestEmptyIndex(t *testing.T) { 169 b := testIndexBuilder(t, nil) 170 searcher := searcherForTest(t, b) 171 172 var opts SearchOptions 173 if _, err := searcher.Search(context.Background(), &query.Substring{}, &opts); err != nil { 174 t.Fatalf("Search: %v", err) 175 } 176 177 if _, err := searcher.List(context.Background(), &query.Repo{Regexp: regexp.MustCompile("")}, nil); err != nil { 178 t.Fatalf("List: %v", err) 179 } 180 181 if _, err := searcher.Search(context.Background(), &query.Substring{Pattern: "java", FileName: true}, &opts); err != nil { 182 t.Fatalf("Search: %v", err) 183 } 184} 185 186type memSeeker struct { 187 data []byte 188} 189 190func (s *memSeeker) Name() string { 191 return "memseeker" 192} 193 194func (s *memSeeker) Close() {} 195func (s *memSeeker) Read(off, sz uint32) ([]byte, error) { 196 return s.data[off : off+sz], nil 197} 198 199func (s *memSeeker) Size() (uint32, error) { 200 return uint32(len(s.data)), nil 201} 202 203func TestNewlines(t *testing.T) { 204 b := testIndexBuilder(t, nil, 205 Document{Name: "filename", Content: []byte("line1\nline2\nbla")}) 206 // ---------------------------------------------012345-678901-234 207 208 t.Run("LineMatches", func(t *testing.T) { 209 sres := searchForTest(t, b, &query.Substring{Pattern: "ne2"}) 210 211 matches := sres.Files 212 want := []FileMatch{{ 213 FileName: "filename", 214 LineMatches: []LineMatch{{ 215 LineFragments: []LineFragmentMatch{{ 216 Offset: 8, 217 LineOffset: 2, 218 MatchLength: 3, 219 }}, 220 Line: []byte("line2"), 221 LineStart: 6, 222 LineEnd: 11, 223 LineNumber: 2, 224 }}, 225 }} 226 227 if !reflect.DeepEqual(matches, want) { 228 t.Errorf("got %v, want %v", matches, want) 229 } 230 }) 231 232 t.Run("ChunkMatches", func(t *testing.T) { 233 sres := searchForTest(t, b, &query.Substring{Pattern: "ne2"}, chunkOpts) 234 235 matches := sres.Files 236 want := []FileMatch{{ 237 FileName: "filename", 238 ChunkMatches: []ChunkMatch{{ 239 Content: []byte("line2"), 240 ContentStart: Location{ 241 ByteOffset: 6, 242 LineNumber: 2, 243 Column: 1, 244 }, 245 Ranges: []Range{{ 246 Start: Location{ByteOffset: 8, LineNumber: 2, Column: 3}, 247 End: Location{ByteOffset: 11, LineNumber: 2, Column: 6}, 248 }}, 249 }}, 250 }} 251 252 if diff := cmp.Diff(want, matches); diff != "" { 253 t.Fatal(diff) 254 } 255 }) 256} 257 258// A result spanning multiple lines should have LineMatches that only cover 259// single lines. 260func TestQueryNewlines(t *testing.T) { 261 text := "line1\nline2\nbla" 262 b := testIndexBuilder(t, nil, 263 Document{Name: "filename", Content: []byte(text)}) 264 265 t.Run("LineMatches", func(t *testing.T) { 266 sres := searchForTest(t, b, &query.Substring{Pattern: "ine2\nbla"}) 267 matches := sres.Files 268 if len(matches) != 1 { 269 t.Fatalf("got %d file matches, want exactly one", len(matches)) 270 } 271 m := matches[0] 272 if len(m.LineMatches) != 2 { 273 t.Fatalf("got %d line matches, want exactly two", len(m.LineMatches)) 274 } 275 }) 276 277 t.Run("ChunkMatches", func(t *testing.T) { 278 sres := searchForTest(t, b, &query.Substring{Pattern: "ine2\nbla"}, chunkOpts) 279 matches := sres.Files 280 if len(matches) != 1 { 281 t.Fatalf("got %d file matches, want exactly one", len(matches)) 282 } 283 m := matches[0] 284 if len(m.ChunkMatches) != 1 { 285 t.Fatalf("got %d chunk matches, want exactly one", len(m.ChunkMatches)) 286 } 287 }) 288} 289 290var chunkOpts = SearchOptions{ChunkMatches: true} 291 292func searchForTest(t *testing.T, b *IndexBuilder, q query.Q, o ...SearchOptions) *SearchResult { 293 searcher := searcherForTest(t, b) 294 var opts SearchOptions 295 if len(o) > 0 { 296 opts = o[0] 297 } 298 res, err := searcher.Search(context.Background(), q, &opts) 299 if err != nil { 300 t.Fatalf("Search(%s): %v", q, err) 301 } 302 clearScores(res) 303 return res 304} 305 306func searcherForTest(t *testing.T, b *IndexBuilder) Searcher { 307 var buf bytes.Buffer 308 if err := b.Write(&buf); err != nil { 309 t.Fatal(err) 310 } 311 f := &memSeeker{buf.Bytes()} 312 313 searcher, err := NewSearcher(f) 314 if err != nil { 315 t.Fatalf("NewSearcher: %v", err) 316 } 317 318 return searcher 319} 320 321func TestCaseFold(t *testing.T) { 322 b := testIndexBuilder(t, nil, 323 Document{Name: "f1", Content: []byte("I love BaNaNAS.")}, 324 // -----------------------------------012345678901234 325 ) 326 t.Run("LineMatches", func(t *testing.T) { 327 sres := searchForTest(t, b, &query.Substring{ 328 Pattern: "bananas", 329 CaseSensitive: true, 330 }) 331 matches := sres.Files 332 if len(matches) != 0 { 333 t.Errorf("foldcase: got %#v, want 0 matches", matches) 334 } 335 336 sres = searchForTest(t, b, 337 &query.Substring{ 338 Pattern: "BaNaNAS", 339 CaseSensitive: true, 340 }) 341 matches = sres.Files 342 if len(matches) != 1 { 343 t.Errorf("no foldcase: got %v, want 1 matches", matches) 344 } else if matches[0].LineMatches[0].LineFragments[0].Offset != 7 { 345 t.Errorf("foldcase: got %v, want offsets 7", matches) 346 } 347 }) 348 349 t.Run("ChunkMatches", func(t *testing.T) { 350 sres := searchForTest(t, b, &query.Substring{ 351 Pattern: "bananas", 352 CaseSensitive: true, 353 }, chunkOpts) 354 matches := sres.Files 355 if len(matches) != 0 { 356 t.Errorf("foldcase: got %#v, want 0 matches", matches) 357 } 358 359 sres = searchForTest(t, b, 360 &query.Substring{ 361 Pattern: "BaNaNAS", 362 CaseSensitive: true, 363 }) 364 matches = sres.Files 365 if len(matches) != 1 { 366 t.Errorf("no foldcase: got %v, want 1 matches", matches) 367 } else if matches[0].LineMatches[0].LineFragments[0].Offset != 7 { 368 t.Errorf("foldcase: got %v, want offsets 7", matches) 369 } 370 }) 371} 372 373func TestAndSearch(t *testing.T) { 374 b := testIndexBuilder(t, nil, 375 Document{Name: "f1", Content: []byte("x banana y")}, 376 Document{Name: "f2", Content: []byte("x apple y")}, 377 Document{Name: "f3", Content: []byte("x banana apple y")}, 378 // ---------------------------------------0123456789012345 379 ) 380 381 t.Run("LineMatches", func(t *testing.T) { 382 sres := searchForTest(t, b, query.NewAnd( 383 &query.Substring{ 384 Pattern: "banana", 385 }, 386 &query.Substring{ 387 Pattern: "apple", 388 }, 389 )) 390 matches := sres.Files 391 if len(matches) != 1 || len(matches[0].LineMatches) != 1 || len(matches[0].LineMatches[0].LineFragments) != 2 { 392 t.Fatalf("got %#v, want 1 match with 2 fragments", matches) 393 } 394 395 if matches[0].LineMatches[0].LineFragments[0].Offset != 2 || matches[0].LineMatches[0].LineFragments[1].Offset != 9 { 396 t.Fatalf("got %#v, want offsets 2,9", matches) 397 } 398 399 wantStats := Stats{ 400 FilesLoaded: 1, 401 ContentBytesLoaded: 18, 402 IndexBytesLoaded: 8, 403 NgramMatches: 3, // we look at doc 1, because it's max(0,1) due to AND 404 MatchCount: 1, 405 FileCount: 1, 406 FilesConsidered: 2, 407 ShardsScanned: 1, 408 } 409 if diff := pretty.Compare(wantStats, sres.Stats); diff != "" { 410 t.Errorf("got stats diff %s", diff) 411 } 412 }) 413 414 t.Run("ChunkMatches", func(t *testing.T) { 415 sres := searchForTest(t, b, query.NewAnd( 416 &query.Substring{ 417 Pattern: "banana", 418 }, 419 &query.Substring{ 420 Pattern: "apple", 421 }, 422 ), chunkOpts) 423 matches := sres.Files 424 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 || len(matches[0].ChunkMatches[0].Ranges) != 2 { 425 t.Fatalf("got %#v, want 1 chunk match with 2 ranges", matches) 426 } 427 428 if matches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset != 2 || matches[0].ChunkMatches[0].Ranges[1].Start.ByteOffset != 9 { 429 t.Fatalf("got %#v, want offsets 2,9", matches) 430 } 431 432 wantStats := Stats{ 433 FilesLoaded: 1, 434 ContentBytesLoaded: 18, 435 IndexBytesLoaded: 8, 436 NgramMatches: 3, // we look at doc 1, because it's max(0,1) due to AND 437 MatchCount: 2, 438 FileCount: 1, 439 FilesConsidered: 2, 440 ShardsScanned: 1, 441 } 442 if diff := pretty.Compare(wantStats, sres.Stats); diff != "" { 443 t.Errorf("got stats diff %s", diff) 444 } 445 }) 446} 447 448func TestAndNegateSearch(t *testing.T) { 449 b := testIndexBuilder(t, nil, 450 Document{Name: "f1", Content: []byte("x banana y")}, 451 // -----------------------------------0123456789 452 Document{Name: "f4", Content: []byte("x banana apple y")}) 453 454 t.Run("LineMatches", func(t *testing.T) { 455 sres := searchForTest(t, b, query.NewAnd( 456 &query.Substring{ 457 Pattern: "banana", 458 }, 459 &query.Not{Child: &query.Substring{ 460 Pattern: "apple", 461 }})) 462 463 matches := sres.Files 464 465 if len(matches) != 1 || len(matches[0].LineMatches) != 1 { 466 t.Fatalf("got %v, want 1 match", matches) 467 } 468 if matches[0].FileName != "f1" { 469 t.Fatalf("got match %#v, want FileName: f1", matches[0]) 470 } 471 if matches[0].LineMatches[0].LineFragments[0].Offset != 2 { 472 t.Fatalf("got %v, want offset 2", matches) 473 } 474 }) 475 476 t.Run("ChunkMatches", func(t *testing.T) { 477 sres := searchForTest(t, b, 478 query.NewAnd( 479 &query.Substring{ 480 Pattern: "banana", 481 }, 482 &query.Not{Child: &query.Substring{ 483 Pattern: "apple", 484 }}, 485 ), 486 chunkOpts, 487 ) 488 489 matches := sres.Files 490 491 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 { 492 t.Fatalf("got %v, want 1 match", matches) 493 } 494 if matches[0].FileName != "f1" { 495 t.Fatalf("got match %#v, want FileName: f1", matches[0]) 496 } 497 if matches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset != 2 { 498 t.Fatalf("got %v, want offset 2", matches) 499 } 500 }) 501} 502 503func TestNegativeMatchesOnlyShortcut(t *testing.T) { 504 b := testIndexBuilder(t, nil, 505 Document{Name: "f1", Content: []byte("x banana y")}, 506 Document{Name: "f2", Content: []byte("x appelmoes y")}, 507 Document{Name: "f3", Content: []byte("x appelmoes y")}, 508 Document{Name: "f3", Content: []byte("x appelmoes y")}) 509 510 t.Run("LineMatches", func(t *testing.T) { 511 sres := searchForTest(t, b, query.NewAnd( 512 &query.Substring{ 513 Pattern: "banana", 514 }, 515 &query.Not{Child: &query.Substring{ 516 Pattern: "appel", 517 }})) 518 519 if sres.Stats.FilesConsidered != 1 { 520 t.Errorf("got %#v, want FilesConsidered: 1", sres.Stats) 521 } 522 }) 523 524 t.Run("ChunkMatches", func(t *testing.T) { 525 sres := searchForTest(t, b, query.NewAnd( 526 &query.Substring{ 527 Pattern: "banana", 528 }, 529 &query.Not{Child: &query.Substring{ 530 Pattern: "appel", 531 }}), chunkOpts) 532 533 if sres.Stats.FilesConsidered != 1 { 534 t.Errorf("got %#v, want FilesConsidered: 1", sres.Stats) 535 } 536 }) 537} 538 539func TestFileSearch(t *testing.T) { 540 b := testIndexBuilder(t, nil, 541 Document{Name: "banzana", Content: []byte("x orange y")}, 542 // -------------0123456 543 Document{Name: "banana", Content: []byte("x apple y")}, 544 // -------------012345 545 ) 546 547 t.Run("LineMatches", func(t *testing.T) { 548 sres := searchForTest(t, b, &query.Substring{ 549 Pattern: "anan", 550 FileName: true, 551 }) 552 553 matches := sres.Files 554 if len(matches) != 1 || len(matches[0].LineMatches) != 1 { 555 t.Fatalf("got %v, want 1 match", matches) 556 } 557 558 got := matches[0].LineMatches[0] 559 want := LineMatch{ 560 Line: []byte("banana"), 561 LineFragments: []LineFragmentMatch{{ 562 Offset: 1, 563 LineOffset: 1, 564 MatchLength: 4, 565 }}, 566 FileName: true, 567 } 568 569 if !reflect.DeepEqual(got, want) { 570 t.Errorf("got %#v, want %#v", got, want) 571 } 572 }) 573 574 t.Run("ChunkMatches", func(t *testing.T) { 575 sres := searchForTest(t, b, &query.Substring{ 576 Pattern: "anan", 577 FileName: true, 578 }, chunkOpts) 579 580 matches := sres.Files 581 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 { 582 t.Fatalf("got %v, want 1 match", matches) 583 } 584 585 got := matches[0].ChunkMatches[0] 586 want := ChunkMatch{ 587 Content: []byte("banana"), 588 ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1}, 589 Ranges: []Range{{ 590 Start: Location{ByteOffset: 1, LineNumber: 1, Column: 2}, 591 End: Location{ByteOffset: 5, LineNumber: 1, Column: 6}, 592 }}, 593 FileName: true, 594 } 595 596 if diff := cmp.Diff(want, got); diff != "" { 597 t.Fatal(diff) 598 } 599 }) 600 601 t.Run("FileNameSet", func(t *testing.T) { 602 sres := searchForTest(t, b, query.NewFileNameSet("banana"), chunkOpts) 603 604 matches := sres.Files 605 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 { 606 t.Fatalf("got %v, want 1 match", matches) 607 } 608 609 got := matches[0].ChunkMatches[0] 610 want := ChunkMatch{ 611 Content: []byte("banana"), 612 ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1}, 613 Ranges: []Range{{ 614 Start: Location{ByteOffset: 0, LineNumber: 1, Column: 1}, 615 End: Location{ByteOffset: 6, LineNumber: 1, Column: 7}, 616 }}, 617 FileName: true, 618 } 619 620 if diff := cmp.Diff(want, got); diff != "" { 621 t.Fatal(diff) 622 } 623 }) 624} 625 626func TestFileCase(t *testing.T) { 627 b := testIndexBuilder(t, nil, 628 Document{Name: "BANANA", Content: []byte("x orange y")}) 629 630 t.Run("LineMatches", func(t *testing.T) { 631 sres := searchForTest(t, b, &query.Substring{ 632 Pattern: "banana", 633 FileName: true, 634 }) 635 636 matches := sres.Files 637 if len(matches) != 1 || matches[0].FileName != "BANANA" { 638 t.Fatalf("got %v, want 1 match 'BANANA'", matches) 639 } 640 }) 641 642 t.Run("ChunkMatches", func(t *testing.T) { 643 sres := searchForTest(t, b, &query.Substring{ 644 Pattern: "banana", 645 FileName: true, 646 }, chunkOpts) 647 648 matches := sres.Files 649 if len(matches) != 1 || matches[0].FileName != "BANANA" { 650 t.Fatalf("got %v, want 1 match 'BANANA'", matches) 651 } 652 }) 653} 654 655func TestFileRegexpSearchBruteForce(t *testing.T) { 656 b := testIndexBuilder(t, nil, 657 Document{Name: "banzana", Content: []byte("x orange y")}, 658 Document{Name: "banana", Content: []byte("x apple y")}, 659 ) 660 t.Run("LineMatches", func(t *testing.T) { 661 sres := searchForTest(t, b, &query.Regexp{ 662 Regexp: mustParseRE("[qn][zx]"), 663 FileName: true, 664 }) 665 666 matches := sres.Files 667 if len(matches) != 1 || matches[0].FileName != "banzana" { 668 t.Fatalf("got %v, want 1 match on 'banzana'", matches) 669 } 670 }) 671 t.Run("LineMatches", func(t *testing.T) { 672 sres := searchForTest(t, b, &query.Regexp{ 673 Regexp: mustParseRE("[qn][zx]"), 674 FileName: true, 675 }, chunkOpts) 676 677 matches := sres.Files 678 if len(matches) != 1 || matches[0].FileName != "banzana" { 679 t.Fatalf("got %v, want 1 match on 'banzana'", matches) 680 } 681 }) 682} 683 684func TestFileRegexpSearchShortString(t *testing.T) { 685 b := testIndexBuilder(t, nil, 686 Document{Name: "banana.py", Content: []byte("x orange y")}) 687 688 t.Run("LineMatches", func(t *testing.T) { 689 sres := searchForTest(t, b, &query.Regexp{ 690 Regexp: mustParseRE("ana.py"), 691 FileName: true, 692 }) 693 694 matches := sres.Files 695 if len(matches) != 1 || matches[0].FileName != "banana.py" { 696 t.Fatalf("got %v, want 1 match on 'banana.py'", matches) 697 } 698 }) 699 700 t.Run("ChunkMatches", func(t *testing.T) { 701 sres := searchForTest(t, b, &query.Regexp{ 702 Regexp: mustParseRE("ana.py"), 703 FileName: true, 704 }, chunkOpts) 705 706 matches := sres.Files 707 if len(matches) != 1 || matches[0].FileName != "banana.py" { 708 t.Fatalf("got %v, want 1 match on 'banana.py'", matches) 709 } 710 }) 711} 712 713func TestFileSubstringSearchBruteForce(t *testing.T) { 714 b := testIndexBuilder(t, nil, 715 Document{Name: "BANZANA", Content: []byte("x orange y")}, 716 Document{Name: "banana", Content: []byte("x apple y")}) 717 718 q := &query.Substring{ 719 Pattern: "z", 720 FileName: true, 721 } 722 723 t.Run("LineMatches", func(t *testing.T) { 724 res := searchForTest(t, b, q) 725 if len(res.Files) != 1 || res.Files[0].FileName != "BANZANA" { 726 t.Fatalf("got %v, want 1 match on 'BANZANA''", res.Files) 727 } 728 }) 729 730 t.Run("ChunkMatches", func(t *testing.T) { 731 res := searchForTest(t, b, q, chunkOpts) 732 if len(res.Files) != 1 || res.Files[0].FileName != "BANZANA" { 733 t.Fatalf("got %v, want 1 match on 'BANZANA''", res.Files) 734 } 735 }) 736} 737 738func TestFileSubstringSearchBruteForceEnd(t *testing.T) { 739 b := testIndexBuilder(t, nil, 740 Document{Name: "BANZANA", Content: []byte("x orange y")}, 741 Document{Name: "bananaq", Content: []byte("x apple y")}) 742 743 q := &query.Substring{ 744 Pattern: "q", 745 FileName: true, 746 } 747 t.Run("LineMatches", func(t *testing.T) { 748 res := searchForTest(t, b, q) 749 if want := "bananaq"; len(res.Files) != 1 || res.Files[0].FileName != want { 750 t.Fatalf("got %v, want 1 match in %q", res.Files, want) 751 } 752 }) 753 754 t.Run("LineMatches", func(t *testing.T) { 755 res := searchForTest(t, b, q, chunkOpts) 756 if want := "bananaq"; len(res.Files) != 1 || res.Files[0].FileName != want { 757 t.Fatalf("got %v, want 1 match in %q", res.Files, want) 758 } 759 }) 760} 761 762func TestSearchMatchAll(t *testing.T) { 763 b := testIndexBuilder(t, nil, 764 Document{Name: "banzana", Content: []byte("x orange y")}, 765 Document{Name: "banana", Content: []byte("x apple y")}) 766 767 t.Run("LineMatches", func(t *testing.T) { 768 sres := searchForTest(t, b, &query.Const{Value: true}) 769 matches := sres.Files 770 if len(matches) != 2 { 771 t.Fatalf("got %v, want 2 matches", matches) 772 } 773 }) 774 775 t.Run("ChunkMatches", func(t *testing.T) { 776 sres := searchForTest(t, b, &query.Const{Value: true}, chunkOpts) 777 matches := sres.Files 778 if len(matches) != 2 { 779 t.Fatalf("got %v, want 2 matches", matches) 780 } 781 }) 782} 783 784func TestSearchNewline(t *testing.T) { 785 b := testIndexBuilder(t, nil, 786 Document{Name: "banzana", Content: []byte("abcd\ndefg")}) 787 788 t.Run("LineMatches", func(t *testing.T) { 789 sres := searchForTest(t, b, &query.Substring{Pattern: "d\nd"}) 790 791 // Just check that we don't crash. 792 793 matches := sres.Files 794 if len(matches) != 1 { 795 t.Fatalf("got %v, want 1 matches", matches) 796 } 797 }) 798 799 t.Run("ChunkMatches", func(t *testing.T) { 800 sres := searchForTest(t, b, &query.Substring{Pattern: "d\nd"}, chunkOpts) 801 802 // Just check that we don't crash. 803 804 matches := sres.Files 805 if len(matches) != 1 { 806 t.Fatalf("got %v, want 1 matches", matches) 807 } 808 }) 809} 810 811func TestSearchMatchAllRegexp(t *testing.T) { 812 b := testIndexBuilder(t, nil, 813 Document{Name: "banzana", Content: []byte("abcd")}, 814 Document{Name: "banana", Content: []byte("pqrs")}) 815 816 t.Run("LineMatches", func(t *testing.T) { 817 sres := searchForTest(t, b, &query.Regexp{Regexp: mustParseRE(".")}) 818 819 matches := sres.Files 820 if len(matches) != 2 || sres.Stats.MatchCount != 2 { 821 t.Fatalf("got %v, want 2 matches", matches) 822 } 823 if len(matches[0].LineMatches[0].Line) != 4 || len(matches[1].LineMatches[0].Line) != 4 { 824 t.Fatalf("want 4 chars in every file, got %#v", matches) 825 } 826 827 }) 828 829 t.Run("ChunkMatches", func(t *testing.T) { 830 sres := searchForTest(t, b, &query.Regexp{Regexp: mustParseRE(".")}, chunkOpts) 831 832 matches := sres.Files 833 if len(matches) != 2 || sres.Stats.MatchCount != 8 { 834 t.Fatalf("got %v, want 2 matches", matches) 835 } 836 if len(matches[0].ChunkMatches[0].Content) != 4 || len(matches[1].ChunkMatches[0].Content) != 4 { 837 t.Fatalf("want 4 chars in every file, got %#v", matches) 838 } 839 840 }) 841} 842 843func TestFileRestriction(t *testing.T) { 844 b := testIndexBuilder(t, nil, 845 Document{Name: "banana1", Content: []byte("x orange y")}, 846 Document{Name: "banana2", Content: []byte("x apple y")}, 847 Document{Name: "orange", Content: []byte("x apple z")}) 848 849 t.Run("LineMatches", func(t *testing.T) { 850 sres := searchForTest(t, b, query.NewAnd( 851 &query.Substring{ 852 Pattern: "banana", 853 FileName: true, 854 }, 855 &query.Substring{ 856 Pattern: "apple", 857 })) 858 859 matches := sres.Files 860 if len(matches) != 1 || len(matches[0].LineMatches) != 1 { 861 t.Fatalf("got %v, want 1 match", matches) 862 } 863 864 match := matches[0].LineMatches[0] 865 got := string(match.Line) 866 want := "x apple y" 867 if got != want { 868 t.Errorf("got match %#v, want line %q", match, want) 869 } 870 }) 871 872 t.Run("ChunkMatches", func(t *testing.T) { 873 sres := searchForTest(t, b, query.NewAnd( 874 &query.Substring{ 875 Pattern: "banana", 876 FileName: true, 877 }, 878 &query.Substring{ 879 Pattern: "apple", 880 }), chunkOpts) 881 882 matches := sres.Files 883 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 { 884 t.Fatalf("got %v, want 1 match", matches) 885 } 886 887 match := matches[0].ChunkMatches[0] 888 got := string(match.Content) 889 want := "x apple y" 890 if got != want { 891 t.Errorf("got match %#v, want line %q", match, want) 892 } 893 }) 894} 895 896func TestFileNameBoundary(t *testing.T) { 897 b := testIndexBuilder(t, nil, 898 Document{Name: "banana2", Content: []byte("x apple y")}, 899 Document{Name: "helpers.go", Content: []byte("x apple y")}, 900 Document{Name: "foo", Content: []byte("x apple y")}) 901 902 t.Run("LineMatches", func(t *testing.T) { 903 sres := searchForTest(t, b, &query.Substring{ 904 Pattern: "helpers.go", 905 FileName: true, 906 }) 907 908 matches := sres.Files 909 if len(matches) != 1 || len(matches[0].LineMatches) != 1 { 910 t.Fatalf("got %v, want 1 match", matches) 911 } 912 }) 913 914 t.Run("ChunkMatches", func(t *testing.T) { 915 sres := searchForTest(t, b, &query.Substring{ 916 Pattern: "helpers.go", 917 FileName: true, 918 }, chunkOpts) 919 920 matches := sres.Files 921 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 { 922 t.Fatalf("got %v, want 1 match", matches) 923 } 924 }) 925} 926 927func TestDocumentOrder(t *testing.T) { 928 var docs []Document 929 for i := 0; i < 3; i++ { 930 docs = append(docs, Document{Name: fmt.Sprintf("f%d", i), Content: []byte("needle")}) 931 } 932 933 b := testIndexBuilder(t, nil, docs...) 934 935 t.Run("LineMatches", func(t *testing.T) { 936 sres := searchForTest(t, b, query.NewAnd( 937 &query.Substring{ 938 Pattern: "needle", 939 })) 940 941 want := []string{"f0", "f1", "f2"} 942 var got []string 943 for _, f := range sres.Files { 944 got = append(got, f.FileName) 945 } 946 if !reflect.DeepEqual(got, want) { 947 t.Fatalf("got %v, want %v", got, want) 948 } 949 }) 950 951 t.Run("ChunkMatches", func(t *testing.T) { 952 sres := searchForTest(t, b, 953 query.NewAnd(&query.Substring{ 954 Pattern: "needle", 955 }), 956 chunkOpts, 957 ) 958 959 want := []string{"f0", "f1", "f2"} 960 var got []string 961 for _, f := range sres.Files { 962 got = append(got, f.FileName) 963 } 964 if !reflect.DeepEqual(got, want) { 965 t.Fatalf("got %v, want %v", got, want) 966 } 967 }) 968} 969 970func TestBranchMask(t *testing.T) { 971 b := testIndexBuilder(t, &Repository{ 972 Branches: []RepositoryBranch{ 973 {"master", "v-master"}, 974 {"stable", "v-stable"}, 975 {"bonzai", "v-bonzai"}, 976 }, 977 }, Document{Name: "f1", Content: []byte("needle"), Branches: []string{"master"}}, 978 Document{Name: "f2", Content: []byte("needle"), Branches: []string{"stable", "master"}}, 979 Document{Name: "f3", Content: []byte("needle"), Branches: []string{"stable", "master"}}, 980 Document{Name: "f4", Content: []byte("needle"), Branches: []string{"bonzai"}}, 981 ) 982 983 t.Run("LineMatches", func(t *testing.T) { 984 sres := searchForTest(t, b, query.NewAnd( 985 &query.Substring{ 986 Pattern: "needle", 987 }, 988 &query.Branch{ 989 Pattern: "table", 990 })) 991 992 if len(sres.Files) != 2 || sres.Files[0].FileName != "f2" || sres.Files[1].FileName != "f3" { 993 t.Fatalf("got %v, want 2 result from [f2,f3]", sres.Files) 994 } 995 996 if len(sres.Files[0].Branches) != 1 || sres.Files[0].Branches[0] != "stable" { 997 t.Fatalf("got %v, want 1 branch 'stable'", sres.Files[0].Branches) 998 } 999 }) 1000 1001 t.Run("ChunkMatches", func(t *testing.T) { 1002 sres := searchForTest(t, b, query.NewAnd( 1003 &query.Substring{ 1004 Pattern: "needle", 1005 }, 1006 &query.Branch{ 1007 Pattern: "table", 1008 }), 1009 chunkOpts, 1010 ) 1011 1012 if len(sres.Files) != 2 || sres.Files[0].FileName != "f2" || sres.Files[1].FileName != "f3" { 1013 t.Fatalf("got %v, want 2 result from [f2,f3]", sres.Files) 1014 } 1015 1016 if len(sres.Files[0].Branches) != 1 || sres.Files[0].Branches[0] != "stable" { 1017 t.Fatalf("got %v, want 1 branch 'stable'", sres.Files[0].Branches) 1018 } 1019 }) 1020} 1021 1022func TestBranchLimit(t *testing.T) { 1023 for limit := 64; limit <= 65; limit++ { 1024 r := &Repository{} 1025 for i := 0; i < limit; i++ { 1026 s := fmt.Sprintf("b%d", i) 1027 r.Branches = append(r.Branches, RepositoryBranch{ 1028 s, "v-" + s, 1029 }) 1030 } 1031 _, err := NewIndexBuilder(r) 1032 if limit == 64 && err != nil { 1033 t.Fatalf("NewIndexBuilder: %v", err) 1034 } else if limit == 65 && err == nil { 1035 t.Fatalf("NewIndexBuilder succeeded") 1036 } 1037 } 1038} 1039 1040func TestBranchReport(t *testing.T) { 1041 branches := []string{"stable", "master"} 1042 b := testIndexBuilder(t, &Repository{ 1043 Branches: []RepositoryBranch{ 1044 {"stable", "vs"}, 1045 {"master", "vm"}, 1046 }, 1047 }, 1048 Document{Name: "f2", Content: []byte("needle"), Branches: branches}) 1049 1050 t.Run("LineMatches", func(t *testing.T) { 1051 sres := searchForTest(t, b, &query.Substring{ 1052 Pattern: "needle", 1053 }) 1054 if len(sres.Files) != 1 { 1055 t.Fatalf("got %v, want 1 result from f2", sres.Files) 1056 } 1057 1058 f := sres.Files[0] 1059 if !reflect.DeepEqual(f.Branches, branches) { 1060 t.Fatalf("got branches %q, want %q", f.Branches, branches) 1061 } 1062 }) 1063 1064 t.Run("ChunkMatches", func(t *testing.T) { 1065 sres := searchForTest(t, b, &query.Substring{ 1066 Pattern: "needle", 1067 }, chunkOpts) 1068 if len(sres.Files) != 1 { 1069 t.Fatalf("got %v, want 1 result from f2", sres.Files) 1070 } 1071 1072 f := sres.Files[0] 1073 if !reflect.DeepEqual(f.Branches, branches) { 1074 t.Fatalf("got branches %q, want %q", f.Branches, branches) 1075 } 1076 }) 1077 1078} 1079 1080func TestBranchVersions(t *testing.T) { 1081 b := testIndexBuilder(t, &Repository{ 1082 Branches: []RepositoryBranch{ 1083 {"stable", "v-stable"}, 1084 {"master", "v-master"}, 1085 }, 1086 }, Document{Name: "f2", Content: []byte("needle"), Branches: []string{"master"}}) 1087 1088 t.Run("LineMatches", func(t *testing.T) { 1089 sres := searchForTest(t, b, &query.Substring{ 1090 Pattern: "needle", 1091 }) 1092 if len(sres.Files) != 1 { 1093 t.Fatalf("got %v, want 1 result from f2", sres.Files) 1094 } 1095 1096 f := sres.Files[0] 1097 if f.Version != "v-master" { 1098 t.Fatalf("got file %#v, want version 'v-master'", f) 1099 } 1100 }) 1101 1102 t.Run("ChunkMatches", func(t *testing.T) { 1103 sres := searchForTest(t, b, &query.Substring{ 1104 Pattern: "needle", 1105 }, chunkOpts) 1106 if len(sres.Files) != 1 { 1107 t.Fatalf("got %v, want 1 result from f2", sres.Files) 1108 } 1109 1110 f := sres.Files[0] 1111 if f.Version != "v-master" { 1112 t.Fatalf("got file %#v, want version 'v-master'", f) 1113 } 1114 }) 1115} 1116 1117func mustParseRE(s string) *syntax.Regexp { 1118 r, err := syntax.Parse(s, syntax.Perl) 1119 if err != nil { 1120 panic(err) 1121 } 1122 1123 return r 1124} 1125 1126func TestRegexp(t *testing.T) { 1127 content := []byte("needle the bla") 1128 // ----------------01234567890123 1129 1130 b := testIndexBuilder(t, nil, 1131 Document{ 1132 Name: "f1", 1133 Content: content, 1134 }) 1135 1136 t.Run("LineMatches", func(t *testing.T) { 1137 sres := searchForTest(t, b, 1138 &query.Regexp{ 1139 Regexp: mustParseRE("dle.*bla"), 1140 }) 1141 1142 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 { 1143 t.Fatalf("got %v, want 1 match in 1 file", sres.Files) 1144 } 1145 1146 got := sres.Files[0].LineMatches[0] 1147 want := LineMatch{ 1148 LineFragments: []LineFragmentMatch{{ 1149 LineOffset: 3, 1150 Offset: 3, 1151 MatchLength: 11, 1152 }}, 1153 Line: content, 1154 FileName: false, 1155 LineNumber: 1, 1156 LineStart: 0, 1157 LineEnd: 14, 1158 } 1159 1160 if !reflect.DeepEqual(got, want) { 1161 t.Errorf("got %#v, want %#v", got, want) 1162 } 1163 }) 1164 1165 t.Run("ChunkMatches", func(t *testing.T) { 1166 sres := searchForTest(t, b, 1167 &query.Regexp{ 1168 Regexp: mustParseRE("dle.*bla"), 1169 }, chunkOpts) 1170 1171 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 { 1172 t.Fatalf("got %v, want 1 match in 1 file", sres.Files) 1173 } 1174 1175 got := sres.Files[0].ChunkMatches[0] 1176 want := ChunkMatch{ 1177 Content: content, 1178 ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1}, 1179 Ranges: []Range{{ 1180 Start: Location{ByteOffset: 3, LineNumber: 1, Column: 4}, 1181 End: Location{ByteOffset: 14, LineNumber: 1, Column: 15}, 1182 }}, 1183 } 1184 1185 if diff := cmp.Diff(want, got); diff != "" { 1186 t.Fatal(diff) 1187 } 1188 }) 1189} 1190 1191func TestRegexpFile(t *testing.T) { 1192 content := []byte("needle the bla") 1193 1194 name := "let's play: find the mussel" 1195 b := testIndexBuilder(t, nil, 1196 Document{Name: name, Content: content}, 1197 Document{Name: "play.txt", Content: content}) 1198 1199 t.Run("LineMatches", func(t *testing.T) { 1200 sres := searchForTest(t, b, 1201 &query.Regexp{ 1202 Regexp: mustParseRE("play.*mussel"), 1203 FileName: true, 1204 }) 1205 1206 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 { 1207 t.Fatalf("got %v, want 1 match in 1 file", sres.Files) 1208 } 1209 1210 if sres.Files[0].FileName != name { 1211 t.Errorf("got match %#v, want name %q", sres.Files[0], name) 1212 } 1213 }) 1214 1215 t.Run("ChunkMatches", func(t *testing.T) { 1216 sres := searchForTest(t, b, 1217 &query.Regexp{ 1218 Regexp: mustParseRE("play.*mussel"), 1219 FileName: true, 1220 }, chunkOpts) 1221 1222 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 { 1223 t.Fatalf("got %v, want 1 match in 1 file", sres.Files) 1224 } 1225 1226 if sres.Files[0].FileName != name { 1227 t.Errorf("got match %#v, want name %q", sres.Files[0], name) 1228 } 1229 }) 1230} 1231 1232func TestRegexpOrder(t *testing.T) { 1233 content := []byte("bla the needle") 1234 // ----------------01234567890123 1235 1236 b := testIndexBuilder(t, nil, 1237 Document{Name: "f1", Content: content}) 1238 1239 t.Run("LineMatches", func(t *testing.T) { 1240 sres := searchForTest(t, b, 1241 &query.Regexp{ 1242 Regexp: mustParseRE("dle.*bla"), 1243 }) 1244 1245 if len(sres.Files) != 0 { 1246 t.Fatalf("got %v, want 0 matches", sres.Files) 1247 } 1248 }) 1249 1250 t.Run("ChunkMatches", func(t *testing.T) { 1251 sres := searchForTest(t, b, 1252 &query.Regexp{ 1253 Regexp: mustParseRE("dle.*bla"), 1254 }) 1255 1256 if len(sres.Files) != 0 { 1257 t.Fatalf("got %v, want 0 matches", sres.Files) 1258 } 1259 }) 1260} 1261 1262func TestRepoName(t *testing.T) { 1263 content := []byte("bla the needle") 1264 // ----------------01234567890123 1265 1266 b := testIndexBuilder(t, &Repository{Name: "bla"}, 1267 Document{Name: "f1", Content: content}) 1268 1269 t.Run("LineMatches", func(t *testing.T) { 1270 sres := searchForTest(t, b, 1271 query.NewAnd( 1272 &query.Substring{Pattern: "needle"}, 1273 &query.Repo{Regexp: regexp.MustCompile("foo")}, 1274 )) 1275 1276 if len(sres.Files) != 0 { 1277 t.Fatalf("got %v, want 0 matches", sres.Files) 1278 } 1279 1280 if sres.Stats.FilesConsidered > 0 { 1281 t.Fatalf("got FilesConsidered %d, should have short circuited", sres.Stats.FilesConsidered) 1282 } 1283 1284 sres = searchForTest(t, b, 1285 query.NewAnd( 1286 &query.Substring{Pattern: "needle"}, 1287 &query.Repo{Regexp: regexp.MustCompile("bla")}, 1288 )) 1289 if len(sres.Files) != 1 { 1290 t.Fatalf("got %v, want 1 match", sres.Files) 1291 } 1292 }) 1293 1294 t.Run("ChunkMatches", func(t *testing.T) { 1295 sres := searchForTest(t, b, 1296 query.NewAnd( 1297 &query.Substring{Pattern: "needle"}, 1298 &query.Repo{Regexp: regexp.MustCompile("foo")}, 1299 ), 1300 chunkOpts, 1301 ) 1302 1303 if len(sres.Files) != 0 { 1304 t.Fatalf("got %v, want 0 matches", sres.Files) 1305 } 1306 1307 if sres.Stats.FilesConsidered > 0 { 1308 t.Fatalf("got FilesConsidered %d, should have short circuited", sres.Stats.FilesConsidered) 1309 } 1310 1311 sres = searchForTest(t, b, 1312 query.NewAnd( 1313 &query.Substring{Pattern: "needle"}, 1314 &query.Repo{Regexp: regexp.MustCompile("bla")}, 1315 )) 1316 if len(sres.Files) != 1 { 1317 t.Fatalf("got %v, want 1 match", sres.Files) 1318 } 1319 }) 1320} 1321 1322func TestMergeMatches(t *testing.T) { 1323 content := []byte("blablabla") 1324 b := testIndexBuilder(t, nil, 1325 Document{Name: "f1", Content: content}) 1326 1327 t.Run("LineMatches", func(t *testing.T) { 1328 sres := searchForTest(t, b, 1329 &query.Substring{Pattern: "bla"}) 1330 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 { 1331 t.Fatalf("got %v, want 1 match", sres.Files) 1332 } 1333 }) 1334 1335 t.Run("ChunkMatches", func(t *testing.T) { 1336 sres := searchForTest(t, b, 1337 &query.Substring{Pattern: "bla"}, 1338 chunkOpts, 1339 ) 1340 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 { 1341 t.Fatalf("got %v, want 1 match", sres.Files) 1342 } 1343 }) 1344} 1345 1346func TestRepoURL(t *testing.T) { 1347 content := []byte("blablabla") 1348 b := testIndexBuilder(t, &Repository{ 1349 Name: "name", 1350 URL: "URL", 1351 CommitURLTemplate: "commit", 1352 FileURLTemplate: "file-url", 1353 LineFragmentTemplate: "fragment", 1354 }, Document{Name: "f1", Content: content}) 1355 1356 sres := searchForTest(t, b, &query.Substring{Pattern: "bla"}) 1357 1358 if sres.RepoURLs["name"] != "file-url" { 1359 t.Errorf("got RepoURLs %v, want {name: URL}", sres.RepoURLs) 1360 } 1361 if sres.LineFragments["name"] != "fragment" { 1362 t.Errorf("got URLs %v, want {name: URL}", sres.LineFragments) 1363 } 1364} 1365 1366func TestRegexpCaseSensitive(t *testing.T) { 1367 content := []byte("bla\nfunc unmarshalGitiles\n") 1368 b := testIndexBuilder(t, nil, Document{ 1369 Name: "f1", 1370 Content: content, 1371 }) 1372 1373 t.Run("LineMatches", func(t *testing.T) { 1374 res := searchForTest(t, b, 1375 &query.Regexp{ 1376 Regexp: mustParseRE("func.*Gitiles"), 1377 CaseSensitive: true, 1378 }) 1379 1380 if len(res.Files) != 1 { 1381 t.Fatalf("got %v, want one match", res.Files) 1382 } 1383 }) 1384 1385 t.Run("ChunkMatches", func(t *testing.T) { 1386 res := searchForTest(t, b, 1387 &query.Regexp{ 1388 Regexp: mustParseRE("func.*Gitiles"), 1389 CaseSensitive: true, 1390 }, 1391 chunkOpts, 1392 ) 1393 1394 if len(res.Files) != 1 { 1395 t.Fatalf("got %v, want one match", res.Files) 1396 } 1397 }) 1398} 1399 1400func TestRegexpCaseFolding(t *testing.T) { 1401 content := []byte("bla\nfunc unmarshalGitiles\n") 1402 1403 b := testIndexBuilder(t, nil, 1404 Document{Name: "f1", Content: content}) 1405 res := searchForTest(t, b, 1406 &query.Regexp{ 1407 Regexp: mustParseRE("func.*GITILES"), 1408 CaseSensitive: false, 1409 }) 1410 1411 if len(res.Files) != 1 { 1412 t.Fatalf("got %v, want one match", res.Files) 1413 } 1414} 1415 1416func TestCaseRegexp(t *testing.T) { 1417 content := []byte("BLABLABLA") 1418 b := testIndexBuilder(t, nil, 1419 Document{Name: "f1", Content: content}) 1420 1421 t.Run("LineMatches", func(t *testing.T) { 1422 res := searchForTest(t, b, 1423 &query.Regexp{ 1424 Regexp: mustParseRE("[xb][xl][xa]"), 1425 CaseSensitive: true, 1426 }) 1427 1428 if len(res.Files) > 0 { 1429 t.Fatalf("got %v, want no matches", res.Files) 1430 } 1431 }) 1432 1433 t.Run("ChunkMatches", func(t *testing.T) { 1434 res := searchForTest(t, b, 1435 &query.Regexp{ 1436 Regexp: mustParseRE("[xb][xl][xa]"), 1437 CaseSensitive: true, 1438 }, 1439 chunkOpts, 1440 ) 1441 1442 if len(res.Files) > 0 { 1443 t.Fatalf("got %v, want no matches", res.Files) 1444 } 1445 }) 1446} 1447 1448func TestNegativeRegexp(t *testing.T) { 1449 content := []byte("BLABLABLA needle bla") 1450 b := testIndexBuilder(t, nil, 1451 Document{Name: "f1", Content: content}) 1452 1453 t.Run("LineMatches", func(t *testing.T) { 1454 res := searchForTest(t, b, 1455 query.NewAnd( 1456 &query.Substring{ 1457 Pattern: "needle", 1458 }, 1459 &query.Not{ 1460 Child: &query.Regexp{ 1461 Regexp: mustParseRE(".cs"), 1462 }, 1463 })) 1464 1465 if len(res.Files) != 1 { 1466 t.Fatalf("got %v, want 1 match", res.Files) 1467 } 1468 }) 1469 1470 t.Run("ChunkMatches", func(t *testing.T) { 1471 res := searchForTest(t, b, 1472 query.NewAnd( 1473 &query.Substring{ 1474 Pattern: "needle", 1475 }, 1476 &query.Not{ 1477 Child: &query.Regexp{ 1478 Regexp: mustParseRE(".cs"), 1479 }, 1480 }, 1481 ), 1482 chunkOpts) 1483 1484 if len(res.Files) != 1 { 1485 t.Fatalf("got %v, want 1 match", res.Files) 1486 } 1487 }) 1488} 1489 1490func TestSymbolRank(t *testing.T) { 1491 t.Skip() 1492 1493 content := []byte("func bla() blubxxxxx") 1494 // ----------------01234567890123456789 1495 b := testIndexBuilder(t, nil, 1496 Document{ 1497 Name: "f1", 1498 Content: content, 1499 }, Document{ 1500 Name: "f2", 1501 Content: content, 1502 Symbols: []DocumentSection{{5, 8}}, 1503 }, Document{ 1504 Name: "f3", 1505 Content: content, 1506 }) 1507 1508 t.Run("LineMatches", func(t *testing.T) { 1509 res := searchForTest(t, b, 1510 &query.Substring{ 1511 CaseSensitive: false, 1512 Pattern: "bla", 1513 }) 1514 1515 if len(res.Files) != 3 { 1516 t.Fatalf("got %d files, want 3 files. Full data: %v", len(res.Files), res.Files) 1517 } 1518 if res.Files[0].FileName != "f2" { 1519 t.Errorf("got %#v, want 'f2' as top match", res.Files[0]) 1520 } 1521 }) 1522 1523 t.Run("ChunkMatches", func(t *testing.T) { 1524 res := searchForTest(t, b, 1525 &query.Substring{ 1526 CaseSensitive: false, 1527 Pattern: "bla", 1528 }, chunkOpts) 1529 1530 if len(res.Files) != 3 { 1531 t.Fatalf("got %d files, want 3 files. Full data: %v", len(res.Files), res.Files) 1532 } 1533 if res.Files[0].FileName != "f2" { 1534 t.Errorf("got %#v, want 'f2' as top match", res.Files[0]) 1535 } 1536 }) 1537} 1538 1539func TestSymbolRankRegexpUTF8(t *testing.T) { 1540 t.Skip() 1541 1542 prefix := strings.Repeat(string([]rune{kelvinCodePoint}), 100) + "\n" 1543 content := []byte(prefix + 1544 "func bla() blub") 1545 // ------012345678901234 1546 b := testIndexBuilder(t, nil, 1547 Document{ 1548 Name: "f1", 1549 Content: content, 1550 }, Document{ 1551 Name: "f2", 1552 Content: content, 1553 Symbols: []DocumentSection{{uint32(len(prefix) + 5), uint32(len(prefix) + 8)}}, 1554 }, Document{ 1555 Name: "f3", 1556 Content: content, 1557 }) 1558 1559 t.Run("LineMatches", func(t *testing.T) { 1560 res := searchForTest(t, b, 1561 &query.Regexp{ 1562 Regexp: mustParseRE("b.a"), 1563 }) 1564 1565 if len(res.Files) != 3 { 1566 t.Fatalf("got %#v, want 3 files", res.Files) 1567 } 1568 if res.Files[0].FileName != "f2" { 1569 t.Errorf("got %#v, want 'f2' as top match", res.Files[0]) 1570 } 1571 }) 1572 1573 t.Run("ChunjkMatches", func(t *testing.T) { 1574 res := searchForTest(t, b, 1575 &query.Regexp{ 1576 Regexp: mustParseRE("b.a"), 1577 }, chunkOpts) 1578 1579 if len(res.Files) != 3 { 1580 t.Fatalf("got %#v, want 3 files", res.Files) 1581 } 1582 if res.Files[0].FileName != "f2" { 1583 t.Errorf("got %#v, want 'f2' as top match", res.Files[0]) 1584 } 1585 }) 1586} 1587 1588func TestPartialSymbolRank(t *testing.T) { 1589 t.Skip() 1590 1591 content := []byte("func bla() blub") 1592 // ----------------012345678901234 1593 1594 b := testIndexBuilder(t, nil, 1595 Document{ 1596 Name: "f1", 1597 Content: content, 1598 Symbols: []DocumentSection{{4, 9}}, 1599 }, Document{ 1600 Name: "f2", 1601 Content: content, 1602 Symbols: []DocumentSection{{4, 8}}, 1603 }, Document{ 1604 Name: "f3", 1605 Content: content, 1606 Symbols: []DocumentSection{{4, 9}}, 1607 }) 1608 1609 t.Run("LineMatches", func(t *testing.T) { 1610 res := searchForTest(t, b, 1611 &query.Substring{ 1612 Pattern: "bla", 1613 }) 1614 1615 if len(res.Files) != 3 { 1616 t.Fatalf("got %#v, want 3 files", res.Files) 1617 } 1618 if res.Files[0].FileName != "f2" { 1619 t.Errorf("got %#v, want 'f2' as top match", res.Files[0]) 1620 } 1621 }) 1622 1623 t.Run("ChunkMatches", func(t *testing.T) { 1624 res := searchForTest(t, b, 1625 &query.Substring{ 1626 Pattern: "bla", 1627 }, chunkOpts) 1628 1629 if len(res.Files) != 3 { 1630 t.Fatalf("got %#v, want 3 files", res.Files) 1631 } 1632 if res.Files[0].FileName != "f2" { 1633 t.Errorf("got %#v, want 'f2' as top match", res.Files[0]) 1634 } 1635 }) 1636} 1637 1638func TestNegativeRepo(t *testing.T) { 1639 content := []byte("bla the needle") 1640 // ----------------01234567890123 1641 b := testIndexBuilder(t, &Repository{ 1642 Name: "bla", 1643 }, Document{Name: "f1", Content: content}) 1644 1645 t.Run("LineMatches", func(t *testing.T) { 1646 sres := searchForTest(t, b, 1647 query.NewAnd( 1648 &query.Substring{Pattern: "needle"}, 1649 &query.Not{Child: &query.Repo{Regexp: regexp.MustCompile("bla")}}, 1650 )) 1651 1652 if len(sres.Files) != 0 { 1653 t.Fatalf("got %v, want 0 matches", sres.Files) 1654 } 1655 }) 1656 1657 t.Run("ChunkMatches", func(t *testing.T) { 1658 sres := searchForTest(t, b, 1659 query.NewAnd( 1660 &query.Substring{Pattern: "needle"}, 1661 &query.Not{Child: &query.Repo{Regexp: regexp.MustCompile("bla")}}, 1662 ), chunkOpts) 1663 1664 if len(sres.Files) != 0 { 1665 t.Fatalf("got %v, want 0 matches", sres.Files) 1666 } 1667 }) 1668} 1669 1670func TestListRepos(t *testing.T) { 1671 content := []byte("bla the needle\n") 1672 // ----------------012345678901234- 1673 1674 t.Run("default and minimal fallback", func(t *testing.T) { 1675 repo := &Repository{ 1676 Name: "reponame", 1677 Branches: []RepositoryBranch{{Name: "main"}, {Name: "dev"}}, 1678 } 1679 b := testIndexBuilder(t, repo, 1680 Document{Name: "f1", Content: content, Branches: []string{"main", "dev"}}, 1681 Document{Name: "f2", Content: content, Branches: []string{"main"}}, 1682 Document{Name: "f2", Content: content, Branches: []string{"dev"}}, 1683 Document{Name: "f3", Content: content, Branches: []string{"dev"}}) 1684 1685 searcher := searcherForTest(t, b) 1686 1687 for _, opts := range []*ListOptions{ 1688 nil, 1689 {Minimal: false}, 1690 {Minimal: true}, 1691 } { 1692 t.Run(fmt.Sprint(opts), func(t *testing.T) { 1693 q := &query.Repo{Regexp: regexp.MustCompile("epo")} 1694 1695 res, err := searcher.List(context.Background(), q, opts) 1696 if err != nil { 1697 t.Fatalf("List(%v): %v", q, err) 1698 } 1699 1700 want := &RepoList{ 1701 Repos: []*RepoListEntry{{ 1702 Repository: *repo, 1703 Stats: RepoStats{ 1704 Documents: 4, 1705 ContentBytes: 68, // (15 bytes of content and 2 bytes of filename) x 4 1706 Shards: 1, 1707 1708 NewLinesCount: 4, 1709 DefaultBranchNewLinesCount: 2, 1710 OtherBranchesNewLinesCount: 3, 1711 }, 1712 }}, 1713 Stats: RepoStats{ 1714 Documents: 4, 1715 ContentBytes: 68, 1716 Shards: 1, 1717 1718 NewLinesCount: 4, 1719 DefaultBranchNewLinesCount: 2, 1720 OtherBranchesNewLinesCount: 3, 1721 }, 1722 } 1723 ignored := []cmp.Option{ 1724 cmpopts.EquateEmpty(), 1725 cmpopts.IgnoreFields(RepoListEntry{}, "IndexMetadata"), 1726 cmpopts.IgnoreFields(RepoStats{}, "IndexBytes"), 1727 cmpopts.IgnoreFields(Repository{}, "SubRepoMap"), 1728 cmpopts.IgnoreFields(Repository{}, "priority"), 1729 } 1730 if diff := cmp.Diff(want, res, ignored...); diff != "" { 1731 t.Fatalf("mismatch (-want +got):\n%s", diff) 1732 } 1733 1734 q = &query.Repo{Regexp: regexp.MustCompile("bla")} 1735 res, err = searcher.List(context.Background(), q, nil) 1736 if err != nil { 1737 t.Fatalf("List(%v): %v", q, err) 1738 } 1739 if len(res.Repos) != 0 || len(res.Minimal) != 0 { 1740 t.Fatalf("got %v, want 0 matches", res) 1741 } 1742 }) 1743 } 1744 }) 1745 1746 t.Run("minimal", func(t *testing.T) { 1747 repo := &Repository{ 1748 ID: 1234, 1749 Name: "reponame", 1750 Branches: []RepositoryBranch{{Name: "main"}, {Name: "dev"}}, 1751 RawConfig: map[string]string{"repoid": "1234"}, 1752 } 1753 b := testIndexBuilder(t, repo, 1754 Document{Name: "f1", Content: content, Branches: []string{"main", "dev"}}, 1755 Document{Name: "f2", Content: content, Branches: []string{"main"}}, 1756 Document{Name: "f2", Content: content, Branches: []string{"dev"}}, 1757 Document{Name: "f3", Content: content, Branches: []string{"dev"}}) 1758 1759 searcher := searcherForTest(t, b) 1760 1761 q := &query.Repo{Regexp: regexp.MustCompile("epo")} 1762 res, err := searcher.List(context.Background(), q, &ListOptions{Minimal: true}) 1763 if err != nil { 1764 t.Fatalf("List(%v): %v", q, err) 1765 } 1766 1767 want := &RepoList{ 1768 Minimal: map[uint32]*MinimalRepoListEntry{ 1769 repo.ID: { 1770 HasSymbols: repo.HasSymbols, 1771 Branches: repo.Branches, 1772 }, 1773 }, 1774 Stats: RepoStats{ 1775 Shards: 1, 1776 Documents: 4, 1777 IndexBytes: 412, 1778 ContentBytes: 68, 1779 NewLinesCount: 4, 1780 DefaultBranchNewLinesCount: 2, 1781 OtherBranchesNewLinesCount: 3, 1782 }, 1783 } 1784 1785 ignored := []cmp.Option{ 1786 cmpopts.IgnoreFields(MinimalRepoListEntry{}, "IndexTimeUnix"), 1787 } 1788 if diff := cmp.Diff(want, res, ignored...); diff != "" { 1789 t.Fatalf("mismatch (-want +got):\n%s", diff) 1790 } 1791 1792 q = &query.Repo{Regexp: regexp.MustCompile("bla")} 1793 res, err = searcher.List(context.Background(), q, &ListOptions{Minimal: true}) 1794 if err != nil { 1795 t.Fatalf("List(%v): %v", q, err) 1796 } 1797 if len(res.Repos) != 0 || len(res.Minimal) != 0 { 1798 t.Fatalf("got %v, want 0 matches", res) 1799 } 1800 }) 1801} 1802 1803func TestListReposByContent(t *testing.T) { 1804 content := []byte("bla the needle") 1805 1806 b := testIndexBuilder(t, &Repository{ 1807 Name: "reponame", 1808 }, 1809 Document{Name: "f1", Content: content}, 1810 Document{Name: "f2", Content: content}) 1811 1812 searcher := searcherForTest(t, b) 1813 q := &query.Substring{Pattern: "needle"} 1814 res, err := searcher.List(context.Background(), q, nil) 1815 if err != nil { 1816 t.Fatalf("List(%v): %v", q, err) 1817 } 1818 if len(res.Repos) != 1 || res.Repos[0].Repository.Name != "reponame" { 1819 t.Fatalf("got %v, want 1 matches", res) 1820 } 1821 if got := res.Repos[0].Stats.Shards; got != 1 { 1822 t.Fatalf("got %d, want 1 shard", got) 1823 } 1824 q = &query.Substring{Pattern: "foo"} 1825 res, err = searcher.List(context.Background(), q, nil) 1826 if err != nil { 1827 t.Fatalf("List(%v): %v", q, err) 1828 } 1829 if len(res.Repos) != 0 { 1830 t.Fatalf("got %v, want 0 matches", res) 1831 } 1832} 1833 1834func TestMetadata(t *testing.T) { 1835 content := []byte("bla the needle") 1836 1837 b := testIndexBuilder(t, &Repository{ 1838 Name: "reponame", 1839 }, Document{Name: "f1", Content: content}, 1840 Document{Name: "f2", Content: content}) 1841 1842 var buf bytes.Buffer 1843 if err := b.Write(&buf); err != nil { 1844 t.Fatal(err) 1845 } 1846 f := &memSeeker{buf.Bytes()} 1847 1848 rd, _, err := ReadMetadata(f) 1849 if err != nil { 1850 t.Fatalf("ReadMetadata: %v", err) 1851 } 1852 1853 if got, want := rd[0].Name, "reponame"; got != want { 1854 t.Fatalf("got %q want %q", got, want) 1855 } 1856} 1857 1858func TestOr(t *testing.T) { 1859 b := testIndexBuilder(t, nil, 1860 Document{Name: "f1", Content: []byte("needle")}, 1861 Document{Name: "f2", Content: []byte("banana")}) 1862 t.Run("LineMatches", func(t *testing.T) { 1863 sres := searchForTest(t, b, query.NewOr( 1864 &query.Substring{Pattern: "needle"}, 1865 &query.Substring{Pattern: "banana"})) 1866 1867 if len(sres.Files) != 2 { 1868 t.Fatalf("got %v, want 2 files", sres.Files) 1869 } 1870 }) 1871 1872 t.Run("ChunkMatches", func(t *testing.T) { 1873 sres := searchForTest(t, b, query.NewOr( 1874 &query.Substring{Pattern: "needle"}, 1875 &query.Substring{Pattern: "banana"})) 1876 1877 if len(sres.Files) != 2 { 1878 t.Fatalf("got %v, want 2 files", sres.Files) 1879 } 1880 }) 1881} 1882 1883func TestFrequency(t *testing.T) { 1884 content := []byte("sla _Py_HashDouble(double v sla las las shd dot dot") 1885 1886 b := testIndexBuilder(t, nil, 1887 Document{ 1888 Name: "f1", 1889 Content: content, 1890 }) 1891 1892 t.Run("LineMatches", func(t *testing.T) { 1893 sres := searchForTest(t, b, &query.Substring{Pattern: "slashdot"}) 1894 if len(sres.Files) != 0 { 1895 t.Errorf("got %v, wanted 0 matches", sres.Files) 1896 } 1897 }) 1898 1899 t.Run("ChunkMatches", func(t *testing.T) { 1900 sres := searchForTest(t, b, &query.Substring{Pattern: "slashdot"}, chunkOpts) 1901 if len(sres.Files) != 0 { 1902 t.Errorf("got %v, wanted 0 matches", sres.Files) 1903 } 1904 }) 1905} 1906 1907func TestMatchNewline(t *testing.T) { 1908 re, err := syntax.Parse("[^a]a", syntax.ClassNL) 1909 if err != nil { 1910 t.Fatalf("syntax.Parse: %v", err) 1911 } 1912 1913 content := []byte("pqr\nalex") 1914 1915 b := testIndexBuilder(t, nil, 1916 Document{ 1917 Name: "f1", 1918 Content: content, 1919 }) 1920 1921 t.Run("LineMatches", func(t *testing.T) { 1922 sres := searchForTest(t, b, &query.Regexp{Regexp: re, CaseSensitive: true}) 1923 if len(sres.Files) != 1 { 1924 t.Errorf("got %v, wanted 1 matches", sres.Files) 1925 } else if l := sres.Files[0].LineMatches[0].Line; !bytes.Equal(l, content[len("pqr\n"):]) { 1926 t.Errorf("got match line %q, want %q", l, content) 1927 } 1928 }) 1929 1930 t.Run("ChunkMatches", func(t *testing.T) { 1931 sres := searchForTest(t, b, &query.Regexp{Regexp: re, CaseSensitive: true}, chunkOpts) 1932 if len(sres.Files) != 1 { 1933 t.Errorf("got %v, wanted 1 matches", sres.Files) 1934 } else if c := sres.Files[0].ChunkMatches[0].Content; !bytes.Equal(c, content) { 1935 t.Errorf("got match line %q, want %q", c, content) 1936 } 1937 }) 1938} 1939 1940func TestSubRepo(t *testing.T) { 1941 subRepos := map[string]*Repository{ 1942 "sub": { 1943 Name: "sub-name", 1944 LineFragmentTemplate: "sub-line", 1945 }, 1946 } 1947 1948 content := []byte("pqr\nalex") 1949 1950 b := testIndexBuilder(t, &Repository{ 1951 SubRepoMap: subRepos, 1952 }, Document{ 1953 Name: "sub/f1", 1954 Content: content, 1955 SubRepositoryPath: "sub", 1956 }) 1957 1958 sres := searchForTest(t, b, &query.Substring{Pattern: "alex"}) 1959 if len(sres.Files) != 1 { 1960 t.Fatalf("got %v, wanted 1 matches", sres.Files) 1961 } 1962 1963 f := sres.Files[0] 1964 if f.SubRepositoryPath != "sub" || f.SubRepositoryName != "sub-name" { 1965 t.Errorf("got %#v, want SubRepository{Path,Name} = {'sub', 'sub-name'}", f) 1966 } 1967 1968 if sres.LineFragments["sub-name"] != "sub-line" { 1969 t.Errorf("got LineFragmentTemplate %v, want {'sub':'sub-line'}", sres.LineFragments) 1970 } 1971} 1972 1973func TestSearchEither(t *testing.T) { 1974 b := testIndexBuilder(t, nil, 1975 Document{Name: "f1", Content: []byte("bla needle bla")}, 1976 Document{Name: "needle-file-branch", Content: []byte("bla content")}) 1977 1978 t.Run("LineMatches", func(t *testing.T) { 1979 sres := searchForTest(t, b, &query.Substring{Pattern: "needle"}) 1980 if len(sres.Files) != 2 { 1981 t.Fatalf("got %v, wanted 2 matches", sres.Files) 1982 } 1983 1984 sres = searchForTest(t, b, &query.Substring{Pattern: "needle", Content: true}) 1985 if len(sres.Files) != 1 { 1986 t.Fatalf("got %v, wanted 1 match", sres.Files) 1987 } 1988 1989 if got, want := sres.Files[0].FileName, "f1"; got != want { 1990 t.Errorf("got %q, want %q", got, want) 1991 } 1992 }) 1993 1994 t.Run("ChunkMatches", func(t *testing.T) { 1995 sres := searchForTest(t, b, &query.Substring{Pattern: "needle"}, chunkOpts) 1996 if len(sres.Files) != 2 { 1997 t.Fatalf("got %v, wanted 2 matches", sres.Files) 1998 } 1999 2000 sres = searchForTest(t, b, &query.Substring{Pattern: "needle", Content: true}, chunkOpts) 2001 if len(sres.Files) != 1 { 2002 t.Fatalf("got %v, wanted 1 match", sres.Files) 2003 } 2004 2005 if got, want := sres.Files[0].FileName, "f1"; got != want { 2006 t.Errorf("got %q, want %q", got, want) 2007 } 2008 }) 2009} 2010 2011func TestUnicodeExactMatch(t *testing.T) { 2012 needle := "néédlÉ" 2013 content := []byte("blá blá " + needle + " blâ") 2014 2015 b := testIndexBuilder(t, nil, 2016 Document{Name: "f1", Content: content}) 2017 2018 t.Run("LineMatches", func(t *testing.T) { 2019 if res := searchForTest(t, b, &query.Substring{Pattern: needle, CaseSensitive: true}); len(res.Files) != 1 { 2020 t.Fatalf("case sensitive: got %v, wanted 1 match", res.Files) 2021 } 2022 }) 2023 2024 t.Run("ChunkMatches", func(t *testing.T) { 2025 res := searchForTest(t, b, &query.Substring{Pattern: needle, CaseSensitive: true}, chunkOpts) 2026 if len(res.Files) != 1 { 2027 t.Fatalf("case sensitive: got %v, wanted 1 match", res.Files) 2028 } 2029 }) 2030} 2031 2032func TestUnicodeCoverContent(t *testing.T) { 2033 needle := "néédlÉ" 2034 content := []byte("blá blá " + needle + " blâ") 2035 2036 b := testIndexBuilder(t, nil, 2037 Document{Name: "f1", Content: content}) 2038 2039 t.Run("LineMatches", func(t *testing.T) { 2040 if res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ", CaseSensitive: true}); len(res.Files) != 0 { 2041 t.Fatalf("case sensitive: got %v, wanted 0 match", res.Files) 2042 } 2043 2044 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ"}) 2045 if len(res.Files) != 1 { 2046 t.Fatalf("case insensitive: got %v, wanted 1 match", res.Files) 2047 } 2048 2049 if got, want := res.Files[0].LineMatches[0].LineFragments[0].Offset, uint32(strings.Index(string(content), needle)); got != want { 2050 t.Errorf("got %d want %d", got, want) 2051 } 2052 }) 2053 2054 t.Run("ChunkMatches", func(t *testing.T) { 2055 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ", CaseSensitive: true}, chunkOpts) 2056 if len(res.Files) != 0 { 2057 t.Fatalf("case sensitive: got %v, wanted 0 match", res.Files) 2058 } 2059 2060 res = searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ"}, chunkOpts) 2061 if len(res.Files) != 1 { 2062 t.Fatalf("case insensitive: got %v, wanted 1 match", res.Files) 2063 } 2064 2065 got := res.Files[0].ChunkMatches[0].Ranges[0].Start.ByteOffset 2066 want := uint32(strings.Index(string(content), needle)) 2067 if got != want { 2068 t.Errorf("got %d want %d", got, want) 2069 } 2070 }) 2071} 2072 2073func TestUnicodeNonCoverContent(t *testing.T) { 2074 needle := "nééáádlÉ" 2075 content := []byte("blá blá " + needle + " blâ") 2076 2077 b := testIndexBuilder(t, nil, 2078 Document{Name: "f1", Content: content}) 2079 2080 t.Run("LineMatches", func(t *testing.T) { 2081 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉÁÁDLÉ", Content: true}) 2082 if len(res.Files) != 1 { 2083 t.Fatalf("got %v, wanted 1 match", res.Files) 2084 } 2085 2086 if got, want := res.Files[0].LineMatches[0].LineFragments[0].Offset, uint32(strings.Index(string(content), needle)); got != want { 2087 t.Errorf("got %d want %d", got, want) 2088 } 2089 }) 2090 2091 t.Run("ChunkMatches", func(t *testing.T) { 2092 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉÁÁDLÉ", Content: true}, chunkOpts) 2093 if len(res.Files) != 1 { 2094 t.Fatalf("got %v, wanted 1 match", res.Files) 2095 } 2096 2097 got := res.Files[0].ChunkMatches[0].Ranges[0].Start.ByteOffset 2098 want := uint32(strings.Index(string(content), needle)) 2099 if got != want { 2100 t.Errorf("got %d want %d", got, want) 2101 } 2102 }) 2103} 2104 2105const kelvinCodePoint = 8490 2106 2107func TestUnicodeVariableLength(t *testing.T) { 2108 lower := 'k' 2109 upper := rune(kelvinCodePoint) 2110 2111 needle := "nee" + string([]rune{lower}) + "eed" 2112 corpus := []byte("nee" + string([]rune{upper}) + "eed" + 2113 " ee" + string([]rune{lower}) + "ee" + 2114 " ee" + string([]rune{upper}) + "ee") 2115 2116 t.Run("LineMatches", func(t *testing.T) { 2117 b := testIndexBuilder(t, nil, 2118 Document{Name: "f1", Content: []byte(corpus)}) 2119 2120 res := searchForTest(t, b, &query.Substring{Pattern: needle, Content: true}) 2121 if len(res.Files) != 1 { 2122 t.Fatalf("got %v, wanted 1 match", res.Files) 2123 } 2124 }) 2125 2126 t.Run("ChunkMatches", func(t *testing.T) { 2127 b := testIndexBuilder(t, nil, 2128 Document{Name: "f1", Content: []byte(corpus)}) 2129 2130 res := searchForTest(t, b, &query.Substring{Pattern: needle, Content: true}, chunkOpts) 2131 if len(res.Files) != 1 { 2132 t.Fatalf("got %v, wanted 1 match", res.Files) 2133 } 2134 }) 2135} 2136 2137func TestUnicodeFileStartOffsets(t *testing.T) { 2138 unicode := "世界" 2139 wat := "waaaaaat" 2140 b := testIndexBuilder(t, nil, 2141 Document{ 2142 Name: "f1", 2143 Content: []byte(unicode), 2144 }, 2145 Document{ 2146 Name: "f2", 2147 Content: []byte(wat), 2148 }, 2149 ) 2150 q := &query.Substring{Pattern: wat, Content: true} 2151 res := searchForTest(t, b, q) 2152 if len(res.Files) != 1 { 2153 t.Fatalf("got %v, wanted 1 match", res.Files) 2154 } 2155} 2156 2157func TestLongFileUTF8(t *testing.T) { 2158 needle := "neeedle" 2159 2160 // 6 bytes. 2161 unicode := "世界" 2162 content := []byte(strings.Repeat(unicode, 100) + needle) 2163 b := testIndexBuilder(t, nil, 2164 Document{ 2165 Name: "f1", 2166 Content: []byte(strings.Repeat("a", 50)), 2167 }, 2168 Document{ 2169 Name: "f2", 2170 Content: content, 2171 }) 2172 2173 t.Run("LineMatches", func(t *testing.T) { 2174 q := &query.Substring{Pattern: needle, Content: true} 2175 res := searchForTest(t, b, q) 2176 if len(res.Files) != 1 { 2177 t.Errorf("got %v, want 1 result", res) 2178 } 2179 }) 2180 2181 t.Run("ChunkMatches", func(t *testing.T) { 2182 q := &query.Substring{Pattern: needle, Content: true} 2183 res := searchForTest(t, b, q, chunkOpts) 2184 if len(res.Files) != 1 { 2185 t.Errorf("got %v, want 1 result", res) 2186 } 2187 }) 2188} 2189 2190func TestEstimateDocCount(t *testing.T) { 2191 content := []byte("bla needle bla") 2192 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2193 Document{Name: "f1", Content: content}, 2194 Document{Name: "f2", Content: content}, 2195 ) 2196 2197 t.Run("LineMatches", func(t *testing.T) { 2198 if sres := searchForTest(t, b, 2199 query.NewAnd( 2200 &query.Substring{Pattern: "needle"}, 2201 &query.Repo{Regexp: regexp.MustCompile("reponame")}, 2202 ), SearchOptions{ 2203 EstimateDocCount: true, 2204 }); sres.Stats.ShardFilesConsidered != 2 { 2205 t.Errorf("got FilesConsidered = %d, want 2", sres.Stats.FilesConsidered) 2206 } 2207 if sres := searchForTest(t, b, 2208 query.NewAnd( 2209 &query.Substring{Pattern: "needle"}, 2210 &query.Repo{Regexp: regexp.MustCompile("nomatch")}, 2211 ), SearchOptions{ 2212 EstimateDocCount: true, 2213 }); sres.Stats.ShardFilesConsidered != 0 { 2214 t.Errorf("got FilesConsidered = %d, want 0", sres.Stats.FilesConsidered) 2215 } 2216 }) 2217 2218 t.Run("ChunkMatches", func(t *testing.T) { 2219 if sres := searchForTest(t, b, 2220 query.NewAnd( 2221 &query.Substring{Pattern: "needle"}, 2222 &query.Repo{Regexp: regexp.MustCompile("reponame")}, 2223 ), SearchOptions{ 2224 EstimateDocCount: true, 2225 ChunkMatches: true, 2226 }); sres.Stats.ShardFilesConsidered != 2 { 2227 t.Errorf("got FilesConsidered = %d, want 2", sres.Stats.FilesConsidered) 2228 } 2229 if sres := searchForTest(t, b, 2230 query.NewAnd( 2231 &query.Substring{Pattern: "needle"}, 2232 &query.Repo{Regexp: regexp.MustCompile("nomatch")}, 2233 ), SearchOptions{ 2234 EstimateDocCount: true, 2235 ChunkMatches: true, 2236 }); sres.Stats.ShardFilesConsidered != 0 { 2237 t.Errorf("got FilesConsidered = %d, want 0", sres.Stats.FilesConsidered) 2238 } 2239 }) 2240} 2241 2242func TestUTF8CorrectCorpus(t *testing.T) { 2243 needle := "neeedle" 2244 2245 // 6 bytes. 2246 unicode := "世界" 2247 b := testIndexBuilder(t, nil, 2248 Document{ 2249 Name: "f1", 2250 Content: []byte(strings.Repeat(unicode, 100)), 2251 }, 2252 Document{ 2253 Name: "xxxxxneeedle", 2254 Content: []byte("hello"), 2255 }) 2256 2257 t.Run("LineMatches", func(t *testing.T) { 2258 q := &query.Substring{Pattern: needle, FileName: true} 2259 res := searchForTest(t, b, q) 2260 if len(res.Files) != 1 { 2261 t.Errorf("got %v, want 1 result", res) 2262 } 2263 }) 2264 2265 t.Run("ChunkMatches", func(t *testing.T) { 2266 q := &query.Substring{Pattern: needle, FileName: true} 2267 res := searchForTest(t, b, q, chunkOpts) 2268 if len(res.Files) != 1 { 2269 t.Errorf("got %v, want 1 result", res) 2270 } 2271 }) 2272} 2273 2274func TestBuilderStats(t *testing.T) { 2275 b := testIndexBuilder(t, nil, 2276 Document{ 2277 Name: "f1", 2278 Content: []byte(strings.Repeat("abcd", 1024)), 2279 }) 2280 var buf bytes.Buffer 2281 if err := b.Write(&buf); err != nil { 2282 t.Fatal(err) 2283 } 2284 2285 if got, want := b.ContentSize(), uint32(2+4*1024); got != want { 2286 t.Errorf("got %d, want %d", got, want) 2287 } 2288} 2289 2290func TestIOStats(t *testing.T) { 2291 b := testIndexBuilder(t, nil, 2292 Document{ 2293 Name: "f1", 2294 Content: []byte(strings.Repeat("abcd", 1024)), 2295 }) 2296 2297 t.Run("LineMatches", func(t *testing.T) { 2298 q := &query.Substring{Pattern: "abc", CaseSensitive: true, Content: true} 2299 res := searchForTest(t, b, q) 2300 2301 // 4096 (content) + 2 (overhead: newlines or doc sections) 2302 if got, want := res.Stats.ContentBytesLoaded, int64(4098); got != want { 2303 t.Errorf("got content I/O %d, want %d", got, want) 2304 } 2305 2306 // 1024 entries, each 4 bytes apart. 4 fits into single byte 2307 // delta encoded. 2308 if got, want := res.Stats.IndexBytesLoaded, int64(1024); got != want { 2309 t.Errorf("got index I/O %d, want %d", got, want) 2310 } 2311 }) 2312 2313 t.Run("ChunkMatches", func(t *testing.T) { 2314 q := &query.Substring{Pattern: "abc", CaseSensitive: true, Content: true} 2315 res := searchForTest(t, b, q, chunkOpts) 2316 2317 // 4096 (content) + 2 (overhead: newlines or doc sections) 2318 if got, want := res.Stats.ContentBytesLoaded, int64(4098); got != want { 2319 t.Errorf("got content I/O %d, want %d", got, want) 2320 } 2321 2322 // 1024 entries, each 4 bytes apart. 4 fits into single byte 2323 // delta encoded. 2324 if got, want := res.Stats.IndexBytesLoaded, int64(1024); got != want { 2325 t.Errorf("got index I/O %d, want %d", got, want) 2326 } 2327 }) 2328} 2329 2330func TestStartLineAnchor(t *testing.T) { 2331 b := testIndexBuilder(t, nil, 2332 Document{ 2333 Name: "f1", 2334 Content: []byte( 2335 `hello 2336start of middle of line 2337`), 2338 }) 2339 2340 t.Run("LineMatches", func(t *testing.T) { 2341 q, err := query.Parse("^start") 2342 if err != nil { 2343 t.Errorf("parse: %v", err) 2344 } 2345 2346 res := searchForTest(t, b, q) 2347 if len(res.Files) != 1 { 2348 t.Errorf("got %v, want 1 file", res.Files) 2349 } 2350 2351 q, err = query.Parse("^middle") 2352 if err != nil { 2353 t.Errorf("parse: %v", err) 2354 } 2355 res = searchForTest(t, b, q) 2356 if len(res.Files) != 0 { 2357 t.Errorf("got %v, want 0 files", res.Files) 2358 } 2359 }) 2360 2361 t.Run("ChunkMatches", func(t *testing.T) { 2362 q, err := query.Parse("^start") 2363 if err != nil { 2364 t.Errorf("parse: %v", err) 2365 } 2366 2367 res := searchForTest(t, b, q, chunkOpts) 2368 if len(res.Files) != 1 { 2369 t.Errorf("got %v, want 1 file", res.Files) 2370 } 2371 2372 q, err = query.Parse("^middle") 2373 if err != nil { 2374 t.Errorf("parse: %v", err) 2375 } 2376 res = searchForTest(t, b, q, chunkOpts) 2377 if len(res.Files) != 0 { 2378 t.Errorf("got %v, want 0 files", res.Files) 2379 } 2380 }) 2381} 2382 2383func TestAndOrUnicode(t *testing.T) { 2384 q, err := query.Parse("orange.*apple") 2385 if err != nil { 2386 t.Errorf("parse: %v", err) 2387 } 2388 finalQ := query.NewAnd(q, 2389 query.NewOr(query.NewAnd(&query.Repo{Regexp: regexp.MustCompile("name")}, 2390 query.NewOr(&query.Branch{Pattern: "master"})))) 2391 2392 b := testIndexBuilder(t, &Repository{ 2393 Name: "name", 2394 Branches: []RepositoryBranch{{"master", "master-version"}}, 2395 }, Document{ 2396 Name: "f2", 2397 Content: []byte("orange\u2318apple"), 2398 // --------------0123456 78901 2399 Branches: []string{"master"}, 2400 }) 2401 2402 t.Run("LineMatches", func(t *testing.T) { 2403 res := searchForTest(t, b, finalQ) 2404 if len(res.Files) != 1 { 2405 t.Errorf("got %v, want 1 result", res.Files) 2406 } 2407 }) 2408 2409 t.Run("ChunkMatches", func(t *testing.T) { 2410 res := searchForTest(t, b, finalQ, chunkOpts) 2411 if len(res.Files) != 1 { 2412 t.Errorf("got %v, want 1 result", res.Files) 2413 } 2414 }) 2415} 2416 2417func TestAndShort(t *testing.T) { 2418 content := []byte("bla needle at orange bla") 2419 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2420 Document{Name: "f1", Content: content}, 2421 Document{Name: "f2", Content: []byte("xx at xx")}, 2422 Document{Name: "f3", Content: []byte("yy orange xx")}, 2423 ) 2424 2425 q := query.NewAnd(&query.Substring{Pattern: "at"}, 2426 &query.Substring{Pattern: "orange"}) 2427 2428 t.Run("LineMatches", func(t *testing.T) { 2429 res := searchForTest(t, b, q) 2430 if len(res.Files) != 1 || res.Files[0].FileName != "f1" { 2431 t.Errorf("got %v, want 1 result", res.Files) 2432 } 2433 }) 2434 2435 t.Run("ChunkMatches", func(t *testing.T) { 2436 res := searchForTest(t, b, q, chunkOpts) 2437 if len(res.Files) != 1 || res.Files[0].FileName != "f1" { 2438 t.Errorf("got %v, want 1 result", res.Files) 2439 } 2440 }) 2441} 2442 2443func TestNoCollectRegexpSubstring(t *testing.T) { 2444 content := []byte("bla final bla\nfoo final, foo") 2445 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2446 Document{Name: "f1", Content: content}, 2447 ) 2448 2449 q := &query.Regexp{ 2450 Regexp: mustParseRE("final[,.]"), 2451 } 2452 2453 t.Run("LineMatches", func(t *testing.T) { 2454 res := searchForTest(t, b, q) 2455 if len(res.Files) != 1 { 2456 t.Fatalf("got %v, want 1 result", res.Files) 2457 } 2458 if f := res.Files[0]; len(f.LineMatches) != 1 { 2459 t.Fatalf("got line matches %v, want 1 line match", printLineMatches(f.LineMatches)) 2460 } 2461 }) 2462 2463 t.Run("ChunkMatches", func(t *testing.T) { 2464 res := searchForTest(t, b, q, chunkOpts) 2465 if len(res.Files) != 1 { 2466 t.Fatalf("got %v, want 1 result", res.Files) 2467 } 2468 if f := res.Files[0]; len(f.ChunkMatches) != 1 { 2469 t.Fatalf("got line matches %v, want 1 line match", printLineMatches(f.LineMatches)) 2470 } 2471 }) 2472} 2473 2474func printLineMatches(ms []LineMatch) string { 2475 var ss []string 2476 for _, m := range ms { 2477 ss = append(ss, fmt.Sprintf("%d:%q %v", m.LineNumber, m.Line, m.LineFragments)) 2478 } 2479 2480 return strings.Join(ss, ", ") 2481} 2482 2483func TestLang(t *testing.T) { 2484 content := []byte("bla needle bla") 2485 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2486 Document{Name: "f1", Content: content}, 2487 Document{Name: "f2", Language: "java", Content: content}, 2488 Document{Name: "f3", Language: "cpp", Content: content}, 2489 ) 2490 2491 q := query.NewAnd(&query.Substring{Pattern: "needle"}, 2492 &query.Language{Language: "cpp"}) 2493 2494 t.Run("LineMatches", func(t *testing.T) { 2495 res := searchForTest(t, b, q) 2496 if len(res.Files) != 1 { 2497 t.Fatalf("got %v, want 1 result in f3", res.Files) 2498 } 2499 f := res.Files[0] 2500 if f.FileName != "f3" || f.Language != "cpp" { 2501 t.Fatalf("got %v, want 1 match with language cpp", f) 2502 } 2503 }) 2504 2505 t.Run("ChunkMatches", func(t *testing.T) { 2506 res := searchForTest(t, b, q, chunkOpts) 2507 if len(res.Files) != 1 { 2508 t.Fatalf("got %v, want 1 result in f3", res.Files) 2509 } 2510 f := res.Files[0] 2511 if f.FileName != "f3" || f.Language != "cpp" { 2512 t.Fatalf("got %v, want 1 match with language cpp", f) 2513 } 2514 }) 2515} 2516 2517func TestLangShortcut(t *testing.T) { 2518 content := []byte("bla needle bla") 2519 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2520 Document{Name: "f2", Language: "java", Content: content}, 2521 Document{Name: "f3", Language: "cpp", Content: content}, 2522 ) 2523 2524 q := query.NewAnd(&query.Substring{Pattern: "needle"}, 2525 &query.Language{Language: "fortran"}) 2526 2527 t.Run("LineMatches", func(t *testing.T) { 2528 res := searchForTest(t, b, q) 2529 if len(res.Files) != 0 { 2530 t.Fatalf("got %v, want 0 results", res.Files) 2531 } 2532 if res.Stats.IndexBytesLoaded > 0 { 2533 t.Errorf("got IndexBytesLoaded %d, want 0", res.Stats.IndexBytesLoaded) 2534 } 2535 }) 2536 2537 t.Run("ChunkMatches", func(t *testing.T) { 2538 res := searchForTest(t, b, q, chunkOpts) 2539 if len(res.Files) != 0 { 2540 t.Fatalf("got %v, want 0 results", res.Files) 2541 } 2542 if res.Stats.IndexBytesLoaded > 0 { 2543 t.Errorf("got IndexBytesLoaded %d, want 0", res.Stats.IndexBytesLoaded) 2544 } 2545 }) 2546} 2547 2548func TestNoTextMatchAtoms(t *testing.T) { 2549 content := []byte("bla needle bla") 2550 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2551 Document{Name: "f1", Content: content}, 2552 Document{Name: "f2", Language: "java", Content: content}, 2553 Document{Name: "f3", Language: "cpp", Content: content}, 2554 ) 2555 q := query.NewAnd(&query.Language{Language: "java"}) 2556 t.Run("LineMatches", func(t *testing.T) { 2557 res := searchForTest(t, b, q) 2558 if len(res.Files) != 1 { 2559 t.Fatalf("got %v, want 1 result in f3", res.Files) 2560 } 2561 }) 2562 2563 t.Run("ChunkMatches", func(t *testing.T) { 2564 res := searchForTest(t, b, q, chunkOpts) 2565 if len(res.Files) != 1 { 2566 t.Fatalf("got %v, want 1 result in f3", res.Files) 2567 } 2568 }) 2569} 2570 2571func TestNoPositiveAtoms(t *testing.T) { 2572 content := []byte("bla needle bla") 2573 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2574 Document{Name: "f1", Content: content}, 2575 Document{Name: "f2", Content: content}, 2576 ) 2577 2578 q := query.NewAnd( 2579 &query.Not{Child: &query.Substring{Pattern: "xyz"}}, 2580 &query.Repo{Regexp: regexp.MustCompile("reponame")}) 2581 t.Run("LineMatches", func(t *testing.T) { 2582 res := searchForTest(t, b, q) 2583 if len(res.Files) != 2 { 2584 t.Fatalf("got %v, want 2 results in f3", res.Files) 2585 } 2586 }) 2587 t.Run("ChunkMatches", func(t *testing.T) { 2588 res := searchForTest(t, b, q, chunkOpts) 2589 if len(res.Files) != 2 { 2590 t.Fatalf("got %v, want 2 results in f3", res.Files) 2591 } 2592 }) 2593} 2594 2595func TestSymbolBoundaryStart(t *testing.T) { 2596 content := []byte("start\nbla bla\nend") 2597 // ----------------012345-67890123-456 2598 2599 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2600 Document{ 2601 Name: "f1", 2602 Content: content, 2603 Symbols: []DocumentSection{{0, 5}, {14, 17}}, 2604 }, 2605 ) 2606 q := &query.Symbol{ 2607 Expr: &query.Substring{Pattern: "start"}, 2608 } 2609 t.Run("LineMatches", func(t *testing.T) { 2610 res := searchForTest(t, b, q) 2611 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 { 2612 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2613 } 2614 m := res.Files[0].LineMatches[0].LineFragments[0] 2615 if m.Offset != 0 { 2616 t.Fatalf("got offset %d want 0", m.Offset) 2617 } 2618 }) 2619 2620 t.Run("ChunkMatches", func(t *testing.T) { 2621 res := searchForTest(t, b, q, chunkOpts) 2622 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 { 2623 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2624 } 2625 m := res.Files[0].ChunkMatches[0].Ranges[0] 2626 if m.Start.ByteOffset != 0 { 2627 t.Fatalf("got offset %d want 0", m.Start.ByteOffset) 2628 } 2629 }) 2630} 2631 2632func TestSymbolBoundaryEnd(t *testing.T) { 2633 content := []byte("start\nbla bla\nend") 2634 // ----------------012345-67890123-456 2635 2636 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2637 Document{ 2638 Name: "f1", 2639 Content: content, 2640 Symbols: []DocumentSection{{14, 17}}, 2641 }, 2642 ) 2643 q := &query.Symbol{ 2644 Expr: &query.Substring{Pattern: "end"}, 2645 } 2646 t.Run("LineMatches", func(t *testing.T) { 2647 res := searchForTest(t, b, q) 2648 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 { 2649 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2650 } 2651 m := res.Files[0].LineMatches[0].LineFragments[0] 2652 if m.Offset != 14 { 2653 t.Fatalf("got offset %d want 0", m.Offset) 2654 } 2655 }) 2656 2657 t.Run("ChunkMatches", func(t *testing.T) { 2658 res := searchForTest(t, b, q, chunkOpts) 2659 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 { 2660 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2661 } 2662 m := res.Files[0].ChunkMatches[0].Ranges[0] 2663 if m.Start.ByteOffset != 14 { 2664 t.Fatalf("got offset %d want 0", m.Start.ByteOffset) 2665 } 2666 }) 2667} 2668 2669func TestSymbolSubstring(t *testing.T) { 2670 content := []byte("bla\nsymblabla\nbla") 2671 // ----------------0123-4567890123-456 2672 2673 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2674 Document{ 2675 Name: "f1", 2676 Content: content, 2677 Symbols: []DocumentSection{{4, 12}}, 2678 }, 2679 ) 2680 q := &query.Symbol{ 2681 Expr: &query.Substring{Pattern: "bla"}, 2682 } 2683 t.Run("LineMatches", func(t *testing.T) { 2684 res := searchForTest(t, b, q) 2685 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 { 2686 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2687 } 2688 m := res.Files[0].LineMatches[0].LineFragments[0] 2689 if m.Offset != 7 || m.MatchLength != 3 { 2690 t.Fatalf("got offset %d, size %d want 7 size 3", m.Offset, m.MatchLength) 2691 } 2692 }) 2693 2694 t.Run("ChunkMatches", func(t *testing.T) { 2695 res := searchForTest(t, b, q, chunkOpts) 2696 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 { 2697 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2698 } 2699 m := res.Files[0].ChunkMatches[0].Ranges[0] 2700 if m.Start.ByteOffset != 7 || m.End.ByteOffset != 10 { 2701 t.Fatalf("got offset %d, end %d want 7, 10", m.Start.ByteOffset, m.End.ByteOffset) 2702 } 2703 }) 2704} 2705 2706func TestSymbolSubstringExact(t *testing.T) { 2707 content := []byte("bla\nsym\nbla\nsym\nasymb") 2708 // ----------------0123-4567-890123456-78901 2709 2710 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2711 Document{ 2712 Name: "f1", 2713 Content: content, 2714 Symbols: []DocumentSection{{4, 7}}, 2715 }, 2716 ) 2717 q := &query.Symbol{ 2718 Expr: &query.Substring{Pattern: "sym"}, 2719 } 2720 t.Run("LineMatches", func(t *testing.T) { 2721 res := searchForTest(t, b, q) 2722 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 { 2723 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2724 } 2725 m := res.Files[0].LineMatches[0].LineFragments[0] 2726 if m.Offset != 4 { 2727 t.Fatalf("got offset %d, want 7", m.Offset) 2728 } 2729 }) 2730 2731 t.Run("ChunkMatches", func(t *testing.T) { 2732 res := searchForTest(t, b, q, chunkOpts) 2733 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 { 2734 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2735 } 2736 m := res.Files[0].ChunkMatches[0].Ranges[0] 2737 if m.Start.ByteOffset != 4 { 2738 t.Fatalf("got offset %d, want 7", m.Start.ByteOffset) 2739 } 2740 }) 2741} 2742 2743func TestSymbolRegexpExact(t *testing.T) { 2744 content := []byte("blah\nbla\nbl") 2745 // ----------------01234-5678-90 2746 2747 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2748 Document{ 2749 Name: "f1", 2750 Content: content, 2751 Symbols: []DocumentSection{{0, 4}, {5, 8}, {9, 11}}, 2752 }, 2753 ) 2754 q := &query.Symbol{ 2755 Expr: &query.Regexp{Regexp: mustParseRE("^bla$")}, 2756 } 2757 t.Run("LineMatches", func(t *testing.T) { 2758 res := searchForTest(t, b, q) 2759 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 { 2760 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2761 } 2762 m := res.Files[0].LineMatches[0].LineFragments[0] 2763 if m.Offset != 5 { 2764 t.Fatalf("got offset %d, want 5", m.Offset) 2765 } 2766 }) 2767 2768 t.Run("ChunkMatches", func(t *testing.T) { 2769 res := searchForTest(t, b, q, chunkOpts) 2770 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 { 2771 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2772 } 2773 m := res.Files[0].ChunkMatches[0].Ranges[0] 2774 if m.Start.ByteOffset != 5 { 2775 t.Fatalf("got offset %d, want 5", m.Start.ByteOffset) 2776 } 2777 }) 2778} 2779 2780func TestSymbolRegexpPartial(t *testing.T) { 2781 content := []byte("abcdef") 2782 // ----------------012345 2783 2784 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2785 Document{ 2786 Name: "f1", 2787 Content: content, 2788 Symbols: []DocumentSection{{0, 6}}, 2789 }, 2790 ) 2791 q := &query.Symbol{ 2792 Expr: &query.Regexp{Regexp: mustParseRE("(b|d)c(d|b)")}, 2793 } 2794 t.Run("LineMatches", func(t *testing.T) { 2795 res := searchForTest(t, b, q) 2796 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 { 2797 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2798 } 2799 m := res.Files[0].LineMatches[0].LineFragments[0] 2800 if m.Offset != 1 { 2801 t.Fatalf("got offset %d, want 1", m.Offset) 2802 } 2803 if m.MatchLength != 3 { 2804 t.Fatalf("got match length %d, want 3", m.MatchLength) 2805 } 2806 }) 2807 2808 t.Run("ChunkMatches", func(t *testing.T) { 2809 res := searchForTest(t, b, q, chunkOpts) 2810 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 { 2811 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2812 } 2813 m := res.Files[0].ChunkMatches[0].Ranges[0] 2814 if m.Start.ByteOffset != 1 { 2815 t.Fatalf("got offset %d, want 1", m.Start.ByteOffset) 2816 } 2817 if m.End.ByteOffset != 4 { 2818 t.Fatalf("got match end %d, want 4", m.End.ByteOffset) 2819 } 2820 }) 2821} 2822 2823func TestSymbolRegexpAll(t *testing.T) { 2824 docs := []Document{ 2825 { 2826 Name: "f1", 2827 Content: []byte("Hello Zoekt"), 2828 // --------------01234567890 2829 Symbols: []DocumentSection{{0, 5}, {6, 11}}, 2830 }, 2831 { 2832 Name: "f2", 2833 Content: []byte("Second Zoekt Third"), 2834 // --------------012345678901234567 2835 Symbols: []DocumentSection{{0, 6}, {7, 12}, {13, 18}}, 2836 }, 2837 } 2838 2839 b := testIndexBuilder(t, &Repository{Name: "reponame"}, docs...) 2840 q := &query.Symbol{ 2841 Expr: &query.Regexp{Regexp: mustParseRE(".*")}, 2842 } 2843 t.Run("LineMatches", func(t *testing.T) { 2844 res := searchForTest(t, b, q) 2845 if len(res.Files) != len(docs) { 2846 t.Fatalf("got %v, want %d file", res.Files, len(docs)) 2847 } 2848 for i, want := range docs { 2849 got := res.Files[i].LineMatches[0].LineFragments 2850 if len(got) != len(want.Symbols) { 2851 t.Fatalf("got %d symbols, want %d symbols in doc %s", len(got), len(want.Symbols), want.Name) 2852 } 2853 2854 for j, sec := range want.Symbols { 2855 if sec.Start != got[j].Offset { 2856 t.Fatalf("got offset %d, want %d in doc %s", got[j].Offset, sec.Start, want.Name) 2857 } 2858 } 2859 } 2860 }) 2861 2862 t.Run("ChunkMatches", func(t *testing.T) { 2863 res := searchForTest(t, b, q, chunkOpts) 2864 if len(res.Files) != len(docs) { 2865 t.Fatalf("got %v, want %d file", res.Files, len(docs)) 2866 } 2867 for i, want := range docs { 2868 got := res.Files[i].ChunkMatches[0].Ranges 2869 if len(got) != len(want.Symbols) { 2870 t.Fatalf("got %d symbols, want %d symbols in doc %s", len(got), len(want.Symbols), want.Name) 2871 } 2872 2873 for j, sec := range want.Symbols { 2874 if sec.Start != uint32(got[j].Start.ByteOffset) { 2875 t.Fatalf("got offset %d, want %d in doc %s", got[j].Start.ByteOffset, sec.Start, want.Name) 2876 } 2877 } 2878 } 2879 }) 2880} 2881 2882func TestHitIterTerminate(t *testing.T) { 2883 // contrived input: trigram frequencies forces selecting abc + 2884 // def for the distance iteration. There is no match, so this 2885 // will advance the compressedPostingIterator to beyond the 2886 // end. 2887 content := []byte("abc bcdbcd cdecde abcabc def efg") 2888 b := testIndexBuilder(t, nil, 2889 Document{ 2890 Name: "f1", 2891 Content: content, 2892 }, 2893 ) 2894 2895 t.Run("LineMatches", func(t *testing.T) { 2896 searchForTest(t, b, &query.Substring{Pattern: "abcdef"}) 2897 }) 2898 2899 t.Run("ChunkMatches", func(t *testing.T) { 2900 searchForTest(t, b, &query.Substring{Pattern: "abcdef"}, chunkOpts) 2901 }) 2902} 2903 2904func TestDistanceHitIterBailLast(t *testing.T) { 2905 content := []byte("AST AST AST UASH") 2906 b := testIndexBuilder(t, nil, 2907 Document{ 2908 Name: "f1", 2909 Content: content, 2910 }, 2911 ) 2912 t.Run("LineMatches", func(t *testing.T) { 2913 res := searchForTest(t, b, &query.Substring{Pattern: "UAST"}) 2914 if len(res.Files) != 0 { 2915 t.Fatalf("got %v, want no results", res.Files) 2916 } 2917 }) 2918 2919 t.Run("LineMatches", func(t *testing.T) { 2920 res := searchForTest(t, b, &query.Substring{Pattern: "UAST"}, chunkOpts) 2921 if len(res.Files) != 0 { 2922 t.Fatalf("got %v, want no results", res.Files) 2923 } 2924 }) 2925} 2926 2927func TestDocumentSectionRuneBoundary(t *testing.T) { 2928 content := string([]rune{kelvinCodePoint, kelvinCodePoint, kelvinCodePoint}) 2929 b, err := NewIndexBuilder(nil) 2930 if err != nil { 2931 t.Fatalf("NewIndexBuilder: %v", err) 2932 } 2933 2934 for i, sec := range []DocumentSection{ 2935 {2, 6}, 2936 {3, 7}, 2937 } { 2938 if err := b.Add(Document{ 2939 Name: "f1", 2940 Content: []byte(content), 2941 Symbols: []DocumentSection{sec}, 2942 }); err == nil { 2943 t.Errorf("%d: Add succeeded", i) 2944 } 2945 } 2946} 2947 2948func TestUnicodeQuery(t *testing.T) { 2949 content := string([]rune{kelvinCodePoint, kelvinCodePoint, kelvinCodePoint}) 2950 b := testIndexBuilder(t, nil, 2951 Document{ 2952 Name: "f1", 2953 Content: []byte(content), 2954 }, 2955 ) 2956 2957 q := &query.Substring{Pattern: content} 2958 2959 t.Run("LineMatches", func(t *testing.T) { 2960 res := searchForTest(t, b, q) 2961 if len(res.Files) != 1 { 2962 t.Fatalf("want 1 match, got %v", res.Files) 2963 } 2964 2965 f := res.Files[0] 2966 if len(f.LineMatches) != 1 { 2967 t.Fatalf("want 1 line, got %v", f.LineMatches) 2968 } 2969 l := f.LineMatches[0] 2970 2971 if len(l.LineFragments) != 1 { 2972 t.Fatalf("want 1 line fragment, got %v", l.LineFragments) 2973 } 2974 fr := l.LineFragments[0] 2975 if fr.MatchLength != len(content) { 2976 t.Fatalf("got MatchLength %d want %d", fr.MatchLength, len(content)) 2977 } 2978 }) 2979 2980 t.Run("ChunkMatches", func(t *testing.T) { 2981 res := searchForTest(t, b, q, chunkOpts) 2982 if len(res.Files) != 1 { 2983 t.Fatalf("want 1 match, got %v", res.Files) 2984 } 2985 2986 f := res.Files[0] 2987 if len(f.ChunkMatches) != 1 { 2988 t.Fatalf("want 1 line, got %v", f.LineMatches) 2989 } 2990 cm := f.ChunkMatches[0] 2991 2992 if len(cm.Ranges) != 1 { 2993 t.Fatalf("want 1 line fragment, got %v", cm.Ranges) 2994 } 2995 rr := cm.Ranges[0] 2996 if matchLen := rr.End.ByteOffset - rr.Start.ByteOffset; int(matchLen) != len(content) { 2997 t.Fatalf("got MatchLength %d want %d", matchLen, len(content)) 2998 } 2999 }) 3000} 3001 3002func TestSkipInvalidContent(t *testing.T) { 3003 for _, content := range []string{ 3004 // Binary 3005 "abc def \x00 abc", 3006 } { 3007 3008 b, err := NewIndexBuilder(nil) 3009 if err != nil { 3010 t.Fatalf("NewIndexBuilder: %v", err) 3011 } 3012 3013 if err := b.Add(Document{ 3014 Name: "f1", 3015 Content: []byte(content), 3016 }); err != nil { 3017 t.Fatal(err) 3018 } 3019 3020 t.Run("LineMatches", func(t *testing.T) { 3021 q := &query.Substring{Pattern: "abc def"} 3022 res := searchForTest(t, b, q) 3023 if len(res.Files) != 0 { 3024 t.Fatalf("got %v, want no results", res.Files) 3025 } 3026 3027 q = &query.Substring{Pattern: "NOT-INDEXED"} 3028 res = searchForTest(t, b, q) 3029 if len(res.Files) != 1 { 3030 t.Fatalf("got %v, want 1 result", res.Files) 3031 } 3032 }) 3033 3034 t.Run("ChunkMatches", func(t *testing.T) { 3035 q := &query.Substring{Pattern: "abc def"} 3036 res := searchForTest(t, b, q, chunkOpts) 3037 if len(res.Files) != 0 { 3038 t.Fatalf("got %v, want no results", res.Files) 3039 } 3040 3041 q = &query.Substring{Pattern: "NOT-INDEXED"} 3042 res = searchForTest(t, b, q, chunkOpts) 3043 if len(res.Files) != 1 { 3044 t.Fatalf("got %v, want 1 result", res.Files) 3045 } 3046 }) 3047 } 3048} 3049 3050func TestCheckText(t *testing.T) { 3051 for _, text := range []string{"", "simple ascii", "símplé unicödé", "\uFEFFwith utf8 'bom'", "with \uFFFD unicode replacement char"} { 3052 if err := CheckText([]byte(text), 20000); err != nil { 3053 t.Errorf("CheckText(%q): %v", text, err) 3054 } 3055 } 3056 for _, text := range []string{"zero\x00byte", "xx", "0123456789abcdefghi"} { 3057 if err := CheckText([]byte(text), 15); err == nil { 3058 t.Errorf("CheckText(%q) succeeded", text) 3059 } 3060 } 3061} 3062 3063func TestLineAnd(t *testing.T) { 3064 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 3065 Document{Name: "f1", Content: []byte("apple\nbanana\napple banana chocolate apple pudding banana\ngrape")}, 3066 Document{Name: "f2", Content: []byte("apple orange\nbanana")}, 3067 Document{Name: "f3", Content: []byte("banana grape")}, 3068 ) 3069 pattern := "(apple)(?-s:.)*?(banana)" 3070 r, _ := syntax.Parse(pattern, syntax.Perl) 3071 3072 q := query.Regexp{ 3073 Regexp: r, 3074 Content: true, 3075 } 3076 t.Run("LineMatches", func(t *testing.T) { 3077 res := searchForTest(t, b, &q) 3078 wantRegexpCount := 1 3079 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount { 3080 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount) 3081 } 3082 if len(res.Files) != 1 || res.Files[0].FileName != "f1" { 3083 t.Errorf("got %v, want 1 result", res.Files) 3084 } 3085 }) 3086 3087 t.Run("ChunkMatches", func(t *testing.T) { 3088 res := searchForTest(t, b, &q, chunkOpts) 3089 wantRegexpCount := 1 3090 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount { 3091 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount) 3092 } 3093 if len(res.Files) != 1 || res.Files[0].FileName != "f1" { 3094 t.Errorf("got %v, want 1 result", res.Files) 3095 } 3096 }) 3097} 3098 3099func TestLineAndFileName(t *testing.T) { 3100 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 3101 Document{Name: "f1", Content: []byte("apple banana\ngrape")}, 3102 Document{Name: "f2", Content: []byte("apple banana\norange")}, 3103 Document{Name: "apple banana", Content: []byte("banana grape")}, 3104 ) 3105 pattern := "(apple)(?-s:.)*?(banana)" 3106 r, _ := syntax.Parse(pattern, syntax.Perl) 3107 3108 q := query.Regexp{ 3109 Regexp: r, 3110 FileName: true, 3111 } 3112 t.Run("LineMatches", func(t *testing.T) { 3113 res := searchForTest(t, b, &q) 3114 wantRegexpCount := 1 3115 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount { 3116 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount) 3117 } 3118 if len(res.Files) != 1 || res.Files[0].FileName != "apple banana" { 3119 t.Errorf("got %v, want 1 result", res.Files) 3120 } 3121 }) 3122 3123 t.Run("ChunkMatches", func(t *testing.T) { 3124 res := searchForTest(t, b, &q, chunkOpts) 3125 wantRegexpCount := 1 3126 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount { 3127 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount) 3128 } 3129 if len(res.Files) != 1 || res.Files[0].FileName != "apple banana" { 3130 t.Errorf("got %v, want 1 result", res.Files) 3131 } 3132 }) 3133} 3134 3135func TestMultiLineRegex(t *testing.T) { 3136 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 3137 Document{Name: "f1", Content: []byte("apple banana\ngrape")}, 3138 Document{Name: "f2", Content: []byte("apple orange")}, 3139 Document{Name: "f3", Content: []byte("grape apple")}, 3140 ) 3141 pattern := "(apple).*?[[:space:]].*?(grape)" 3142 r, _ := syntax.Parse(pattern, syntax.Perl) 3143 3144 q := query.Regexp{ 3145 Regexp: r, 3146 } 3147 t.Run("LineMatches", func(t *testing.T) { 3148 res := searchForTest(t, b, &q) 3149 wantRegexpCount := 2 3150 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount { 3151 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount) 3152 } 3153 if len(res.Files) != 1 || res.Files[0].FileName != "f1" { 3154 t.Errorf("got %v, want 1 result", res.Files) 3155 } 3156 if l := len(res.Files[0].LineMatches); l != 2 { 3157 t.Errorf("got %v, want 2 line matches", l) 3158 } 3159 }) 3160 3161 t.Run("ChunkMatches", func(t *testing.T) { 3162 res := searchForTest(t, b, &q, chunkOpts) 3163 wantRegexpCount := 2 3164 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount { 3165 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount) 3166 } 3167 if len(res.Files) != 1 || res.Files[0].FileName != "f1" { 3168 t.Errorf("got %v, want 1 result", res.Files) 3169 } 3170 if l := len(res.Files[0].ChunkMatches); l != 1 { 3171 t.Errorf("got %v, want 1 chunk matches", l) 3172 } 3173 if l := len(res.Files[0].ChunkMatches[0].Ranges); l != 1 { 3174 t.Errorf("got %v, want 1 chunk ranges", l) 3175 } 3176 }) 3177} 3178 3179func TestSearchTypeFileName(t *testing.T) { 3180 b := testIndexBuilder(t, &Repository{ 3181 Name: "reponame", 3182 }, 3183 Document{Name: "f1", Content: []byte("bla the needle")}, 3184 Document{Name: "f2", Content: []byte("another file another\nneedle")}, 3185 // -----------------------------------012345678901234567890-123456 3186 ) 3187 3188 t.Run("LineMatches", func(t *testing.T) { 3189 wantSingleMatch := func(res *SearchResult, want string) { 3190 t.Helper() 3191 fmatches := res.Files 3192 if len(fmatches) != 1 { 3193 t.Errorf("got %v, want 1 matches", len(fmatches)) 3194 return 3195 } 3196 if len(fmatches[0].LineMatches) != 1 { 3197 t.Errorf("got %d line matches", len(fmatches[0].LineMatches)) 3198 return 3199 } 3200 var got string 3201 if fmatches[0].LineMatches[0].FileName { 3202 got = fmatches[0].FileName 3203 } else { 3204 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].LineMatches[0].LineFragments[0].Offset) 3205 } 3206 3207 if got != want { 3208 t.Errorf("got %s, want %s", got, want) 3209 } 3210 } 3211 3212 // Only return the later match in the second file 3213 res := searchForTest(t, b, query.NewAnd( 3214 &query.Type{ 3215 Type: query.TypeFileName, 3216 Child: &query.Substring{Pattern: "needle"}, 3217 }, 3218 &query.Substring{Pattern: "file"})) 3219 wantSingleMatch(res, "f2:8") 3220 3221 // Only return a filename result 3222 res = searchForTest(t, b, 3223 &query.Type{ 3224 Type: query.TypeFileName, 3225 Child: &query.Substring{Pattern: "file"}, 3226 }) 3227 wantSingleMatch(res, "f2") 3228 }) 3229 3230 t.Run("ChunkMatches", func(t *testing.T) { 3231 wantSingleMatch := func(res *SearchResult, want string) { 3232 t.Helper() 3233 fmatches := res.Files 3234 if len(fmatches) != 1 { 3235 t.Errorf("got %v, want 1 matches", len(fmatches)) 3236 return 3237 } 3238 if len(fmatches[0].ChunkMatches) != 1 { 3239 t.Errorf("got %d line matches", len(fmatches[0].ChunkMatches)) 3240 return 3241 } 3242 var got string 3243 if fmatches[0].ChunkMatches[0].FileName { 3244 got = fmatches[0].FileName 3245 } else { 3246 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset) 3247 } 3248 3249 if got != want { 3250 t.Errorf("got %s, want %s", got, want) 3251 } 3252 } 3253 3254 // Only return the later match in the second file 3255 res := searchForTest(t, b, query.NewAnd( 3256 &query.Type{ 3257 Type: query.TypeFileName, 3258 Child: &query.Substring{Pattern: "needle"}, 3259 }, 3260 &query.Substring{Pattern: "file"}), 3261 chunkOpts, 3262 ) 3263 wantSingleMatch(res, "f2:8") 3264 3265 // Only return a filename result 3266 res = searchForTest(t, b, 3267 &query.Type{ 3268 Type: query.TypeFileName, 3269 Child: &query.Substring{Pattern: "file"}, 3270 }, 3271 chunkOpts, 3272 ) 3273 wantSingleMatch(res, "f2") 3274 }) 3275} 3276 3277func TestSearchTypeLanguage(t *testing.T) { 3278 b := testIndexBuilder(t, &Repository{ 3279 Name: "reponame", 3280 }, 3281 Document{Name: "apex.cls", Content: []byte("public class Car extends Vehicle {")}, 3282 Document{Name: "tex.cls", Content: []byte(`\DeclareOption*{`)}, 3283 Document{Name: "hello.h", Content: []byte(`#include <stdio.h>`)}, 3284 ) 3285 3286 t.Log(b.languageMap) 3287 3288 t.Run("LineMatches", func(t *testing.T) { 3289 wantSingleMatch := func(res *SearchResult, want string) { 3290 t.Helper() 3291 fmatches := res.Files 3292 if len(fmatches) != 1 { 3293 t.Errorf("got %v, want 1 matches", len(fmatches)) 3294 return 3295 } 3296 if len(fmatches[0].LineMatches) != 1 { 3297 t.Errorf("got %d line matches", len(fmatches[0].LineMatches)) 3298 return 3299 } 3300 var got string 3301 if fmatches[0].LineMatches[0].FileName { 3302 got = fmatches[0].FileName 3303 } else { 3304 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].LineMatches[0].LineFragments[0].Offset) 3305 } 3306 3307 if got != want { 3308 t.Errorf("got %s, want %s", got, want) 3309 } 3310 } 3311 3312 res := searchForTest(t, b, &query.Language{Language: "Apex"}) 3313 wantSingleMatch(res, "apex.cls") 3314 3315 res = searchForTest(t, b, &query.Language{Language: "TeX"}) 3316 wantSingleMatch(res, "tex.cls") 3317 3318 res = searchForTest(t, b, &query.Language{Language: "C"}) 3319 wantSingleMatch(res, "hello.h") 3320 3321 // test fallback language search by pretending it's an older index version 3322 res = searchForTest(t, b, &query.Language{Language: "C++"}) 3323 if len(res.Files) != 0 { 3324 t.Errorf("got %d results for C++, want 0", len(res.Files)) 3325 } 3326 3327 b.featureVersion = 11 // force fallback 3328 res = searchForTest(t, b, &query.Language{Language: "C++"}) 3329 wantSingleMatch(res, "hello.h") 3330 }) 3331 3332 t.Run("ChunkMatches", func(t *testing.T) { 3333 wantSingleMatch := func(res *SearchResult, want string) { 3334 t.Helper() 3335 fmatches := res.Files 3336 if len(fmatches) != 1 { 3337 t.Errorf("got %v, want 1 matches", len(fmatches)) 3338 return 3339 } 3340 if len(fmatches[0].ChunkMatches) != 1 { 3341 t.Errorf("got %d line matches", len(fmatches[0].ChunkMatches)) 3342 return 3343 } 3344 var got string 3345 if fmatches[0].ChunkMatches[0].FileName { 3346 got = fmatches[0].FileName 3347 } else { 3348 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset) 3349 } 3350 3351 if got != want { 3352 t.Errorf("got %s, want %s", got, want) 3353 } 3354 } 3355 3356 b.featureVersion = FeatureVersion // reset feature version 3357 res := searchForTest(t, b, &query.Language{Language: "Apex"}, chunkOpts) 3358 wantSingleMatch(res, "apex.cls") 3359 3360 res = searchForTest(t, b, &query.Language{Language: "TeX"}, chunkOpts) 3361 wantSingleMatch(res, "tex.cls") 3362 3363 res = searchForTest(t, b, &query.Language{Language: "C"}, chunkOpts) 3364 wantSingleMatch(res, "hello.h") 3365 3366 // test fallback language search by pretending it's an older index version 3367 res = searchForTest(t, b, &query.Language{Language: "C++"}, chunkOpts) 3368 if len(res.Files) != 0 { 3369 t.Errorf("got %d results for C++, want 0", len(res.Files)) 3370 } 3371 3372 b.featureVersion = 11 // force fallback 3373 res = searchForTest(t, b, &query.Language{Language: "C++"}, chunkOpts) 3374 wantSingleMatch(res, "hello.h") 3375 }) 3376} 3377 3378func TestStats(t *testing.T) { 3379 ignored := []cmp.Option{ 3380 cmpopts.EquateEmpty(), 3381 cmpopts.IgnoreFields(RepoListEntry{}, "Repository"), 3382 cmpopts.IgnoreFields(RepoListEntry{}, "IndexMetadata"), 3383 cmpopts.IgnoreFields(RepoStats{}, "IndexBytes"), 3384 } 3385 3386 repoListEntries := func(b *IndexBuilder) []RepoListEntry { 3387 searcher := searcherForTest(t, b) 3388 indexdata := searcher.(*indexData) 3389 return indexdata.repoListEntry 3390 } 3391 3392 t.Run("one empty repo", func(t *testing.T) { 3393 b := testIndexBuilder(t, nil) 3394 got := repoListEntries(b) 3395 want := []RepoListEntry{ 3396 { 3397 Stats: RepoStats{ 3398 Repos: 0, 3399 Shards: 1, 3400 Documents: 0, 3401 IndexBytes: 20, 3402 ContentBytes: 0, 3403 NewLinesCount: 0, 3404 DefaultBranchNewLinesCount: 0, 3405 OtherBranchesNewLinesCount: 0, 3406 }, 3407 }, 3408 } 3409 3410 if diff := cmp.Diff(want, got, ignored...); diff != "" { 3411 t.Fatalf("mismatch (-want +got):\n%s", diff) 3412 } 3413 3414 }) 3415 3416 t.Run("one simple shard", func(t *testing.T) { 3417 b := testIndexBuilder(t, nil, 3418 Document{Name: "doc 0", Content: []byte("content 0")}, 3419 Document{Name: "doc 1", Content: []byte("content 1")}, 3420 ) 3421 got := repoListEntries(b) 3422 want := []RepoListEntry{ 3423 { 3424 Stats: RepoStats{ 3425 Repos: 0, 3426 Shards: 1, 3427 Documents: 2, 3428 IndexBytes: 224, 3429 ContentBytes: 28, 3430 NewLinesCount: 0, 3431 DefaultBranchNewLinesCount: 0, 3432 OtherBranchesNewLinesCount: 0, 3433 }, 3434 }, 3435 } 3436 3437 if diff := cmp.Diff(want, got, ignored...); diff != "" { 3438 t.Fatalf("mismatch (-want +got):\n%s", diff) 3439 } 3440 3441 }) 3442 3443 t.Run("one compound shard", func(t *testing.T) { 3444 b := testIndexBuilderCompound(t, 3445 []*Repository{ 3446 {Name: "repo 0"}, 3447 {Name: "repo 1"}, 3448 }, 3449 [][]Document{ 3450 { 3451 {Name: "doc 0", Content: []byte("content 0")}, 3452 {Name: "doc 1", Content: []byte("content 1")}, 3453 }, 3454 { 3455 {Name: "doc 2", Content: []byte("content 2")}, 3456 {Name: "doc 3", Content: []byte("content 3")}, 3457 }, 3458 }, 3459 ) 3460 got := repoListEntries(b) 3461 want := []RepoListEntry{ 3462 { 3463 Stats: RepoStats{ 3464 Repos: 0, 3465 Shards: 1, 3466 Documents: 2, 3467 IndexBytes: 180, 3468 ContentBytes: 28, 3469 NewLinesCount: 0, 3470 DefaultBranchNewLinesCount: 0, 3471 OtherBranchesNewLinesCount: 0, 3472 }, 3473 }, 3474 { 3475 Stats: RepoStats{ 3476 Repos: 0, 3477 Shards: 1, 3478 Documents: 2, 3479 IndexBytes: 180, 3480 ContentBytes: 28, 3481 NewLinesCount: 0, 3482 DefaultBranchNewLinesCount: 0, 3483 OtherBranchesNewLinesCount: 0, 3484 }, 3485 }, 3486 } 3487 3488 if diff := cmp.Diff(want, got, ignored...); diff != "" { 3489 t.Fatalf("mismatch (-want +got):\n%s", diff) 3490 } 3491 }) 3492 3493 t.Run("compound shard with empty repos", func(t *testing.T) { 3494 b := testIndexBuilderCompound(t, 3495 []*Repository{ 3496 {Name: "repo 0"}, 3497 {Name: "repo 1"}, 3498 {Name: "repo 2"}, 3499 {Name: "repo 3"}, 3500 {Name: "repo 4"}, 3501 }, 3502 [][]Document{ 3503 {{Name: "doc 0", Content: []byte("content 0")}}, 3504 nil, 3505 {{Name: "doc 1", Content: []byte("content 1")}}, 3506 nil, 3507 nil, 3508 }, 3509 ) 3510 got := repoListEntries(b) 3511 3512 entryEmpty := RepoListEntry{Stats: RepoStats{ 3513 Shards: 1, 3514 Documents: 0, 3515 ContentBytes: 0, 3516 }} 3517 entryNonEmpty := RepoListEntry{Stats: RepoStats{ 3518 Shards: 1, 3519 Documents: 1, 3520 ContentBytes: 14, 3521 }} 3522 3523 want := []RepoListEntry{ 3524 entryNonEmpty, 3525 entryEmpty, 3526 entryNonEmpty, 3527 entryEmpty, 3528 entryEmpty, 3529 } 3530 3531 if diff := cmp.Diff(want, got, ignored...); diff != "" { 3532 t.Fatalf("mismatch (-want +got):\n%s", diff) 3533 } 3534 3535 }) 3536} 3537 3538// This tests the frequent pattern "\bLITERAL\b". 3539func TestWordSearch(t *testing.T) { 3540 content := []byte("needle the bla") 3541 // ----------------01234567890123 3542 3543 b := testIndexBuilder(t, nil, 3544 Document{ 3545 Name: "f1", 3546 Content: content, 3547 }) 3548 3549 t.Run("LineMatches", func(t *testing.T) { 3550 sres := searchForTest(t, b, 3551 &query.Regexp{ 3552 Regexp: mustParseRE("\\bthe\\b"), 3553 CaseSensitive: true, 3554 Content: true, 3555 }) 3556 3557 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 { 3558 t.Fatalf("got %v, want 1 match in 1 file", sres.Files) 3559 } 3560 3561 if sres.Stats.RegexpsConsidered != 0 { 3562 t.Fatal("expected regexp to be skipped") 3563 } 3564 3565 got := sres.Files[0].LineMatches[0] 3566 want := LineMatch{ 3567 LineFragments: []LineFragmentMatch{{ 3568 LineOffset: 7, 3569 Offset: 7, 3570 MatchLength: 3, 3571 }}, 3572 Line: content, 3573 FileName: false, 3574 LineNumber: 1, 3575 LineStart: 0, 3576 LineEnd: 14, 3577 } 3578 3579 if !reflect.DeepEqual(got, want) { 3580 t.Errorf("got %#v, want %#v", got, want) 3581 } 3582 }) 3583 3584 t.Run("ChunkMatches", func(t *testing.T) { 3585 sres := searchForTest(t, b, 3586 &query.Regexp{ 3587 Regexp: mustParseRE("\\bthe\\b"), 3588 CaseSensitive: true, 3589 }, chunkOpts) 3590 3591 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 { 3592 t.Fatalf("got %v, want 1 match in 1 file", sres.Files) 3593 } 3594 3595 if sres.Stats.RegexpsConsidered != 0 { 3596 t.Fatal("expected regexp to be skipped") 3597 } 3598 3599 got := sres.Files[0].ChunkMatches[0] 3600 want := ChunkMatch{ 3601 Content: content, 3602 ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1}, 3603 Ranges: []Range{{ 3604 Start: Location{ByteOffset: 7, LineNumber: 1, Column: 8}, 3605 End: Location{ByteOffset: 10, LineNumber: 1, Column: 11}, 3606 }}, 3607 } 3608 3609 if diff := cmp.Diff(want, got); diff != "" { 3610 t.Fatal(diff) 3611 } 3612 }) 3613}