fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

1// Copyright 2016 Google Inc. All rights reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package zoekt 16 17import ( 18 "bytes" 19 "context" 20 "fmt" 21 "os" 22 "reflect" 23 "regexp/syntax" 24 "strings" 25 "testing" 26 27 "github.com/google/go-cmp/cmp" 28 "github.com/google/go-cmp/cmp/cmpopts" 29 "github.com/grafana/regexp" 30 "github.com/kylelemons/godebug/pretty" 31 32 "github.com/sourcegraph/zoekt/query" 33) 34 35func clearScores(r *SearchResult) { 36 for i := range r.Files { 37 r.Files[i].Score = 0.0 38 for j := range r.Files[i].LineMatches { 39 r.Files[i].LineMatches[j].Score = 0.0 40 } 41 for j := range r.Files[i].ChunkMatches { 42 r.Files[i].ChunkMatches[j].Score = 0.0 43 } 44 r.Files[i].Checksum = nil 45 r.Files[i].Debug = "" 46 } 47} 48 49func testIndexBuilder(t *testing.T, repo *Repository, docs ...Document) *IndexBuilder { 50 t.Helper() 51 52 b, err := NewIndexBuilder(repo) 53 if err != nil { 54 t.Fatalf("NewIndexBuilder: %v", err) 55 } 56 57 for i, d := range docs { 58 if err := b.Add(d); err != nil { 59 t.Fatalf("Add %d: %v", i, err) 60 } 61 } 62 63 return b 64} 65 66func testIndexBuilderCompound(t *testing.T, repos []*Repository, docs [][]Document) *IndexBuilder { 67 t.Helper() 68 69 b := newIndexBuilder() 70 b.indexFormatVersion = NextIndexFormatVersion 71 72 if len(repos) != len(docs) { 73 t.Fatalf("testIndexBuilderCompound: repos must be the same length as docs, got: len(repos)=%d len(docs)=%d", len(repos), len(docs)) 74 } 75 76 for i, repo := range repos { 77 if err := b.setRepository(repo); err != nil { 78 t.Fatal(err) 79 } 80 for j, d := range docs[i] { 81 if err := b.Add(d); err != nil { 82 t.Fatalf("Add %d %d: %v", i, j, err) 83 } 84 } 85 } 86 87 return b 88} 89 90func TestBoundary(t *testing.T) { 91 b := testIndexBuilder(t, nil, 92 Document{Name: "f1", Content: []byte("x the")}, 93 Document{Name: "f1", Content: []byte("reader")}) 94 res := searchForTest(t, b, &query.Substring{Pattern: "there"}) 95 if len(res.Files) > 0 { 96 t.Fatalf("got %v, want no matches", res.Files) 97 } 98} 99 100func TestDocSectionInvalid(t *testing.T) { 101 b, err := NewIndexBuilder(nil) 102 if err != nil { 103 t.Fatalf("NewIndexBuilder: %v", err) 104 } 105 doc := Document{ 106 Name: "f1", 107 Content: []byte("01234567890123"), 108 Symbols: []DocumentSection{{5, 8}, {7, 9}}, 109 } 110 111 if err := b.Add(doc); err == nil { 112 t.Errorf("overlapping doc sections should fail") 113 } 114 115 doc = Document{ 116 Name: "f1", 117 Content: []byte("01234567890123"), 118 Symbols: []DocumentSection{{0, 20}}, 119 } 120 121 if err := b.Add(doc); err == nil { 122 t.Errorf("doc sections beyond EOF should fail") 123 } 124} 125 126func TestBasic(t *testing.T) { 127 b := testIndexBuilder(t, nil, 128 Document{ 129 Name: "f2", 130 Content: []byte("to carry water in the no later bla"), 131 // --------------0123456789012345678901234567890123 132 }) 133 134 t.Run("LineMatch", func(t *testing.T) { 135 res := searchForTest(t, b, &query.Substring{ 136 Pattern: "water", 137 CaseSensitive: true, 138 }) 139 fmatches := res.Files 140 if len(fmatches) != 1 || len(fmatches[0].LineMatches) != 1 { 141 t.Fatalf("got %v, want 1 matches", fmatches) 142 } 143 144 got := fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].LineMatches[0].LineFragments[0].Offset) 145 want := "f2:9" 146 if got != want { 147 t.Errorf("1: got %s, want %s", got, want) 148 } 149 }) 150 151 t.Run("ChunkMatch", func(t *testing.T) { 152 res := searchForTest(t, b, &query.Substring{ 153 Pattern: "water", 154 CaseSensitive: true, 155 }, chunkOpts) 156 fmatches := res.Files 157 if len(fmatches) != 1 || len(fmatches[0].ChunkMatches) != 1 { 158 t.Fatalf("got %v, want 1 matches", fmatches) 159 } 160 161 got := fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset) 162 want := "f2:9" 163 if got != want { 164 t.Errorf("1: got %s, want %s", got, want) 165 } 166 }) 167} 168 169func TestEmptyIndex(t *testing.T) { 170 b := testIndexBuilder(t, nil) 171 searcher := searcherForTest(t, b) 172 173 var opts SearchOptions 174 if _, err := searcher.Search(context.Background(), &query.Substring{}, &opts); err != nil { 175 t.Fatalf("Search: %v", err) 176 } 177 178 if _, err := searcher.List(context.Background(), &query.Repo{Regexp: regexp.MustCompile("")}, nil); err != nil { 179 t.Fatalf("List: %v", err) 180 } 181 182 if _, err := searcher.Search(context.Background(), &query.Substring{Pattern: "java", FileName: true}, &opts); err != nil { 183 t.Fatalf("Search: %v", err) 184 } 185} 186 187type memSeeker struct { 188 data []byte 189} 190 191func (s *memSeeker) Name() string { 192 return "memseeker" 193} 194 195func (s *memSeeker) Close() {} 196func (s *memSeeker) Read(off, sz uint32) ([]byte, error) { 197 return s.data[off : off+sz], nil 198} 199 200func (s *memSeeker) Size() (uint32, error) { 201 return uint32(len(s.data)), nil 202} 203 204func TestNewlines(t *testing.T) { 205 b := testIndexBuilder(t, nil, 206 Document{Name: "filename", Content: []byte("line1\nline2\nbla")}) 207 // ---------------------------------------------012345-678901-234 208 209 t.Run("LineMatches", func(t *testing.T) { 210 sres := searchForTest(t, b, &query.Substring{Pattern: "ne2"}) 211 212 matches := sres.Files 213 want := []FileMatch{{ 214 FileName: "filename", 215 LineMatches: []LineMatch{{ 216 LineFragments: []LineFragmentMatch{{ 217 Offset: 8, 218 LineOffset: 2, 219 MatchLength: 3, 220 }}, 221 Line: []byte("line2"), 222 LineStart: 6, 223 LineEnd: 11, 224 LineNumber: 2, 225 }}, 226 }} 227 228 if !reflect.DeepEqual(matches, want) { 229 t.Errorf("got %v, want %v", matches, want) 230 } 231 }) 232 233 t.Run("ChunkMatches", func(t *testing.T) { 234 sres := searchForTest(t, b, &query.Substring{Pattern: "ne2"}, chunkOpts) 235 236 matches := sres.Files 237 want := []FileMatch{{ 238 FileName: "filename", 239 ChunkMatches: []ChunkMatch{{ 240 Content: []byte("line2"), 241 ContentStart: Location{ 242 ByteOffset: 6, 243 LineNumber: 2, 244 Column: 1, 245 }, 246 Ranges: []Range{{ 247 Start: Location{ByteOffset: 8, LineNumber: 2, Column: 3}, 248 End: Location{ByteOffset: 11, LineNumber: 2, Column: 6}, 249 }}, 250 }}, 251 }} 252 253 if diff := cmp.Diff(want, matches); diff != "" { 254 t.Fatal(diff) 255 } 256 }) 257} 258 259// A result spanning multiple lines should have LineMatches that only cover 260// single lines. 261func TestQueryNewlines(t *testing.T) { 262 text := "line1\nline2\nbla" 263 b := testIndexBuilder(t, nil, 264 Document{Name: "filename", Content: []byte(text)}) 265 266 t.Run("LineMatches", func(t *testing.T) { 267 sres := searchForTest(t, b, &query.Substring{Pattern: "ine2\nbla"}) 268 matches := sres.Files 269 if len(matches) != 1 { 270 t.Fatalf("got %d file matches, want exactly one", len(matches)) 271 } 272 m := matches[0] 273 if len(m.LineMatches) != 2 { 274 t.Fatalf("got %d line matches, want exactly two", len(m.LineMatches)) 275 } 276 }) 277 278 t.Run("ChunkMatches", func(t *testing.T) { 279 sres := searchForTest(t, b, &query.Substring{Pattern: "ine2\nbla"}, chunkOpts) 280 matches := sres.Files 281 if len(matches) != 1 { 282 t.Fatalf("got %d file matches, want exactly one", len(matches)) 283 } 284 m := matches[0] 285 if len(m.ChunkMatches) != 1 { 286 t.Fatalf("got %d chunk matches, want exactly one", len(m.ChunkMatches)) 287 } 288 }) 289} 290 291var chunkOpts = SearchOptions{ChunkMatches: true} 292 293func searchForTest(t *testing.T, b *IndexBuilder, q query.Q, o ...SearchOptions) *SearchResult { 294 searcher := searcherForTest(t, b) 295 var opts SearchOptions 296 if len(o) > 0 { 297 opts = o[0] 298 } 299 res, err := searcher.Search(context.Background(), q, &opts) 300 if err != nil { 301 t.Fatalf("Search(%s): %v", q, err) 302 } 303 clearScores(res) 304 return res 305} 306 307func searcherForTest(t *testing.T, b *IndexBuilder) Searcher { 308 var buf bytes.Buffer 309 if err := b.Write(&buf); err != nil { 310 t.Fatal(err) 311 } 312 f := &memSeeker{buf.Bytes()} 313 314 searcher, err := NewSearcher(f) 315 if err != nil { 316 t.Fatalf("NewSearcher: %v", err) 317 } 318 319 return searcher 320} 321 322func TestCaseFold(t *testing.T) { 323 b := testIndexBuilder(t, nil, 324 Document{Name: "f1", Content: []byte("I love BaNaNAS.")}, 325 // -----------------------------------012345678901234 326 ) 327 t.Run("LineMatches", func(t *testing.T) { 328 sres := searchForTest(t, b, &query.Substring{ 329 Pattern: "bananas", 330 CaseSensitive: true, 331 }) 332 matches := sres.Files 333 if len(matches) != 0 { 334 t.Errorf("foldcase: got %#v, want 0 matches", matches) 335 } 336 337 sres = searchForTest(t, b, 338 &query.Substring{ 339 Pattern: "BaNaNAS", 340 CaseSensitive: true, 341 }) 342 matches = sres.Files 343 if len(matches) != 1 { 344 t.Errorf("no foldcase: got %v, want 1 matches", matches) 345 } else if matches[0].LineMatches[0].LineFragments[0].Offset != 7 { 346 t.Errorf("foldcase: got %v, want offsets 7", matches) 347 } 348 }) 349 350 t.Run("ChunkMatches", func(t *testing.T) { 351 sres := searchForTest(t, b, &query.Substring{ 352 Pattern: "bananas", 353 CaseSensitive: true, 354 }, chunkOpts) 355 matches := sres.Files 356 if len(matches) != 0 { 357 t.Errorf("foldcase: got %#v, want 0 matches", matches) 358 } 359 360 sres = searchForTest(t, b, 361 &query.Substring{ 362 Pattern: "BaNaNAS", 363 CaseSensitive: true, 364 }) 365 matches = sres.Files 366 if len(matches) != 1 { 367 t.Errorf("no foldcase: got %v, want 1 matches", matches) 368 } else if matches[0].LineMatches[0].LineFragments[0].Offset != 7 { 369 t.Errorf("foldcase: got %v, want offsets 7", matches) 370 } 371 }) 372} 373 374func TestAndSearch(t *testing.T) { 375 b := testIndexBuilder(t, nil, 376 Document{Name: "f1", Content: []byte("x banana y")}, 377 Document{Name: "f2", Content: []byte("x apple y")}, 378 Document{Name: "f3", Content: []byte("x banana apple y")}, 379 // ---------------------------------------0123456789012345 380 ) 381 382 t.Run("LineMatches", func(t *testing.T) { 383 sres := searchForTest(t, b, query.NewAnd( 384 &query.Substring{ 385 Pattern: "banana", 386 }, 387 &query.Substring{ 388 Pattern: "apple", 389 }, 390 )) 391 matches := sres.Files 392 if len(matches) != 1 || len(matches[0].LineMatches) != 1 || len(matches[0].LineMatches[0].LineFragments) != 2 { 393 t.Fatalf("got %#v, want 1 match with 2 fragments", matches) 394 } 395 396 if matches[0].LineMatches[0].LineFragments[0].Offset != 2 || matches[0].LineMatches[0].LineFragments[1].Offset != 9 { 397 t.Fatalf("got %#v, want offsets 2,9", matches) 398 } 399 400 wantStats := Stats{ 401 FilesLoaded: 1, 402 ContentBytesLoaded: 18, 403 IndexBytesLoaded: 8, 404 NgramMatches: 3, // we look at doc 1, because it's max(0,1) due to AND 405 MatchCount: 1, 406 FileCount: 1, 407 FilesConsidered: 2, 408 ShardsScanned: 1, 409 } 410 if diff := pretty.Compare(wantStats, sres.Stats); diff != "" { 411 t.Errorf("got stats diff %s", diff) 412 } 413 }) 414 415 t.Run("ChunkMatches", func(t *testing.T) { 416 sres := searchForTest(t, b, query.NewAnd( 417 &query.Substring{ 418 Pattern: "banana", 419 }, 420 &query.Substring{ 421 Pattern: "apple", 422 }, 423 ), chunkOpts) 424 matches := sres.Files 425 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 || len(matches[0].ChunkMatches[0].Ranges) != 2 { 426 t.Fatalf("got %#v, want 1 chunk match with 2 ranges", matches) 427 } 428 429 if matches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset != 2 || matches[0].ChunkMatches[0].Ranges[1].Start.ByteOffset != 9 { 430 t.Fatalf("got %#v, want offsets 2,9", matches) 431 } 432 433 wantStats := Stats{ 434 FilesLoaded: 1, 435 ContentBytesLoaded: 18, 436 IndexBytesLoaded: 8, 437 NgramMatches: 3, // we look at doc 1, because it's max(0,1) due to AND 438 MatchCount: 2, 439 FileCount: 1, 440 FilesConsidered: 2, 441 ShardsScanned: 1, 442 } 443 if diff := pretty.Compare(wantStats, sres.Stats); diff != "" { 444 t.Errorf("got stats diff %s", diff) 445 } 446 }) 447} 448 449func TestAndNegateSearch(t *testing.T) { 450 b := testIndexBuilder(t, nil, 451 Document{Name: "f1", Content: []byte("x banana y")}, 452 // -----------------------------------0123456789 453 Document{Name: "f4", Content: []byte("x banana apple y")}) 454 455 t.Run("LineMatches", func(t *testing.T) { 456 sres := searchForTest(t, b, query.NewAnd( 457 &query.Substring{ 458 Pattern: "banana", 459 }, 460 &query.Not{Child: &query.Substring{ 461 Pattern: "apple", 462 }})) 463 464 matches := sres.Files 465 466 if len(matches) != 1 || len(matches[0].LineMatches) != 1 { 467 t.Fatalf("got %v, want 1 match", matches) 468 } 469 if matches[0].FileName != "f1" { 470 t.Fatalf("got match %#v, want FileName: f1", matches[0]) 471 } 472 if matches[0].LineMatches[0].LineFragments[0].Offset != 2 { 473 t.Fatalf("got %v, want offset 2", matches) 474 } 475 }) 476 477 t.Run("ChunkMatches", func(t *testing.T) { 478 sres := searchForTest(t, b, 479 query.NewAnd( 480 &query.Substring{ 481 Pattern: "banana", 482 }, 483 &query.Not{Child: &query.Substring{ 484 Pattern: "apple", 485 }}, 486 ), 487 chunkOpts, 488 ) 489 490 matches := sres.Files 491 492 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 { 493 t.Fatalf("got %v, want 1 match", matches) 494 } 495 if matches[0].FileName != "f1" { 496 t.Fatalf("got match %#v, want FileName: f1", matches[0]) 497 } 498 if matches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset != 2 { 499 t.Fatalf("got %v, want offset 2", matches) 500 } 501 }) 502} 503 504func TestNegativeMatchesOnlyShortcut(t *testing.T) { 505 b := testIndexBuilder(t, nil, 506 Document{Name: "f1", Content: []byte("x banana y")}, 507 Document{Name: "f2", Content: []byte("x appelmoes y")}, 508 Document{Name: "f3", Content: []byte("x appelmoes y")}, 509 Document{Name: "f3", Content: []byte("x appelmoes y")}) 510 511 t.Run("LineMatches", func(t *testing.T) { 512 sres := searchForTest(t, b, query.NewAnd( 513 &query.Substring{ 514 Pattern: "banana", 515 }, 516 &query.Not{Child: &query.Substring{ 517 Pattern: "appel", 518 }})) 519 520 if sres.Stats.FilesConsidered != 1 { 521 t.Errorf("got %#v, want FilesConsidered: 1", sres.Stats) 522 } 523 }) 524 525 t.Run("ChunkMatches", func(t *testing.T) { 526 sres := searchForTest(t, b, query.NewAnd( 527 &query.Substring{ 528 Pattern: "banana", 529 }, 530 &query.Not{Child: &query.Substring{ 531 Pattern: "appel", 532 }}), chunkOpts) 533 534 if sres.Stats.FilesConsidered != 1 { 535 t.Errorf("got %#v, want FilesConsidered: 1", sres.Stats) 536 } 537 }) 538} 539 540func TestFileSearch(t *testing.T) { 541 b := testIndexBuilder(t, nil, 542 Document{Name: "banzana", Content: []byte("x orange y")}, 543 // -------------0123456 544 Document{Name: "banana", Content: []byte("x apple y")}, 545 // -------------012345 546 ) 547 548 t.Run("LineMatches", func(t *testing.T) { 549 sres := searchForTest(t, b, &query.Substring{ 550 Pattern: "anan", 551 FileName: true, 552 }) 553 554 matches := sres.Files 555 if len(matches) != 1 || len(matches[0].LineMatches) != 1 { 556 t.Fatalf("got %v, want 1 match", matches) 557 } 558 559 got := matches[0].LineMatches[0] 560 want := LineMatch{ 561 Line: []byte("banana"), 562 LineFragments: []LineFragmentMatch{{ 563 Offset: 1, 564 LineOffset: 1, 565 MatchLength: 4, 566 }}, 567 FileName: true, 568 } 569 570 if !reflect.DeepEqual(got, want) { 571 t.Errorf("got %#v, want %#v", got, want) 572 } 573 }) 574 575 t.Run("ChunkMatches", func(t *testing.T) { 576 sres := searchForTest(t, b, &query.Substring{ 577 Pattern: "anan", 578 FileName: true, 579 }, chunkOpts) 580 581 matches := sres.Files 582 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 { 583 t.Fatalf("got %v, want 1 match", matches) 584 } 585 586 got := matches[0].ChunkMatches[0] 587 want := ChunkMatch{ 588 Content: []byte("banana"), 589 ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1}, 590 Ranges: []Range{{ 591 Start: Location{ByteOffset: 1, LineNumber: 1, Column: 2}, 592 End: Location{ByteOffset: 5, LineNumber: 1, Column: 6}, 593 }}, 594 FileName: true, 595 } 596 597 if diff := cmp.Diff(want, got); diff != "" { 598 t.Fatal(diff) 599 } 600 }) 601 602 t.Run("FileNameSet", func(t *testing.T) { 603 sres := searchForTest(t, b, query.NewFileNameSet("banana"), chunkOpts) 604 605 matches := sres.Files 606 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 { 607 t.Fatalf("got %v, want 1 match", matches) 608 } 609 610 got := matches[0].ChunkMatches[0] 611 want := ChunkMatch{ 612 Content: []byte("banana"), 613 ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1}, 614 Ranges: []Range{{ 615 Start: Location{ByteOffset: 0, LineNumber: 1, Column: 1}, 616 End: Location{ByteOffset: 6, LineNumber: 1, Column: 7}, 617 }}, 618 FileName: true, 619 } 620 621 if diff := cmp.Diff(want, got); diff != "" { 622 t.Fatal(diff) 623 } 624 }) 625} 626 627func TestFileCase(t *testing.T) { 628 b := testIndexBuilder(t, nil, 629 Document{Name: "BANANA", Content: []byte("x orange y")}) 630 631 t.Run("LineMatches", func(t *testing.T) { 632 sres := searchForTest(t, b, &query.Substring{ 633 Pattern: "banana", 634 FileName: true, 635 }) 636 637 matches := sres.Files 638 if len(matches) != 1 || matches[0].FileName != "BANANA" { 639 t.Fatalf("got %v, want 1 match 'BANANA'", matches) 640 } 641 }) 642 643 t.Run("ChunkMatches", func(t *testing.T) { 644 sres := searchForTest(t, b, &query.Substring{ 645 Pattern: "banana", 646 FileName: true, 647 }, chunkOpts) 648 649 matches := sres.Files 650 if len(matches) != 1 || matches[0].FileName != "BANANA" { 651 t.Fatalf("got %v, want 1 match 'BANANA'", matches) 652 } 653 }) 654} 655 656func TestFileRegexpSearchBruteForce(t *testing.T) { 657 b := testIndexBuilder(t, nil, 658 Document{Name: "banzana", Content: []byte("x orange y")}, 659 Document{Name: "banana", Content: []byte("x apple y")}, 660 ) 661 t.Run("LineMatches", func(t *testing.T) { 662 sres := searchForTest(t, b, &query.Regexp{ 663 Regexp: mustParseRE("[qn][zx]"), 664 FileName: true, 665 }) 666 667 matches := sres.Files 668 if len(matches) != 1 || matches[0].FileName != "banzana" { 669 t.Fatalf("got %v, want 1 match on 'banzana'", matches) 670 } 671 }) 672 t.Run("LineMatches", func(t *testing.T) { 673 sres := searchForTest(t, b, &query.Regexp{ 674 Regexp: mustParseRE("[qn][zx]"), 675 FileName: true, 676 }, chunkOpts) 677 678 matches := sres.Files 679 if len(matches) != 1 || matches[0].FileName != "banzana" { 680 t.Fatalf("got %v, want 1 match on 'banzana'", matches) 681 } 682 }) 683} 684 685func TestFileRegexpSearchShortString(t *testing.T) { 686 b := testIndexBuilder(t, nil, 687 Document{Name: "banana.py", Content: []byte("x orange y")}) 688 689 t.Run("LineMatches", func(t *testing.T) { 690 sres := searchForTest(t, b, &query.Regexp{ 691 Regexp: mustParseRE("ana.py"), 692 FileName: true, 693 }) 694 695 matches := sres.Files 696 if len(matches) != 1 || matches[0].FileName != "banana.py" { 697 t.Fatalf("got %v, want 1 match on 'banana.py'", matches) 698 } 699 }) 700 701 t.Run("ChunkMatches", func(t *testing.T) { 702 sres := searchForTest(t, b, &query.Regexp{ 703 Regexp: mustParseRE("ana.py"), 704 FileName: true, 705 }, chunkOpts) 706 707 matches := sres.Files 708 if len(matches) != 1 || matches[0].FileName != "banana.py" { 709 t.Fatalf("got %v, want 1 match on 'banana.py'", matches) 710 } 711 }) 712} 713 714func TestFileSubstringSearchBruteForce(t *testing.T) { 715 b := testIndexBuilder(t, nil, 716 Document{Name: "BANZANA", Content: []byte("x orange y")}, 717 Document{Name: "banana", Content: []byte("x apple y")}) 718 719 q := &query.Substring{ 720 Pattern: "z", 721 FileName: true, 722 } 723 724 t.Run("LineMatches", func(t *testing.T) { 725 res := searchForTest(t, b, q) 726 if len(res.Files) != 1 || res.Files[0].FileName != "BANZANA" { 727 t.Fatalf("got %v, want 1 match on 'BANZANA''", res.Files) 728 } 729 }) 730 731 t.Run("ChunkMatches", func(t *testing.T) { 732 res := searchForTest(t, b, q, chunkOpts) 733 if len(res.Files) != 1 || res.Files[0].FileName != "BANZANA" { 734 t.Fatalf("got %v, want 1 match on 'BANZANA''", res.Files) 735 } 736 }) 737} 738 739func TestFileSubstringSearchBruteForceEnd(t *testing.T) { 740 b := testIndexBuilder(t, nil, 741 Document{Name: "BANZANA", Content: []byte("x orange y")}, 742 Document{Name: "bananaq", Content: []byte("x apple y")}) 743 744 q := &query.Substring{ 745 Pattern: "q", 746 FileName: true, 747 } 748 t.Run("LineMatches", func(t *testing.T) { 749 res := searchForTest(t, b, q) 750 if want := "bananaq"; len(res.Files) != 1 || res.Files[0].FileName != want { 751 t.Fatalf("got %v, want 1 match in %q", res.Files, want) 752 } 753 }) 754 755 t.Run("LineMatches", func(t *testing.T) { 756 res := searchForTest(t, b, q, chunkOpts) 757 if want := "bananaq"; len(res.Files) != 1 || res.Files[0].FileName != want { 758 t.Fatalf("got %v, want 1 match in %q", res.Files, want) 759 } 760 }) 761} 762 763func TestSearchMatchAll(t *testing.T) { 764 b := testIndexBuilder(t, nil, 765 Document{Name: "banzana", Content: []byte("x orange y")}, 766 Document{Name: "banana", Content: []byte("x apple y")}) 767 768 t.Run("LineMatches", func(t *testing.T) { 769 sres := searchForTest(t, b, &query.Const{Value: true}) 770 matches := sres.Files 771 if len(matches) != 2 { 772 t.Fatalf("got %v, want 2 matches", matches) 773 } 774 }) 775 776 t.Run("ChunkMatches", func(t *testing.T) { 777 sres := searchForTest(t, b, &query.Const{Value: true}, chunkOpts) 778 matches := sres.Files 779 if len(matches) != 2 { 780 t.Fatalf("got %v, want 2 matches", matches) 781 } 782 }) 783} 784 785func TestSearchNewline(t *testing.T) { 786 b := testIndexBuilder(t, nil, 787 Document{Name: "banzana", Content: []byte("abcd\ndefg")}) 788 789 t.Run("LineMatches", func(t *testing.T) { 790 sres := searchForTest(t, b, &query.Substring{Pattern: "d\nd"}) 791 792 // Just check that we don't crash. 793 794 matches := sres.Files 795 if len(matches) != 1 { 796 t.Fatalf("got %v, want 1 matches", matches) 797 } 798 }) 799 800 t.Run("ChunkMatches", func(t *testing.T) { 801 sres := searchForTest(t, b, &query.Substring{Pattern: "d\nd"}, chunkOpts) 802 803 // Just check that we don't crash. 804 805 matches := sres.Files 806 if len(matches) != 1 { 807 t.Fatalf("got %v, want 1 matches", matches) 808 } 809 }) 810} 811 812func TestSearchMatchAllRegexp(t *testing.T) { 813 b := testIndexBuilder(t, nil, 814 Document{Name: "banzana", Content: []byte("abcd")}, 815 Document{Name: "banana", Content: []byte("pqrs")}) 816 817 t.Run("LineMatches", func(t *testing.T) { 818 sres := searchForTest(t, b, &query.Regexp{Regexp: mustParseRE(".")}) 819 820 matches := sres.Files 821 if len(matches) != 2 || sres.Stats.MatchCount != 2 { 822 t.Fatalf("got %v, want 2 matches", matches) 823 } 824 if len(matches[0].LineMatches[0].Line) != 4 || len(matches[1].LineMatches[0].Line) != 4 { 825 t.Fatalf("want 4 chars in every file, got %#v", matches) 826 } 827 828 }) 829 830 t.Run("ChunkMatches", func(t *testing.T) { 831 sres := searchForTest(t, b, &query.Regexp{Regexp: mustParseRE(".")}, chunkOpts) 832 833 matches := sres.Files 834 if len(matches) != 2 || sres.Stats.MatchCount != 8 { 835 t.Fatalf("got %v, want 2 matches", matches) 836 } 837 if len(matches[0].ChunkMatches[0].Content) != 4 || len(matches[1].ChunkMatches[0].Content) != 4 { 838 t.Fatalf("want 4 chars in every file, got %#v", matches) 839 } 840 841 }) 842} 843 844func TestFileRestriction(t *testing.T) { 845 b := testIndexBuilder(t, nil, 846 Document{Name: "banana1", Content: []byte("x orange y")}, 847 Document{Name: "banana2", Content: []byte("x apple y")}, 848 Document{Name: "orange", Content: []byte("x apple z")}) 849 850 t.Run("LineMatches", func(t *testing.T) { 851 sres := searchForTest(t, b, query.NewAnd( 852 &query.Substring{ 853 Pattern: "banana", 854 FileName: true, 855 }, 856 &query.Substring{ 857 Pattern: "apple", 858 })) 859 860 matches := sres.Files 861 if len(matches) != 1 || len(matches[0].LineMatches) != 1 { 862 t.Fatalf("got %v, want 1 match", matches) 863 } 864 865 match := matches[0].LineMatches[0] 866 got := string(match.Line) 867 want := "x apple y" 868 if got != want { 869 t.Errorf("got match %#v, want line %q", match, want) 870 } 871 }) 872 873 t.Run("ChunkMatches", func(t *testing.T) { 874 sres := searchForTest(t, b, query.NewAnd( 875 &query.Substring{ 876 Pattern: "banana", 877 FileName: true, 878 }, 879 &query.Substring{ 880 Pattern: "apple", 881 }), chunkOpts) 882 883 matches := sres.Files 884 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 { 885 t.Fatalf("got %v, want 1 match", matches) 886 } 887 888 match := matches[0].ChunkMatches[0] 889 got := string(match.Content) 890 want := "x apple y" 891 if got != want { 892 t.Errorf("got match %#v, want line %q", match, want) 893 } 894 }) 895} 896 897func TestFileNameBoundary(t *testing.T) { 898 b := testIndexBuilder(t, nil, 899 Document{Name: "banana2", Content: []byte("x apple y")}, 900 Document{Name: "helpers.go", Content: []byte("x apple y")}, 901 Document{Name: "foo", Content: []byte("x apple y")}) 902 903 t.Run("LineMatches", func(t *testing.T) { 904 sres := searchForTest(t, b, &query.Substring{ 905 Pattern: "helpers.go", 906 FileName: true, 907 }) 908 909 matches := sres.Files 910 if len(matches) != 1 || len(matches[0].LineMatches) != 1 { 911 t.Fatalf("got %v, want 1 match", matches) 912 } 913 }) 914 915 t.Run("ChunkMatches", func(t *testing.T) { 916 sres := searchForTest(t, b, &query.Substring{ 917 Pattern: "helpers.go", 918 FileName: true, 919 }, chunkOpts) 920 921 matches := sres.Files 922 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 { 923 t.Fatalf("got %v, want 1 match", matches) 924 } 925 }) 926} 927 928func TestDocumentOrder(t *testing.T) { 929 var docs []Document 930 for i := 0; i < 3; i++ { 931 docs = append(docs, Document{Name: fmt.Sprintf("f%d", i), Content: []byte("needle")}) 932 } 933 934 b := testIndexBuilder(t, nil, docs...) 935 936 t.Run("LineMatches", func(t *testing.T) { 937 sres := searchForTest(t, b, query.NewAnd( 938 &query.Substring{ 939 Pattern: "needle", 940 })) 941 942 want := []string{"f0", "f1", "f2"} 943 var got []string 944 for _, f := range sres.Files { 945 got = append(got, f.FileName) 946 } 947 if !reflect.DeepEqual(got, want) { 948 t.Fatalf("got %v, want %v", got, want) 949 } 950 }) 951 952 t.Run("ChunkMatches", func(t *testing.T) { 953 sres := searchForTest(t, b, 954 query.NewAnd(&query.Substring{ 955 Pattern: "needle", 956 }), 957 chunkOpts, 958 ) 959 960 want := []string{"f0", "f1", "f2"} 961 var got []string 962 for _, f := range sres.Files { 963 got = append(got, f.FileName) 964 } 965 if !reflect.DeepEqual(got, want) { 966 t.Fatalf("got %v, want %v", got, want) 967 } 968 }) 969} 970 971func TestBranchMask(t *testing.T) { 972 b := testIndexBuilder(t, &Repository{ 973 Branches: []RepositoryBranch{ 974 {"master", "v-master"}, 975 {"stable", "v-stable"}, 976 {"bonzai", "v-bonzai"}, 977 }, 978 }, Document{Name: "f1", Content: []byte("needle"), Branches: []string{"master"}}, 979 Document{Name: "f2", Content: []byte("needle"), Branches: []string{"stable", "master"}}, 980 Document{Name: "f3", Content: []byte("needle"), Branches: []string{"stable", "master"}}, 981 Document{Name: "f4", Content: []byte("needle"), Branches: []string{"bonzai"}}, 982 ) 983 984 t.Run("LineMatches", func(t *testing.T) { 985 sres := searchForTest(t, b, query.NewAnd( 986 &query.Substring{ 987 Pattern: "needle", 988 }, 989 &query.Branch{ 990 Pattern: "table", 991 })) 992 993 if len(sres.Files) != 2 || sres.Files[0].FileName != "f2" || sres.Files[1].FileName != "f3" { 994 t.Fatalf("got %v, want 2 result from [f2,f3]", sres.Files) 995 } 996 997 if len(sres.Files[0].Branches) != 1 || sres.Files[0].Branches[0] != "stable" { 998 t.Fatalf("got %v, want 1 branch 'stable'", sres.Files[0].Branches) 999 } 1000 }) 1001 1002 t.Run("ChunkMatches", func(t *testing.T) { 1003 sres := searchForTest(t, b, query.NewAnd( 1004 &query.Substring{ 1005 Pattern: "needle", 1006 }, 1007 &query.Branch{ 1008 Pattern: "table", 1009 }), 1010 chunkOpts, 1011 ) 1012 1013 if len(sres.Files) != 2 || sres.Files[0].FileName != "f2" || sres.Files[1].FileName != "f3" { 1014 t.Fatalf("got %v, want 2 result from [f2,f3]", sres.Files) 1015 } 1016 1017 if len(sres.Files[0].Branches) != 1 || sres.Files[0].Branches[0] != "stable" { 1018 t.Fatalf("got %v, want 1 branch 'stable'", sres.Files[0].Branches) 1019 } 1020 }) 1021} 1022 1023func TestBranchLimit(t *testing.T) { 1024 for limit := 64; limit <= 65; limit++ { 1025 r := &Repository{} 1026 for i := 0; i < limit; i++ { 1027 s := fmt.Sprintf("b%d", i) 1028 r.Branches = append(r.Branches, RepositoryBranch{ 1029 s, "v-" + s, 1030 }) 1031 } 1032 _, err := NewIndexBuilder(r) 1033 if limit == 64 && err != nil { 1034 t.Fatalf("NewIndexBuilder: %v", err) 1035 } else if limit == 65 && err == nil { 1036 t.Fatalf("NewIndexBuilder succeeded") 1037 } 1038 } 1039} 1040 1041func TestBranchReport(t *testing.T) { 1042 branches := []string{"stable", "master"} 1043 b := testIndexBuilder(t, &Repository{ 1044 Branches: []RepositoryBranch{ 1045 {"stable", "vs"}, 1046 {"master", "vm"}, 1047 }, 1048 }, 1049 Document{Name: "f2", Content: []byte("needle"), Branches: branches}) 1050 1051 t.Run("LineMatches", func(t *testing.T) { 1052 sres := searchForTest(t, b, &query.Substring{ 1053 Pattern: "needle", 1054 }) 1055 if len(sres.Files) != 1 { 1056 t.Fatalf("got %v, want 1 result from f2", sres.Files) 1057 } 1058 1059 f := sres.Files[0] 1060 if !reflect.DeepEqual(f.Branches, branches) { 1061 t.Fatalf("got branches %q, want %q", f.Branches, branches) 1062 } 1063 }) 1064 1065 t.Run("ChunkMatches", func(t *testing.T) { 1066 sres := searchForTest(t, b, &query.Substring{ 1067 Pattern: "needle", 1068 }, chunkOpts) 1069 if len(sres.Files) != 1 { 1070 t.Fatalf("got %v, want 1 result from f2", sres.Files) 1071 } 1072 1073 f := sres.Files[0] 1074 if !reflect.DeepEqual(f.Branches, branches) { 1075 t.Fatalf("got branches %q, want %q", f.Branches, branches) 1076 } 1077 }) 1078 1079} 1080 1081func TestBranchVersions(t *testing.T) { 1082 b := testIndexBuilder(t, &Repository{ 1083 Branches: []RepositoryBranch{ 1084 {"stable", "v-stable"}, 1085 {"master", "v-master"}, 1086 }, 1087 }, Document{Name: "f2", Content: []byte("needle"), Branches: []string{"master"}}) 1088 1089 t.Run("LineMatches", func(t *testing.T) { 1090 sres := searchForTest(t, b, &query.Substring{ 1091 Pattern: "needle", 1092 }) 1093 if len(sres.Files) != 1 { 1094 t.Fatalf("got %v, want 1 result from f2", sres.Files) 1095 } 1096 1097 f := sres.Files[0] 1098 if f.Version != "v-master" { 1099 t.Fatalf("got file %#v, want version 'v-master'", f) 1100 } 1101 }) 1102 1103 t.Run("ChunkMatches", func(t *testing.T) { 1104 sres := searchForTest(t, b, &query.Substring{ 1105 Pattern: "needle", 1106 }, chunkOpts) 1107 if len(sres.Files) != 1 { 1108 t.Fatalf("got %v, want 1 result from f2", sres.Files) 1109 } 1110 1111 f := sres.Files[0] 1112 if f.Version != "v-master" { 1113 t.Fatalf("got file %#v, want version 'v-master'", f) 1114 } 1115 }) 1116} 1117 1118func mustParseRE(s string) *syntax.Regexp { 1119 r, err := syntax.Parse(s, syntax.Perl) 1120 if err != nil { 1121 panic(err) 1122 } 1123 1124 return r 1125} 1126 1127func TestRegexp(t *testing.T) { 1128 content := []byte("needle the bla") 1129 // ----------------01234567890123 1130 1131 b := testIndexBuilder(t, nil, 1132 Document{ 1133 Name: "f1", 1134 Content: content, 1135 }) 1136 1137 t.Run("LineMatches", func(t *testing.T) { 1138 sres := searchForTest(t, b, 1139 &query.Regexp{ 1140 Regexp: mustParseRE("dle.*bla"), 1141 }) 1142 1143 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 { 1144 t.Fatalf("got %v, want 1 match in 1 file", sres.Files) 1145 } 1146 1147 got := sres.Files[0].LineMatches[0] 1148 want := LineMatch{ 1149 LineFragments: []LineFragmentMatch{{ 1150 LineOffset: 3, 1151 Offset: 3, 1152 MatchLength: 11, 1153 }}, 1154 Line: content, 1155 FileName: false, 1156 LineNumber: 1, 1157 LineStart: 0, 1158 LineEnd: 14, 1159 } 1160 1161 if !reflect.DeepEqual(got, want) { 1162 t.Errorf("got %#v, want %#v", got, want) 1163 } 1164 }) 1165 1166 t.Run("ChunkMatches", func(t *testing.T) { 1167 sres := searchForTest(t, b, 1168 &query.Regexp{ 1169 Regexp: mustParseRE("dle.*bla"), 1170 }, chunkOpts) 1171 1172 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 { 1173 t.Fatalf("got %v, want 1 match in 1 file", sres.Files) 1174 } 1175 1176 got := sres.Files[0].ChunkMatches[0] 1177 want := ChunkMatch{ 1178 Content: content, 1179 ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1}, 1180 Ranges: []Range{{ 1181 Start: Location{ByteOffset: 3, LineNumber: 1, Column: 4}, 1182 End: Location{ByteOffset: 14, LineNumber: 1, Column: 15}, 1183 }}, 1184 } 1185 1186 if diff := cmp.Diff(want, got); diff != "" { 1187 t.Fatal(diff) 1188 } 1189 }) 1190} 1191 1192func TestRegexpFile(t *testing.T) { 1193 content := []byte("needle the bla") 1194 1195 name := "let's play: find the mussel" 1196 b := testIndexBuilder(t, nil, 1197 Document{Name: name, Content: content}, 1198 Document{Name: "play.txt", Content: content}) 1199 1200 t.Run("LineMatches", func(t *testing.T) { 1201 sres := searchForTest(t, b, 1202 &query.Regexp{ 1203 Regexp: mustParseRE("play.*mussel"), 1204 FileName: true, 1205 }) 1206 1207 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 { 1208 t.Fatalf("got %v, want 1 match in 1 file", sres.Files) 1209 } 1210 1211 if sres.Files[0].FileName != name { 1212 t.Errorf("got match %#v, want name %q", sres.Files[0], name) 1213 } 1214 }) 1215 1216 t.Run("ChunkMatches", func(t *testing.T) { 1217 sres := searchForTest(t, b, 1218 &query.Regexp{ 1219 Regexp: mustParseRE("play.*mussel"), 1220 FileName: true, 1221 }, chunkOpts) 1222 1223 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 { 1224 t.Fatalf("got %v, want 1 match in 1 file", sres.Files) 1225 } 1226 1227 if sres.Files[0].FileName != name { 1228 t.Errorf("got match %#v, want name %q", sres.Files[0], name) 1229 } 1230 }) 1231} 1232 1233func TestRegexpOrder(t *testing.T) { 1234 content := []byte("bla the needle") 1235 // ----------------01234567890123 1236 1237 b := testIndexBuilder(t, nil, 1238 Document{Name: "f1", Content: content}) 1239 1240 t.Run("LineMatches", func(t *testing.T) { 1241 sres := searchForTest(t, b, 1242 &query.Regexp{ 1243 Regexp: mustParseRE("dle.*bla"), 1244 }) 1245 1246 if len(sres.Files) != 0 { 1247 t.Fatalf("got %v, want 0 matches", sres.Files) 1248 } 1249 }) 1250 1251 t.Run("ChunkMatches", func(t *testing.T) { 1252 sres := searchForTest(t, b, 1253 &query.Regexp{ 1254 Regexp: mustParseRE("dle.*bla"), 1255 }) 1256 1257 if len(sres.Files) != 0 { 1258 t.Fatalf("got %v, want 0 matches", sres.Files) 1259 } 1260 }) 1261} 1262 1263func TestRepoName(t *testing.T) { 1264 content := []byte("bla the needle") 1265 // ----------------01234567890123 1266 1267 b := testIndexBuilder(t, &Repository{Name: "bla"}, 1268 Document{Name: "f1", Content: content}) 1269 1270 t.Run("LineMatches", func(t *testing.T) { 1271 sres := searchForTest(t, b, 1272 query.NewAnd( 1273 &query.Substring{Pattern: "needle"}, 1274 &query.Repo{Regexp: regexp.MustCompile("foo")}, 1275 )) 1276 1277 if len(sres.Files) != 0 { 1278 t.Fatalf("got %v, want 0 matches", sres.Files) 1279 } 1280 1281 if sres.Stats.FilesConsidered > 0 { 1282 t.Fatalf("got FilesConsidered %d, should have short circuited", sres.Stats.FilesConsidered) 1283 } 1284 1285 sres = searchForTest(t, b, 1286 query.NewAnd( 1287 &query.Substring{Pattern: "needle"}, 1288 &query.Repo{Regexp: regexp.MustCompile("bla")}, 1289 )) 1290 if len(sres.Files) != 1 { 1291 t.Fatalf("got %v, want 1 match", sres.Files) 1292 } 1293 }) 1294 1295 t.Run("ChunkMatches", func(t *testing.T) { 1296 sres := searchForTest(t, b, 1297 query.NewAnd( 1298 &query.Substring{Pattern: "needle"}, 1299 &query.Repo{Regexp: regexp.MustCompile("foo")}, 1300 ), 1301 chunkOpts, 1302 ) 1303 1304 if len(sres.Files) != 0 { 1305 t.Fatalf("got %v, want 0 matches", sres.Files) 1306 } 1307 1308 if sres.Stats.FilesConsidered > 0 { 1309 t.Fatalf("got FilesConsidered %d, should have short circuited", sres.Stats.FilesConsidered) 1310 } 1311 1312 sres = searchForTest(t, b, 1313 query.NewAnd( 1314 &query.Substring{Pattern: "needle"}, 1315 &query.Repo{Regexp: regexp.MustCompile("bla")}, 1316 )) 1317 if len(sres.Files) != 1 { 1318 t.Fatalf("got %v, want 1 match", sres.Files) 1319 } 1320 }) 1321} 1322 1323func TestMergeMatches(t *testing.T) { 1324 content := []byte("blablabla") 1325 b := testIndexBuilder(t, nil, 1326 Document{Name: "f1", Content: content}) 1327 1328 t.Run("LineMatches", func(t *testing.T) { 1329 sres := searchForTest(t, b, 1330 &query.Substring{Pattern: "bla"}) 1331 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 { 1332 t.Fatalf("got %v, want 1 match", sres.Files) 1333 } 1334 }) 1335 1336 t.Run("ChunkMatches", func(t *testing.T) { 1337 sres := searchForTest(t, b, 1338 &query.Substring{Pattern: "bla"}, 1339 chunkOpts, 1340 ) 1341 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 { 1342 t.Fatalf("got %v, want 1 match", sres.Files) 1343 } 1344 }) 1345} 1346 1347func TestRepoURL(t *testing.T) { 1348 content := []byte("blablabla") 1349 b := testIndexBuilder(t, &Repository{ 1350 Name: "name", 1351 URL: "URL", 1352 CommitURLTemplate: "commit", 1353 FileURLTemplate: "file-url", 1354 LineFragmentTemplate: "fragment", 1355 }, Document{Name: "f1", Content: content}) 1356 1357 sres := searchForTest(t, b, &query.Substring{Pattern: "bla"}) 1358 1359 if sres.RepoURLs["name"] != "file-url" { 1360 t.Errorf("got RepoURLs %v, want {name: URL}", sres.RepoURLs) 1361 } 1362 if sres.LineFragments["name"] != "fragment" { 1363 t.Errorf("got URLs %v, want {name: URL}", sres.LineFragments) 1364 } 1365} 1366 1367func TestRegexpCaseSensitive(t *testing.T) { 1368 content := []byte("bla\nfunc unmarshalGitiles\n") 1369 b := testIndexBuilder(t, nil, Document{ 1370 Name: "f1", 1371 Content: content, 1372 }) 1373 1374 t.Run("LineMatches", func(t *testing.T) { 1375 res := searchForTest(t, b, 1376 &query.Regexp{ 1377 Regexp: mustParseRE("func.*Gitiles"), 1378 CaseSensitive: true, 1379 }) 1380 1381 if len(res.Files) != 1 { 1382 t.Fatalf("got %v, want one match", res.Files) 1383 } 1384 }) 1385 1386 t.Run("ChunkMatches", func(t *testing.T) { 1387 res := searchForTest(t, b, 1388 &query.Regexp{ 1389 Regexp: mustParseRE("func.*Gitiles"), 1390 CaseSensitive: true, 1391 }, 1392 chunkOpts, 1393 ) 1394 1395 if len(res.Files) != 1 { 1396 t.Fatalf("got %v, want one match", res.Files) 1397 } 1398 }) 1399} 1400 1401func TestRegexpCaseFolding(t *testing.T) { 1402 content := []byte("bla\nfunc unmarshalGitiles\n") 1403 1404 b := testIndexBuilder(t, nil, 1405 Document{Name: "f1", Content: content}) 1406 res := searchForTest(t, b, 1407 &query.Regexp{ 1408 Regexp: mustParseRE("func.*GITILES"), 1409 CaseSensitive: false, 1410 }) 1411 1412 if len(res.Files) != 1 { 1413 t.Fatalf("got %v, want one match", res.Files) 1414 } 1415} 1416 1417func TestCaseRegexp(t *testing.T) { 1418 content := []byte("BLABLABLA") 1419 b := testIndexBuilder(t, nil, 1420 Document{Name: "f1", Content: content}) 1421 1422 t.Run("LineMatches", func(t *testing.T) { 1423 res := searchForTest(t, b, 1424 &query.Regexp{ 1425 Regexp: mustParseRE("[xb][xl][xa]"), 1426 CaseSensitive: true, 1427 }) 1428 1429 if len(res.Files) > 0 { 1430 t.Fatalf("got %v, want no matches", res.Files) 1431 } 1432 }) 1433 1434 t.Run("ChunkMatches", func(t *testing.T) { 1435 res := searchForTest(t, b, 1436 &query.Regexp{ 1437 Regexp: mustParseRE("[xb][xl][xa]"), 1438 CaseSensitive: true, 1439 }, 1440 chunkOpts, 1441 ) 1442 1443 if len(res.Files) > 0 { 1444 t.Fatalf("got %v, want no matches", res.Files) 1445 } 1446 }) 1447} 1448 1449func TestNegativeRegexp(t *testing.T) { 1450 content := []byte("BLABLABLA needle bla") 1451 b := testIndexBuilder(t, nil, 1452 Document{Name: "f1", Content: content}) 1453 1454 t.Run("LineMatches", func(t *testing.T) { 1455 res := searchForTest(t, b, 1456 query.NewAnd( 1457 &query.Substring{ 1458 Pattern: "needle", 1459 }, 1460 &query.Not{ 1461 Child: &query.Regexp{ 1462 Regexp: mustParseRE(".cs"), 1463 }, 1464 })) 1465 1466 if len(res.Files) != 1 { 1467 t.Fatalf("got %v, want 1 match", res.Files) 1468 } 1469 }) 1470 1471 t.Run("ChunkMatches", func(t *testing.T) { 1472 res := searchForTest(t, b, 1473 query.NewAnd( 1474 &query.Substring{ 1475 Pattern: "needle", 1476 }, 1477 &query.Not{ 1478 Child: &query.Regexp{ 1479 Regexp: mustParseRE(".cs"), 1480 }, 1481 }, 1482 ), 1483 chunkOpts) 1484 1485 if len(res.Files) != 1 { 1486 t.Fatalf("got %v, want 1 match", res.Files) 1487 } 1488 }) 1489} 1490 1491func TestSymbolRank(t *testing.T) { 1492 t.Skip() 1493 1494 content := []byte("func bla() blubxxxxx") 1495 // ----------------01234567890123456789 1496 b := testIndexBuilder(t, nil, 1497 Document{ 1498 Name: "f1", 1499 Content: content, 1500 }, Document{ 1501 Name: "f2", 1502 Content: content, 1503 Symbols: []DocumentSection{{5, 8}}, 1504 }, Document{ 1505 Name: "f3", 1506 Content: content, 1507 }) 1508 1509 t.Run("LineMatches", func(t *testing.T) { 1510 res := searchForTest(t, b, 1511 &query.Substring{ 1512 CaseSensitive: false, 1513 Pattern: "bla", 1514 }) 1515 1516 if len(res.Files) != 3 { 1517 t.Fatalf("got %d files, want 3 files. Full data: %v", len(res.Files), res.Files) 1518 } 1519 if res.Files[0].FileName != "f2" { 1520 t.Errorf("got %#v, want 'f2' as top match", res.Files[0]) 1521 } 1522 }) 1523 1524 t.Run("ChunkMatches", func(t *testing.T) { 1525 res := searchForTest(t, b, 1526 &query.Substring{ 1527 CaseSensitive: false, 1528 Pattern: "bla", 1529 }, chunkOpts) 1530 1531 if len(res.Files) != 3 { 1532 t.Fatalf("got %d files, want 3 files. Full data: %v", len(res.Files), res.Files) 1533 } 1534 if res.Files[0].FileName != "f2" { 1535 t.Errorf("got %#v, want 'f2' as top match", res.Files[0]) 1536 } 1537 }) 1538} 1539 1540func TestSymbolRankRegexpUTF8(t *testing.T) { 1541 t.Skip() 1542 1543 prefix := strings.Repeat(string([]rune{kelvinCodePoint}), 100) + "\n" 1544 content := []byte(prefix + 1545 "func bla() blub") 1546 // ------012345678901234 1547 b := testIndexBuilder(t, nil, 1548 Document{ 1549 Name: "f1", 1550 Content: content, 1551 }, Document{ 1552 Name: "f2", 1553 Content: content, 1554 Symbols: []DocumentSection{{uint32(len(prefix) + 5), uint32(len(prefix) + 8)}}, 1555 }, Document{ 1556 Name: "f3", 1557 Content: content, 1558 }) 1559 1560 t.Run("LineMatches", func(t *testing.T) { 1561 res := searchForTest(t, b, 1562 &query.Regexp{ 1563 Regexp: mustParseRE("b.a"), 1564 }) 1565 1566 if len(res.Files) != 3 { 1567 t.Fatalf("got %#v, want 3 files", res.Files) 1568 } 1569 if res.Files[0].FileName != "f2" { 1570 t.Errorf("got %#v, want 'f2' as top match", res.Files[0]) 1571 } 1572 }) 1573 1574 t.Run("ChunjkMatches", func(t *testing.T) { 1575 res := searchForTest(t, b, 1576 &query.Regexp{ 1577 Regexp: mustParseRE("b.a"), 1578 }, chunkOpts) 1579 1580 if len(res.Files) != 3 { 1581 t.Fatalf("got %#v, want 3 files", res.Files) 1582 } 1583 if res.Files[0].FileName != "f2" { 1584 t.Errorf("got %#v, want 'f2' as top match", res.Files[0]) 1585 } 1586 }) 1587} 1588 1589func TestPartialSymbolRank(t *testing.T) { 1590 t.Skip() 1591 1592 content := []byte("func bla() blub") 1593 // ----------------012345678901234 1594 1595 b := testIndexBuilder(t, nil, 1596 Document{ 1597 Name: "f1", 1598 Content: content, 1599 Symbols: []DocumentSection{{4, 9}}, 1600 }, Document{ 1601 Name: "f2", 1602 Content: content, 1603 Symbols: []DocumentSection{{4, 8}}, 1604 }, Document{ 1605 Name: "f3", 1606 Content: content, 1607 Symbols: []DocumentSection{{4, 9}}, 1608 }) 1609 1610 t.Run("LineMatches", func(t *testing.T) { 1611 res := searchForTest(t, b, 1612 &query.Substring{ 1613 Pattern: "bla", 1614 }) 1615 1616 if len(res.Files) != 3 { 1617 t.Fatalf("got %#v, want 3 files", res.Files) 1618 } 1619 if res.Files[0].FileName != "f2" { 1620 t.Errorf("got %#v, want 'f2' as top match", res.Files[0]) 1621 } 1622 }) 1623 1624 t.Run("ChunkMatches", func(t *testing.T) { 1625 res := searchForTest(t, b, 1626 &query.Substring{ 1627 Pattern: "bla", 1628 }, chunkOpts) 1629 1630 if len(res.Files) != 3 { 1631 t.Fatalf("got %#v, want 3 files", res.Files) 1632 } 1633 if res.Files[0].FileName != "f2" { 1634 t.Errorf("got %#v, want 'f2' as top match", res.Files[0]) 1635 } 1636 }) 1637} 1638 1639func TestNegativeRepo(t *testing.T) { 1640 content := []byte("bla the needle") 1641 // ----------------01234567890123 1642 b := testIndexBuilder(t, &Repository{ 1643 Name: "bla", 1644 }, Document{Name: "f1", Content: content}) 1645 1646 t.Run("LineMatches", func(t *testing.T) { 1647 sres := searchForTest(t, b, 1648 query.NewAnd( 1649 &query.Substring{Pattern: "needle"}, 1650 &query.Not{Child: &query.Repo{Regexp: regexp.MustCompile("bla")}}, 1651 )) 1652 1653 if len(sres.Files) != 0 { 1654 t.Fatalf("got %v, want 0 matches", sres.Files) 1655 } 1656 }) 1657 1658 t.Run("ChunkMatches", func(t *testing.T) { 1659 sres := searchForTest(t, b, 1660 query.NewAnd( 1661 &query.Substring{Pattern: "needle"}, 1662 &query.Not{Child: &query.Repo{Regexp: regexp.MustCompile("bla")}}, 1663 ), chunkOpts) 1664 1665 if len(sres.Files) != 0 { 1666 t.Fatalf("got %v, want 0 matches", sres.Files) 1667 } 1668 }) 1669} 1670 1671func TestListRepos(t *testing.T) { 1672 content := []byte("bla the needle\n") 1673 // ----------------012345678901234- 1674 1675 t.Run("default and minimal fallback", func(t *testing.T) { 1676 repo := &Repository{ 1677 Name: "reponame", 1678 Branches: []RepositoryBranch{{Name: "main"}, {Name: "dev"}}, 1679 } 1680 b := testIndexBuilder(t, repo, 1681 Document{Name: "f1", Content: content, Branches: []string{"main", "dev"}}, 1682 Document{Name: "f2", Content: content, Branches: []string{"main"}}, 1683 Document{Name: "f2", Content: content, Branches: []string{"dev"}}, 1684 Document{Name: "f3", Content: content, Branches: []string{"dev"}}) 1685 1686 searcher := searcherForTest(t, b) 1687 1688 for _, opts := range []*ListOptions{ 1689 nil, 1690 {Minimal: false}, 1691 {Minimal: true}, 1692 } { 1693 t.Run(fmt.Sprint(opts), func(t *testing.T) { 1694 q := &query.Repo{Regexp: regexp.MustCompile("epo")} 1695 1696 res, err := searcher.List(context.Background(), q, opts) 1697 if err != nil { 1698 t.Fatalf("List(%v): %v", q, err) 1699 } 1700 1701 want := &RepoList{ 1702 Repos: []*RepoListEntry{{ 1703 Repository: *repo, 1704 Stats: RepoStats{ 1705 Documents: 4, 1706 ContentBytes: 68, // (15 bytes of content and 2 bytes of filename) x 4 1707 Shards: 1, 1708 1709 NewLinesCount: 4, 1710 DefaultBranchNewLinesCount: 2, 1711 OtherBranchesNewLinesCount: 3, 1712 }, 1713 }}, 1714 Stats: RepoStats{ 1715 Documents: 4, 1716 ContentBytes: 68, 1717 Shards: 1, 1718 1719 NewLinesCount: 4, 1720 DefaultBranchNewLinesCount: 2, 1721 OtherBranchesNewLinesCount: 3, 1722 }, 1723 } 1724 ignored := []cmp.Option{ 1725 cmpopts.EquateEmpty(), 1726 cmpopts.IgnoreFields(RepoListEntry{}, "IndexMetadata"), 1727 cmpopts.IgnoreFields(RepoStats{}, "IndexBytes"), 1728 cmpopts.IgnoreFields(Repository{}, "SubRepoMap"), 1729 cmpopts.IgnoreFields(Repository{}, "priority"), 1730 } 1731 if diff := cmp.Diff(want, res, ignored...); diff != "" { 1732 t.Fatalf("mismatch (-want +got):\n%s", diff) 1733 } 1734 1735 q = &query.Repo{Regexp: regexp.MustCompile("bla")} 1736 res, err = searcher.List(context.Background(), q, nil) 1737 if err != nil { 1738 t.Fatalf("List(%v): %v", q, err) 1739 } 1740 if len(res.Repos) != 0 || len(res.Minimal) != 0 { 1741 t.Fatalf("got %v, want 0 matches", res) 1742 } 1743 }) 1744 } 1745 }) 1746 1747 t.Run("minimal", func(t *testing.T) { 1748 repo := &Repository{ 1749 ID: 1234, 1750 Name: "reponame", 1751 Branches: []RepositoryBranch{{Name: "main"}, {Name: "dev"}}, 1752 RawConfig: map[string]string{"repoid": "1234"}, 1753 } 1754 b := testIndexBuilder(t, repo, 1755 Document{Name: "f1", Content: content, Branches: []string{"main", "dev"}}, 1756 Document{Name: "f2", Content: content, Branches: []string{"main"}}, 1757 Document{Name: "f2", Content: content, Branches: []string{"dev"}}, 1758 Document{Name: "f3", Content: content, Branches: []string{"dev"}}) 1759 1760 searcher := searcherForTest(t, b) 1761 1762 q := &query.Repo{Regexp: regexp.MustCompile("epo")} 1763 res, err := searcher.List(context.Background(), q, &ListOptions{Minimal: true}) 1764 if err != nil { 1765 t.Fatalf("List(%v): %v", q, err) 1766 } 1767 1768 want := &RepoList{ 1769 Minimal: map[uint32]*MinimalRepoListEntry{ 1770 repo.ID: { 1771 HasSymbols: repo.HasSymbols, 1772 Branches: repo.Branches, 1773 }, 1774 }, 1775 Stats: RepoStats{ 1776 Shards: 1, 1777 Documents: 4, 1778 IndexBytes: 412, 1779 ContentBytes: 68, 1780 NewLinesCount: 4, 1781 DefaultBranchNewLinesCount: 2, 1782 OtherBranchesNewLinesCount: 3, 1783 }, 1784 } 1785 1786 if os.Getenv("ZOEKT_ENABLE_NGRAM_BS") != "" { 1787 want.Stats.IndexBytes = 228 1788 } 1789 1790 if diff := cmp.Diff(want, res); diff != "" { 1791 t.Fatalf("mismatch (-want +got):\n%s", diff) 1792 } 1793 1794 q = &query.Repo{Regexp: regexp.MustCompile("bla")} 1795 res, err = searcher.List(context.Background(), q, &ListOptions{Minimal: true}) 1796 if err != nil { 1797 t.Fatalf("List(%v): %v", q, err) 1798 } 1799 if len(res.Repos) != 0 || len(res.Minimal) != 0 { 1800 t.Fatalf("got %v, want 0 matches", res) 1801 } 1802 }) 1803} 1804 1805func TestListReposByContent(t *testing.T) { 1806 content := []byte("bla the needle") 1807 1808 b := testIndexBuilder(t, &Repository{ 1809 Name: "reponame", 1810 }, 1811 Document{Name: "f1", Content: content}, 1812 Document{Name: "f2", Content: content}) 1813 1814 searcher := searcherForTest(t, b) 1815 q := &query.Substring{Pattern: "needle"} 1816 res, err := searcher.List(context.Background(), q, nil) 1817 if err != nil { 1818 t.Fatalf("List(%v): %v", q, err) 1819 } 1820 if len(res.Repos) != 1 || res.Repos[0].Repository.Name != "reponame" { 1821 t.Fatalf("got %v, want 1 matches", res) 1822 } 1823 if got := res.Repos[0].Stats.Shards; got != 1 { 1824 t.Fatalf("got %d, want 1 shard", got) 1825 } 1826 q = &query.Substring{Pattern: "foo"} 1827 res, err = searcher.List(context.Background(), q, nil) 1828 if err != nil { 1829 t.Fatalf("List(%v): %v", q, err) 1830 } 1831 if len(res.Repos) != 0 { 1832 t.Fatalf("got %v, want 0 matches", res) 1833 } 1834} 1835 1836func TestMetadata(t *testing.T) { 1837 content := []byte("bla the needle") 1838 1839 b := testIndexBuilder(t, &Repository{ 1840 Name: "reponame", 1841 }, Document{Name: "f1", Content: content}, 1842 Document{Name: "f2", Content: content}) 1843 1844 var buf bytes.Buffer 1845 if err := b.Write(&buf); err != nil { 1846 t.Fatal(err) 1847 } 1848 f := &memSeeker{buf.Bytes()} 1849 1850 rd, _, err := ReadMetadata(f) 1851 if err != nil { 1852 t.Fatalf("ReadMetadata: %v", err) 1853 } 1854 1855 if got, want := rd[0].Name, "reponame"; got != want { 1856 t.Fatalf("got %q want %q", got, want) 1857 } 1858} 1859 1860func TestOr(t *testing.T) { 1861 b := testIndexBuilder(t, nil, 1862 Document{Name: "f1", Content: []byte("needle")}, 1863 Document{Name: "f2", Content: []byte("banana")}) 1864 t.Run("LineMatches", func(t *testing.T) { 1865 sres := searchForTest(t, b, query.NewOr( 1866 &query.Substring{Pattern: "needle"}, 1867 &query.Substring{Pattern: "banana"})) 1868 1869 if len(sres.Files) != 2 { 1870 t.Fatalf("got %v, want 2 files", sres.Files) 1871 } 1872 }) 1873 1874 t.Run("ChunkMatches", func(t *testing.T) { 1875 sres := searchForTest(t, b, query.NewOr( 1876 &query.Substring{Pattern: "needle"}, 1877 &query.Substring{Pattern: "banana"})) 1878 1879 if len(sres.Files) != 2 { 1880 t.Fatalf("got %v, want 2 files", sres.Files) 1881 } 1882 }) 1883} 1884 1885func TestFrequency(t *testing.T) { 1886 content := []byte("sla _Py_HashDouble(double v sla las las shd dot dot") 1887 1888 b := testIndexBuilder(t, nil, 1889 Document{ 1890 Name: "f1", 1891 Content: content, 1892 }) 1893 1894 t.Run("LineMatches", func(t *testing.T) { 1895 sres := searchForTest(t, b, &query.Substring{Pattern: "slashdot"}) 1896 if len(sres.Files) != 0 { 1897 t.Errorf("got %v, wanted 0 matches", sres.Files) 1898 } 1899 }) 1900 1901 t.Run("ChunkMatches", func(t *testing.T) { 1902 sres := searchForTest(t, b, &query.Substring{Pattern: "slashdot"}, chunkOpts) 1903 if len(sres.Files) != 0 { 1904 t.Errorf("got %v, wanted 0 matches", sres.Files) 1905 } 1906 }) 1907} 1908 1909func TestMatchNewline(t *testing.T) { 1910 re, err := syntax.Parse("[^a]a", syntax.ClassNL) 1911 if err != nil { 1912 t.Fatalf("syntax.Parse: %v", err) 1913 } 1914 1915 content := []byte("pqr\nalex") 1916 1917 b := testIndexBuilder(t, nil, 1918 Document{ 1919 Name: "f1", 1920 Content: content, 1921 }) 1922 1923 t.Run("LineMatches", func(t *testing.T) { 1924 sres := searchForTest(t, b, &query.Regexp{Regexp: re, CaseSensitive: true}) 1925 if len(sres.Files) != 1 { 1926 t.Errorf("got %v, wanted 1 matches", sres.Files) 1927 } else if l := sres.Files[0].LineMatches[0].Line; !bytes.Equal(l, content[len("pqr\n"):]) { 1928 t.Errorf("got match line %q, want %q", l, content) 1929 } 1930 }) 1931 1932 t.Run("ChunkMatches", func(t *testing.T) { 1933 sres := searchForTest(t, b, &query.Regexp{Regexp: re, CaseSensitive: true}, chunkOpts) 1934 if len(sres.Files) != 1 { 1935 t.Errorf("got %v, wanted 1 matches", sres.Files) 1936 } else if c := sres.Files[0].ChunkMatches[0].Content; !bytes.Equal(c, content) { 1937 t.Errorf("got match line %q, want %q", c, content) 1938 } 1939 }) 1940} 1941 1942func TestSubRepo(t *testing.T) { 1943 subRepos := map[string]*Repository{ 1944 "sub": { 1945 Name: "sub-name", 1946 LineFragmentTemplate: "sub-line", 1947 }, 1948 } 1949 1950 content := []byte("pqr\nalex") 1951 1952 b := testIndexBuilder(t, &Repository{ 1953 SubRepoMap: subRepos, 1954 }, Document{ 1955 Name: "sub/f1", 1956 Content: content, 1957 SubRepositoryPath: "sub", 1958 }) 1959 1960 sres := searchForTest(t, b, &query.Substring{Pattern: "alex"}) 1961 if len(sres.Files) != 1 { 1962 t.Fatalf("got %v, wanted 1 matches", sres.Files) 1963 } 1964 1965 f := sres.Files[0] 1966 if f.SubRepositoryPath != "sub" || f.SubRepositoryName != "sub-name" { 1967 t.Errorf("got %#v, want SubRepository{Path,Name} = {'sub', 'sub-name'}", f) 1968 } 1969 1970 if sres.LineFragments["sub-name"] != "sub-line" { 1971 t.Errorf("got LineFragmentTemplate %v, want {'sub':'sub-line'}", sres.LineFragments) 1972 } 1973} 1974 1975func TestSearchEither(t *testing.T) { 1976 b := testIndexBuilder(t, nil, 1977 Document{Name: "f1", Content: []byte("bla needle bla")}, 1978 Document{Name: "needle-file-branch", Content: []byte("bla content")}) 1979 1980 t.Run("LineMatches", func(t *testing.T) { 1981 sres := searchForTest(t, b, &query.Substring{Pattern: "needle"}) 1982 if len(sres.Files) != 2 { 1983 t.Fatalf("got %v, wanted 2 matches", sres.Files) 1984 } 1985 1986 sres = searchForTest(t, b, &query.Substring{Pattern: "needle", Content: true}) 1987 if len(sres.Files) != 1 { 1988 t.Fatalf("got %v, wanted 1 match", sres.Files) 1989 } 1990 1991 if got, want := sres.Files[0].FileName, "f1"; got != want { 1992 t.Errorf("got %q, want %q", got, want) 1993 } 1994 }) 1995 1996 t.Run("ChunkMatches", func(t *testing.T) { 1997 sres := searchForTest(t, b, &query.Substring{Pattern: "needle"}, chunkOpts) 1998 if len(sres.Files) != 2 { 1999 t.Fatalf("got %v, wanted 2 matches", sres.Files) 2000 } 2001 2002 sres = searchForTest(t, b, &query.Substring{Pattern: "needle", Content: true}, chunkOpts) 2003 if len(sres.Files) != 1 { 2004 t.Fatalf("got %v, wanted 1 match", sres.Files) 2005 } 2006 2007 if got, want := sres.Files[0].FileName, "f1"; got != want { 2008 t.Errorf("got %q, want %q", got, want) 2009 } 2010 }) 2011} 2012 2013func TestUnicodeExactMatch(t *testing.T) { 2014 needle := "néédlÉ" 2015 content := []byte("blá blá " + needle + " blâ") 2016 2017 b := testIndexBuilder(t, nil, 2018 Document{Name: "f1", Content: content}) 2019 2020 t.Run("LineMatches", func(t *testing.T) { 2021 if res := searchForTest(t, b, &query.Substring{Pattern: needle, CaseSensitive: true}); len(res.Files) != 1 { 2022 t.Fatalf("case sensitive: got %v, wanted 1 match", res.Files) 2023 } 2024 }) 2025 2026 t.Run("ChunkMatches", func(t *testing.T) { 2027 res := searchForTest(t, b, &query.Substring{Pattern: needle, CaseSensitive: true}, chunkOpts) 2028 if len(res.Files) != 1 { 2029 t.Fatalf("case sensitive: got %v, wanted 1 match", res.Files) 2030 } 2031 }) 2032} 2033 2034func TestUnicodeCoverContent(t *testing.T) { 2035 needle := "néédlÉ" 2036 content := []byte("blá blá " + needle + " blâ") 2037 2038 b := testIndexBuilder(t, nil, 2039 Document{Name: "f1", Content: content}) 2040 2041 t.Run("LineMatches", func(t *testing.T) { 2042 if res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ", CaseSensitive: true}); len(res.Files) != 0 { 2043 t.Fatalf("case sensitive: got %v, wanted 0 match", res.Files) 2044 } 2045 2046 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ"}) 2047 if len(res.Files) != 1 { 2048 t.Fatalf("case insensitive: got %v, wanted 1 match", res.Files) 2049 } 2050 2051 if got, want := res.Files[0].LineMatches[0].LineFragments[0].Offset, uint32(strings.Index(string(content), needle)); got != want { 2052 t.Errorf("got %d want %d", got, want) 2053 } 2054 }) 2055 2056 t.Run("ChunkMatches", func(t *testing.T) { 2057 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ", CaseSensitive: true}, chunkOpts) 2058 if len(res.Files) != 0 { 2059 t.Fatalf("case sensitive: got %v, wanted 0 match", res.Files) 2060 } 2061 2062 res = searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ"}, chunkOpts) 2063 if len(res.Files) != 1 { 2064 t.Fatalf("case insensitive: got %v, wanted 1 match", res.Files) 2065 } 2066 2067 got := res.Files[0].ChunkMatches[0].Ranges[0].Start.ByteOffset 2068 want := uint32(strings.Index(string(content), needle)) 2069 if got != want { 2070 t.Errorf("got %d want %d", got, want) 2071 } 2072 }) 2073} 2074 2075func TestUnicodeNonCoverContent(t *testing.T) { 2076 needle := "nééáádlÉ" 2077 content := []byte("blá blá " + needle + " blâ") 2078 2079 b := testIndexBuilder(t, nil, 2080 Document{Name: "f1", Content: content}) 2081 2082 t.Run("LineMatches", func(t *testing.T) { 2083 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉÁÁDLÉ", Content: true}) 2084 if len(res.Files) != 1 { 2085 t.Fatalf("got %v, wanted 1 match", res.Files) 2086 } 2087 2088 if got, want := res.Files[0].LineMatches[0].LineFragments[0].Offset, uint32(strings.Index(string(content), needle)); got != want { 2089 t.Errorf("got %d want %d", got, want) 2090 } 2091 }) 2092 2093 t.Run("ChunkMatches", func(t *testing.T) { 2094 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉÁÁDLÉ", Content: true}, chunkOpts) 2095 if len(res.Files) != 1 { 2096 t.Fatalf("got %v, wanted 1 match", res.Files) 2097 } 2098 2099 got := res.Files[0].ChunkMatches[0].Ranges[0].Start.ByteOffset 2100 want := uint32(strings.Index(string(content), needle)) 2101 if got != want { 2102 t.Errorf("got %d want %d", got, want) 2103 } 2104 }) 2105} 2106 2107const kelvinCodePoint = 8490 2108 2109func TestUnicodeVariableLength(t *testing.T) { 2110 lower := 'k' 2111 upper := rune(kelvinCodePoint) 2112 2113 needle := "nee" + string([]rune{lower}) + "eed" 2114 corpus := []byte("nee" + string([]rune{upper}) + "eed" + 2115 " ee" + string([]rune{lower}) + "ee" + 2116 " ee" + string([]rune{upper}) + "ee") 2117 2118 t.Run("LineMatches", func(t *testing.T) { 2119 b := testIndexBuilder(t, nil, 2120 Document{Name: "f1", Content: []byte(corpus)}) 2121 2122 res := searchForTest(t, b, &query.Substring{Pattern: needle, Content: true}) 2123 if len(res.Files) != 1 { 2124 t.Fatalf("got %v, wanted 1 match", res.Files) 2125 } 2126 }) 2127 2128 t.Run("ChunkMatches", func(t *testing.T) { 2129 b := testIndexBuilder(t, nil, 2130 Document{Name: "f1", Content: []byte(corpus)}) 2131 2132 res := searchForTest(t, b, &query.Substring{Pattern: needle, Content: true}, chunkOpts) 2133 if len(res.Files) != 1 { 2134 t.Fatalf("got %v, wanted 1 match", res.Files) 2135 } 2136 }) 2137} 2138 2139func TestUnicodeFileStartOffsets(t *testing.T) { 2140 unicode := "世界" 2141 wat := "waaaaaat" 2142 b := testIndexBuilder(t, nil, 2143 Document{ 2144 Name: "f1", 2145 Content: []byte(unicode), 2146 }, 2147 Document{ 2148 Name: "f2", 2149 Content: []byte(wat), 2150 }, 2151 ) 2152 q := &query.Substring{Pattern: wat, Content: true} 2153 res := searchForTest(t, b, q) 2154 if len(res.Files) != 1 { 2155 t.Fatalf("got %v, wanted 1 match", res.Files) 2156 } 2157} 2158 2159func TestLongFileUTF8(t *testing.T) { 2160 needle := "neeedle" 2161 2162 // 6 bytes. 2163 unicode := "世界" 2164 content := []byte(strings.Repeat(unicode, 100) + needle) 2165 b := testIndexBuilder(t, nil, 2166 Document{ 2167 Name: "f1", 2168 Content: []byte(strings.Repeat("a", 50)), 2169 }, 2170 Document{ 2171 Name: "f2", 2172 Content: content, 2173 }) 2174 2175 t.Run("LineMatches", func(t *testing.T) { 2176 q := &query.Substring{Pattern: needle, Content: true} 2177 res := searchForTest(t, b, q) 2178 if len(res.Files) != 1 { 2179 t.Errorf("got %v, want 1 result", res) 2180 } 2181 }) 2182 2183 t.Run("ChunkMatches", func(t *testing.T) { 2184 q := &query.Substring{Pattern: needle, Content: true} 2185 res := searchForTest(t, b, q, chunkOpts) 2186 if len(res.Files) != 1 { 2187 t.Errorf("got %v, want 1 result", res) 2188 } 2189 }) 2190} 2191 2192func TestEstimateDocCount(t *testing.T) { 2193 content := []byte("bla needle bla") 2194 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2195 Document{Name: "f1", Content: content}, 2196 Document{Name: "f2", Content: content}, 2197 ) 2198 2199 t.Run("LineMatches", func(t *testing.T) { 2200 if sres := searchForTest(t, b, 2201 query.NewAnd( 2202 &query.Substring{Pattern: "needle"}, 2203 &query.Repo{Regexp: regexp.MustCompile("reponame")}, 2204 ), SearchOptions{ 2205 EstimateDocCount: true, 2206 }); sres.Stats.ShardFilesConsidered != 2 { 2207 t.Errorf("got FilesConsidered = %d, want 2", sres.Stats.FilesConsidered) 2208 } 2209 if sres := searchForTest(t, b, 2210 query.NewAnd( 2211 &query.Substring{Pattern: "needle"}, 2212 &query.Repo{Regexp: regexp.MustCompile("nomatch")}, 2213 ), SearchOptions{ 2214 EstimateDocCount: true, 2215 }); sres.Stats.ShardFilesConsidered != 0 { 2216 t.Errorf("got FilesConsidered = %d, want 0", sres.Stats.FilesConsidered) 2217 } 2218 }) 2219 2220 t.Run("ChunkMatches", func(t *testing.T) { 2221 if sres := searchForTest(t, b, 2222 query.NewAnd( 2223 &query.Substring{Pattern: "needle"}, 2224 &query.Repo{Regexp: regexp.MustCompile("reponame")}, 2225 ), SearchOptions{ 2226 EstimateDocCount: true, 2227 ChunkMatches: true, 2228 }); sres.Stats.ShardFilesConsidered != 2 { 2229 t.Errorf("got FilesConsidered = %d, want 2", sres.Stats.FilesConsidered) 2230 } 2231 if sres := searchForTest(t, b, 2232 query.NewAnd( 2233 &query.Substring{Pattern: "needle"}, 2234 &query.Repo{Regexp: regexp.MustCompile("nomatch")}, 2235 ), SearchOptions{ 2236 EstimateDocCount: true, 2237 ChunkMatches: true, 2238 }); sres.Stats.ShardFilesConsidered != 0 { 2239 t.Errorf("got FilesConsidered = %d, want 0", sres.Stats.FilesConsidered) 2240 } 2241 }) 2242} 2243 2244func TestUTF8CorrectCorpus(t *testing.T) { 2245 needle := "neeedle" 2246 2247 // 6 bytes. 2248 unicode := "世界" 2249 b := testIndexBuilder(t, nil, 2250 Document{ 2251 Name: "f1", 2252 Content: []byte(strings.Repeat(unicode, 100)), 2253 }, 2254 Document{ 2255 Name: "xxxxxneeedle", 2256 Content: []byte("hello"), 2257 }) 2258 2259 t.Run("LineMatches", func(t *testing.T) { 2260 q := &query.Substring{Pattern: needle, FileName: true} 2261 res := searchForTest(t, b, q) 2262 if len(res.Files) != 1 { 2263 t.Errorf("got %v, want 1 result", res) 2264 } 2265 }) 2266 2267 t.Run("ChunkMatches", func(t *testing.T) { 2268 q := &query.Substring{Pattern: needle, FileName: true} 2269 res := searchForTest(t, b, q, chunkOpts) 2270 if len(res.Files) != 1 { 2271 t.Errorf("got %v, want 1 result", res) 2272 } 2273 }) 2274} 2275 2276func TestBuilderStats(t *testing.T) { 2277 b := testIndexBuilder(t, nil, 2278 Document{ 2279 Name: "f1", 2280 Content: []byte(strings.Repeat("abcd", 1024)), 2281 }) 2282 var buf bytes.Buffer 2283 if err := b.Write(&buf); err != nil { 2284 t.Fatal(err) 2285 } 2286 2287 if got, want := b.ContentSize(), uint32(2+4*1024); got != want { 2288 t.Errorf("got %d, want %d", got, want) 2289 } 2290} 2291 2292func TestIOStats(t *testing.T) { 2293 b := testIndexBuilder(t, nil, 2294 Document{ 2295 Name: "f1", 2296 Content: []byte(strings.Repeat("abcd", 1024)), 2297 }) 2298 2299 t.Run("LineMatches", func(t *testing.T) { 2300 q := &query.Substring{Pattern: "abc", CaseSensitive: true, Content: true} 2301 res := searchForTest(t, b, q) 2302 2303 // 4096 (content) + 2 (overhead: newlines or doc sections) 2304 if got, want := res.Stats.ContentBytesLoaded, int64(4098); got != want { 2305 t.Errorf("got content I/O %d, want %d", got, want) 2306 } 2307 2308 // 1024 entries, each 4 bytes apart. 4 fits into single byte 2309 // delta encoded. 2310 if got, want := res.Stats.IndexBytesLoaded, int64(1024); got != want { 2311 t.Errorf("got index I/O %d, want %d", got, want) 2312 } 2313 }) 2314 2315 t.Run("ChunkMatches", func(t *testing.T) { 2316 q := &query.Substring{Pattern: "abc", CaseSensitive: true, Content: true} 2317 res := searchForTest(t, b, q, chunkOpts) 2318 2319 // 4096 (content) + 2 (overhead: newlines or doc sections) 2320 if got, want := res.Stats.ContentBytesLoaded, int64(4098); got != want { 2321 t.Errorf("got content I/O %d, want %d", got, want) 2322 } 2323 2324 // 1024 entries, each 4 bytes apart. 4 fits into single byte 2325 // delta encoded. 2326 if got, want := res.Stats.IndexBytesLoaded, int64(1024); got != want { 2327 t.Errorf("got index I/O %d, want %d", got, want) 2328 } 2329 }) 2330} 2331 2332func TestStartLineAnchor(t *testing.T) { 2333 b := testIndexBuilder(t, nil, 2334 Document{ 2335 Name: "f1", 2336 Content: []byte( 2337 `hello 2338start of middle of line 2339`), 2340 }) 2341 2342 t.Run("LineMatches", func(t *testing.T) { 2343 q, err := query.Parse("^start") 2344 if err != nil { 2345 t.Errorf("parse: %v", err) 2346 } 2347 2348 res := searchForTest(t, b, q) 2349 if len(res.Files) != 1 { 2350 t.Errorf("got %v, want 1 file", res.Files) 2351 } 2352 2353 q, err = query.Parse("^middle") 2354 if err != nil { 2355 t.Errorf("parse: %v", err) 2356 } 2357 res = searchForTest(t, b, q) 2358 if len(res.Files) != 0 { 2359 t.Errorf("got %v, want 0 files", res.Files) 2360 } 2361 }) 2362 2363 t.Run("ChunkMatches", func(t *testing.T) { 2364 q, err := query.Parse("^start") 2365 if err != nil { 2366 t.Errorf("parse: %v", err) 2367 } 2368 2369 res := searchForTest(t, b, q, chunkOpts) 2370 if len(res.Files) != 1 { 2371 t.Errorf("got %v, want 1 file", res.Files) 2372 } 2373 2374 q, err = query.Parse("^middle") 2375 if err != nil { 2376 t.Errorf("parse: %v", err) 2377 } 2378 res = searchForTest(t, b, q, chunkOpts) 2379 if len(res.Files) != 0 { 2380 t.Errorf("got %v, want 0 files", res.Files) 2381 } 2382 }) 2383} 2384 2385func TestAndOrUnicode(t *testing.T) { 2386 q, err := query.Parse("orange.*apple") 2387 if err != nil { 2388 t.Errorf("parse: %v", err) 2389 } 2390 finalQ := query.NewAnd(q, 2391 query.NewOr(query.NewAnd(&query.Repo{Regexp: regexp.MustCompile("name")}, 2392 query.NewOr(&query.Branch{Pattern: "master"})))) 2393 2394 b := testIndexBuilder(t, &Repository{ 2395 Name: "name", 2396 Branches: []RepositoryBranch{{"master", "master-version"}}, 2397 }, Document{ 2398 Name: "f2", 2399 Content: []byte("orange\u2318apple"), 2400 // --------------0123456 78901 2401 Branches: []string{"master"}, 2402 }) 2403 2404 t.Run("LineMatches", func(t *testing.T) { 2405 res := searchForTest(t, b, finalQ) 2406 if len(res.Files) != 1 { 2407 t.Errorf("got %v, want 1 result", res.Files) 2408 } 2409 }) 2410 2411 t.Run("ChunkMatches", func(t *testing.T) { 2412 res := searchForTest(t, b, finalQ, chunkOpts) 2413 if len(res.Files) != 1 { 2414 t.Errorf("got %v, want 1 result", res.Files) 2415 } 2416 }) 2417} 2418 2419func TestAndShort(t *testing.T) { 2420 content := []byte("bla needle at orange bla") 2421 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2422 Document{Name: "f1", Content: content}, 2423 Document{Name: "f2", Content: []byte("xx at xx")}, 2424 Document{Name: "f3", Content: []byte("yy orange xx")}, 2425 ) 2426 2427 q := query.NewAnd(&query.Substring{Pattern: "at"}, 2428 &query.Substring{Pattern: "orange"}) 2429 2430 t.Run("LineMatches", func(t *testing.T) { 2431 res := searchForTest(t, b, q) 2432 if len(res.Files) != 1 || res.Files[0].FileName != "f1" { 2433 t.Errorf("got %v, want 1 result", res.Files) 2434 } 2435 }) 2436 2437 t.Run("ChunkMatches", func(t *testing.T) { 2438 res := searchForTest(t, b, q, chunkOpts) 2439 if len(res.Files) != 1 || res.Files[0].FileName != "f1" { 2440 t.Errorf("got %v, want 1 result", res.Files) 2441 } 2442 }) 2443} 2444 2445func TestNoCollectRegexpSubstring(t *testing.T) { 2446 content := []byte("bla final bla\nfoo final, foo") 2447 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2448 Document{Name: "f1", Content: content}, 2449 ) 2450 2451 q := &query.Regexp{ 2452 Regexp: mustParseRE("final[,.]"), 2453 } 2454 2455 t.Run("LineMatches", func(t *testing.T) { 2456 res := searchForTest(t, b, q) 2457 if len(res.Files) != 1 { 2458 t.Fatalf("got %v, want 1 result", res.Files) 2459 } 2460 if f := res.Files[0]; len(f.LineMatches) != 1 { 2461 t.Fatalf("got line matches %v, want 1 line match", printLineMatches(f.LineMatches)) 2462 } 2463 }) 2464 2465 t.Run("ChunkMatches", func(t *testing.T) { 2466 res := searchForTest(t, b, q, chunkOpts) 2467 if len(res.Files) != 1 { 2468 t.Fatalf("got %v, want 1 result", res.Files) 2469 } 2470 if f := res.Files[0]; len(f.ChunkMatches) != 1 { 2471 t.Fatalf("got line matches %v, want 1 line match", printLineMatches(f.LineMatches)) 2472 } 2473 }) 2474} 2475 2476func printLineMatches(ms []LineMatch) string { 2477 var ss []string 2478 for _, m := range ms { 2479 ss = append(ss, fmt.Sprintf("%d:%q %v", m.LineNumber, m.Line, m.LineFragments)) 2480 } 2481 2482 return strings.Join(ss, ", ") 2483} 2484 2485func TestLang(t *testing.T) { 2486 content := []byte("bla needle bla") 2487 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2488 Document{Name: "f1", Content: content}, 2489 Document{Name: "f2", Language: "java", Content: content}, 2490 Document{Name: "f3", Language: "cpp", Content: content}, 2491 ) 2492 2493 q := query.NewAnd(&query.Substring{Pattern: "needle"}, 2494 &query.Language{Language: "cpp"}) 2495 2496 t.Run("LineMatches", func(t *testing.T) { 2497 res := searchForTest(t, b, q) 2498 if len(res.Files) != 1 { 2499 t.Fatalf("got %v, want 1 result in f3", res.Files) 2500 } 2501 f := res.Files[0] 2502 if f.FileName != "f3" || f.Language != "cpp" { 2503 t.Fatalf("got %v, want 1 match with language cpp", f) 2504 } 2505 }) 2506 2507 t.Run("ChunkMatches", func(t *testing.T) { 2508 res := searchForTest(t, b, q, chunkOpts) 2509 if len(res.Files) != 1 { 2510 t.Fatalf("got %v, want 1 result in f3", res.Files) 2511 } 2512 f := res.Files[0] 2513 if f.FileName != "f3" || f.Language != "cpp" { 2514 t.Fatalf("got %v, want 1 match with language cpp", f) 2515 } 2516 }) 2517} 2518 2519func TestLangShortcut(t *testing.T) { 2520 content := []byte("bla needle bla") 2521 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2522 Document{Name: "f2", Language: "java", Content: content}, 2523 Document{Name: "f3", Language: "cpp", Content: content}, 2524 ) 2525 2526 q := query.NewAnd(&query.Substring{Pattern: "needle"}, 2527 &query.Language{Language: "fortran"}) 2528 2529 t.Run("LineMatches", func(t *testing.T) { 2530 res := searchForTest(t, b, q) 2531 if len(res.Files) != 0 { 2532 t.Fatalf("got %v, want 0 results", res.Files) 2533 } 2534 if res.Stats.IndexBytesLoaded > 0 { 2535 t.Errorf("got IndexBytesLoaded %d, want 0", res.Stats.IndexBytesLoaded) 2536 } 2537 }) 2538 2539 t.Run("ChunkMatches", func(t *testing.T) { 2540 res := searchForTest(t, b, q, chunkOpts) 2541 if len(res.Files) != 0 { 2542 t.Fatalf("got %v, want 0 results", res.Files) 2543 } 2544 if res.Stats.IndexBytesLoaded > 0 { 2545 t.Errorf("got IndexBytesLoaded %d, want 0", res.Stats.IndexBytesLoaded) 2546 } 2547 }) 2548} 2549 2550func TestNoTextMatchAtoms(t *testing.T) { 2551 content := []byte("bla needle bla") 2552 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2553 Document{Name: "f1", Content: content}, 2554 Document{Name: "f2", Language: "java", Content: content}, 2555 Document{Name: "f3", Language: "cpp", Content: content}, 2556 ) 2557 q := query.NewAnd(&query.Language{Language: "java"}) 2558 t.Run("LineMatches", func(t *testing.T) { 2559 res := searchForTest(t, b, q) 2560 if len(res.Files) != 1 { 2561 t.Fatalf("got %v, want 1 result in f3", res.Files) 2562 } 2563 }) 2564 2565 t.Run("ChunkMatches", func(t *testing.T) { 2566 res := searchForTest(t, b, q, chunkOpts) 2567 if len(res.Files) != 1 { 2568 t.Fatalf("got %v, want 1 result in f3", res.Files) 2569 } 2570 }) 2571} 2572 2573func TestNoPositiveAtoms(t *testing.T) { 2574 content := []byte("bla needle bla") 2575 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2576 Document{Name: "f1", Content: content}, 2577 Document{Name: "f2", Content: content}, 2578 ) 2579 2580 q := query.NewAnd( 2581 &query.Not{Child: &query.Substring{Pattern: "xyz"}}, 2582 &query.Repo{Regexp: regexp.MustCompile("reponame")}) 2583 t.Run("LineMatches", func(t *testing.T) { 2584 res := searchForTest(t, b, q) 2585 if len(res.Files) != 2 { 2586 t.Fatalf("got %v, want 2 results in f3", res.Files) 2587 } 2588 }) 2589 t.Run("ChunkMatches", func(t *testing.T) { 2590 res := searchForTest(t, b, q, chunkOpts) 2591 if len(res.Files) != 2 { 2592 t.Fatalf("got %v, want 2 results in f3", res.Files) 2593 } 2594 }) 2595} 2596 2597func TestSymbolBoundaryStart(t *testing.T) { 2598 content := []byte("start\nbla bla\nend") 2599 // ----------------012345-67890123-456 2600 2601 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2602 Document{ 2603 Name: "f1", 2604 Content: content, 2605 Symbols: []DocumentSection{{0, 5}, {14, 17}}, 2606 }, 2607 ) 2608 q := &query.Symbol{ 2609 Expr: &query.Substring{Pattern: "start"}, 2610 } 2611 t.Run("LineMatches", func(t *testing.T) { 2612 res := searchForTest(t, b, q) 2613 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 { 2614 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2615 } 2616 m := res.Files[0].LineMatches[0].LineFragments[0] 2617 if m.Offset != 0 { 2618 t.Fatalf("got offset %d want 0", m.Offset) 2619 } 2620 }) 2621 2622 t.Run("ChunkMatches", func(t *testing.T) { 2623 res := searchForTest(t, b, q, chunkOpts) 2624 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 { 2625 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2626 } 2627 m := res.Files[0].ChunkMatches[0].Ranges[0] 2628 if m.Start.ByteOffset != 0 { 2629 t.Fatalf("got offset %d want 0", m.Start.ByteOffset) 2630 } 2631 }) 2632} 2633 2634func TestSymbolBoundaryEnd(t *testing.T) { 2635 content := []byte("start\nbla bla\nend") 2636 // ----------------012345-67890123-456 2637 2638 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2639 Document{ 2640 Name: "f1", 2641 Content: content, 2642 Symbols: []DocumentSection{{14, 17}}, 2643 }, 2644 ) 2645 q := &query.Symbol{ 2646 Expr: &query.Substring{Pattern: "end"}, 2647 } 2648 t.Run("LineMatches", func(t *testing.T) { 2649 res := searchForTest(t, b, q) 2650 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 { 2651 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2652 } 2653 m := res.Files[0].LineMatches[0].LineFragments[0] 2654 if m.Offset != 14 { 2655 t.Fatalf("got offset %d want 0", m.Offset) 2656 } 2657 }) 2658 2659 t.Run("ChunkMatches", func(t *testing.T) { 2660 res := searchForTest(t, b, q, chunkOpts) 2661 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 { 2662 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2663 } 2664 m := res.Files[0].ChunkMatches[0].Ranges[0] 2665 if m.Start.ByteOffset != 14 { 2666 t.Fatalf("got offset %d want 0", m.Start.ByteOffset) 2667 } 2668 }) 2669} 2670 2671func TestSymbolSubstring(t *testing.T) { 2672 content := []byte("bla\nsymblabla\nbla") 2673 // ----------------0123-4567890123-456 2674 2675 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2676 Document{ 2677 Name: "f1", 2678 Content: content, 2679 Symbols: []DocumentSection{{4, 12}}, 2680 }, 2681 ) 2682 q := &query.Symbol{ 2683 Expr: &query.Substring{Pattern: "bla"}, 2684 } 2685 t.Run("LineMatches", func(t *testing.T) { 2686 res := searchForTest(t, b, q) 2687 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 { 2688 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2689 } 2690 m := res.Files[0].LineMatches[0].LineFragments[0] 2691 if m.Offset != 7 || m.MatchLength != 3 { 2692 t.Fatalf("got offset %d, size %d want 7 size 3", m.Offset, m.MatchLength) 2693 } 2694 }) 2695 2696 t.Run("ChunkMatches", func(t *testing.T) { 2697 res := searchForTest(t, b, q, chunkOpts) 2698 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 { 2699 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2700 } 2701 m := res.Files[0].ChunkMatches[0].Ranges[0] 2702 if m.Start.ByteOffset != 7 || m.End.ByteOffset != 10 { 2703 t.Fatalf("got offset %d, end %d want 7, 10", m.Start.ByteOffset, m.End.ByteOffset) 2704 } 2705 }) 2706} 2707 2708func TestSymbolSubstringExact(t *testing.T) { 2709 content := []byte("bla\nsym\nbla\nsym\nasymb") 2710 // ----------------0123-4567-890123456-78901 2711 2712 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2713 Document{ 2714 Name: "f1", 2715 Content: content, 2716 Symbols: []DocumentSection{{4, 7}}, 2717 }, 2718 ) 2719 q := &query.Symbol{ 2720 Expr: &query.Substring{Pattern: "sym"}, 2721 } 2722 t.Run("LineMatches", func(t *testing.T) { 2723 res := searchForTest(t, b, q) 2724 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 { 2725 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2726 } 2727 m := res.Files[0].LineMatches[0].LineFragments[0] 2728 if m.Offset != 4 { 2729 t.Fatalf("got offset %d, want 7", m.Offset) 2730 } 2731 }) 2732 2733 t.Run("ChunkMatches", func(t *testing.T) { 2734 res := searchForTest(t, b, q, chunkOpts) 2735 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 { 2736 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2737 } 2738 m := res.Files[0].ChunkMatches[0].Ranges[0] 2739 if m.Start.ByteOffset != 4 { 2740 t.Fatalf("got offset %d, want 7", m.Start.ByteOffset) 2741 } 2742 }) 2743} 2744 2745func TestSymbolRegexpExact(t *testing.T) { 2746 content := []byte("blah\nbla\nbl") 2747 // ----------------01234-5678-90 2748 2749 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2750 Document{ 2751 Name: "f1", 2752 Content: content, 2753 Symbols: []DocumentSection{{0, 4}, {5, 8}, {9, 11}}, 2754 }, 2755 ) 2756 q := &query.Symbol{ 2757 Expr: &query.Regexp{Regexp: mustParseRE("^bla$")}, 2758 } 2759 t.Run("LineMatches", func(t *testing.T) { 2760 res := searchForTest(t, b, q) 2761 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 { 2762 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2763 } 2764 m := res.Files[0].LineMatches[0].LineFragments[0] 2765 if m.Offset != 5 { 2766 t.Fatalf("got offset %d, want 5", m.Offset) 2767 } 2768 }) 2769 2770 t.Run("ChunkMatches", func(t *testing.T) { 2771 res := searchForTest(t, b, q, chunkOpts) 2772 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 { 2773 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2774 } 2775 m := res.Files[0].ChunkMatches[0].Ranges[0] 2776 if m.Start.ByteOffset != 5 { 2777 t.Fatalf("got offset %d, want 5", m.Start.ByteOffset) 2778 } 2779 }) 2780} 2781 2782func TestSymbolRegexpPartial(t *testing.T) { 2783 content := []byte("abcdef") 2784 // ----------------012345 2785 2786 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2787 Document{ 2788 Name: "f1", 2789 Content: content, 2790 Symbols: []DocumentSection{{0, 6}}, 2791 }, 2792 ) 2793 q := &query.Symbol{ 2794 Expr: &query.Regexp{Regexp: mustParseRE("(b|d)c(d|b)")}, 2795 } 2796 t.Run("LineMatches", func(t *testing.T) { 2797 res := searchForTest(t, b, q) 2798 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 { 2799 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2800 } 2801 m := res.Files[0].LineMatches[0].LineFragments[0] 2802 if m.Offset != 1 { 2803 t.Fatalf("got offset %d, want 1", m.Offset) 2804 } 2805 if m.MatchLength != 3 { 2806 t.Fatalf("got match length %d, want 3", m.MatchLength) 2807 } 2808 }) 2809 2810 t.Run("ChunkMatches", func(t *testing.T) { 2811 res := searchForTest(t, b, q, chunkOpts) 2812 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 { 2813 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2814 } 2815 m := res.Files[0].ChunkMatches[0].Ranges[0] 2816 if m.Start.ByteOffset != 1 { 2817 t.Fatalf("got offset %d, want 1", m.Start.ByteOffset) 2818 } 2819 if m.End.ByteOffset != 4 { 2820 t.Fatalf("got match end %d, want 4", m.End.ByteOffset) 2821 } 2822 }) 2823} 2824 2825func TestSymbolRegexpAll(t *testing.T) { 2826 docs := []Document{ 2827 { 2828 Name: "f1", 2829 Content: []byte("Hello Zoekt"), 2830 // --------------01234567890 2831 Symbols: []DocumentSection{{0, 5}, {6, 11}}, 2832 }, 2833 { 2834 Name: "f2", 2835 Content: []byte("Second Zoekt Third"), 2836 // --------------012345678901234567 2837 Symbols: []DocumentSection{{0, 6}, {7, 12}, {13, 18}}, 2838 }, 2839 } 2840 2841 b := testIndexBuilder(t, &Repository{Name: "reponame"}, docs...) 2842 q := &query.Symbol{ 2843 Expr: &query.Regexp{Regexp: mustParseRE(".*")}, 2844 } 2845 t.Run("LineMatches", func(t *testing.T) { 2846 res := searchForTest(t, b, q) 2847 if len(res.Files) != len(docs) { 2848 t.Fatalf("got %v, want %d file", res.Files, len(docs)) 2849 } 2850 for i, want := range docs { 2851 got := res.Files[i].LineMatches[0].LineFragments 2852 if len(got) != len(want.Symbols) { 2853 t.Fatalf("got %d symbols, want %d symbols in doc %s", len(got), len(want.Symbols), want.Name) 2854 } 2855 2856 for j, sec := range want.Symbols { 2857 if sec.Start != got[j].Offset { 2858 t.Fatalf("got offset %d, want %d in doc %s", got[j].Offset, sec.Start, want.Name) 2859 } 2860 } 2861 } 2862 }) 2863 2864 t.Run("ChunkMatches", func(t *testing.T) { 2865 res := searchForTest(t, b, q, chunkOpts) 2866 if len(res.Files) != len(docs) { 2867 t.Fatalf("got %v, want %d file", res.Files, len(docs)) 2868 } 2869 for i, want := range docs { 2870 got := res.Files[i].ChunkMatches[0].Ranges 2871 if len(got) != len(want.Symbols) { 2872 t.Fatalf("got %d symbols, want %d symbols in doc %s", len(got), len(want.Symbols), want.Name) 2873 } 2874 2875 for j, sec := range want.Symbols { 2876 if sec.Start != uint32(got[j].Start.ByteOffset) { 2877 t.Fatalf("got offset %d, want %d in doc %s", got[j].Start.ByteOffset, sec.Start, want.Name) 2878 } 2879 } 2880 } 2881 }) 2882} 2883 2884func TestHitIterTerminate(t *testing.T) { 2885 // contrived input: trigram frequencies forces selecting abc + 2886 // def for the distance iteration. There is no match, so this 2887 // will advance the compressedPostingIterator to beyond the 2888 // end. 2889 content := []byte("abc bcdbcd cdecde abcabc def efg") 2890 b := testIndexBuilder(t, nil, 2891 Document{ 2892 Name: "f1", 2893 Content: content, 2894 }, 2895 ) 2896 2897 t.Run("LineMatches", func(t *testing.T) { 2898 searchForTest(t, b, &query.Substring{Pattern: "abcdef"}) 2899 }) 2900 2901 t.Run("ChunkMatches", func(t *testing.T) { 2902 searchForTest(t, b, &query.Substring{Pattern: "abcdef"}, chunkOpts) 2903 }) 2904} 2905 2906func TestDistanceHitIterBailLast(t *testing.T) { 2907 content := []byte("AST AST AST UASH") 2908 b := testIndexBuilder(t, nil, 2909 Document{ 2910 Name: "f1", 2911 Content: content, 2912 }, 2913 ) 2914 t.Run("LineMatches", func(t *testing.T) { 2915 res := searchForTest(t, b, &query.Substring{Pattern: "UAST"}) 2916 if len(res.Files) != 0 { 2917 t.Fatalf("got %v, want no results", res.Files) 2918 } 2919 }) 2920 2921 t.Run("LineMatches", func(t *testing.T) { 2922 res := searchForTest(t, b, &query.Substring{Pattern: "UAST"}, chunkOpts) 2923 if len(res.Files) != 0 { 2924 t.Fatalf("got %v, want no results", res.Files) 2925 } 2926 }) 2927} 2928 2929func TestDocumentSectionRuneBoundary(t *testing.T) { 2930 content := string([]rune{kelvinCodePoint, kelvinCodePoint, kelvinCodePoint}) 2931 b, err := NewIndexBuilder(nil) 2932 if err != nil { 2933 t.Fatalf("NewIndexBuilder: %v", err) 2934 } 2935 2936 for i, sec := range []DocumentSection{ 2937 {2, 6}, 2938 {3, 7}, 2939 } { 2940 if err := b.Add(Document{ 2941 Name: "f1", 2942 Content: []byte(content), 2943 Symbols: []DocumentSection{sec}, 2944 }); err == nil { 2945 t.Errorf("%d: Add succeeded", i) 2946 } 2947 } 2948} 2949 2950func TestUnicodeQuery(t *testing.T) { 2951 content := string([]rune{kelvinCodePoint, kelvinCodePoint, kelvinCodePoint}) 2952 b := testIndexBuilder(t, nil, 2953 Document{ 2954 Name: "f1", 2955 Content: []byte(content), 2956 }, 2957 ) 2958 2959 q := &query.Substring{Pattern: content} 2960 2961 t.Run("LineMatches", func(t *testing.T) { 2962 res := searchForTest(t, b, q) 2963 if len(res.Files) != 1 { 2964 t.Fatalf("want 1 match, got %v", res.Files) 2965 } 2966 2967 f := res.Files[0] 2968 if len(f.LineMatches) != 1 { 2969 t.Fatalf("want 1 line, got %v", f.LineMatches) 2970 } 2971 l := f.LineMatches[0] 2972 2973 if len(l.LineFragments) != 1 { 2974 t.Fatalf("want 1 line fragment, got %v", l.LineFragments) 2975 } 2976 fr := l.LineFragments[0] 2977 if fr.MatchLength != len(content) { 2978 t.Fatalf("got MatchLength %d want %d", fr.MatchLength, len(content)) 2979 } 2980 }) 2981 2982 t.Run("ChunkMatches", func(t *testing.T) { 2983 res := searchForTest(t, b, q, chunkOpts) 2984 if len(res.Files) != 1 { 2985 t.Fatalf("want 1 match, got %v", res.Files) 2986 } 2987 2988 f := res.Files[0] 2989 if len(f.ChunkMatches) != 1 { 2990 t.Fatalf("want 1 line, got %v", f.LineMatches) 2991 } 2992 cm := f.ChunkMatches[0] 2993 2994 if len(cm.Ranges) != 1 { 2995 t.Fatalf("want 1 line fragment, got %v", cm.Ranges) 2996 } 2997 rr := cm.Ranges[0] 2998 if matchLen := rr.End.ByteOffset - rr.Start.ByteOffset; int(matchLen) != len(content) { 2999 t.Fatalf("got MatchLength %d want %d", matchLen, len(content)) 3000 } 3001 }) 3002} 3003 3004func TestSkipInvalidContent(t *testing.T) { 3005 for _, content := range []string{ 3006 // Binary 3007 "abc def \x00 abc", 3008 } { 3009 3010 b, err := NewIndexBuilder(nil) 3011 if err != nil { 3012 t.Fatalf("NewIndexBuilder: %v", err) 3013 } 3014 3015 if err := b.Add(Document{ 3016 Name: "f1", 3017 Content: []byte(content), 3018 }); err != nil { 3019 t.Fatal(err) 3020 } 3021 3022 t.Run("LineMatches", func(t *testing.T) { 3023 q := &query.Substring{Pattern: "abc def"} 3024 res := searchForTest(t, b, q) 3025 if len(res.Files) != 0 { 3026 t.Fatalf("got %v, want no results", res.Files) 3027 } 3028 3029 q = &query.Substring{Pattern: "NOT-INDEXED"} 3030 res = searchForTest(t, b, q) 3031 if len(res.Files) != 1 { 3032 t.Fatalf("got %v, want 1 result", res.Files) 3033 } 3034 }) 3035 3036 t.Run("ChunkMatches", func(t *testing.T) { 3037 q := &query.Substring{Pattern: "abc def"} 3038 res := searchForTest(t, b, q, chunkOpts) 3039 if len(res.Files) != 0 { 3040 t.Fatalf("got %v, want no results", res.Files) 3041 } 3042 3043 q = &query.Substring{Pattern: "NOT-INDEXED"} 3044 res = searchForTest(t, b, q, chunkOpts) 3045 if len(res.Files) != 1 { 3046 t.Fatalf("got %v, want 1 result", res.Files) 3047 } 3048 }) 3049 } 3050} 3051 3052func TestCheckText(t *testing.T) { 3053 for _, text := range []string{"", "simple ascii", "símplé unicödé", "\uFEFFwith utf8 'bom'", "with \uFFFD unicode replacement char"} { 3054 if err := CheckText([]byte(text), 20000); err != nil { 3055 t.Errorf("CheckText(%q): %v", text, err) 3056 } 3057 } 3058 for _, text := range []string{"zero\x00byte", "xx", "0123456789abcdefghi"} { 3059 if err := CheckText([]byte(text), 15); err == nil { 3060 t.Errorf("CheckText(%q) succeeded", text) 3061 } 3062 } 3063} 3064 3065func TestLineAnd(t *testing.T) { 3066 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 3067 Document{Name: "f1", Content: []byte("apple\nbanana\napple banana chocolate apple pudding banana\ngrape")}, 3068 Document{Name: "f2", Content: []byte("apple orange\nbanana")}, 3069 Document{Name: "f3", Content: []byte("banana grape")}, 3070 ) 3071 pattern := "(apple)(?-s:.)*?(banana)" 3072 r, _ := syntax.Parse(pattern, syntax.Perl) 3073 3074 q := query.Regexp{ 3075 Regexp: r, 3076 Content: true, 3077 } 3078 t.Run("LineMatches", func(t *testing.T) { 3079 res := searchForTest(t, b, &q) 3080 wantRegexpCount := 1 3081 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount { 3082 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount) 3083 } 3084 if len(res.Files) != 1 || res.Files[0].FileName != "f1" { 3085 t.Errorf("got %v, want 1 result", res.Files) 3086 } 3087 }) 3088 3089 t.Run("ChunkMatches", func(t *testing.T) { 3090 res := searchForTest(t, b, &q, chunkOpts) 3091 wantRegexpCount := 1 3092 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount { 3093 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount) 3094 } 3095 if len(res.Files) != 1 || res.Files[0].FileName != "f1" { 3096 t.Errorf("got %v, want 1 result", res.Files) 3097 } 3098 }) 3099} 3100 3101func TestLineAndFileName(t *testing.T) { 3102 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 3103 Document{Name: "f1", Content: []byte("apple banana\ngrape")}, 3104 Document{Name: "f2", Content: []byte("apple banana\norange")}, 3105 Document{Name: "apple banana", Content: []byte("banana grape")}, 3106 ) 3107 pattern := "(apple)(?-s:.)*?(banana)" 3108 r, _ := syntax.Parse(pattern, syntax.Perl) 3109 3110 q := query.Regexp{ 3111 Regexp: r, 3112 FileName: true, 3113 } 3114 t.Run("LineMatches", func(t *testing.T) { 3115 res := searchForTest(t, b, &q) 3116 wantRegexpCount := 1 3117 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount { 3118 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount) 3119 } 3120 if len(res.Files) != 1 || res.Files[0].FileName != "apple banana" { 3121 t.Errorf("got %v, want 1 result", res.Files) 3122 } 3123 }) 3124 3125 t.Run("ChunkMatches", func(t *testing.T) { 3126 res := searchForTest(t, b, &q, chunkOpts) 3127 wantRegexpCount := 1 3128 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount { 3129 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount) 3130 } 3131 if len(res.Files) != 1 || res.Files[0].FileName != "apple banana" { 3132 t.Errorf("got %v, want 1 result", res.Files) 3133 } 3134 }) 3135} 3136 3137func TestMultiLineRegex(t *testing.T) { 3138 b := testIndexBuilder(t, &Repository{Name: "reponame"}, 3139 Document{Name: "f1", Content: []byte("apple banana\ngrape")}, 3140 Document{Name: "f2", Content: []byte("apple orange")}, 3141 Document{Name: "f3", Content: []byte("grape apple")}, 3142 ) 3143 pattern := "(apple).*?[[:space:]].*?(grape)" 3144 r, _ := syntax.Parse(pattern, syntax.Perl) 3145 3146 q := query.Regexp{ 3147 Regexp: r, 3148 } 3149 t.Run("LineMatches", func(t *testing.T) { 3150 res := searchForTest(t, b, &q) 3151 wantRegexpCount := 2 3152 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount { 3153 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount) 3154 } 3155 if len(res.Files) != 1 || res.Files[0].FileName != "f1" { 3156 t.Errorf("got %v, want 1 result", res.Files) 3157 } 3158 if l := len(res.Files[0].LineMatches); l != 2 { 3159 t.Errorf("got %v, want 2 line matches", l) 3160 } 3161 }) 3162 3163 t.Run("ChunkMatches", func(t *testing.T) { 3164 res := searchForTest(t, b, &q, chunkOpts) 3165 wantRegexpCount := 2 3166 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount { 3167 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount) 3168 } 3169 if len(res.Files) != 1 || res.Files[0].FileName != "f1" { 3170 t.Errorf("got %v, want 1 result", res.Files) 3171 } 3172 if l := len(res.Files[0].ChunkMatches); l != 1 { 3173 t.Errorf("got %v, want 1 chunk matches", l) 3174 } 3175 if l := len(res.Files[0].ChunkMatches[0].Ranges); l != 1 { 3176 t.Errorf("got %v, want 1 chunk ranges", l) 3177 } 3178 }) 3179} 3180 3181func TestSearchTypeFileName(t *testing.T) { 3182 b := testIndexBuilder(t, &Repository{ 3183 Name: "reponame", 3184 }, 3185 Document{Name: "f1", Content: []byte("bla the needle")}, 3186 Document{Name: "f2", Content: []byte("another file another\nneedle")}, 3187 // -----------------------------------012345678901234567890-123456 3188 ) 3189 3190 t.Run("LineMatches", func(t *testing.T) { 3191 wantSingleMatch := func(res *SearchResult, want string) { 3192 t.Helper() 3193 fmatches := res.Files 3194 if len(fmatches) != 1 { 3195 t.Errorf("got %v, want 1 matches", len(fmatches)) 3196 return 3197 } 3198 if len(fmatches[0].LineMatches) != 1 { 3199 t.Errorf("got %d line matches", len(fmatches[0].LineMatches)) 3200 return 3201 } 3202 var got string 3203 if fmatches[0].LineMatches[0].FileName { 3204 got = fmatches[0].FileName 3205 } else { 3206 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].LineMatches[0].LineFragments[0].Offset) 3207 } 3208 3209 if got != want { 3210 t.Errorf("got %s, want %s", got, want) 3211 } 3212 } 3213 3214 // Only return the later match in the second file 3215 res := searchForTest(t, b, query.NewAnd( 3216 &query.Type{ 3217 Type: query.TypeFileName, 3218 Child: &query.Substring{Pattern: "needle"}, 3219 }, 3220 &query.Substring{Pattern: "file"})) 3221 wantSingleMatch(res, "f2:8") 3222 3223 // Only return a filename result 3224 res = searchForTest(t, b, 3225 &query.Type{ 3226 Type: query.TypeFileName, 3227 Child: &query.Substring{Pattern: "file"}, 3228 }) 3229 wantSingleMatch(res, "f2") 3230 }) 3231 3232 t.Run("ChunkMatches", func(t *testing.T) { 3233 wantSingleMatch := func(res *SearchResult, want string) { 3234 t.Helper() 3235 fmatches := res.Files 3236 if len(fmatches) != 1 { 3237 t.Errorf("got %v, want 1 matches", len(fmatches)) 3238 return 3239 } 3240 if len(fmatches[0].ChunkMatches) != 1 { 3241 t.Errorf("got %d line matches", len(fmatches[0].ChunkMatches)) 3242 return 3243 } 3244 var got string 3245 if fmatches[0].ChunkMatches[0].FileName { 3246 got = fmatches[0].FileName 3247 } else { 3248 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset) 3249 } 3250 3251 if got != want { 3252 t.Errorf("got %s, want %s", got, want) 3253 } 3254 } 3255 3256 // Only return the later match in the second file 3257 res := searchForTest(t, b, query.NewAnd( 3258 &query.Type{ 3259 Type: query.TypeFileName, 3260 Child: &query.Substring{Pattern: "needle"}, 3261 }, 3262 &query.Substring{Pattern: "file"}), 3263 chunkOpts, 3264 ) 3265 wantSingleMatch(res, "f2:8") 3266 3267 // Only return a filename result 3268 res = searchForTest(t, b, 3269 &query.Type{ 3270 Type: query.TypeFileName, 3271 Child: &query.Substring{Pattern: "file"}, 3272 }, 3273 chunkOpts, 3274 ) 3275 wantSingleMatch(res, "f2") 3276 }) 3277} 3278 3279func TestSearchTypeLanguage(t *testing.T) { 3280 b := testIndexBuilder(t, &Repository{ 3281 Name: "reponame", 3282 }, 3283 Document{Name: "apex.cls", Content: []byte("public class Car extends Vehicle {")}, 3284 Document{Name: "tex.cls", Content: []byte(`\DeclareOption*{`)}, 3285 Document{Name: "hello.h", Content: []byte(`#include <stdio.h>`)}, 3286 ) 3287 3288 t.Log(b.languageMap) 3289 3290 t.Run("LineMatches", func(t *testing.T) { 3291 wantSingleMatch := func(res *SearchResult, want string) { 3292 t.Helper() 3293 fmatches := res.Files 3294 if len(fmatches) != 1 { 3295 t.Errorf("got %v, want 1 matches", len(fmatches)) 3296 return 3297 } 3298 if len(fmatches[0].LineMatches) != 1 { 3299 t.Errorf("got %d line matches", len(fmatches[0].LineMatches)) 3300 return 3301 } 3302 var got string 3303 if fmatches[0].LineMatches[0].FileName { 3304 got = fmatches[0].FileName 3305 } else { 3306 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].LineMatches[0].LineFragments[0].Offset) 3307 } 3308 3309 if got != want { 3310 t.Errorf("got %s, want %s", got, want) 3311 } 3312 } 3313 3314 res := searchForTest(t, b, &query.Language{Language: "Apex"}) 3315 wantSingleMatch(res, "apex.cls") 3316 3317 res = searchForTest(t, b, &query.Language{Language: "TeX"}) 3318 wantSingleMatch(res, "tex.cls") 3319 3320 res = searchForTest(t, b, &query.Language{Language: "C"}) 3321 wantSingleMatch(res, "hello.h") 3322 3323 // test fallback language search by pretending it's an older index version 3324 res = searchForTest(t, b, &query.Language{Language: "C++"}) 3325 if len(res.Files) != 0 { 3326 t.Errorf("got %d results for C++, want 0", len(res.Files)) 3327 } 3328 3329 b.featureVersion = 11 // force fallback 3330 res = searchForTest(t, b, &query.Language{Language: "C++"}) 3331 wantSingleMatch(res, "hello.h") 3332 }) 3333 3334 t.Run("ChunkMatches", func(t *testing.T) { 3335 wantSingleMatch := func(res *SearchResult, want string) { 3336 t.Helper() 3337 fmatches := res.Files 3338 if len(fmatches) != 1 { 3339 t.Errorf("got %v, want 1 matches", len(fmatches)) 3340 return 3341 } 3342 if len(fmatches[0].ChunkMatches) != 1 { 3343 t.Errorf("got %d line matches", len(fmatches[0].ChunkMatches)) 3344 return 3345 } 3346 var got string 3347 if fmatches[0].ChunkMatches[0].FileName { 3348 got = fmatches[0].FileName 3349 } else { 3350 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset) 3351 } 3352 3353 if got != want { 3354 t.Errorf("got %s, want %s", got, want) 3355 } 3356 } 3357 3358 b.featureVersion = FeatureVersion // reset feature version 3359 res := searchForTest(t, b, &query.Language{Language: "Apex"}, chunkOpts) 3360 wantSingleMatch(res, "apex.cls") 3361 3362 res = searchForTest(t, b, &query.Language{Language: "TeX"}, chunkOpts) 3363 wantSingleMatch(res, "tex.cls") 3364 3365 res = searchForTest(t, b, &query.Language{Language: "C"}, chunkOpts) 3366 wantSingleMatch(res, "hello.h") 3367 3368 // test fallback language search by pretending it's an older index version 3369 res = searchForTest(t, b, &query.Language{Language: "C++"}, chunkOpts) 3370 if len(res.Files) != 0 { 3371 t.Errorf("got %d results for C++, want 0", len(res.Files)) 3372 } 3373 3374 b.featureVersion = 11 // force fallback 3375 res = searchForTest(t, b, &query.Language{Language: "C++"}, chunkOpts) 3376 wantSingleMatch(res, "hello.h") 3377 }) 3378} 3379 3380func TestStats(t *testing.T) { 3381 ignored := []cmp.Option{ 3382 cmpopts.EquateEmpty(), 3383 cmpopts.IgnoreFields(RepoListEntry{}, "Repository"), 3384 cmpopts.IgnoreFields(RepoListEntry{}, "IndexMetadata"), 3385 cmpopts.IgnoreFields(RepoStats{}, "IndexBytes"), 3386 } 3387 3388 repoListEntries := func(b *IndexBuilder) []RepoListEntry { 3389 searcher := searcherForTest(t, b) 3390 indexdata := searcher.(*indexData) 3391 return indexdata.repoListEntry 3392 } 3393 3394 t.Run("one empty repo", func(t *testing.T) { 3395 b := testIndexBuilder(t, nil) 3396 got := repoListEntries(b) 3397 want := []RepoListEntry{ 3398 { 3399 Stats: RepoStats{ 3400 Repos: 0, 3401 Shards: 1, 3402 Documents: 0, 3403 IndexBytes: 20, 3404 ContentBytes: 0, 3405 NewLinesCount: 0, 3406 DefaultBranchNewLinesCount: 0, 3407 OtherBranchesNewLinesCount: 0, 3408 }, 3409 }, 3410 } 3411 3412 if diff := cmp.Diff(want, got, ignored...); diff != "" { 3413 t.Fatalf("mismatch (-want +got):\n%s", diff) 3414 } 3415 3416 }) 3417 3418 t.Run("one simple shard", func(t *testing.T) { 3419 b := testIndexBuilder(t, nil, 3420 Document{Name: "doc 0", Content: []byte("content 0")}, 3421 Document{Name: "doc 1", Content: []byte("content 1")}, 3422 ) 3423 got := repoListEntries(b) 3424 want := []RepoListEntry{ 3425 { 3426 Stats: RepoStats{ 3427 Repos: 0, 3428 Shards: 1, 3429 Documents: 2, 3430 IndexBytes: 224, 3431 ContentBytes: 28, 3432 NewLinesCount: 0, 3433 DefaultBranchNewLinesCount: 0, 3434 OtherBranchesNewLinesCount: 0, 3435 }, 3436 }, 3437 } 3438 3439 if diff := cmp.Diff(want, got, ignored...); diff != "" { 3440 t.Fatalf("mismatch (-want +got):\n%s", diff) 3441 } 3442 3443 }) 3444 3445 t.Run("one compound shard", func(t *testing.T) { 3446 b := testIndexBuilderCompound(t, 3447 []*Repository{ 3448 {Name: "repo 0"}, 3449 {Name: "repo 1"}, 3450 }, 3451 [][]Document{ 3452 { 3453 {Name: "doc 0", Content: []byte("content 0")}, 3454 {Name: "doc 1", Content: []byte("content 1")}, 3455 }, 3456 { 3457 {Name: "doc 2", Content: []byte("content 2")}, 3458 {Name: "doc 3", Content: []byte("content 3")}, 3459 }, 3460 }, 3461 ) 3462 got := repoListEntries(b) 3463 want := []RepoListEntry{ 3464 { 3465 Stats: RepoStats{ 3466 Repos: 0, 3467 Shards: 1, 3468 Documents: 2, 3469 IndexBytes: 180, 3470 ContentBytes: 28, 3471 NewLinesCount: 0, 3472 DefaultBranchNewLinesCount: 0, 3473 OtherBranchesNewLinesCount: 0, 3474 }, 3475 }, 3476 { 3477 Stats: RepoStats{ 3478 Repos: 0, 3479 Shards: 1, 3480 Documents: 2, 3481 IndexBytes: 180, 3482 ContentBytes: 28, 3483 NewLinesCount: 0, 3484 DefaultBranchNewLinesCount: 0, 3485 OtherBranchesNewLinesCount: 0, 3486 }, 3487 }, 3488 } 3489 3490 if diff := cmp.Diff(want, got, ignored...); diff != "" { 3491 t.Fatalf("mismatch (-want +got):\n%s", diff) 3492 } 3493 }) 3494 3495 t.Run("compound shard with empty repos", func(t *testing.T) { 3496 b := testIndexBuilderCompound(t, 3497 []*Repository{ 3498 {Name: "repo 0"}, 3499 {Name: "repo 1"}, 3500 {Name: "repo 2"}, 3501 {Name: "repo 3"}, 3502 {Name: "repo 4"}, 3503 }, 3504 [][]Document{ 3505 {{Name: "doc 0", Content: []byte("content 0")}}, 3506 nil, 3507 {{Name: "doc 1", Content: []byte("content 1")}}, 3508 nil, 3509 nil, 3510 }, 3511 ) 3512 got := repoListEntries(b) 3513 3514 entryEmpty := RepoListEntry{Stats: RepoStats{ 3515 Shards: 1, 3516 Documents: 0, 3517 ContentBytes: 0, 3518 }} 3519 entryNonEmpty := RepoListEntry{Stats: RepoStats{ 3520 Shards: 1, 3521 Documents: 1, 3522 ContentBytes: 14, 3523 }} 3524 3525 want := []RepoListEntry{ 3526 entryNonEmpty, 3527 entryEmpty, 3528 entryNonEmpty, 3529 entryEmpty, 3530 entryEmpty, 3531 } 3532 3533 if diff := cmp.Diff(want, got, ignored...); diff != "" { 3534 t.Fatalf("mismatch (-want +got):\n%s", diff) 3535 } 3536 3537 }) 3538} 3539 3540// This tests the frequent pattern "\bLITERAL\b". 3541func TestWordSearch(t *testing.T) { 3542 content := []byte("needle the bla") 3543 // ----------------01234567890123 3544 3545 b := testIndexBuilder(t, nil, 3546 Document{ 3547 Name: "f1", 3548 Content: content, 3549 }) 3550 3551 t.Run("LineMatches", func(t *testing.T) { 3552 sres := searchForTest(t, b, 3553 &query.Regexp{ 3554 Regexp: mustParseRE("\\bthe\\b"), 3555 CaseSensitive: true, 3556 Content: true, 3557 }) 3558 3559 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 { 3560 t.Fatalf("got %v, want 1 match in 1 file", sres.Files) 3561 } 3562 3563 if sres.Stats.RegexpsConsidered != 0 { 3564 t.Fatal("expected regexp to be skipped") 3565 } 3566 3567 got := sres.Files[0].LineMatches[0] 3568 want := LineMatch{ 3569 LineFragments: []LineFragmentMatch{{ 3570 LineOffset: 7, 3571 Offset: 7, 3572 MatchLength: 3, 3573 }}, 3574 Line: content, 3575 FileName: false, 3576 LineNumber: 1, 3577 LineStart: 0, 3578 LineEnd: 14, 3579 } 3580 3581 if !reflect.DeepEqual(got, want) { 3582 t.Errorf("got %#v, want %#v", got, want) 3583 } 3584 }) 3585 3586 t.Run("ChunkMatches", func(t *testing.T) { 3587 sres := searchForTest(t, b, 3588 &query.Regexp{ 3589 Regexp: mustParseRE("\\bthe\\b"), 3590 CaseSensitive: true, 3591 }, chunkOpts) 3592 3593 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 { 3594 t.Fatalf("got %v, want 1 match in 1 file", sres.Files) 3595 } 3596 3597 if sres.Stats.RegexpsConsidered != 0 { 3598 t.Fatal("expected regexp to be skipped") 3599 } 3600 3601 got := sres.Files[0].ChunkMatches[0] 3602 want := ChunkMatch{ 3603 Content: content, 3604 ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1}, 3605 Ranges: []Range{{ 3606 Start: Location{ByteOffset: 7, LineNumber: 1, Column: 8}, 3607 End: Location{ByteOffset: 10, LineNumber: 1, Column: 11}, 3608 }}, 3609 } 3610 3611 if diff := cmp.Diff(want, got); diff != "" { 3612 t.Fatal(diff) 3613 } 3614 }) 3615}