fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

1// Copyright 2016 Google Inc. All rights reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package index 16 17import ( 18 "bytes" 19 "context" 20 "fmt" 21 "reflect" 22 "regexp/syntax" 23 "strings" 24 "testing" 25 26 "github.com/google/go-cmp/cmp" 27 "github.com/google/go-cmp/cmp/cmpopts" 28 "github.com/grafana/regexp" 29 "github.com/sourcegraph/zoekt" 30 "github.com/sourcegraph/zoekt/query" 31) 32 33func clearScores(r *zoekt.SearchResult) { 34 for i := range r.Files { 35 r.Files[i].Score = 0.0 36 for j := range r.Files[i].LineMatches { 37 r.Files[i].LineMatches[j].Score = 0.0 38 } 39 for j := range r.Files[i].ChunkMatches { 40 r.Files[i].ChunkMatches[j].Score = 0.0 41 r.Files[i].ChunkMatches[j].BestLineMatch = 0 42 } 43 r.Files[i].Checksum = nil 44 r.Files[i].Debug = "" 45 } 46} 47 48func testIndexBuilder(tb testing.TB, repo *zoekt.Repository, docs ...Document) *IndexBuilder { 49 tb.Helper() 50 51 b, err := NewIndexBuilder(repo) 52 if err != nil { 53 tb.Fatalf("NewIndexBuilder: %v", err) 54 } 55 56 for i, d := range docs { 57 if err := b.Add(d); err != nil { 58 tb.Fatalf("Add %d: %v", i, err) 59 } 60 } 61 62 return b 63} 64 65func testIndexBuilderCompound(t *testing.T, repos []*zoekt.Repository, docs [][]Document) *IndexBuilder { 66 t.Helper() 67 68 b := newIndexBuilder() 69 b.indexFormatVersion = NextIndexFormatVersion 70 71 if len(repos) != len(docs) { 72 t.Fatalf("testIndexBuilderCompound: repos must be the same length as docs, got: len(repos)=%d len(docs)=%d", len(repos), len(docs)) 73 } 74 75 for i, repo := range repos { 76 if err := b.setRepository(repo); err != nil { 77 t.Fatal(err) 78 } 79 for j, d := range docs[i] { 80 if err := b.Add(d); err != nil { 81 t.Fatalf("Add %d %d: %v", i, j, err) 82 } 83 } 84 } 85 86 return b 87} 88 89func TestBoundary(t *testing.T) { 90 b := testIndexBuilder(t, nil, 91 Document{Name: "f1", Content: []byte("x the")}, 92 Document{Name: "f1", Content: []byte("reader")}) 93 res := searchForTest(t, b, &query.Substring{Pattern: "there"}) 94 if len(res.Files) > 0 { 95 t.Fatalf("got %v, want no matches", res.Files) 96 } 97} 98 99func TestDocSectionInvalid(t *testing.T) { 100 b, err := NewIndexBuilder(nil) 101 if err != nil { 102 t.Fatalf("NewIndexBuilder: %v", err) 103 } 104 doc := Document{ 105 Name: "f1", 106 Content: []byte("01234567890123"), 107 Symbols: []DocumentSection{{5, 8}, {7, 9}}, 108 } 109 110 if err := b.Add(doc); err == nil { 111 t.Errorf("overlapping doc sections should fail") 112 } 113 114 doc = Document{ 115 Name: "f1", 116 Content: []byte("01234567890123"), 117 Symbols: []DocumentSection{{0, 20}}, 118 } 119 120 if err := b.Add(doc); err == nil { 121 t.Errorf("doc sections beyond EOF should fail") 122 } 123} 124 125func TestBasic(t *testing.T) { 126 b := testIndexBuilder(t, nil, 127 Document{ 128 Name: "f2", 129 Content: []byte("to carry water in the no later bla"), 130 // --------------0123456789012345678901234567890123 131 }) 132 133 t.Run("LineMatch", func(t *testing.T) { 134 res := searchForTest(t, b, &query.Substring{ 135 Pattern: "water", 136 CaseSensitive: true, 137 }) 138 fmatches := res.Files 139 if len(fmatches) != 1 || len(fmatches[0].LineMatches) != 1 { 140 t.Fatalf("got %v, want 1 matches", fmatches) 141 } 142 143 got := fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].LineMatches[0].LineFragments[0].Offset) 144 want := "f2:9" 145 if got != want { 146 t.Errorf("1: got %s, want %s", got, want) 147 } 148 }) 149 150 t.Run("ChunkMatch", func(t *testing.T) { 151 res := searchForTest(t, b, &query.Substring{ 152 Pattern: "water", 153 CaseSensitive: true, 154 }, chunkOpts) 155 fmatches := res.Files 156 if len(fmatches) != 1 || len(fmatches[0].ChunkMatches) != 1 { 157 t.Fatalf("got %v, want 1 matches", fmatches) 158 } 159 160 got := fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset) 161 want := "f2:9" 162 if got != want { 163 t.Errorf("1: got %s, want %s", got, want) 164 } 165 }) 166} 167 168func TestEmptyIndex(t *testing.T) { 169 b := testIndexBuilder(t, nil) 170 searcher := searcherForTest(t, b) 171 172 var opts zoekt.SearchOptions 173 if _, err := searcher.Search(context.Background(), &query.Substring{}, &opts); err != nil { 174 t.Fatalf("Search: %v", err) 175 } 176 177 if _, err := searcher.List(context.Background(), &query.Repo{Regexp: regexp.MustCompile("")}, nil); err != nil { 178 t.Fatalf("List: %v", err) 179 } 180 181 if _, err := searcher.Search(context.Background(), &query.Substring{Pattern: "java", FileName: true}, &opts); err != nil { 182 t.Fatalf("Search: %v", err) 183 } 184} 185 186type memSeeker struct { 187 data []byte 188} 189 190func (s *memSeeker) Name() string { 191 return "memseeker" 192} 193 194func (s *memSeeker) Close() {} 195func (s *memSeeker) Read(off, sz uint32) ([]byte, error) { 196 return s.data[off : off+sz], nil 197} 198 199func (s *memSeeker) Size() (uint32, error) { 200 return uint32(len(s.data)), nil 201} 202 203func TestNewlines(t *testing.T) { 204 b := testIndexBuilder(t, nil, 205 // -----------------------------------------012345-678901-234 206 Document{Name: "filename", Content: []byte("line1\nline2\nbla")}) 207 208 t.Run("LineMatches", func(t *testing.T) { 209 sres := searchForTest(t, b, &query.Substring{Pattern: "ne2"}) 210 211 matches := sres.Files 212 want := []zoekt.FileMatch{{ 213 FileName: "filename", 214 LineMatches: []zoekt.LineMatch{{ 215 LineFragments: []zoekt.LineFragmentMatch{{ 216 Offset: 8, 217 LineOffset: 2, 218 MatchLength: 3, 219 }}, 220 Line: []byte("line2\n"), 221 LineStart: 6, 222 LineEnd: 12, 223 LineNumber: 2, 224 }}, 225 }} 226 227 if diff := cmp.Diff(matches, want); diff != "" { 228 t.Fatal(diff) 229 } 230 }) 231 232 t.Run("ChunkMatches", func(t *testing.T) { 233 sres := searchForTest(t, b, &query.Substring{Pattern: "ne2"}, chunkOpts) 234 235 matches := sres.Files 236 want := []zoekt.FileMatch{{ 237 FileName: "filename", 238 ChunkMatches: []zoekt.ChunkMatch{{ 239 Content: []byte("line2\n"), 240 ContentStart: zoekt.Location{ 241 ByteOffset: 6, 242 LineNumber: 2, 243 Column: 1, 244 }, 245 Ranges: []zoekt.Range{{ 246 Start: zoekt.Location{ByteOffset: 8, LineNumber: 2, Column: 3}, 247 End: zoekt.Location{ByteOffset: 11, LineNumber: 2, Column: 6}, 248 }}, 249 }}, 250 }} 251 252 if diff := cmp.Diff(want, matches); diff != "" { 253 t.Fatal(diff) 254 } 255 }) 256} 257 258// A result spanning multiple lines should have LineMatches that only cover 259// single lines. 260func TestQueryNewlines(t *testing.T) { 261 text := "line1\nline2\nbla" 262 b := testIndexBuilder(t, nil, 263 Document{Name: "filename", Content: []byte(text)}) 264 265 t.Run("LineMatches", func(t *testing.T) { 266 sres := searchForTest(t, b, &query.Substring{Pattern: "ine2\nbla"}) 267 matches := sres.Files 268 if len(matches) != 1 { 269 t.Fatalf("got %d file matches, want exactly one", len(matches)) 270 } 271 m := matches[0] 272 if len(m.LineMatches) != 2 { 273 t.Fatalf("got %d line matches, want exactly two %#v", len(m.LineMatches), m.LineMatches) 274 } 275 }) 276 277 t.Run("ChunkMatches", func(t *testing.T) { 278 sres := searchForTest(t, b, &query.Substring{Pattern: "ine2\nbla"}, chunkOpts) 279 matches := sres.Files 280 if len(matches) != 1 { 281 t.Fatalf("got %d file matches, want exactly one", len(matches)) 282 } 283 m := matches[0] 284 if len(m.ChunkMatches) != 1 { 285 t.Fatalf("got %d chunk matches, want exactly one", len(m.ChunkMatches)) 286 } 287 }) 288} 289 290var chunkOpts = zoekt.SearchOptions{ChunkMatches: true} 291 292func searchForTest(t *testing.T, b *IndexBuilder, q query.Q, o ...zoekt.SearchOptions) *zoekt.SearchResult { 293 searcher := searcherForTest(t, b) 294 var opts zoekt.SearchOptions 295 if len(o) > 0 { 296 opts = o[0] 297 } 298 res, err := searcher.Search(context.Background(), q, &opts) 299 if err != nil { 300 t.Fatalf("Search(%s): %v", q, err) 301 } 302 clearScores(res) 303 return res 304} 305 306func searcherForTest(t testing.TB, b *IndexBuilder) zoekt.Searcher { 307 var buf bytes.Buffer 308 if err := b.Write(&buf); err != nil { 309 t.Fatal(err) 310 } 311 f := &memSeeker{buf.Bytes()} 312 313 searcher, err := NewSearcher(f) 314 if err != nil { 315 t.Fatalf("NewSearcher: %v", err) 316 } 317 318 return searcher 319} 320 321func TestCaseFold(t *testing.T) { 322 b := testIndexBuilder(t, nil, 323 Document{Name: "f1", Content: []byte("I love BaNaNAS.")}, 324 // -----------------------------------012345678901234 325 ) 326 t.Run("LineMatches", func(t *testing.T) { 327 sres := searchForTest(t, b, &query.Substring{ 328 Pattern: "bananas", 329 CaseSensitive: true, 330 }) 331 matches := sres.Files 332 if len(matches) != 0 { 333 t.Errorf("foldcase: got %#v, want 0 matches", matches) 334 } 335 336 sres = searchForTest(t, b, 337 &query.Substring{ 338 Pattern: "BaNaNAS", 339 CaseSensitive: true, 340 }) 341 matches = sres.Files 342 if len(matches) != 1 { 343 t.Errorf("no foldcase: got %v, want 1 matches", matches) 344 } else if matches[0].LineMatches[0].LineFragments[0].Offset != 7 { 345 t.Errorf("foldcase: got %v, want offsets 7", matches) 346 } 347 }) 348 349 t.Run("ChunkMatches", func(t *testing.T) { 350 sres := searchForTest(t, b, &query.Substring{ 351 Pattern: "bananas", 352 CaseSensitive: true, 353 }, chunkOpts) 354 matches := sres.Files 355 if len(matches) != 0 { 356 t.Errorf("foldcase: got %#v, want 0 matches", matches) 357 } 358 359 sres = searchForTest(t, b, 360 &query.Substring{ 361 Pattern: "BaNaNAS", 362 CaseSensitive: true, 363 }) 364 matches = sres.Files 365 if len(matches) != 1 { 366 t.Errorf("no foldcase: got %v, want 1 matches", matches) 367 } else if matches[0].LineMatches[0].LineFragments[0].Offset != 7 { 368 t.Errorf("foldcase: got %v, want offsets 7", matches) 369 } 370 }) 371} 372 373// wordsAsSymbols finds all ASCII words in doc.Content which are longer than 2 374// chars. Those are then set as symbols. 375func wordsAsSymbols(doc Document) Document { 376 re := regexp.MustCompile(`\b\w{2,}\b`) 377 var symbols []DocumentSection 378 var symbolsMetadata []*zoekt.Symbol 379 for _, match := range re.FindAllIndex(doc.Content, -1) { 380 symbols = append(symbols, DocumentSection{ 381 Start: uint32(match[0]), 382 End: uint32(match[1]), 383 }) 384 symbolsMetadata = append(symbolsMetadata, &zoekt.Symbol{Kind: "method"}) 385 } 386 doc.Symbols = symbols 387 doc.SymbolsMetaData = symbolsMetadata 388 return doc 389} 390 391func TestSearchStats(t *testing.T) { 392 ctx := context.Background() 393 searcher := searcherForTest(t, testIndexBuilder(t, nil, 394 wordsAsSymbols(Document{Name: "f1", Content: []byte("x banana y")}), 395 wordsAsSymbols(Document{Name: "f2", Content: []byte("x apple y")}), 396 wordsAsSymbols(Document{Name: "f3", Content: []byte("x banana apple y")}), 397 // --------------------------------------------------0123456789012345 398 )) 399 400 andQuery := query.NewAnd( 401 &query.Substring{ 402 Pattern: "banana", 403 }, 404 &query.Substring{ 405 Pattern: "apple", 406 }, 407 ) 408 409 t.Run("LineMatches", func(t *testing.T) { 410 sres, err := searcher.Search(ctx, andQuery, &zoekt.SearchOptions{}) 411 if err != nil { 412 t.Fatal(err) 413 } 414 matches := sres.Files 415 if len(matches) != 1 || len(matches[0].LineMatches) != 1 || len(matches[0].LineMatches[0].LineFragments) != 2 { 416 t.Fatalf("got %#v, want 1 match with 2 fragments", matches) 417 } 418 419 if matches[0].LineMatches[0].LineFragments[0].Offset != 2 || matches[0].LineMatches[0].LineFragments[1].Offset != 9 { 420 t.Fatalf("got %#v, want offsets 2,9", matches) 421 } 422 }) 423 t.Run("ChunkMatches", func(t *testing.T) { 424 sres, err := searcher.Search(ctx, andQuery, &chunkOpts) 425 if err != nil { 426 t.Fatal(err) 427 } 428 matches := sres.Files 429 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 || len(matches[0].ChunkMatches[0].Ranges) != 2 { 430 t.Fatalf("got %#v, want 1 chunk match with 2 ranges", matches) 431 } 432 433 if matches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset != 2 || matches[0].ChunkMatches[0].Ranges[1].Start.ByteOffset != 9 { 434 t.Fatalf("got %#v, want offsets 2,9", matches) 435 } 436 }) 437 t.Run("Stats", func(t *testing.T) { 438 cases := []struct { 439 Name string 440 Q query.Q 441 Want zoekt.Stats 442 }{{ 443 Name: "and-query", 444 Q: andQuery, 445 Want: zoekt.Stats{ 446 FilesLoaded: 1, 447 ContentBytesLoaded: 22, 448 IndexBytesLoaded: 10, 449 NgramMatches: 3, // we look at doc 1, because it's max(0,1) due to AND 450 NgramLookups: 104, 451 MatchCount: 2, 452 FileCount: 1, 453 FilesConsidered: 2, 454 ShardsScanned: 1, 455 }, 456 }, { 457 Name: "one-trigram", 458 Q: &query.Substring{ 459 Pattern: "a y", 460 Content: true, 461 CaseSensitive: true, 462 }, 463 Want: zoekt.Stats{ 464 ContentBytesLoaded: 14, 465 IndexBytesLoaded: 1, 466 FileCount: 1, 467 FilesConsidered: 1, 468 FilesLoaded: 1, 469 ShardsScanned: 1, 470 MatchCount: 1, 471 NgramMatches: 1, 472 NgramLookups: 2, // once to lookup frequency then again to access posting list. 473 }, 474 }, { 475 Name: "one-trigram-case-insensitive", 476 Q: &query.Substring{ 477 Pattern: "a y", 478 Content: true, 479 }, 480 Want: zoekt.Stats{ 481 ContentBytesLoaded: 14, 482 IndexBytesLoaded: 1, 483 FileCount: 1, 484 FilesConsidered: 1, 485 FilesLoaded: 1, 486 ShardsScanned: 1, 487 MatchCount: 1, 488 NgramMatches: 1, 489 NgramLookups: 8, // "a y" has 2**2 casings which we lookup twice. 490 }, 491 }, { 492 Name: "one-trigram-pruned", 493 Q: &query.Substring{ 494 Pattern: "foo", 495 Content: true, 496 CaseSensitive: true, 497 }, 498 Want: zoekt.Stats{ 499 ShardsSkippedFilter: 1, 500 NgramLookups: 1, // only had to lookup once 501 }, 502 }, { 503 Name: "one-trigram-branch-pruned", 504 Q: query.NewAnd( 505 &query.Substring{ 506 Pattern: "foo", 507 Content: true, 508 CaseSensitive: true, 509 }, 510 &query.Substring{ 511 Pattern: "a y", 512 Content: true, 513 CaseSensitive: true, 514 }, 515 ), 516 Want: zoekt.Stats{ 517 IndexBytesLoaded: 1, // we created an iterator for "a y" before pruning. 518 ShardsSkippedFilter: 1, 519 NgramLookups: 3, // we lookedup "foo" once (1), but lookedup and created "a y" (2). 520 }, 521 }, { 522 Name: "symbol-substr-nomatch", 523 Q: &query.Symbol{Expr: &query.Substring{ 524 Pattern: "banana apple", 525 Content: true, 526 CaseSensitive: true, 527 }}, 528 Want: zoekt.Stats{ 529 IndexBytesLoaded: 3, 530 FilesConsidered: 1, // important that we only check 1 file to ensure we are using the index 531 MatchCount: 0, // even though there is a match it doesn't align with a symbol 532 ShardsScanned: 1, 533 NgramMatches: 1, 534 NgramLookups: 12, 535 }, 536 }, { 537 Name: "symbol-substr", 538 Q: &query.Symbol{Expr: &query.Substring{ 539 Pattern: "apple", 540 Content: true, 541 CaseSensitive: true, 542 }}, 543 Want: zoekt.Stats{ 544 ContentBytesLoaded: 35, 545 IndexBytesLoaded: 4, 546 FileCount: 2, 547 FilesConsidered: 2, // must be 2 to ensure we used the index 548 FilesLoaded: 2, 549 MatchCount: 2, // apple symbols is in two files 550 ShardsScanned: 1, 551 NgramMatches: 2, 552 NgramLookups: 5, 553 }, 554 }, { 555 Name: "symbol-regexp-nomatch", 556 Q: &query.Symbol{Expr: &query.Regexp{ 557 Regexp: mustParseRE("^apple.banana$"), 558 Content: true, 559 CaseSensitive: true, 560 }}, 561 Want: zoekt.Stats{ 562 ContentBytesLoaded: 33, // we still have to run regex since "app" matches two documents 563 IndexBytesLoaded: 10, 564 FilesConsidered: 2, // important that we don't check 3 to ensure we are using the index 565 FilesLoaded: 2, 566 MatchCount: 0, // even though there is a match it doesn't align with a symbol 567 ShardsScanned: 1, 568 NgramMatches: 3, 569 NgramLookups: 11, 570 }, 571 }, { 572 Name: "symbol-regexp", 573 Q: &query.Symbol{Expr: &query.Regexp{ 574 Regexp: mustParseRE("^app.e$"), 575 Content: true, 576 CaseSensitive: true, 577 }}, 578 Want: zoekt.Stats{ 579 ContentBytesLoaded: 35, 580 IndexBytesLoaded: 2, 581 FileCount: 2, 582 FilesConsidered: 2, // must be 2 to ensure we used the index 583 FilesLoaded: 2, 584 MatchCount: 2, // apple symbols is in two files 585 ShardsScanned: 1, 586 NgramMatches: 2, 587 NgramLookups: 2, 588 }, 589 }} 590 591 for _, tc := range cases { 592 t.Run(tc.Name, func(t *testing.T) { 593 sres, err := searcher.Search(ctx, tc.Q, &chunkOpts) 594 if err != nil { 595 t.Fatal(err) 596 } 597 if diff := cmp.Diff(tc.Want, sres.Stats, cmpopts.IgnoreFields(zoekt.Stats{}, "MatchTreeConstruction", "MatchTreeSearch")); diff != "" { 598 t.Errorf("unexpected Stats (-want +got):\n%s", diff) 599 } 600 }) 601 } 602 }) 603} 604 605func TestAndNegateSearch(t *testing.T) { 606 b := testIndexBuilder(t, nil, 607 Document{Name: "f1", Content: []byte("x banana y")}, 608 // -----------------------------------0123456789 609 Document{Name: "f4", Content: []byte("x banana apple y")}) 610 611 t.Run("LineMatches", func(t *testing.T) { 612 sres := searchForTest(t, b, query.NewAnd( 613 &query.Substring{ 614 Pattern: "banana", 615 }, 616 &query.Not{Child: &query.Substring{ 617 Pattern: "apple", 618 }})) 619 620 matches := sres.Files 621 622 if len(matches) != 1 || len(matches[0].LineMatches) != 1 { 623 t.Fatalf("got %v, want 1 match", matches) 624 } 625 if matches[0].FileName != "f1" { 626 t.Fatalf("got match %#v, want FileName: f1", matches[0]) 627 } 628 if matches[0].LineMatches[0].LineFragments[0].Offset != 2 { 629 t.Fatalf("got %v, want offset 2", matches) 630 } 631 }) 632 633 t.Run("ChunkMatches", func(t *testing.T) { 634 sres := searchForTest(t, b, 635 query.NewAnd( 636 &query.Substring{ 637 Pattern: "banana", 638 }, 639 &query.Not{Child: &query.Substring{ 640 Pattern: "apple", 641 }}, 642 ), 643 chunkOpts, 644 ) 645 646 matches := sres.Files 647 648 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 { 649 t.Fatalf("got %v, want 1 match", matches) 650 } 651 if matches[0].FileName != "f1" { 652 t.Fatalf("got match %#v, want FileName: f1", matches[0]) 653 } 654 if matches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset != 2 { 655 t.Fatalf("got %v, want offset 2", matches) 656 } 657 }) 658} 659 660func TestNegativeMatchesOnlyShortcut(t *testing.T) { 661 b := testIndexBuilder(t, nil, 662 Document{Name: "f1", Content: []byte("x banana y")}, 663 Document{Name: "f2", Content: []byte("x appelmoes y")}, 664 Document{Name: "f3", Content: []byte("x appelmoes y")}, 665 Document{Name: "f3", Content: []byte("x appelmoes y")}) 666 667 t.Run("LineMatches", func(t *testing.T) { 668 sres := searchForTest(t, b, query.NewAnd( 669 &query.Substring{ 670 Pattern: "banana", 671 }, 672 &query.Not{Child: &query.Substring{ 673 Pattern: "appel", 674 }})) 675 676 if sres.Stats.FilesConsidered != 1 { 677 t.Errorf("got %#v, want FilesConsidered: 1", sres.Stats) 678 } 679 }) 680 681 t.Run("ChunkMatches", func(t *testing.T) { 682 sres := searchForTest(t, b, query.NewAnd( 683 &query.Substring{ 684 Pattern: "banana", 685 }, 686 &query.Not{Child: &query.Substring{ 687 Pattern: "appel", 688 }}), chunkOpts) 689 690 if sres.Stats.FilesConsidered != 1 { 691 t.Errorf("got %#v, want FilesConsidered: 1", sres.Stats) 692 } 693 }) 694} 695 696func TestFileSearch(t *testing.T) { 697 b := testIndexBuilder(t, nil, 698 Document{Name: "banzana", Content: []byte("x orange y")}, 699 // -------------0123456 700 Document{Name: "banana", Content: []byte("x apple y")}, 701 // -------------012345 702 ) 703 704 t.Run("LineMatches", func(t *testing.T) { 705 sres := searchForTest(t, b, &query.Substring{ 706 Pattern: "anan", 707 FileName: true, 708 }) 709 710 matches := sres.Files 711 if len(matches) != 1 || len(matches[0].LineMatches) != 1 { 712 t.Fatalf("got %v, want 1 match", matches) 713 } 714 715 got := matches[0].LineMatches[0] 716 want := zoekt.LineMatch{ 717 Line: []byte("banana"), 718 LineFragments: []zoekt.LineFragmentMatch{{ 719 Offset: 1, 720 LineOffset: 1, 721 MatchLength: 4, 722 }}, 723 FileName: true, 724 } 725 726 if !reflect.DeepEqual(got, want) { 727 t.Errorf("got %#v, want %#v", got, want) 728 } 729 }) 730 731 t.Run("ChunkMatches", func(t *testing.T) { 732 sres := searchForTest(t, b, &query.Substring{ 733 Pattern: "anan", 734 FileName: true, 735 }, chunkOpts) 736 737 matches := sres.Files 738 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 { 739 t.Fatalf("got %v, want 1 match", matches) 740 } 741 742 got := matches[0].ChunkMatches[0] 743 want := zoekt.ChunkMatch{ 744 Content: []byte("banana"), 745 ContentStart: zoekt.Location{ByteOffset: 0, LineNumber: 1, Column: 1}, 746 Ranges: []zoekt.Range{{ 747 Start: zoekt.Location{ByteOffset: 1, LineNumber: 1, Column: 2}, 748 End: zoekt.Location{ByteOffset: 5, LineNumber: 1, Column: 6}, 749 }}, 750 FileName: true, 751 } 752 753 if diff := cmp.Diff(want, got); diff != "" { 754 t.Fatal(diff) 755 } 756 }) 757 758 t.Run("FileNameSet", func(t *testing.T) { 759 sres := searchForTest(t, b, query.NewFileNameSet("banana"), chunkOpts) 760 761 matches := sres.Files 762 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 { 763 t.Fatalf("got %v, want 1 match", matches) 764 } 765 766 got := matches[0].ChunkMatches[0] 767 want := zoekt.ChunkMatch{ 768 Content: []byte("banana"), 769 ContentStart: zoekt.Location{ByteOffset: 0, LineNumber: 1, Column: 1}, 770 Ranges: []zoekt.Range{{ 771 Start: zoekt.Location{ByteOffset: 0, LineNumber: 1, Column: 1}, 772 End: zoekt.Location{ByteOffset: 6, LineNumber: 1, Column: 7}, 773 }}, 774 FileName: true, 775 } 776 777 if diff := cmp.Diff(want, got); diff != "" { 778 t.Fatal(diff) 779 } 780 }) 781} 782 783func TestFileCase(t *testing.T) { 784 b := testIndexBuilder(t, nil, 785 Document{Name: "BANANA", Content: []byte("x orange y")}) 786 787 t.Run("LineMatches", func(t *testing.T) { 788 sres := searchForTest(t, b, &query.Substring{ 789 Pattern: "banana", 790 FileName: true, 791 }) 792 793 matches := sres.Files 794 if len(matches) != 1 || matches[0].FileName != "BANANA" { 795 t.Fatalf("got %v, want 1 match 'BANANA'", matches) 796 } 797 }) 798 799 t.Run("ChunkMatches", func(t *testing.T) { 800 sres := searchForTest(t, b, &query.Substring{ 801 Pattern: "banana", 802 FileName: true, 803 }, chunkOpts) 804 805 matches := sres.Files 806 if len(matches) != 1 || matches[0].FileName != "BANANA" { 807 t.Fatalf("got %v, want 1 match 'BANANA'", matches) 808 } 809 }) 810} 811 812func TestFileRegexpSearchBruteForce(t *testing.T) { 813 b := testIndexBuilder(t, nil, 814 Document{Name: "banzana", Content: []byte("x orange y")}, 815 Document{Name: "banana", Content: []byte("x apple y")}, 816 ) 817 t.Run("LineMatches", func(t *testing.T) { 818 sres := searchForTest(t, b, &query.Regexp{ 819 Regexp: mustParseRE("[qn][zx]"), 820 FileName: true, 821 }) 822 823 matches := sres.Files 824 if len(matches) != 1 || matches[0].FileName != "banzana" { 825 t.Fatalf("got %v, want 1 match on 'banzana'", matches) 826 } 827 }) 828 t.Run("LineMatches", func(t *testing.T) { 829 sres := searchForTest(t, b, &query.Regexp{ 830 Regexp: mustParseRE("[qn][zx]"), 831 FileName: true, 832 }, chunkOpts) 833 834 matches := sres.Files 835 if len(matches) != 1 || matches[0].FileName != "banzana" { 836 t.Fatalf("got %v, want 1 match on 'banzana'", matches) 837 } 838 }) 839} 840 841func TestFileRegexpSearchShortString(t *testing.T) { 842 b := testIndexBuilder(t, nil, 843 Document{Name: "banana.py", Content: []byte("x orange y")}) 844 845 t.Run("LineMatches", func(t *testing.T) { 846 sres := searchForTest(t, b, &query.Regexp{ 847 Regexp: mustParseRE("ana.py"), 848 FileName: true, 849 }) 850 851 matches := sres.Files 852 if len(matches) != 1 || matches[0].FileName != "banana.py" { 853 t.Fatalf("got %v, want 1 match on 'banana.py'", matches) 854 } 855 }) 856 857 t.Run("ChunkMatches", func(t *testing.T) { 858 sres := searchForTest(t, b, &query.Regexp{ 859 Regexp: mustParseRE("ana.py"), 860 FileName: true, 861 }, chunkOpts) 862 863 matches := sres.Files 864 if len(matches) != 1 || matches[0].FileName != "banana.py" { 865 t.Fatalf("got %v, want 1 match on 'banana.py'", matches) 866 } 867 }) 868} 869 870func TestFileSubstringSearchBruteForce(t *testing.T) { 871 b := testIndexBuilder(t, nil, 872 Document{Name: "BANZANA", Content: []byte("x orange y")}, 873 Document{Name: "banana", Content: []byte("x apple y")}) 874 875 q := &query.Substring{ 876 Pattern: "z", 877 FileName: true, 878 } 879 880 t.Run("LineMatches", func(t *testing.T) { 881 res := searchForTest(t, b, q) 882 if len(res.Files) != 1 || res.Files[0].FileName != "BANZANA" { 883 t.Fatalf("got %v, want 1 match on 'BANZANA''", res.Files) 884 } 885 }) 886 887 t.Run("ChunkMatches", func(t *testing.T) { 888 res := searchForTest(t, b, q, chunkOpts) 889 if len(res.Files) != 1 || res.Files[0].FileName != "BANZANA" { 890 t.Fatalf("got %v, want 1 match on 'BANZANA''", res.Files) 891 } 892 }) 893} 894 895func TestFileSubstringSearchBruteForceEnd(t *testing.T) { 896 b := testIndexBuilder(t, nil, 897 Document{Name: "BANZANA", Content: []byte("x orange y")}, 898 Document{Name: "bananaq", Content: []byte("x apple y")}) 899 900 q := &query.Substring{ 901 Pattern: "q", 902 FileName: true, 903 } 904 t.Run("LineMatches", func(t *testing.T) { 905 res := searchForTest(t, b, q) 906 if want := "bananaq"; len(res.Files) != 1 || res.Files[0].FileName != want { 907 t.Fatalf("got %v, want 1 match in %q", res.Files, want) 908 } 909 }) 910 911 t.Run("LineMatches", func(t *testing.T) { 912 res := searchForTest(t, b, q, chunkOpts) 913 if want := "bananaq"; len(res.Files) != 1 || res.Files[0].FileName != want { 914 t.Fatalf("got %v, want 1 match in %q", res.Files, want) 915 } 916 }) 917} 918 919func TestSearchMatchAll(t *testing.T) { 920 b := testIndexBuilder(t, nil, 921 Document{Name: "banzana", Content: []byte("x orange y")}, 922 Document{Name: "banana", Content: []byte("x apple y")}) 923 924 t.Run("LineMatches", func(t *testing.T) { 925 sres := searchForTest(t, b, &query.Const{Value: true}) 926 matches := sres.Files 927 if len(matches) != 2 { 928 t.Fatalf("got %v, want 2 matches", matches) 929 } 930 }) 931 932 t.Run("ChunkMatches", func(t *testing.T) { 933 sres := searchForTest(t, b, &query.Const{Value: true}, chunkOpts) 934 matches := sres.Files 935 if len(matches) != 2 { 936 t.Fatalf("got %v, want 2 matches", matches) 937 } 938 }) 939} 940 941func TestSearchNewline(t *testing.T) { 942 b := testIndexBuilder(t, nil, 943 Document{Name: "banzana", Content: []byte("abcd\ndefg")}) 944 945 t.Run("LineMatches", func(t *testing.T) { 946 sres := searchForTest(t, b, &query.Substring{Pattern: "d\nd"}) 947 948 // Just check that we don't crash. 949 950 matches := sres.Files 951 if len(matches) != 1 { 952 t.Fatalf("got %v, want 1 matches", matches) 953 } 954 }) 955 956 t.Run("ChunkMatches", func(t *testing.T) { 957 sres := searchForTest(t, b, &query.Substring{Pattern: "d\nd"}, chunkOpts) 958 959 // Just check that we don't crash. 960 961 matches := sres.Files 962 if len(matches) != 1 { 963 t.Fatalf("got %v, want 1 matches", matches) 964 } 965 }) 966} 967 968func TestSearchMatchAllRegexp(t *testing.T) { 969 b := testIndexBuilder(t, nil, 970 Document{Name: "banzana", Content: []byte("abcd")}, 971 Document{Name: "banana", Content: []byte("pqrs")}) 972 973 t.Run("LineMatches", func(t *testing.T) { 974 sres := searchForTest(t, b, &query.Regexp{Regexp: mustParseRE(".")}) 975 976 matches := sres.Files 977 if len(matches) != 2 || sres.Stats.MatchCount != 2 { 978 t.Fatalf("got %v, want 2 matches", matches) 979 } 980 if len(matches[0].LineMatches[0].Line) != 4 || len(matches[1].LineMatches[0].Line) != 4 { 981 t.Fatalf("want 4 chars in every file, got %#v", matches) 982 } 983 }) 984 985 t.Run("ChunkMatches", func(t *testing.T) { 986 sres := searchForTest(t, b, &query.Regexp{Regexp: mustParseRE(".")}, chunkOpts) 987 988 matches := sres.Files 989 if len(matches) != 2 || sres.Stats.MatchCount != 8 { 990 t.Fatalf("got %v, want 2 matches", matches) 991 } 992 if len(matches[0].ChunkMatches[0].Content) != 4 || len(matches[1].ChunkMatches[0].Content) != 4 { 993 t.Fatalf("want 4 chars in every file, got %#v", matches) 994 } 995 }) 996} 997 998func TestSearchBM25MatchScores(t *testing.T) { 999 ctx := context.Background() 1000 searcher := searcherForTest(t, testIndexBuilder(t, nil, 1001 Document{Name: "f1", Content: []byte("one two three\naaaaaaaaaa\nbbbbbbbb\none two two")}, 1002 Document{Name: "f2", Content: []byte("four five six\naaaaaaaaaa\nbbbbbbbb\nfour five five\nsix six")}, 1003 wordsAsSymbols(Document{Name: "f3", Content: []byte("public static void main")}), 1004 )) 1005 1006 t.Run("LineMatches", func(t *testing.T) { 1007 q := &query.Substring{Pattern: "two"} 1008 sres, err := searcher.Search(ctx, q, &zoekt.SearchOptions{UseBM25Scoring: true}) 1009 if err != nil { 1010 t.Fatal(err) 1011 } 1012 matches := sres.Files 1013 if len(matches) != 1 { 1014 t.Fatalf("want 1 file index, got %d", len(matches)) 1015 } 1016 1017 if len(matches[0].LineMatches) != 2 { 1018 t.Fatalf("want 2 chunk matches, got %d", len(matches[0].ChunkMatches)) 1019 } 1020 1021 if matches[0].LineMatches[0].LineNumber != 4 { 1022 t.Fatalf("want best-scoring line to be line 4, got %d", matches[0].LineMatches[0].LineNumber) 1023 } 1024 }) 1025 1026 t.Run("ChunkMatches", func(t *testing.T) { 1027 q := &query.Substring{Pattern: "five"} 1028 sres, err := searcher.Search(ctx, q, &zoekt.SearchOptions{UseBM25Scoring: true, ChunkMatches: true, NumContextLines: 1}) 1029 if err != nil { 1030 t.Fatal(err) 1031 } 1032 1033 matches := sres.Files 1034 if len(matches) != 1 { 1035 t.Fatalf("want 1 file index, got %d", len(matches)) 1036 } 1037 1038 if len(matches[0].ChunkMatches) != 2 { 1039 t.Fatalf("want 2 chunk matches, got %d", len(matches[0].ChunkMatches)) 1040 } 1041 1042 if matches[0].ChunkMatches[0].BestLineMatch != 4 { 1043 t.Fatalf("want best-scoring line to be line 4, got %d", matches[0].ChunkMatches[0].BestLineMatch) 1044 } 1045 }) 1046 1047 t.Run("ChunkMatches with symbols", func(t *testing.T) { 1048 q := &query.Or{ 1049 Children: []query.Q{ 1050 &query.Symbol{Expr: &query.Substring{Pattern: "main"}}, 1051 &query.Substring{Pattern: "five"}, 1052 }, 1053 } 1054 1055 sres, err := searcher.Search(ctx, q, &zoekt.SearchOptions{UseBM25Scoring: true, ChunkMatches: true, NumContextLines: 1}) 1056 if err != nil { 1057 t.Fatal(err) 1058 } 1059 1060 matches := sres.Files 1061 if len(matches) != 2 { 1062 t.Fatalf("want 2 file index, got %d", len(matches)) 1063 } 1064 1065 foundSymbolInfo := false 1066 for _, m := range matches { 1067 for _, cm := range m.ChunkMatches { 1068 if len(cm.SymbolInfo) > 0 { 1069 foundSymbolInfo = true 1070 } 1071 } 1072 } 1073 1074 if !foundSymbolInfo { 1075 t.Fatalf("want symbol info, got none") 1076 } 1077 }) 1078} 1079 1080func TestFileRestriction(t *testing.T) { 1081 b := testIndexBuilder(t, nil, 1082 Document{Name: "banana1", Content: []byte("x orange y")}, 1083 Document{Name: "banana2", Content: []byte("x apple y")}, 1084 Document{Name: "orange", Content: []byte("x apple z")}) 1085 1086 t.Run("LineMatches", func(t *testing.T) { 1087 sres := searchForTest(t, b, query.NewAnd( 1088 &query.Substring{ 1089 Pattern: "banana", 1090 FileName: true, 1091 }, 1092 &query.Substring{ 1093 Pattern: "apple", 1094 })) 1095 1096 matches := sres.Files 1097 if len(matches) != 1 || len(matches[0].LineMatches) != 1 { 1098 t.Fatalf("got %v, want 1 match", matches) 1099 } 1100 1101 match := matches[0].LineMatches[0] 1102 got := string(match.Line) 1103 want := "x apple y" 1104 if got != want { 1105 t.Errorf("got match %#v, want line %q", match, want) 1106 } 1107 }) 1108 1109 t.Run("ChunkMatches", func(t *testing.T) { 1110 sres := searchForTest(t, b, query.NewAnd( 1111 &query.Substring{ 1112 Pattern: "banana", 1113 FileName: true, 1114 }, 1115 &query.Substring{ 1116 Pattern: "apple", 1117 }), chunkOpts) 1118 1119 matches := sres.Files 1120 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 { 1121 t.Fatalf("got %v, want 1 match", matches) 1122 } 1123 1124 match := matches[0].ChunkMatches[0] 1125 got := string(match.Content) 1126 want := "x apple y" 1127 if got != want { 1128 t.Errorf("got match %#v, want line %q", match, want) 1129 } 1130 }) 1131} 1132 1133func TestFileNameBoundary(t *testing.T) { 1134 b := testIndexBuilder(t, nil, 1135 Document{Name: "banana2", Content: []byte("x apple y")}, 1136 Document{Name: "helpers.go", Content: []byte("x apple y")}, 1137 Document{Name: "foo", Content: []byte("x apple y")}) 1138 1139 t.Run("LineMatches", func(t *testing.T) { 1140 sres := searchForTest(t, b, &query.Substring{ 1141 Pattern: "helpers.go", 1142 FileName: true, 1143 }) 1144 1145 matches := sres.Files 1146 if len(matches) != 1 || len(matches[0].LineMatches) != 1 { 1147 t.Fatalf("got %v, want 1 match", matches) 1148 } 1149 }) 1150 1151 t.Run("ChunkMatches", func(t *testing.T) { 1152 sres := searchForTest(t, b, &query.Substring{ 1153 Pattern: "helpers.go", 1154 FileName: true, 1155 }, chunkOpts) 1156 1157 matches := sres.Files 1158 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 { 1159 t.Fatalf("got %v, want 1 match", matches) 1160 } 1161 }) 1162} 1163 1164func TestDocumentOrder(t *testing.T) { 1165 var docs []Document 1166 for i := 0; i < 3; i++ { 1167 docs = append(docs, Document{Name: fmt.Sprintf("f%d", i), Content: []byte("needle")}) 1168 } 1169 1170 b := testIndexBuilder(t, nil, docs...) 1171 1172 t.Run("LineMatches", func(t *testing.T) { 1173 sres := searchForTest(t, b, query.NewAnd( 1174 &query.Substring{ 1175 Pattern: "needle", 1176 })) 1177 1178 want := []string{"f0", "f1", "f2"} 1179 var got []string 1180 for _, f := range sres.Files { 1181 got = append(got, f.FileName) 1182 } 1183 if !reflect.DeepEqual(got, want) { 1184 t.Fatalf("got %v, want %v", got, want) 1185 } 1186 }) 1187 1188 t.Run("ChunkMatches", func(t *testing.T) { 1189 sres := searchForTest(t, b, 1190 query.NewAnd(&query.Substring{ 1191 Pattern: "needle", 1192 }), 1193 chunkOpts, 1194 ) 1195 1196 want := []string{"f0", "f1", "f2"} 1197 var got []string 1198 for _, f := range sres.Files { 1199 got = append(got, f.FileName) 1200 } 1201 if !reflect.DeepEqual(got, want) { 1202 t.Fatalf("got %v, want %v", got, want) 1203 } 1204 }) 1205} 1206 1207func TestBranchMask(t *testing.T) { 1208 b := testIndexBuilder(t, &zoekt.Repository{ 1209 Branches: []zoekt.RepositoryBranch{ 1210 {"master", "v-master"}, 1211 {"stable", "v-stable"}, 1212 {"bonzai", "v-bonzai"}, 1213 }, 1214 }, Document{Name: "f1", Content: []byte("needle"), Branches: []string{"master"}}, 1215 Document{Name: "f2", Content: []byte("needle"), Branches: []string{"stable", "master"}}, 1216 Document{Name: "f3", Content: []byte("needle"), Branches: []string{"stable", "master"}}, 1217 Document{Name: "f4", Content: []byte("needle"), Branches: []string{"bonzai"}}, 1218 ) 1219 1220 t.Run("LineMatches", func(t *testing.T) { 1221 sres := searchForTest(t, b, query.NewAnd( 1222 &query.Substring{ 1223 Pattern: "needle", 1224 }, 1225 &query.Branch{ 1226 Pattern: "table", 1227 })) 1228 1229 if len(sres.Files) != 2 || sres.Files[0].FileName != "f2" || sres.Files[1].FileName != "f3" { 1230 t.Fatalf("got %v, want 2 result from [f2,f3]", sres.Files) 1231 } 1232 1233 if len(sres.Files[0].Branches) != 1 || sres.Files[0].Branches[0] != "stable" { 1234 t.Fatalf("got %v, want 1 branch 'stable'", sres.Files[0].Branches) 1235 } 1236 }) 1237 1238 t.Run("ChunkMatches", func(t *testing.T) { 1239 sres := searchForTest(t, b, query.NewAnd( 1240 &query.Substring{ 1241 Pattern: "needle", 1242 }, 1243 &query.Branch{ 1244 Pattern: "table", 1245 }), 1246 chunkOpts, 1247 ) 1248 1249 if len(sres.Files) != 2 || sres.Files[0].FileName != "f2" || sres.Files[1].FileName != "f3" { 1250 t.Fatalf("got %v, want 2 result from [f2,f3]", sres.Files) 1251 } 1252 1253 if len(sres.Files[0].Branches) != 1 || sres.Files[0].Branches[0] != "stable" { 1254 t.Fatalf("got %v, want 1 branch 'stable'", sres.Files[0].Branches) 1255 } 1256 }) 1257} 1258 1259func TestBranchLimit(t *testing.T) { 1260 for limit := 64; limit <= 65; limit++ { 1261 r := &zoekt.Repository{} 1262 for i := 0; i < limit; i++ { 1263 s := fmt.Sprintf("b%d", i) 1264 r.Branches = append(r.Branches, zoekt.RepositoryBranch{ 1265 s, "v-" + s, 1266 }) 1267 } 1268 _, err := NewIndexBuilder(r) 1269 if limit == 64 && err != nil { 1270 t.Fatalf("NewIndexBuilder: %v", err) 1271 } else if limit == 65 && err == nil { 1272 t.Fatalf("NewIndexBuilder succeeded") 1273 } 1274 } 1275} 1276 1277func TestBranchReport(t *testing.T) { 1278 branches := []string{"stable", "master"} 1279 b := testIndexBuilder(t, &zoekt.Repository{ 1280 Branches: []zoekt.RepositoryBranch{ 1281 {"stable", "vs"}, 1282 {"master", "vm"}, 1283 }, 1284 }, 1285 Document{Name: "f2", Content: []byte("needle"), Branches: branches}) 1286 1287 t.Run("LineMatches", func(t *testing.T) { 1288 sres := searchForTest(t, b, &query.Substring{ 1289 Pattern: "needle", 1290 }) 1291 if len(sres.Files) != 1 { 1292 t.Fatalf("got %v, want 1 result from f2", sres.Files) 1293 } 1294 1295 f := sres.Files[0] 1296 if !reflect.DeepEqual(f.Branches, branches) { 1297 t.Fatalf("got branches %q, want %q", f.Branches, branches) 1298 } 1299 }) 1300 1301 t.Run("ChunkMatches", func(t *testing.T) { 1302 sres := searchForTest(t, b, &query.Substring{ 1303 Pattern: "needle", 1304 }, chunkOpts) 1305 if len(sres.Files) != 1 { 1306 t.Fatalf("got %v, want 1 result from f2", sres.Files) 1307 } 1308 1309 f := sres.Files[0] 1310 if !reflect.DeepEqual(f.Branches, branches) { 1311 t.Fatalf("got branches %q, want %q", f.Branches, branches) 1312 } 1313 }) 1314} 1315 1316func TestBranchVersions(t *testing.T) { 1317 b := testIndexBuilder(t, &zoekt.Repository{ 1318 Branches: []zoekt.RepositoryBranch{ 1319 {"stable", "v-stable"}, 1320 {"master", "v-master"}, 1321 }, 1322 }, Document{Name: "f2", Content: []byte("needle"), Branches: []string{"master"}}) 1323 1324 t.Run("LineMatches", func(t *testing.T) { 1325 sres := searchForTest(t, b, &query.Substring{ 1326 Pattern: "needle", 1327 }) 1328 if len(sres.Files) != 1 { 1329 t.Fatalf("got %v, want 1 result from f2", sres.Files) 1330 } 1331 1332 f := sres.Files[0] 1333 if f.Version != "v-master" { 1334 t.Fatalf("got file %#v, want version 'v-master'", f) 1335 } 1336 }) 1337 1338 t.Run("ChunkMatches", func(t *testing.T) { 1339 sres := searchForTest(t, b, &query.Substring{ 1340 Pattern: "needle", 1341 }, chunkOpts) 1342 if len(sres.Files) != 1 { 1343 t.Fatalf("got %v, want 1 result from f2", sres.Files) 1344 } 1345 1346 f := sres.Files[0] 1347 if f.Version != "v-master" { 1348 t.Fatalf("got file %#v, want version 'v-master'", f) 1349 } 1350 }) 1351} 1352 1353func mustParseRE(s string) *syntax.Regexp { 1354 r, err := syntax.Parse(s, syntax.Perl) 1355 if err != nil { 1356 panic(err) 1357 } 1358 1359 return r 1360} 1361 1362func TestRegexp(t *testing.T) { 1363 content := []byte("needle the bla") 1364 // ----------------01234567890123 1365 1366 b := testIndexBuilder(t, nil, 1367 Document{ 1368 Name: "f1", 1369 Content: content, 1370 }) 1371 1372 t.Run("LineMatches", func(t *testing.T) { 1373 sres := searchForTest(t, b, 1374 &query.Regexp{ 1375 Regexp: mustParseRE("dle.*bla"), 1376 }) 1377 1378 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 { 1379 t.Fatalf("got %v, want 1 match in 1 file", sres.Files) 1380 } 1381 1382 got := sres.Files[0].LineMatches[0] 1383 want := zoekt.LineMatch{ 1384 LineFragments: []zoekt.LineFragmentMatch{{ 1385 LineOffset: 3, 1386 Offset: 3, 1387 MatchLength: 11, 1388 }}, 1389 Line: content, 1390 FileName: false, 1391 LineNumber: 1, 1392 LineStart: 0, 1393 LineEnd: 14, 1394 } 1395 1396 if !reflect.DeepEqual(got, want) { 1397 t.Errorf("got %#v, want %#v", got, want) 1398 } 1399 }) 1400 1401 t.Run("ChunkMatches", func(t *testing.T) { 1402 sres := searchForTest(t, b, 1403 &query.Regexp{ 1404 Regexp: mustParseRE("dle.*bla"), 1405 }, chunkOpts) 1406 1407 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 { 1408 t.Fatalf("got %v, want 1 match in 1 file", sres.Files) 1409 } 1410 1411 got := sres.Files[0].ChunkMatches[0] 1412 want := zoekt.ChunkMatch{ 1413 Content: content, 1414 ContentStart: zoekt.Location{ByteOffset: 0, LineNumber: 1, Column: 1}, 1415 Ranges: []zoekt.Range{{ 1416 Start: zoekt.Location{ByteOffset: 3, LineNumber: 1, Column: 4}, 1417 End: zoekt.Location{ByteOffset: 14, LineNumber: 1, Column: 15}, 1418 }}, 1419 } 1420 1421 if diff := cmp.Diff(want, got); diff != "" { 1422 t.Fatal(diff) 1423 } 1424 }) 1425} 1426 1427func TestRegexpFile(t *testing.T) { 1428 content := []byte("needle the bla") 1429 1430 name := "let's play: find the mussel" 1431 b := testIndexBuilder(t, nil, 1432 Document{Name: name, Content: content}, 1433 Document{Name: "play.txt", Content: content}) 1434 1435 t.Run("LineMatches", func(t *testing.T) { 1436 sres := searchForTest(t, b, 1437 &query.Regexp{ 1438 Regexp: mustParseRE("play.*mussel"), 1439 FileName: true, 1440 }) 1441 1442 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 { 1443 t.Fatalf("got %v, want 1 match in 1 file", sres.Files) 1444 } 1445 1446 if sres.Files[0].FileName != name { 1447 t.Errorf("got match %#v, want name %q", sres.Files[0], name) 1448 } 1449 }) 1450 1451 t.Run("ChunkMatches", func(t *testing.T) { 1452 sres := searchForTest(t, b, 1453 &query.Regexp{ 1454 Regexp: mustParseRE("play.*mussel"), 1455 FileName: true, 1456 }, chunkOpts) 1457 1458 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 { 1459 t.Fatalf("got %v, want 1 match in 1 file", sres.Files) 1460 } 1461 1462 if sres.Files[0].FileName != name { 1463 t.Errorf("got match %#v, want name %q", sres.Files[0], name) 1464 } 1465 }) 1466} 1467 1468func TestRegexpOrder(t *testing.T) { 1469 content := []byte("bla the needle") 1470 // ----------------01234567890123 1471 1472 b := testIndexBuilder(t, nil, 1473 Document{Name: "f1", Content: content}) 1474 1475 t.Run("LineMatches", func(t *testing.T) { 1476 sres := searchForTest(t, b, 1477 &query.Regexp{ 1478 Regexp: mustParseRE("dle.*bla"), 1479 }) 1480 1481 if len(sres.Files) != 0 { 1482 t.Fatalf("got %v, want 0 matches", sres.Files) 1483 } 1484 }) 1485 1486 t.Run("ChunkMatches", func(t *testing.T) { 1487 sres := searchForTest(t, b, 1488 &query.Regexp{ 1489 Regexp: mustParseRE("dle.*bla"), 1490 }) 1491 1492 if len(sres.Files) != 0 { 1493 t.Fatalf("got %v, want 0 matches", sres.Files) 1494 } 1495 }) 1496} 1497 1498func TestRepoName(t *testing.T) { 1499 content := []byte("bla the needle") 1500 // ----------------01234567890123 1501 1502 b := testIndexBuilder(t, &zoekt.Repository{Name: "bla"}, 1503 Document{Name: "f1", Content: content}) 1504 1505 t.Run("LineMatches", func(t *testing.T) { 1506 sres := searchForTest(t, b, 1507 query.NewAnd( 1508 &query.Substring{Pattern: "needle"}, 1509 &query.Repo{Regexp: regexp.MustCompile("foo")}, 1510 )) 1511 1512 if len(sres.Files) != 0 { 1513 t.Fatalf("got %v, want 0 matches", sres.Files) 1514 } 1515 1516 if sres.Stats.FilesConsidered > 0 { 1517 t.Fatalf("got FilesConsidered %d, should have short circuited", sres.Stats.FilesConsidered) 1518 } 1519 1520 sres = searchForTest(t, b, 1521 query.NewAnd( 1522 &query.Substring{Pattern: "needle"}, 1523 &query.Repo{Regexp: regexp.MustCompile("bla")}, 1524 )) 1525 if len(sres.Files) != 1 { 1526 t.Fatalf("got %v, want 1 match", sres.Files) 1527 } 1528 }) 1529 1530 t.Run("ChunkMatches", func(t *testing.T) { 1531 sres := searchForTest(t, b, 1532 query.NewAnd( 1533 &query.Substring{Pattern: "needle"}, 1534 &query.Repo{Regexp: regexp.MustCompile("foo")}, 1535 ), 1536 chunkOpts, 1537 ) 1538 1539 if len(sres.Files) != 0 { 1540 t.Fatalf("got %v, want 0 matches", sres.Files) 1541 } 1542 1543 if sres.Stats.FilesConsidered > 0 { 1544 t.Fatalf("got FilesConsidered %d, should have short circuited", sres.Stats.FilesConsidered) 1545 } 1546 1547 sres = searchForTest(t, b, 1548 query.NewAnd( 1549 &query.Substring{Pattern: "needle"}, 1550 &query.Repo{Regexp: regexp.MustCompile("bla")}, 1551 )) 1552 if len(sres.Files) != 1 { 1553 t.Fatalf("got %v, want 1 match", sres.Files) 1554 } 1555 }) 1556} 1557 1558func TestMergeMatches(t *testing.T) { 1559 t.Run("LineMatches, adjacent matches", func(t *testing.T) { 1560 b := testIndexBuilder(t, nil, 1561 Document{Name: "f1", Content: []byte("blablabla")}) 1562 sres := searchForTest(t, b, 1563 &query.Substring{Pattern: "bla"}) 1564 1565 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 { 1566 t.Fatalf("got %v, want 1 match", sres.Files) 1567 } 1568 1569 if len(sres.Files[0].LineMatches[0].LineFragments) != 3 { 1570 t.Fatalf("got %v, want 3 fragments", sres.Files[0].LineMatches[0].LineFragments) 1571 } 1572 }) 1573 1574 t.Run("LineMatches, overlapping matches", func(t *testing.T) { 1575 b := testIndexBuilder(t, nil, 1576 Document{Name: "f1", Content: []byte("hellogoodbye")}) 1577 sres := searchForTest(t, b, 1578 &query.And{Children: []query.Q{ 1579 &query.Substring{Pattern: "hello"}, 1580 &query.Substring{Pattern: "logood"}, 1581 }}) 1582 1583 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 { 1584 t.Fatalf("got %v, want 1 match", sres.Files) 1585 } 1586 1587 lineFragments := sres.Files[0].LineMatches[0].LineFragments 1588 if len(lineFragments) != 1 || lineFragments[0].MatchLength != len("hello") { 1589 t.Fatalf("got %v, want single line fragment 'hello'", lineFragments) 1590 } 1591 }) 1592 1593 t.Run("ChunkMatches, no overlap", func(t *testing.T) { 1594 b := testIndexBuilder(t, nil, 1595 Document{Name: "f1", Content: []byte("blablabla")}) 1596 1597 sres := searchForTest(t, b, 1598 &query.Substring{Pattern: "bla"}, 1599 chunkOpts, 1600 ) 1601 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 { 1602 t.Fatalf("got %v, want 1 match", sres.Files) 1603 } 1604 1605 if len(sres.Files[0].ChunkMatches[0].Ranges) != 3 { 1606 t.Fatalf("got %v, want 3 ranges", sres.Files[0].ChunkMatches[0].Ranges) 1607 } 1608 }) 1609 1610 t.Run("ChunkMatches, overlapping matches", func(t *testing.T) { 1611 b := testIndexBuilder(t, nil, 1612 Document{Name: "f1", Content: []byte("hellogoodbye")}) 1613 sres := searchForTest(t, b, 1614 &query.And{Children: []query.Q{ 1615 &query.Substring{Pattern: "hello"}, 1616 &query.Substring{Pattern: "logood"}, 1617 }}, chunkOpts) 1618 1619 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 { 1620 t.Fatalf("got %v, want 1 chunk match", sres.Files) 1621 } 1622 1623 ranges := sres.Files[0].ChunkMatches[0].Ranges 1624 if len(ranges) != 1 || ranges[0].Start.ByteOffset != 0 || ranges[0].End.ByteOffset != 5 { 1625 t.Fatalf("got %v, want single chunk range 'hello'", ranges) 1626 } 1627 }) 1628} 1629 1630func TestRepoURL(t *testing.T) { 1631 content := []byte("blablabla") 1632 b := testIndexBuilder(t, &zoekt.Repository{ 1633 Name: "name", 1634 URL: "URL", 1635 CommitURLTemplate: "commit", 1636 FileURLTemplate: "file-url", 1637 LineFragmentTemplate: "fragment", 1638 }, Document{Name: "f1", Content: content}) 1639 1640 sres := searchForTest(t, b, &query.Substring{Pattern: "bla"}) 1641 1642 if sres.RepoURLs["name"] != "file-url" { 1643 t.Errorf("got RepoURLs %v, want {name: URL}", sres.RepoURLs) 1644 } 1645 if sres.LineFragments["name"] != "fragment" { 1646 t.Errorf("got URLs %v, want {name: URL}", sres.LineFragments) 1647 } 1648} 1649 1650func TestRegexpCaseSensitive(t *testing.T) { 1651 content := []byte("bla\nfunc unmarshalGitiles\n") 1652 b := testIndexBuilder(t, nil, Document{ 1653 Name: "f1", 1654 Content: content, 1655 }) 1656 1657 t.Run("LineMatches", func(t *testing.T) { 1658 res := searchForTest(t, b, 1659 &query.Regexp{ 1660 Regexp: mustParseRE("func.*Gitiles"), 1661 CaseSensitive: true, 1662 }) 1663 1664 if len(res.Files) != 1 { 1665 t.Fatalf("got %v, want one index", res.Files) 1666 } 1667 }) 1668 1669 t.Run("ChunkMatches", func(t *testing.T) { 1670 res := searchForTest(t, b, 1671 &query.Regexp{ 1672 Regexp: mustParseRE("func.*Gitiles"), 1673 CaseSensitive: true, 1674 }, 1675 chunkOpts, 1676 ) 1677 1678 if len(res.Files) != 1 { 1679 t.Fatalf("got %v, want one index", res.Files) 1680 } 1681 }) 1682} 1683 1684func TestRegexpCaseFolding(t *testing.T) { 1685 content := []byte("bla\nfunc unmarshalGitiles\n") 1686 1687 b := testIndexBuilder(t, nil, 1688 Document{Name: "f1", Content: content}) 1689 res := searchForTest(t, b, 1690 &query.Regexp{ 1691 Regexp: mustParseRE("func.*GITILES"), 1692 CaseSensitive: false, 1693 }) 1694 1695 if len(res.Files) != 1 { 1696 t.Fatalf("got %v, want one index", res.Files) 1697 } 1698} 1699 1700func TestCaseRegexp(t *testing.T) { 1701 content := []byte("BLABLABLA") 1702 b := testIndexBuilder(t, nil, 1703 Document{Name: "f1", Content: content}) 1704 1705 t.Run("LineMatches", func(t *testing.T) { 1706 res := searchForTest(t, b, 1707 &query.Regexp{ 1708 Regexp: mustParseRE("[xb][xl][xa]"), 1709 CaseSensitive: true, 1710 }) 1711 1712 if len(res.Files) > 0 { 1713 t.Fatalf("got %v, want no matches", res.Files) 1714 } 1715 }) 1716 1717 t.Run("ChunkMatches", func(t *testing.T) { 1718 res := searchForTest(t, b, 1719 &query.Regexp{ 1720 Regexp: mustParseRE("[xb][xl][xa]"), 1721 CaseSensitive: true, 1722 }, 1723 chunkOpts, 1724 ) 1725 1726 if len(res.Files) > 0 { 1727 t.Fatalf("got %v, want no matches", res.Files) 1728 } 1729 }) 1730} 1731 1732func TestNegativeRegexp(t *testing.T) { 1733 content := []byte("BLABLABLA needle bla") 1734 b := testIndexBuilder(t, nil, 1735 Document{Name: "f1", Content: content}) 1736 1737 t.Run("LineMatches", func(t *testing.T) { 1738 res := searchForTest(t, b, 1739 query.NewAnd( 1740 &query.Substring{ 1741 Pattern: "needle", 1742 }, 1743 &query.Not{ 1744 Child: &query.Regexp{ 1745 Regexp: mustParseRE(".cs"), 1746 }, 1747 })) 1748 1749 if len(res.Files) != 1 { 1750 t.Fatalf("got %v, want 1 match", res.Files) 1751 } 1752 }) 1753 1754 t.Run("ChunkMatches", func(t *testing.T) { 1755 res := searchForTest(t, b, 1756 query.NewAnd( 1757 &query.Substring{ 1758 Pattern: "needle", 1759 }, 1760 &query.Not{ 1761 Child: &query.Regexp{ 1762 Regexp: mustParseRE(".cs"), 1763 }, 1764 }, 1765 ), 1766 chunkOpts) 1767 1768 if len(res.Files) != 1 { 1769 t.Fatalf("got %v, want 1 match", res.Files) 1770 } 1771 }) 1772} 1773 1774func TestSymbolRank(t *testing.T) { 1775 t.Skip() 1776 1777 content := []byte("func bla() blubxxxxx") 1778 // ----------------01234567890123456789 1779 b := testIndexBuilder(t, nil, 1780 Document{ 1781 Name: "f1", 1782 Content: content, 1783 }, Document{ 1784 Name: "f2", 1785 Content: content, 1786 Symbols: []DocumentSection{{5, 8}}, 1787 }, Document{ 1788 Name: "f3", 1789 Content: content, 1790 }) 1791 1792 t.Run("LineMatches", func(t *testing.T) { 1793 res := searchForTest(t, b, 1794 &query.Substring{ 1795 CaseSensitive: false, 1796 Pattern: "bla", 1797 }) 1798 1799 if len(res.Files) != 3 { 1800 t.Fatalf("got %d files, want 3 files. Full data: %v", len(res.Files), res.Files) 1801 } 1802 if res.Files[0].FileName != "f2" { 1803 t.Errorf("got %#v, want 'f2' as top index", res.Files[0]) 1804 } 1805 }) 1806 1807 t.Run("ChunkMatches", func(t *testing.T) { 1808 res := searchForTest(t, b, 1809 &query.Substring{ 1810 CaseSensitive: false, 1811 Pattern: "bla", 1812 }, chunkOpts) 1813 1814 if len(res.Files) != 3 { 1815 t.Fatalf("got %d files, want 3 files. Full data: %v", len(res.Files), res.Files) 1816 } 1817 if res.Files[0].FileName != "f2" { 1818 t.Errorf("got %#v, want 'f2' as top index", res.Files[0]) 1819 } 1820 }) 1821} 1822 1823func TestSymbolRankRegexpUTF8(t *testing.T) { 1824 t.Skip() 1825 1826 prefix := strings.Repeat(string([]rune{kelvinCodePoint}), 100) + "\n" 1827 content := []byte(prefix + 1828 "func bla() blub") 1829 // ------012345678901234 1830 b := testIndexBuilder(t, nil, 1831 Document{ 1832 Name: "f1", 1833 Content: content, 1834 }, Document{ 1835 Name: "f2", 1836 Content: content, 1837 Symbols: []DocumentSection{{uint32(len(prefix) + 5), uint32(len(prefix) + 8)}}, 1838 }, Document{ 1839 Name: "f3", 1840 Content: content, 1841 }) 1842 1843 t.Run("LineMatches", func(t *testing.T) { 1844 res := searchForTest(t, b, 1845 &query.Regexp{ 1846 Regexp: mustParseRE("b.a"), 1847 }) 1848 1849 if len(res.Files) != 3 { 1850 t.Fatalf("got %#v, want 3 files", res.Files) 1851 } 1852 if res.Files[0].FileName != "f2" { 1853 t.Errorf("got %#v, want 'f2' as top index", res.Files[0]) 1854 } 1855 }) 1856 1857 t.Run("ChunjkMatches", func(t *testing.T) { 1858 res := searchForTest(t, b, 1859 &query.Regexp{ 1860 Regexp: mustParseRE("b.a"), 1861 }, chunkOpts) 1862 1863 if len(res.Files) != 3 { 1864 t.Fatalf("got %#v, want 3 files", res.Files) 1865 } 1866 if res.Files[0].FileName != "f2" { 1867 t.Errorf("got %#v, want 'f2' as top index", res.Files[0]) 1868 } 1869 }) 1870} 1871 1872func TestPartialSymbolRank(t *testing.T) { 1873 t.Skip() 1874 1875 content := []byte("func bla() blub") 1876 // ----------------012345678901234 1877 1878 b := testIndexBuilder(t, nil, 1879 Document{ 1880 Name: "f1", 1881 Content: content, 1882 Symbols: []DocumentSection{{4, 9}}, 1883 }, Document{ 1884 Name: "f2", 1885 Content: content, 1886 Symbols: []DocumentSection{{4, 8}}, 1887 }, Document{ 1888 Name: "f3", 1889 Content: content, 1890 Symbols: []DocumentSection{{4, 9}}, 1891 }) 1892 1893 t.Run("LineMatches", func(t *testing.T) { 1894 res := searchForTest(t, b, 1895 &query.Substring{ 1896 Pattern: "bla", 1897 }) 1898 1899 if len(res.Files) != 3 { 1900 t.Fatalf("got %#v, want 3 files", res.Files) 1901 } 1902 if res.Files[0].FileName != "f2" { 1903 t.Errorf("got %#v, want 'f2' as top index", res.Files[0]) 1904 } 1905 }) 1906 1907 t.Run("ChunkMatches", func(t *testing.T) { 1908 res := searchForTest(t, b, 1909 &query.Substring{ 1910 Pattern: "bla", 1911 }, chunkOpts) 1912 1913 if len(res.Files) != 3 { 1914 t.Fatalf("got %#v, want 3 files", res.Files) 1915 } 1916 if res.Files[0].FileName != "f2" { 1917 t.Errorf("got %#v, want 'f2' as top index", res.Files[0]) 1918 } 1919 }) 1920} 1921 1922func TestNegativeRepo(t *testing.T) { 1923 content := []byte("bla the needle") 1924 // ----------------01234567890123 1925 b := testIndexBuilder(t, &zoekt.Repository{ 1926 Name: "bla", 1927 }, Document{Name: "f1", Content: content}) 1928 1929 t.Run("LineMatches", func(t *testing.T) { 1930 sres := searchForTest(t, b, 1931 query.NewAnd( 1932 &query.Substring{Pattern: "needle"}, 1933 &query.Not{Child: &query.Repo{Regexp: regexp.MustCompile("bla")}}, 1934 )) 1935 1936 if len(sres.Files) != 0 { 1937 t.Fatalf("got %v, want 0 matches", sres.Files) 1938 } 1939 }) 1940 1941 t.Run("ChunkMatches", func(t *testing.T) { 1942 sres := searchForTest(t, b, 1943 query.NewAnd( 1944 &query.Substring{Pattern: "needle"}, 1945 &query.Not{Child: &query.Repo{Regexp: regexp.MustCompile("bla")}}, 1946 ), chunkOpts) 1947 1948 if len(sres.Files) != 0 { 1949 t.Fatalf("got %v, want 0 matches", sres.Files) 1950 } 1951 }) 1952} 1953 1954func TestListRepos(t *testing.T) { 1955 content := []byte("bla the needle\n") 1956 // ----------------012345678901234- 1957 1958 t.Run("default and minimal fallback", func(t *testing.T) { 1959 repo := &zoekt.Repository{ 1960 Name: "reponame", 1961 Branches: []zoekt.RepositoryBranch{{Name: "main"}, {Name: "dev"}}, 1962 } 1963 b := testIndexBuilder(t, repo, 1964 Document{Name: "f1", Content: content, Branches: []string{"main", "dev"}}, 1965 Document{Name: "f2", Content: content, Branches: []string{"main"}}, 1966 Document{Name: "f2", Content: content, Branches: []string{"dev"}}, 1967 Document{Name: "f3", Content: content, Branches: []string{"dev"}}) 1968 1969 searcher := searcherForTest(t, b) 1970 1971 for _, opts := range []*zoekt.ListOptions{ 1972 nil, 1973 {}, 1974 {Field: zoekt.RepoListFieldRepos}, 1975 {Field: zoekt.RepoListFieldReposMap}, 1976 } { 1977 t.Run(fmt.Sprint(opts), func(t *testing.T) { 1978 q := &query.Repo{Regexp: regexp.MustCompile("epo")} 1979 1980 res, err := searcher.List(context.Background(), q, opts) 1981 if err != nil { 1982 t.Fatalf("List(%v): %v", q, err) 1983 } 1984 1985 want := &zoekt.RepoList{ 1986 Repos: []*zoekt.RepoListEntry{{ 1987 Repository: *repo, 1988 Stats: zoekt.RepoStats{ 1989 Documents: 4, 1990 ContentBytes: 68, // (15 bytes of content and 2 bytes of filename) x 4 1991 Shards: 1, 1992 1993 NewLinesCount: 4, 1994 DefaultBranchNewLinesCount: 2, 1995 OtherBranchesNewLinesCount: 3, 1996 }, 1997 }}, 1998 Stats: zoekt.RepoStats{ 1999 Repos: 1, 2000 Documents: 4, 2001 ContentBytes: 68, 2002 Shards: 1, 2003 2004 NewLinesCount: 4, 2005 DefaultBranchNewLinesCount: 2, 2006 OtherBranchesNewLinesCount: 3, 2007 }, 2008 } 2009 ignored := []cmp.Option{ 2010 cmpopts.EquateEmpty(), 2011 cmpopts.IgnoreFields(zoekt.RepoListEntry{}, "IndexMetadata"), 2012 cmpopts.IgnoreFields(zoekt.RepoStats{}, "IndexBytes"), 2013 cmpopts.IgnoreFields(zoekt.Repository{}, "SubRepoMap"), 2014 cmpopts.IgnoreFields(zoekt.Repository{}, "priority"), 2015 } 2016 if diff := cmp.Diff(want, res, ignored...); diff != "" { 2017 t.Fatalf("mismatch (-want +got):\n%s", diff) 2018 } 2019 2020 q = &query.Repo{Regexp: regexp.MustCompile("bla")} 2021 res, err = searcher.List(context.Background(), q, nil) 2022 if err != nil { 2023 t.Fatalf("List(%v): %v", q, err) 2024 } 2025 if len(res.Repos) != 0 || len(res.ReposMap) != 0 { 2026 t.Fatalf("got %v, want 0 matches", res) 2027 } 2028 }) 2029 } 2030 }) 2031 2032 t.Run("minimal", func(t *testing.T) { 2033 repo := &zoekt.Repository{ 2034 ID: 1234, 2035 Name: "reponame", 2036 Branches: []zoekt.RepositoryBranch{{Name: "main"}, {Name: "dev"}}, 2037 RawConfig: map[string]string{"repoid": "1234"}, 2038 } 2039 b := testIndexBuilder(t, repo, 2040 Document{Name: "f1", Content: content, Branches: []string{"main", "dev"}}, 2041 Document{Name: "f2", Content: content, Branches: []string{"main"}}, 2042 Document{Name: "f2", Content: content, Branches: []string{"dev"}}, 2043 Document{Name: "f3", Content: content, Branches: []string{"dev"}}) 2044 2045 searcher := searcherForTest(t, b) 2046 2047 q := &query.Repo{Regexp: regexp.MustCompile("epo")} 2048 res, err := searcher.List(context.Background(), q, &zoekt.ListOptions{Field: zoekt.RepoListFieldReposMap}) 2049 if err != nil { 2050 t.Fatalf("List(%v): %v", q, err) 2051 } 2052 2053 want := &zoekt.RepoList{ 2054 ReposMap: zoekt.ReposMap{ 2055 repo.ID: { 2056 HasSymbols: repo.HasSymbols, 2057 Branches: repo.Branches, 2058 }, 2059 }, 2060 Stats: zoekt.RepoStats{ 2061 Repos: 1, 2062 Shards: 1, 2063 Documents: 4, 2064 IndexBytes: 412, 2065 ContentBytes: 68, 2066 NewLinesCount: 4, 2067 DefaultBranchNewLinesCount: 2, 2068 OtherBranchesNewLinesCount: 3, 2069 }, 2070 } 2071 2072 ignored := []cmp.Option{ 2073 cmpopts.IgnoreFields(zoekt.MinimalRepoListEntry{}, "IndexTimeUnix"), 2074 } 2075 if diff := cmp.Diff(want, res, ignored...); diff != "" { 2076 t.Fatalf("mismatch (-want +got):\n%s", diff) 2077 } 2078 2079 q = &query.Repo{Regexp: regexp.MustCompile("bla")} 2080 res, err = searcher.List(context.Background(), q, &zoekt.ListOptions{Field: zoekt.RepoListFieldReposMap}) 2081 if err != nil { 2082 t.Fatalf("List(%v): %v", q, err) 2083 } 2084 if len(res.Repos) != 0 || len(res.ReposMap) != 0 { 2085 t.Fatalf("got %v, want 0 matches", res) 2086 } 2087 }) 2088} 2089 2090func TestListReposByContent(t *testing.T) { 2091 content := []byte("bla the needle") 2092 2093 b := testIndexBuilder(t, &zoekt.Repository{ 2094 Name: "reponame", 2095 }, 2096 Document{Name: "f1", Content: content}, 2097 Document{Name: "f2", Content: content}) 2098 2099 searcher := searcherForTest(t, b) 2100 q := &query.Substring{Pattern: "needle"} 2101 res, err := searcher.List(context.Background(), q, nil) 2102 if err != nil { 2103 t.Fatalf("List(%v): %v", q, err) 2104 } 2105 if len(res.Repos) != 1 || res.Repos[0].Repository.Name != "reponame" { 2106 t.Fatalf("got %v, want 1 matches", res) 2107 } 2108 if got := res.Repos[0].Stats.Shards; got != 1 { 2109 t.Fatalf("got %d, want 1 shard", got) 2110 } 2111 q = &query.Substring{Pattern: "foo"} 2112 res, err = searcher.List(context.Background(), q, nil) 2113 if err != nil { 2114 t.Fatalf("List(%v): %v", q, err) 2115 } 2116 if len(res.Repos) != 0 { 2117 t.Fatalf("got %v, want 0 matches", res) 2118 } 2119} 2120 2121func TestMetadata(t *testing.T) { 2122 content := []byte("bla the needle") 2123 2124 b := testIndexBuilder(t, &zoekt.Repository{ 2125 Name: "reponame", 2126 }, Document{Name: "f1", Content: content}, 2127 Document{Name: "f2", Content: content}) 2128 2129 var buf bytes.Buffer 2130 if err := b.Write(&buf); err != nil { 2131 t.Fatal(err) 2132 } 2133 f := &memSeeker{buf.Bytes()} 2134 2135 rd, _, err := ReadMetadata(f) 2136 if err != nil { 2137 t.Fatalf("ReadMetadata: %v", err) 2138 } 2139 2140 if got, want := rd[0].Name, "reponame"; got != want { 2141 t.Fatalf("got %q want %q", got, want) 2142 } 2143} 2144 2145func TestOr(t *testing.T) { 2146 b := testIndexBuilder(t, nil, 2147 Document{Name: "f1", Content: []byte("needle")}, 2148 Document{Name: "f2", Content: []byte("banana")}) 2149 t.Run("LineMatches", func(t *testing.T) { 2150 sres := searchForTest(t, b, query.NewOr( 2151 &query.Substring{Pattern: "needle"}, 2152 &query.Substring{Pattern: "banana"})) 2153 2154 if len(sres.Files) != 2 { 2155 t.Fatalf("got %v, want 2 files", sres.Files) 2156 } 2157 }) 2158 2159 t.Run("ChunkMatches", func(t *testing.T) { 2160 sres := searchForTest(t, b, query.NewOr( 2161 &query.Substring{Pattern: "needle"}, 2162 &query.Substring{Pattern: "banana"})) 2163 2164 if len(sres.Files) != 2 { 2165 t.Fatalf("got %v, want 2 files", sres.Files) 2166 } 2167 }) 2168} 2169 2170func TestFrequency(t *testing.T) { 2171 content := []byte("sla _Py_HashDouble(double v sla las las shd dot dot") 2172 2173 b := testIndexBuilder(t, nil, 2174 Document{ 2175 Name: "f1", 2176 Content: content, 2177 }) 2178 2179 t.Run("LineMatches", func(t *testing.T) { 2180 sres := searchForTest(t, b, &query.Substring{Pattern: "slashdot"}) 2181 if len(sres.Files) != 0 { 2182 t.Errorf("got %v, wanted 0 matches", sres.Files) 2183 } 2184 }) 2185 2186 t.Run("ChunkMatches", func(t *testing.T) { 2187 sres := searchForTest(t, b, &query.Substring{Pattern: "slashdot"}, chunkOpts) 2188 if len(sres.Files) != 0 { 2189 t.Errorf("got %v, wanted 0 matches", sres.Files) 2190 } 2191 }) 2192} 2193 2194func TestMatchNewline(t *testing.T) { 2195 re, err := syntax.Parse("[^a]a", syntax.ClassNL) 2196 if err != nil { 2197 t.Fatalf("syntax.Parse: %v", err) 2198 } 2199 2200 content := []byte("pqr\nalex") 2201 2202 b := testIndexBuilder(t, nil, 2203 Document{ 2204 Name: "f1", 2205 Content: content, 2206 }) 2207 2208 t.Run("LineMatches", func(t *testing.T) { 2209 sres := searchForTest(t, b, &query.Regexp{Regexp: re, CaseSensitive: true}) 2210 if len(sres.Files) != 1 { 2211 t.Errorf("got %v, wanted 1 matches", sres.Files) 2212 } else if l := sres.Files[0].LineMatches[0].Line; !bytes.Equal(l, content[len("pqr\n"):]) { 2213 t.Errorf("got match line %q, want %q", l, content) 2214 } 2215 }) 2216 2217 t.Run("ChunkMatches", func(t *testing.T) { 2218 sres := searchForTest(t, b, &query.Regexp{Regexp: re, CaseSensitive: true}, chunkOpts) 2219 if len(sres.Files) != 1 { 2220 t.Errorf("got %v, wanted 1 matches", sres.Files) 2221 } else if c := sres.Files[0].ChunkMatches[0].Content; !bytes.Equal(c, content) { 2222 t.Errorf("got match line %q, want %q", c, content) 2223 } 2224 }) 2225} 2226 2227func TestSubRepo(t *testing.T) { 2228 subRepos := map[string]*zoekt.Repository{ 2229 "sub": { 2230 Name: "sub-name", 2231 LineFragmentTemplate: "sub-line", 2232 }, 2233 } 2234 2235 content := []byte("pqr\nalex") 2236 2237 b := testIndexBuilder(t, &zoekt.Repository{ 2238 SubRepoMap: subRepos, 2239 }, Document{ 2240 Name: "sub/f1", 2241 Content: content, 2242 SubRepositoryPath: "sub", 2243 }) 2244 2245 sres := searchForTest(t, b, &query.Substring{Pattern: "alex"}) 2246 if len(sres.Files) != 1 { 2247 t.Fatalf("got %v, wanted 1 matches", sres.Files) 2248 } 2249 2250 f := sres.Files[0] 2251 if f.SubRepositoryPath != "sub" || f.SubRepositoryName != "sub-name" { 2252 t.Errorf("got %#v, want SubRepository{Path,Name} = {'sub', 'sub-name'}", f) 2253 } 2254 2255 if sres.LineFragments["sub-name"] != "sub-line" { 2256 t.Errorf("got LineFragmentTemplate %v, want {'sub':'sub-line'}", sres.LineFragments) 2257 } 2258} 2259 2260func TestSearchEither(t *testing.T) { 2261 b := testIndexBuilder(t, nil, 2262 Document{Name: "f1", Content: []byte("bla needle bla")}, 2263 Document{Name: "needle-file-branch", Content: []byte("bla content")}) 2264 2265 t.Run("LineMatches", func(t *testing.T) { 2266 sres := searchForTest(t, b, &query.Substring{Pattern: "needle"}) 2267 if len(sres.Files) != 2 { 2268 t.Fatalf("got %v, wanted 2 matches", sres.Files) 2269 } 2270 2271 sres = searchForTest(t, b, &query.Substring{Pattern: "needle", Content: true}) 2272 if len(sres.Files) != 1 { 2273 t.Fatalf("got %v, wanted 1 index", sres.Files) 2274 } 2275 2276 if got, want := sres.Files[0].FileName, "f1"; got != want { 2277 t.Errorf("got %q, want %q", got, want) 2278 } 2279 }) 2280 2281 t.Run("ChunkMatches", func(t *testing.T) { 2282 sres := searchForTest(t, b, &query.Substring{Pattern: "needle"}, chunkOpts) 2283 if len(sres.Files) != 2 { 2284 t.Fatalf("got %v, wanted 2 matches", sres.Files) 2285 } 2286 2287 sres = searchForTest(t, b, &query.Substring{Pattern: "needle", Content: true}, chunkOpts) 2288 if len(sres.Files) != 1 { 2289 t.Fatalf("got %v, wanted 1 index", sres.Files) 2290 } 2291 2292 if got, want := sres.Files[0].FileName, "f1"; got != want { 2293 t.Errorf("got %q, want %q", got, want) 2294 } 2295 }) 2296} 2297 2298func TestUnicodeExactMatch(t *testing.T) { 2299 needle := "néédlÉ" 2300 content := []byte("blá blá " + needle + " blâ") 2301 2302 b := testIndexBuilder(t, nil, 2303 Document{Name: "f1", Content: content}) 2304 2305 t.Run("LineMatches", func(t *testing.T) { 2306 if res := searchForTest(t, b, &query.Substring{Pattern: needle, CaseSensitive: true}); len(res.Files) != 1 { 2307 t.Fatalf("case sensitive: got %v, wanted 1 index", res.Files) 2308 } 2309 }) 2310 2311 t.Run("ChunkMatches", func(t *testing.T) { 2312 res := searchForTest(t, b, &query.Substring{Pattern: needle, CaseSensitive: true}, chunkOpts) 2313 if len(res.Files) != 1 { 2314 t.Fatalf("case sensitive: got %v, wanted 1 index", res.Files) 2315 } 2316 }) 2317} 2318 2319func TestUnicodeCoverContent(t *testing.T) { 2320 needle := "néédlÉ" 2321 content := []byte("blá blá " + needle + " blâ") 2322 2323 b := testIndexBuilder(t, nil, 2324 Document{Name: "f1", Content: content}) 2325 2326 t.Run("LineMatches", func(t *testing.T) { 2327 if res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ", CaseSensitive: true}); len(res.Files) != 0 { 2328 t.Fatalf("case sensitive: got %v, wanted 0 index", res.Files) 2329 } 2330 2331 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ"}) 2332 if len(res.Files) != 1 { 2333 t.Fatalf("case insensitive: got %v, wanted 1 index", res.Files) 2334 } 2335 2336 if got, want := res.Files[0].LineMatches[0].LineFragments[0].Offset, uint32(strings.Index(string(content), needle)); got != want { 2337 t.Errorf("got %d want %d", got, want) 2338 } 2339 }) 2340 2341 t.Run("ChunkMatches", func(t *testing.T) { 2342 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ", CaseSensitive: true}, chunkOpts) 2343 if len(res.Files) != 0 { 2344 t.Fatalf("case sensitive: got %v, wanted 0 index", res.Files) 2345 } 2346 2347 res = searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ"}, chunkOpts) 2348 if len(res.Files) != 1 { 2349 t.Fatalf("case insensitive: got %v, wanted 1 index", res.Files) 2350 } 2351 2352 got := res.Files[0].ChunkMatches[0].Ranges[0].Start.ByteOffset 2353 want := uint32(strings.Index(string(content), needle)) 2354 if got != want { 2355 t.Errorf("got %d want %d", got, want) 2356 } 2357 }) 2358} 2359 2360func TestUnicodeNonCoverContent(t *testing.T) { 2361 needle := "nééáádlÉ" 2362 content := []byte("blá blá " + needle + " blâ") 2363 2364 b := testIndexBuilder(t, nil, 2365 Document{Name: "f1", Content: content}) 2366 2367 t.Run("LineMatches", func(t *testing.T) { 2368 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉÁÁDLÉ", Content: true}) 2369 if len(res.Files) != 1 { 2370 t.Fatalf("got %v, wanted 1 index", res.Files) 2371 } 2372 2373 if got, want := res.Files[0].LineMatches[0].LineFragments[0].Offset, uint32(strings.Index(string(content), needle)); got != want { 2374 t.Errorf("got %d want %d", got, want) 2375 } 2376 }) 2377 2378 t.Run("ChunkMatches", func(t *testing.T) { 2379 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉÁÁDLÉ", Content: true}, chunkOpts) 2380 if len(res.Files) != 1 { 2381 t.Fatalf("got %v, wanted 1 index", res.Files) 2382 } 2383 2384 got := res.Files[0].ChunkMatches[0].Ranges[0].Start.ByteOffset 2385 want := uint32(strings.Index(string(content), needle)) 2386 if got != want { 2387 t.Errorf("got %d want %d", got, want) 2388 } 2389 }) 2390} 2391 2392const kelvinCodePoint = 8490 2393 2394func TestUnicodeVariableLength(t *testing.T) { 2395 lower := 'k' 2396 upper := rune(kelvinCodePoint) 2397 2398 needle := "nee" + string([]rune{lower}) + "eed" 2399 corpus := []byte("nee" + string([]rune{upper}) + "eed" + 2400 " ee" + string([]rune{lower}) + "ee" + 2401 " ee" + string([]rune{upper}) + "ee") 2402 2403 t.Run("LineMatches", func(t *testing.T) { 2404 b := testIndexBuilder(t, nil, 2405 Document{Name: "f1", Content: []byte(corpus)}) 2406 2407 res := searchForTest(t, b, &query.Substring{Pattern: needle, Content: true}) 2408 if len(res.Files) != 1 { 2409 t.Fatalf("got %v, wanted 1 index", res.Files) 2410 } 2411 }) 2412 2413 t.Run("ChunkMatches", func(t *testing.T) { 2414 b := testIndexBuilder(t, nil, 2415 Document{Name: "f1", Content: []byte(corpus)}) 2416 2417 res := searchForTest(t, b, &query.Substring{Pattern: needle, Content: true}, chunkOpts) 2418 if len(res.Files) != 1 { 2419 t.Fatalf("got %v, wanted 1 index", res.Files) 2420 } 2421 }) 2422} 2423 2424func TestUnicodeFileStartOffsets(t *testing.T) { 2425 unicode := "世界" 2426 wat := "waaaaaat" 2427 b := testIndexBuilder(t, nil, 2428 Document{ 2429 Name: "f1", 2430 Content: []byte(unicode), 2431 }, 2432 Document{ 2433 Name: "f2", 2434 Content: []byte(wat), 2435 }, 2436 ) 2437 q := &query.Substring{Pattern: wat, Content: true} 2438 res := searchForTest(t, b, q) 2439 if len(res.Files) != 1 { 2440 t.Fatalf("got %v, wanted 1 index", res.Files) 2441 } 2442} 2443 2444func TestLongFileUTF8(t *testing.T) { 2445 needle := "neeedle" 2446 2447 // 6 bytes. 2448 unicode := "世界" 2449 content := []byte(strings.Repeat(unicode, 100) + needle) 2450 b := testIndexBuilder(t, nil, 2451 Document{ 2452 Name: "f1", 2453 Content: []byte(strings.Repeat("a", 50)), 2454 }, 2455 Document{ 2456 Name: "f2", 2457 Content: content, 2458 }) 2459 2460 t.Run("LineMatches", func(t *testing.T) { 2461 q := &query.Substring{Pattern: needle, Content: true} 2462 res := searchForTest(t, b, q) 2463 if len(res.Files) != 1 { 2464 t.Errorf("got %v, want 1 result", res) 2465 } 2466 }) 2467 2468 t.Run("ChunkMatches", func(t *testing.T) { 2469 q := &query.Substring{Pattern: needle, Content: true} 2470 res := searchForTest(t, b, q, chunkOpts) 2471 if len(res.Files) != 1 { 2472 t.Errorf("got %v, want 1 result", res) 2473 } 2474 }) 2475} 2476 2477func TestEstimateDocCount(t *testing.T) { 2478 content := []byte("bla needle bla") 2479 b := testIndexBuilder(t, &zoekt.Repository{Name: "reponame"}, 2480 Document{Name: "f1", Content: content}, 2481 Document{Name: "f2", Content: content}, 2482 ) 2483 2484 t.Run("LineMatches", func(t *testing.T) { 2485 if sres := searchForTest(t, b, 2486 query.NewAnd( 2487 &query.Substring{Pattern: "needle"}, 2488 &query.Repo{Regexp: regexp.MustCompile("reponame")}, 2489 ), zoekt.SearchOptions{ 2490 EstimateDocCount: true, 2491 }); sres.Stats.ShardFilesConsidered != 2 { 2492 t.Errorf("got FilesConsidered = %d, want 2", sres.Stats.FilesConsidered) 2493 } 2494 if sres := searchForTest(t, b, 2495 query.NewAnd( 2496 &query.Substring{Pattern: "needle"}, 2497 &query.Repo{Regexp: regexp.MustCompile("nomatch")}, 2498 ), zoekt.SearchOptions{ 2499 EstimateDocCount: true, 2500 }); sres.Stats.ShardFilesConsidered != 0 { 2501 t.Errorf("got FilesConsidered = %d, want 0", sres.Stats.FilesConsidered) 2502 } 2503 }) 2504 2505 t.Run("ChunkMatches", func(t *testing.T) { 2506 if sres := searchForTest(t, b, 2507 query.NewAnd( 2508 &query.Substring{Pattern: "needle"}, 2509 &query.Repo{Regexp: regexp.MustCompile("reponame")}, 2510 ), zoekt.SearchOptions{ 2511 EstimateDocCount: true, 2512 ChunkMatches: true, 2513 }); sres.Stats.ShardFilesConsidered != 2 { 2514 t.Errorf("got FilesConsidered = %d, want 2", sres.Stats.FilesConsidered) 2515 } 2516 if sres := searchForTest(t, b, 2517 query.NewAnd( 2518 &query.Substring{Pattern: "needle"}, 2519 &query.Repo{Regexp: regexp.MustCompile("nomatch")}, 2520 ), zoekt.SearchOptions{ 2521 EstimateDocCount: true, 2522 ChunkMatches: true, 2523 }); sres.Stats.ShardFilesConsidered != 0 { 2524 t.Errorf("got FilesConsidered = %d, want 0", sres.Stats.FilesConsidered) 2525 } 2526 }) 2527} 2528 2529func TestUTF8CorrectCorpus(t *testing.T) { 2530 needle := "neeedle" 2531 2532 // 6 bytes. 2533 unicode := "世界" 2534 b := testIndexBuilder(t, nil, 2535 Document{ 2536 Name: "f1", 2537 Content: []byte(strings.Repeat(unicode, 100)), 2538 }, 2539 Document{ 2540 Name: "xxxxxneeedle", 2541 Content: []byte("hello"), 2542 }) 2543 2544 t.Run("LineMatches", func(t *testing.T) { 2545 q := &query.Substring{Pattern: needle, FileName: true} 2546 res := searchForTest(t, b, q) 2547 if len(res.Files) != 1 { 2548 t.Errorf("got %v, want 1 result", res) 2549 } 2550 }) 2551 2552 t.Run("ChunkMatches", func(t *testing.T) { 2553 q := &query.Substring{Pattern: needle, FileName: true} 2554 res := searchForTest(t, b, q, chunkOpts) 2555 if len(res.Files) != 1 { 2556 t.Errorf("got %v, want 1 result", res) 2557 } 2558 }) 2559} 2560 2561func TestBuilderStats(t *testing.T) { 2562 b := testIndexBuilder(t, nil, 2563 Document{ 2564 Name: "f1", 2565 Content: []byte(strings.Repeat("abcd", 1024)), 2566 }) 2567 var buf bytes.Buffer 2568 if err := b.Write(&buf); err != nil { 2569 t.Fatal(err) 2570 } 2571 2572 if got, want := b.ContentSize(), uint32(2+4*1024); got != want { 2573 t.Errorf("got %d, want %d", got, want) 2574 } 2575} 2576 2577func TestIOStats(t *testing.T) { 2578 b := testIndexBuilder(t, nil, 2579 Document{ 2580 Name: "f1", 2581 Content: []byte(strings.Repeat("abcd", 1024)), 2582 }) 2583 2584 t.Run("LineMatches", func(t *testing.T) { 2585 q := &query.Substring{Pattern: "abc", CaseSensitive: true, Content: true} 2586 res := searchForTest(t, b, q) 2587 2588 // 4096 (content) + 2 (overhead: newlines or doc sections) 2589 if got, want := res.Stats.ContentBytesLoaded, int64(4098); got != want { 2590 t.Errorf("got content I/O %d, want %d", got, want) 2591 } 2592 2593 // 1024 entries, each 4 bytes apart. 4 fits into single byte 2594 // delta encoded. 2595 if got, want := res.Stats.IndexBytesLoaded, int64(1024); got != want { 2596 t.Errorf("got index I/O %d, want %d", got, want) 2597 } 2598 }) 2599 2600 t.Run("ChunkMatches", func(t *testing.T) { 2601 q := &query.Substring{Pattern: "abc", CaseSensitive: true, Content: true} 2602 res := searchForTest(t, b, q, chunkOpts) 2603 2604 // 4096 (content) + 2 (overhead: newlines or doc sections) 2605 if got, want := res.Stats.ContentBytesLoaded, int64(4098); got != want { 2606 t.Errorf("got content I/O %d, want %d", got, want) 2607 } 2608 2609 // 1024 entries, each 4 bytes apart. 4 fits into single byte 2610 // delta encoded. 2611 if got, want := res.Stats.IndexBytesLoaded, int64(1024); got != want { 2612 t.Errorf("got index I/O %d, want %d", got, want) 2613 } 2614 }) 2615 2616 t.Run("LineMatches with BM25", func(t *testing.T) { 2617 q := &query.Substring{Pattern: "abc", CaseSensitive: true, Content: true} 2618 res := searchForTest(t, b, q, zoekt.SearchOptions{UseBM25Scoring: true}) 2619 2620 // 4096 (content) + 2 (overhead: newlines or doc sections) 2621 if got, want := res.Stats.ContentBytesLoaded, int64(4098); got != want { 2622 t.Errorf("got content I/O %d, want %d", got, want) 2623 } 2624 2625 // 1024 entries, each 4 bytes apart. 4 fits into single byte 2626 // delta encoded. 2627 if got, want := res.Stats.IndexBytesLoaded, int64(1024); got != want { 2628 t.Errorf("got index I/O %d, want %d", got, want) 2629 } 2630 }) 2631 2632 t.Run("ChunkMatches with BM25", func(t *testing.T) { 2633 q := &query.Substring{Pattern: "abc", CaseSensitive: true, Content: true} 2634 res := searchForTest(t, b, q, zoekt.SearchOptions{UseBM25Scoring: true, ChunkMatches: true}) 2635 2636 // 4096 (content) + 2 (overhead: newlines or doc sections) 2637 if got, want := res.Stats.ContentBytesLoaded, int64(4098); got != want { 2638 t.Errorf("got content I/O %d, want %d", got, want) 2639 } 2640 2641 // 1024 entries, each 4 bytes apart. 4 fits into single byte 2642 // delta encoded. 2643 if got, want := res.Stats.IndexBytesLoaded, int64(1024); got != want { 2644 t.Errorf("got index I/O %d, want %d", got, want) 2645 } 2646 }) 2647} 2648 2649func TestStartLineAnchor(t *testing.T) { 2650 b := testIndexBuilder(t, nil, 2651 Document{ 2652 Name: "f1", 2653 Content: []byte( 2654 `hello 2655start of middle of line 2656`), 2657 }) 2658 2659 t.Run("LineMatches", func(t *testing.T) { 2660 q, err := query.Parse("^start") 2661 if err != nil { 2662 t.Errorf("parse: %v", err) 2663 } 2664 2665 res := searchForTest(t, b, q) 2666 if len(res.Files) != 1 { 2667 t.Errorf("got %v, want 1 file", res.Files) 2668 } 2669 2670 q, err = query.Parse("^middle") 2671 if err != nil { 2672 t.Errorf("parse: %v", err) 2673 } 2674 res = searchForTest(t, b, q) 2675 if len(res.Files) != 0 { 2676 t.Errorf("got %v, want 0 files", res.Files) 2677 } 2678 }) 2679 2680 t.Run("ChunkMatches", func(t *testing.T) { 2681 q, err := query.Parse("^start") 2682 if err != nil { 2683 t.Errorf("parse: %v", err) 2684 } 2685 2686 res := searchForTest(t, b, q, chunkOpts) 2687 if len(res.Files) != 1 { 2688 t.Errorf("got %v, want 1 file", res.Files) 2689 } 2690 2691 q, err = query.Parse("^middle") 2692 if err != nil { 2693 t.Errorf("parse: %v", err) 2694 } 2695 res = searchForTest(t, b, q, chunkOpts) 2696 if len(res.Files) != 0 { 2697 t.Errorf("got %v, want 0 files", res.Files) 2698 } 2699 }) 2700} 2701 2702func TestAndOrUnicode(t *testing.T) { 2703 q, err := query.Parse("orange.*apple") 2704 if err != nil { 2705 t.Errorf("parse: %v", err) 2706 } 2707 finalQ := query.NewAnd(q, 2708 query.NewOr(query.NewAnd(&query.Repo{Regexp: regexp.MustCompile("name")}, 2709 query.NewOr(&query.Branch{Pattern: "master"})))) 2710 2711 b := testIndexBuilder(t, &zoekt.Repository{ 2712 Name: "name", 2713 Branches: []zoekt.RepositoryBranch{{"master", "master-version"}}, 2714 }, Document{ 2715 Name: "f2", 2716 Content: []byte("orange\u2318apple"), 2717 // --------------0123456 78901 2718 Branches: []string{"master"}, 2719 }) 2720 2721 t.Run("LineMatches", func(t *testing.T) { 2722 res := searchForTest(t, b, finalQ) 2723 if len(res.Files) != 1 { 2724 t.Errorf("got %v, want 1 result", res.Files) 2725 } 2726 }) 2727 2728 t.Run("ChunkMatches", func(t *testing.T) { 2729 res := searchForTest(t, b, finalQ, chunkOpts) 2730 if len(res.Files) != 1 { 2731 t.Errorf("got %v, want 1 result", res.Files) 2732 } 2733 }) 2734} 2735 2736func TestAndShort(t *testing.T) { 2737 content := []byte("bla needle at orange bla") 2738 b := testIndexBuilder(t, &zoekt.Repository{Name: "reponame"}, 2739 Document{Name: "f1", Content: content}, 2740 Document{Name: "f2", Content: []byte("xx at xx")}, 2741 Document{Name: "f3", Content: []byte("yy orange xx")}, 2742 ) 2743 2744 q := query.NewAnd(&query.Substring{Pattern: "at"}, 2745 &query.Substring{Pattern: "orange"}) 2746 2747 t.Run("LineMatches", func(t *testing.T) { 2748 res := searchForTest(t, b, q) 2749 if len(res.Files) != 1 || res.Files[0].FileName != "f1" { 2750 t.Errorf("got %v, want 1 result", res.Files) 2751 } 2752 }) 2753 2754 t.Run("ChunkMatches", func(t *testing.T) { 2755 res := searchForTest(t, b, q, chunkOpts) 2756 if len(res.Files) != 1 || res.Files[0].FileName != "f1" { 2757 t.Errorf("got %v, want 1 result", res.Files) 2758 } 2759 }) 2760} 2761 2762func TestNoCollectRegexpSubstring(t *testing.T) { 2763 content := []byte("bla final bla\nfoo final, foo") 2764 b := testIndexBuilder(t, &zoekt.Repository{Name: "reponame"}, 2765 Document{Name: "f1", Content: content}, 2766 ) 2767 2768 q := &query.Regexp{ 2769 Regexp: mustParseRE("final[,.]"), 2770 } 2771 2772 t.Run("LineMatches", func(t *testing.T) { 2773 res := searchForTest(t, b, q) 2774 if len(res.Files) != 1 { 2775 t.Fatalf("got %v, want 1 result", res.Files) 2776 } 2777 if f := res.Files[0]; len(f.LineMatches) != 1 { 2778 t.Fatalf("got line matches %v, want 1 line index", printLineMatches(f.LineMatches)) 2779 } 2780 }) 2781 2782 t.Run("ChunkMatches", func(t *testing.T) { 2783 res := searchForTest(t, b, q, chunkOpts) 2784 if len(res.Files) != 1 { 2785 t.Fatalf("got %v, want 1 result", res.Files) 2786 } 2787 if f := res.Files[0]; len(f.ChunkMatches) != 1 { 2788 t.Fatalf("got line matches %v, want 1 line index", printLineMatches(f.LineMatches)) 2789 } 2790 }) 2791} 2792 2793func printLineMatches(ms []zoekt.LineMatch) string { 2794 var ss []string 2795 for _, m := range ms { 2796 ss = append(ss, fmt.Sprintf("%d:%q %v", m.LineNumber, m.Line, m.LineFragments)) 2797 } 2798 2799 return strings.Join(ss, ", ") 2800} 2801 2802func TestLang(t *testing.T) { 2803 content := []byte("bla needle bla") 2804 b := testIndexBuilder(t, &zoekt.Repository{Name: "reponame"}, 2805 Document{Name: "f1", Content: content}, 2806 Document{Name: "f2", Language: "java", Content: content}, 2807 Document{Name: "f3", Language: "cpp", Content: content}, 2808 ) 2809 2810 q := query.NewAnd(&query.Substring{Pattern: "needle"}, 2811 &query.Language{Language: "cpp"}) 2812 2813 t.Run("LineMatches", func(t *testing.T) { 2814 res := searchForTest(t, b, q) 2815 if len(res.Files) != 1 { 2816 t.Fatalf("got %v, want 1 result in f3", res.Files) 2817 } 2818 f := res.Files[0] 2819 if f.FileName != "f3" || f.Language != "cpp" { 2820 t.Fatalf("got %v, want 1 match with language cpp", f) 2821 } 2822 }) 2823 2824 t.Run("ChunkMatches", func(t *testing.T) { 2825 res := searchForTest(t, b, q, chunkOpts) 2826 if len(res.Files) != 1 { 2827 t.Fatalf("got %v, want 1 result in f3", res.Files) 2828 } 2829 f := res.Files[0] 2830 if f.FileName != "f3" || f.Language != "cpp" { 2831 t.Fatalf("got %v, want 1 match with language cpp", f) 2832 } 2833 }) 2834} 2835 2836func TestLangShortcut(t *testing.T) { 2837 content := []byte("bla needle bla") 2838 b := testIndexBuilder(t, &zoekt.Repository{Name: "reponame"}, 2839 Document{Name: "f2", Language: "java", Content: content}, 2840 Document{Name: "f3", Language: "cpp", Content: content}, 2841 ) 2842 2843 q := query.NewAnd(&query.Substring{Pattern: "needle"}, 2844 &query.Language{Language: "fortran"}) 2845 2846 t.Run("LineMatches", func(t *testing.T) { 2847 res := searchForTest(t, b, q) 2848 if len(res.Files) != 0 { 2849 t.Fatalf("got %v, want 0 results", res.Files) 2850 } 2851 if res.Stats.IndexBytesLoaded > 0 { 2852 t.Errorf("got matchBytesLoaded %d, want 0", res.Stats.IndexBytesLoaded) 2853 } 2854 }) 2855 2856 t.Run("ChunkMatches", func(t *testing.T) { 2857 res := searchForTest(t, b, q, chunkOpts) 2858 if len(res.Files) != 0 { 2859 t.Fatalf("got %v, want 0 results", res.Files) 2860 } 2861 if res.Stats.IndexBytesLoaded > 0 { 2862 t.Errorf("got matchBytesLoaded %d, want 0", res.Stats.IndexBytesLoaded) 2863 } 2864 }) 2865} 2866 2867func TestNoTextMatchAtoms(t *testing.T) { 2868 content := []byte("bla needle bla") 2869 b := testIndexBuilder(t, &zoekt.Repository{Name: "reponame"}, 2870 Document{Name: "f1", Content: content}, 2871 Document{Name: "f2", Language: "java", Content: content}, 2872 Document{Name: "f3", Language: "cpp", Content: content}, 2873 ) 2874 q := query.NewAnd(&query.Language{Language: "java"}) 2875 t.Run("LineMatches", func(t *testing.T) { 2876 res := searchForTest(t, b, q) 2877 if len(res.Files) != 1 { 2878 t.Fatalf("got %v, want 1 result in f3", res.Files) 2879 } 2880 }) 2881 2882 t.Run("ChunkMatches", func(t *testing.T) { 2883 res := searchForTest(t, b, q, chunkOpts) 2884 if len(res.Files) != 1 { 2885 t.Fatalf("got %v, want 1 result in f3", res.Files) 2886 } 2887 }) 2888} 2889 2890func TestNoPositiveAtoms(t *testing.T) { 2891 content := []byte("bla needle bla") 2892 b := testIndexBuilder(t, &zoekt.Repository{Name: "reponame"}, 2893 Document{Name: "f1", Content: content}, 2894 Document{Name: "f2", Content: content}, 2895 ) 2896 2897 q := query.NewAnd( 2898 &query.Not{Child: &query.Substring{Pattern: "xyz"}}, 2899 &query.Repo{Regexp: regexp.MustCompile("reponame")}) 2900 t.Run("LineMatches", func(t *testing.T) { 2901 res := searchForTest(t, b, q) 2902 if len(res.Files) != 2 { 2903 t.Fatalf("got %v, want 2 results in f3", res.Files) 2904 } 2905 }) 2906 t.Run("ChunkMatches", func(t *testing.T) { 2907 res := searchForTest(t, b, q, chunkOpts) 2908 if len(res.Files) != 2 { 2909 t.Fatalf("got %v, want 2 results in f3", res.Files) 2910 } 2911 }) 2912} 2913 2914func TestSymbolBoundaryStart(t *testing.T) { 2915 content := []byte("start\nbla bla\nend") 2916 // ----------------012345-67890123-456 2917 2918 b := testIndexBuilder(t, &zoekt.Repository{Name: "reponame"}, 2919 Document{ 2920 Name: "f1", 2921 Content: content, 2922 Symbols: []DocumentSection{{0, 5}, {14, 17}}, 2923 }, 2924 ) 2925 q := &query.Symbol{ 2926 Expr: &query.Substring{Pattern: "start"}, 2927 } 2928 t.Run("LineMatches", func(t *testing.T) { 2929 res := searchForTest(t, b, q) 2930 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 { 2931 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2932 } 2933 m := res.Files[0].LineMatches[0].LineFragments[0] 2934 if m.Offset != 0 { 2935 t.Fatalf("got offset %d want 0", m.Offset) 2936 } 2937 }) 2938 2939 t.Run("ChunkMatches", func(t *testing.T) { 2940 res := searchForTest(t, b, q, chunkOpts) 2941 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 { 2942 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2943 } 2944 m := res.Files[0].ChunkMatches[0].Ranges[0] 2945 if m.Start.ByteOffset != 0 { 2946 t.Fatalf("got offset %d want 0", m.Start.ByteOffset) 2947 } 2948 }) 2949} 2950 2951func TestSymbolBoundaryEnd(t *testing.T) { 2952 content := []byte("start\nbla bla\nend") 2953 // ----------------012345-67890123-456 2954 2955 b := testIndexBuilder(t, &zoekt.Repository{Name: "reponame"}, 2956 Document{ 2957 Name: "f1", 2958 Content: content, 2959 Symbols: []DocumentSection{{14, 17}}, 2960 }, 2961 ) 2962 q := &query.Symbol{ 2963 Expr: &query.Substring{Pattern: "end"}, 2964 } 2965 t.Run("LineMatches", func(t *testing.T) { 2966 res := searchForTest(t, b, q) 2967 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 { 2968 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2969 } 2970 m := res.Files[0].LineMatches[0].LineFragments[0] 2971 if m.Offset != 14 { 2972 t.Fatalf("got offset %d want 0", m.Offset) 2973 } 2974 }) 2975 2976 t.Run("ChunkMatches", func(t *testing.T) { 2977 res := searchForTest(t, b, q, chunkOpts) 2978 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 { 2979 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2980 } 2981 m := res.Files[0].ChunkMatches[0].Ranges[0] 2982 if m.Start.ByteOffset != 14 { 2983 t.Fatalf("got offset %d want 0", m.Start.ByteOffset) 2984 } 2985 }) 2986} 2987 2988func TestSymbolSubstring(t *testing.T) { 2989 content := []byte("bla\nsymblabla\nbla") 2990 // ----------------0123-4567890123-456 2991 2992 b := testIndexBuilder(t, &zoekt.Repository{Name: "reponame"}, 2993 Document{ 2994 Name: "f1", 2995 Content: content, 2996 Symbols: []DocumentSection{{4, 12}}, 2997 }, 2998 ) 2999 q := &query.Symbol{ 3000 Expr: &query.Substring{Pattern: "bla"}, 3001 } 3002 t.Run("LineMatches", func(t *testing.T) { 3003 res := searchForTest(t, b, q) 3004 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 { 3005 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 3006 } 3007 m := res.Files[0].LineMatches[0].LineFragments[0] 3008 if m.Offset != 7 || m.MatchLength != 3 { 3009 t.Fatalf("got offset %d, size %d want 7 size 3", m.Offset, m.MatchLength) 3010 } 3011 }) 3012 3013 t.Run("ChunkMatches", func(t *testing.T) { 3014 res := searchForTest(t, b, q, chunkOpts) 3015 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 { 3016 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 3017 } 3018 m := res.Files[0].ChunkMatches[0].Ranges[0] 3019 if m.Start.ByteOffset != 7 || m.End.ByteOffset != 10 { 3020 t.Fatalf("got offset %d, end %d want 7, 10", m.Start.ByteOffset, m.End.ByteOffset) 3021 } 3022 }) 3023} 3024 3025func TestSymbolSubstringExact(t *testing.T) { 3026 content := []byte("bla\nsym\nbla\nsym\nasymb") 3027 // ----------------0123-4567-890123456-78901 3028 3029 b := testIndexBuilder(t, &zoekt.Repository{Name: "reponame"}, 3030 Document{ 3031 Name: "f1", 3032 Content: content, 3033 Symbols: []DocumentSection{{4, 7}}, 3034 }, 3035 ) 3036 q := &query.Symbol{ 3037 Expr: &query.Substring{Pattern: "sym"}, 3038 } 3039 t.Run("LineMatches", func(t *testing.T) { 3040 res := searchForTest(t, b, q) 3041 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 { 3042 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 3043 } 3044 m := res.Files[0].LineMatches[0].LineFragments[0] 3045 if m.Offset != 4 { 3046 t.Fatalf("got offset %d, want 7", m.Offset) 3047 } 3048 }) 3049 3050 t.Run("ChunkMatches", func(t *testing.T) { 3051 res := searchForTest(t, b, q, chunkOpts) 3052 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 { 3053 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 3054 } 3055 m := res.Files[0].ChunkMatches[0].Ranges[0] 3056 if m.Start.ByteOffset != 4 { 3057 t.Fatalf("got offset %d, want 7", m.Start.ByteOffset) 3058 } 3059 }) 3060} 3061 3062func TestSymbolRegexpExact(t *testing.T) { 3063 content := []byte("blah\nbla\nbl") 3064 // ----------------01234-5678-90 3065 3066 b := testIndexBuilder(t, &zoekt.Repository{Name: "reponame"}, 3067 Document{ 3068 Name: "f1", 3069 Content: content, 3070 Symbols: []DocumentSection{{0, 4}, {5, 8}, {9, 11}}, 3071 }, 3072 ) 3073 q := &query.Symbol{ 3074 Expr: &query.Regexp{Regexp: mustParseRE("^bla$")}, 3075 } 3076 t.Run("LineMatches", func(t *testing.T) { 3077 res := searchForTest(t, b, q) 3078 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 { 3079 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 3080 } 3081 m := res.Files[0].LineMatches[0].LineFragments[0] 3082 if m.Offset != 5 { 3083 t.Fatalf("got offset %d, want 5", m.Offset) 3084 } 3085 }) 3086 3087 t.Run("ChunkMatches", func(t *testing.T) { 3088 res := searchForTest(t, b, q, chunkOpts) 3089 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 { 3090 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 3091 } 3092 m := res.Files[0].ChunkMatches[0].Ranges[0] 3093 if m.Start.ByteOffset != 5 { 3094 t.Fatalf("got offset %d, want 5", m.Start.ByteOffset) 3095 } 3096 }) 3097} 3098 3099func TestSymbolRegexpPartial(t *testing.T) { 3100 content := []byte("abcdef") 3101 // ----------------012345 3102 3103 b := testIndexBuilder(t, &zoekt.Repository{Name: "reponame"}, 3104 Document{ 3105 Name: "f1", 3106 Content: content, 3107 Symbols: []DocumentSection{{0, 6}}, 3108 }, 3109 ) 3110 q := &query.Symbol{ 3111 Expr: &query.Regexp{Regexp: mustParseRE("(b|d)c(d|b)")}, 3112 } 3113 t.Run("LineMatches", func(t *testing.T) { 3114 res := searchForTest(t, b, q) 3115 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 { 3116 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 3117 } 3118 m := res.Files[0].LineMatches[0].LineFragments[0] 3119 if m.Offset != 1 { 3120 t.Fatalf("got offset %d, want 1", m.Offset) 3121 } 3122 if m.MatchLength != 3 { 3123 t.Fatalf("got match length %d, want 3", m.MatchLength) 3124 } 3125 }) 3126 3127 t.Run("ChunkMatches", func(t *testing.T) { 3128 res := searchForTest(t, b, q, chunkOpts) 3129 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 { 3130 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 3131 } 3132 m := res.Files[0].ChunkMatches[0].Ranges[0] 3133 if m.Start.ByteOffset != 1 { 3134 t.Fatalf("got offset %d, want 1", m.Start.ByteOffset) 3135 } 3136 if m.End.ByteOffset != 4 { 3137 t.Fatalf("got match end %d, want 4", m.End.ByteOffset) 3138 } 3139 }) 3140} 3141 3142func TestSymbolRegexpAll(t *testing.T) { 3143 docs := []Document{ 3144 { 3145 Name: "f1", 3146 Content: []byte("Hello Zoekt"), 3147 // --------------01234567890 3148 Symbols: []DocumentSection{{0, 5}, {6, 11}}, 3149 }, 3150 { 3151 Name: "f2", 3152 Content: []byte("Second Zoekt Third"), 3153 // --------------012345678901234567 3154 Symbols: []DocumentSection{{0, 6}, {7, 12}, {13, 18}}, 3155 }, 3156 } 3157 3158 b := testIndexBuilder(t, &zoekt.Repository{Name: "reponame"}, docs...) 3159 q := &query.Symbol{ 3160 Expr: &query.Regexp{Regexp: mustParseRE(".*")}, 3161 } 3162 t.Run("LineMatches", func(t *testing.T) { 3163 res := searchForTest(t, b, q) 3164 if len(res.Files) != len(docs) { 3165 t.Fatalf("got %v, want %d file", res.Files, len(docs)) 3166 } 3167 for i, want := range docs { 3168 got := res.Files[i].LineMatches[0].LineFragments 3169 if len(got) != len(want.Symbols) { 3170 t.Fatalf("got %d symbols, want %d symbols in doc %s", len(got), len(want.Symbols), want.Name) 3171 } 3172 3173 for j, sec := range want.Symbols { 3174 if sec.Start != got[j].Offset { 3175 t.Fatalf("got offset %d, want %d in doc %s", got[j].Offset, sec.Start, want.Name) 3176 } 3177 } 3178 } 3179 }) 3180 3181 t.Run("ChunkMatches", func(t *testing.T) { 3182 res := searchForTest(t, b, q, chunkOpts) 3183 if len(res.Files) != len(docs) { 3184 t.Fatalf("got %v, want %d file", res.Files, len(docs)) 3185 } 3186 for i, want := range docs { 3187 got := res.Files[i].ChunkMatches[0].Ranges 3188 if len(got) != len(want.Symbols) { 3189 t.Fatalf("got %d symbols, want %d symbols in doc %s", len(got), len(want.Symbols), want.Name) 3190 } 3191 3192 for j, sec := range want.Symbols { 3193 if sec.Start != uint32(got[j].Start.ByteOffset) { 3194 t.Fatalf("got offset %d, want %d in doc %s", got[j].Start.ByteOffset, sec.Start, want.Name) 3195 } 3196 } 3197 } 3198 }) 3199} 3200 3201func TestHitIterTerminate(t *testing.T) { 3202 // contrived input: trigram frequencies forces selecting abc + 3203 // def for the distance iteration. There is no index, so this 3204 // will advance the compressedPostingIterator to beyond the 3205 // end. 3206 content := []byte("abc bcdbcd cdecde abcabc def efg") 3207 b := testIndexBuilder(t, nil, 3208 Document{ 3209 Name: "f1", 3210 Content: content, 3211 }, 3212 ) 3213 3214 t.Run("LineMatches", func(t *testing.T) { 3215 searchForTest(t, b, &query.Substring{Pattern: "abcdef"}) 3216 }) 3217 3218 t.Run("ChunkMatches", func(t *testing.T) { 3219 searchForTest(t, b, &query.Substring{Pattern: "abcdef"}, chunkOpts) 3220 }) 3221} 3222 3223func TestDistanceHitIterBailLast(t *testing.T) { 3224 content := []byte("AST AST AST UASH") 3225 b := testIndexBuilder(t, nil, 3226 Document{ 3227 Name: "f1", 3228 Content: content, 3229 }, 3230 ) 3231 t.Run("LineMatches", func(t *testing.T) { 3232 res := searchForTest(t, b, &query.Substring{Pattern: "UAST"}) 3233 if len(res.Files) != 0 { 3234 t.Fatalf("got %v, want no results", res.Files) 3235 } 3236 }) 3237 3238 t.Run("LineMatches", func(t *testing.T) { 3239 res := searchForTest(t, b, &query.Substring{Pattern: "UAST"}, chunkOpts) 3240 if len(res.Files) != 0 { 3241 t.Fatalf("got %v, want no results", res.Files) 3242 } 3243 }) 3244} 3245 3246func TestDocumentSectionRuneBoundary(t *testing.T) { 3247 content := string([]rune{kelvinCodePoint, kelvinCodePoint, kelvinCodePoint}) 3248 b, err := NewIndexBuilder(nil) 3249 if err != nil { 3250 t.Fatalf("NewIndexBuilder: %v", err) 3251 } 3252 3253 for i, sec := range []DocumentSection{ 3254 {2, 6}, 3255 {3, 7}, 3256 } { 3257 if err := b.Add(Document{ 3258 Name: "f1", 3259 Content: []byte(content), 3260 Symbols: []DocumentSection{sec}, 3261 }); err == nil { 3262 t.Errorf("%d: Add succeeded", i) 3263 } 3264 } 3265} 3266 3267func TestUnicodeQuery(t *testing.T) { 3268 content := string([]rune{kelvinCodePoint, kelvinCodePoint, kelvinCodePoint}) 3269 b := testIndexBuilder(t, nil, 3270 Document{ 3271 Name: "f1", 3272 Content: []byte(content), 3273 }, 3274 ) 3275 3276 q := &query.Substring{Pattern: content} 3277 3278 t.Run("LineMatches", func(t *testing.T) { 3279 res := searchForTest(t, b, q) 3280 if len(res.Files) != 1 { 3281 t.Fatalf("want 1 match, got %v", res.Files) 3282 } 3283 3284 f := res.Files[0] 3285 if len(f.LineMatches) != 1 { 3286 t.Fatalf("want 1 line, got %v", f.LineMatches) 3287 } 3288 l := f.LineMatches[0] 3289 3290 if len(l.LineFragments) != 1 { 3291 t.Fatalf("want 1 line fragment, got %v", l.LineFragments) 3292 } 3293 fr := l.LineFragments[0] 3294 if fr.MatchLength != len(content) { 3295 t.Fatalf("got MatchLength %d want %d", fr.MatchLength, len(content)) 3296 } 3297 }) 3298 3299 t.Run("ChunkMatches", func(t *testing.T) { 3300 res := searchForTest(t, b, q, chunkOpts) 3301 if len(res.Files) != 1 { 3302 t.Fatalf("want 1 match, got %v", res.Files) 3303 } 3304 3305 f := res.Files[0] 3306 if len(f.ChunkMatches) != 1 { 3307 t.Fatalf("want 1 line, got %v", f.LineMatches) 3308 } 3309 cm := f.ChunkMatches[0] 3310 3311 if len(cm.Ranges) != 1 { 3312 t.Fatalf("want 1 line fragment, got %v", cm.Ranges) 3313 } 3314 rr := cm.Ranges[0] 3315 if matchLen := rr.End.ByteOffset - rr.Start.ByteOffset; int(matchLen) != len(content) { 3316 t.Fatalf("got MatchLength %d want %d", matchLen, len(content)) 3317 } 3318 }) 3319} 3320 3321func TestSkipInvalidContent(t *testing.T) { 3322 for _, content := range []string{ 3323 // Binary 3324 "abc def \x00 abc", 3325 } { 3326 3327 b, err := NewIndexBuilder(nil) 3328 if err != nil { 3329 t.Fatalf("NewIndexBuilder: %v", err) 3330 } 3331 3332 if err := b.Add(Document{ 3333 Name: "f1", 3334 Content: []byte(content), 3335 }); err != nil { 3336 t.Fatal(err) 3337 } 3338 3339 t.Run("LineMatches", func(t *testing.T) { 3340 q := &query.Substring{Pattern: "abc def"} 3341 res := searchForTest(t, b, q) 3342 if len(res.Files) != 0 { 3343 t.Fatalf("got %v, want no results", res.Files) 3344 } 3345 3346 q = &query.Substring{Pattern: "NOT-INDEXED"} 3347 res = searchForTest(t, b, q) 3348 if len(res.Files) != 1 { 3349 t.Fatalf("got %v, want 1 result", res.Files) 3350 } 3351 }) 3352 3353 t.Run("ChunkMatches", func(t *testing.T) { 3354 q := &query.Substring{Pattern: "abc def"} 3355 res := searchForTest(t, b, q, chunkOpts) 3356 if len(res.Files) != 0 { 3357 t.Fatalf("got %v, want no results", res.Files) 3358 } 3359 3360 q = &query.Substring{Pattern: "NOT-INDEXED"} 3361 res = searchForTest(t, b, q, chunkOpts) 3362 if len(res.Files) != 1 { 3363 t.Fatalf("got %v, want 1 result", res.Files) 3364 } 3365 }) 3366 } 3367} 3368 3369func TestDocChecker(t *testing.T) { 3370 docChecker := DocChecker{} 3371 3372 // Test valid and invalid text 3373 for _, text := range []string{"", "simple ascii", "símplé unicödé", "\uFEFFwith utf8 'bom'", "with \uFFFD unicode replacement char"} { 3374 if err := docChecker.Check([]byte(text), 20000, false); err != nil { 3375 t.Errorf("Check(%q): %v", text, err) 3376 } 3377 } 3378 for _, text := range []string{"zero\x00byte", "xx", "0123456789abcdefghi"} { 3379 if err := docChecker.Check([]byte(text), 15, false); err == nil { 3380 t.Errorf("Check(%q) succeeded", text) 3381 } 3382 } 3383 3384 // Test valid and invalid text with an allowed large file 3385 for _, text := range []string{"0123456789abcdefghi", "qwertyuiopasdfghjklzxcvbnm"} { 3386 if err := docChecker.Check([]byte(text), 15, true); err != nil { 3387 t.Errorf("Check(%q): %v", text, err) 3388 } 3389 } 3390 for _, text := range []string{"zero\x00byte", "xx"} { 3391 if err := docChecker.Check([]byte(text), 15, true); err == nil { 3392 t.Errorf("Check(%q) succeeded", text) 3393 } 3394 } 3395} 3396 3397func TestLineAnd(t *testing.T) { 3398 b := testIndexBuilder(t, &zoekt.Repository{Name: "reponame"}, 3399 Document{Name: "f1", Content: []byte("apple\nbanana\napple banana chocolate apple pudding banana\ngrape")}, 3400 Document{Name: "f2", Content: []byte("apple orange\nbanana")}, 3401 Document{Name: "f3", Content: []byte("banana grape")}, 3402 ) 3403 pattern := "(apple)(?-s:.)*?(banana)" 3404 r, _ := syntax.Parse(pattern, syntax.Perl) 3405 3406 q := query.Regexp{ 3407 Regexp: r, 3408 Content: true, 3409 } 3410 t.Run("LineMatches", func(t *testing.T) { 3411 res := searchForTest(t, b, &q) 3412 wantRegexpCount := 1 3413 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount { 3414 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount) 3415 } 3416 if len(res.Files) != 1 || res.Files[0].FileName != "f1" { 3417 t.Errorf("got %v, want 1 result", res.Files) 3418 } 3419 }) 3420 3421 t.Run("ChunkMatches", func(t *testing.T) { 3422 res := searchForTest(t, b, &q, chunkOpts) 3423 wantRegexpCount := 1 3424 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount { 3425 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount) 3426 } 3427 if len(res.Files) != 1 || res.Files[0].FileName != "f1" { 3428 t.Errorf("got %v, want 1 result", res.Files) 3429 } 3430 }) 3431} 3432 3433func TestLineAndFileName(t *testing.T) { 3434 b := testIndexBuilder(t, &zoekt.Repository{Name: "reponame"}, 3435 Document{Name: "f1", Content: []byte("apple banana\ngrape")}, 3436 Document{Name: "f2", Content: []byte("apple banana\norange")}, 3437 Document{Name: "apple banana", Content: []byte("banana grape")}, 3438 ) 3439 pattern := "(apple)(?-s:.)*?(banana)" 3440 r, _ := syntax.Parse(pattern, syntax.Perl) 3441 3442 q := query.Regexp{ 3443 Regexp: r, 3444 FileName: true, 3445 } 3446 t.Run("LineMatches", func(t *testing.T) { 3447 res := searchForTest(t, b, &q) 3448 wantRegexpCount := 1 3449 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount { 3450 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount) 3451 } 3452 if len(res.Files) != 1 || res.Files[0].FileName != "apple banana" { 3453 t.Errorf("got %v, want 1 result", res.Files) 3454 } 3455 }) 3456 3457 t.Run("ChunkMatches", func(t *testing.T) { 3458 res := searchForTest(t, b, &q, chunkOpts) 3459 wantRegexpCount := 1 3460 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount { 3461 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount) 3462 } 3463 if len(res.Files) != 1 || res.Files[0].FileName != "apple banana" { 3464 t.Errorf("got %v, want 1 result", res.Files) 3465 } 3466 }) 3467} 3468 3469func TestMultiLineRegex(t *testing.T) { 3470 b := testIndexBuilder(t, &zoekt.Repository{Name: "reponame"}, 3471 Document{Name: "f1", Content: []byte("apple banana\ngrape")}, 3472 Document{Name: "f2", Content: []byte("apple orange")}, 3473 Document{Name: "f3", Content: []byte("grape apple")}, 3474 ) 3475 pattern := "(apple).*?[[:space:]].*?(grape)" 3476 r, _ := syntax.Parse(pattern, syntax.Perl) 3477 3478 q := query.Regexp{ 3479 Regexp: r, 3480 } 3481 t.Run("LineMatches", func(t *testing.T) { 3482 res := searchForTest(t, b, &q) 3483 wantRegexpCount := 2 3484 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount { 3485 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount) 3486 } 3487 if len(res.Files) != 1 || res.Files[0].FileName != "f1" { 3488 t.Errorf("got %v, want 1 result", res.Files) 3489 } 3490 if l := len(res.Files[0].LineMatches); l != 2 { 3491 t.Errorf("got %v, want 2 line matches", l) 3492 } 3493 }) 3494 3495 t.Run("ChunkMatches", func(t *testing.T) { 3496 res := searchForTest(t, b, &q, chunkOpts) 3497 wantRegexpCount := 2 3498 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount { 3499 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount) 3500 } 3501 if len(res.Files) != 1 || res.Files[0].FileName != "f1" { 3502 t.Errorf("got %v, want 1 result", res.Files) 3503 } 3504 if l := len(res.Files[0].ChunkMatches); l != 1 { 3505 t.Errorf("got %v, want 1 chunk matches", l) 3506 } 3507 if l := len(res.Files[0].ChunkMatches[0].Ranges); l != 1 { 3508 t.Errorf("got %v, want 1 chunk ranges", l) 3509 } 3510 }) 3511} 3512 3513func TestSearchTypeFileName(t *testing.T) { 3514 b := testIndexBuilder(t, &zoekt.Repository{ 3515 Name: "reponame", 3516 }, 3517 Document{Name: "f1", Content: []byte("bla the needle")}, 3518 Document{Name: "f2", Content: []byte("another file another\nneedle")}, 3519 // -----------------------------------012345678901234567890-123456 3520 ) 3521 3522 t.Run("LineMatches", func(t *testing.T) { 3523 wantSingleMatch := func(res *zoekt.SearchResult, want string) { 3524 t.Helper() 3525 fmatches := res.Files 3526 if len(fmatches) != 1 { 3527 t.Errorf("got %v, want 1 matches", len(fmatches)) 3528 return 3529 } 3530 if len(fmatches[0].LineMatches) != 1 { 3531 t.Errorf("got %d line matches", len(fmatches[0].LineMatches)) 3532 return 3533 } 3534 var got string 3535 if fmatches[0].LineMatches[0].FileName { 3536 got = fmatches[0].FileName 3537 } else { 3538 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].LineMatches[0].LineFragments[0].Offset) 3539 } 3540 3541 if got != want { 3542 t.Errorf("got %s, want %s", got, want) 3543 } 3544 } 3545 3546 // Only return the later match in the second file 3547 res := searchForTest(t, b, query.NewAnd( 3548 &query.Type{ 3549 Type: query.TypeFileName, 3550 Child: &query.Substring{Pattern: "needle"}, 3551 }, 3552 &query.Substring{Pattern: "file"})) 3553 wantSingleMatch(res, "f2:8") 3554 3555 // Only return a filename result 3556 res = searchForTest(t, b, 3557 &query.Type{ 3558 Type: query.TypeFileName, 3559 Child: &query.Substring{Pattern: "file"}, 3560 }) 3561 wantSingleMatch(res, "f2") 3562 }) 3563 3564 t.Run("ChunkMatches", func(t *testing.T) { 3565 wantSingleMatch := func(res *zoekt.SearchResult, want string) { 3566 t.Helper() 3567 fmatches := res.Files 3568 if len(fmatches) != 1 { 3569 t.Errorf("got %v, want 1 matches", len(fmatches)) 3570 return 3571 } 3572 if len(fmatches[0].ChunkMatches) != 1 { 3573 t.Errorf("got %d line matches", len(fmatches[0].ChunkMatches)) 3574 return 3575 } 3576 var got string 3577 if fmatches[0].ChunkMatches[0].FileName { 3578 got = fmatches[0].FileName 3579 } else { 3580 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset) 3581 } 3582 3583 if got != want { 3584 t.Errorf("got %s, want %s", got, want) 3585 } 3586 } 3587 3588 // Only return the later match in the second file 3589 res := searchForTest(t, b, query.NewAnd( 3590 &query.Type{ 3591 Type: query.TypeFileName, 3592 Child: &query.Substring{Pattern: "needle"}, 3593 }, 3594 &query.Substring{Pattern: "file"}), 3595 chunkOpts, 3596 ) 3597 wantSingleMatch(res, "f2:8") 3598 3599 // Only return a filename result 3600 res = searchForTest(t, b, 3601 &query.Type{ 3602 Type: query.TypeFileName, 3603 Child: &query.Substring{Pattern: "file"}, 3604 }, 3605 chunkOpts, 3606 ) 3607 wantSingleMatch(res, "f2") 3608 }) 3609} 3610 3611func TestSearchTypeLanguage(t *testing.T) { 3612 b := testIndexBuilder(t, &zoekt.Repository{ 3613 Name: "reponame", 3614 }, 3615 Document{Name: "apex.cls", Content: []byte("public class Car extends Vehicle {")}, 3616 Document{Name: "tex.cls", Content: []byte(`\DeclareOption*{`)}, 3617 Document{Name: "hello.h", Content: []byte(`#include <stdio.h>`)}, 3618 Document{Name: "be.magik", Content: []byte(`_package unicorn`)}, 3619 ) 3620 3621 t.Log(b.languageMap) 3622 3623 t.Run("LineMatches", func(t *testing.T) { 3624 wantSingleMatch := func(res *zoekt.SearchResult, want string) { 3625 t.Helper() 3626 fmatches := res.Files 3627 if len(fmatches) != 1 { 3628 t.Errorf("got %v, want 1 matches", len(fmatches)) 3629 return 3630 } 3631 if len(fmatches[0].LineMatches) != 1 { 3632 t.Errorf("got %d line matches", len(fmatches[0].LineMatches)) 3633 return 3634 } 3635 var got string 3636 if fmatches[0].LineMatches[0].FileName { 3637 got = fmatches[0].FileName 3638 } else { 3639 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].LineMatches[0].LineFragments[0].Offset) 3640 } 3641 3642 if got != want { 3643 t.Errorf("got %s, want %s", got, want) 3644 } 3645 } 3646 3647 res := searchForTest(t, b, &query.Language{Language: "Apex"}) 3648 wantSingleMatch(res, "apex.cls") 3649 3650 res = searchForTest(t, b, &query.Language{Language: "TeX"}) 3651 wantSingleMatch(res, "tex.cls") 3652 3653 res = searchForTest(t, b, &query.Language{Language: "C"}) 3654 wantSingleMatch(res, "hello.h") 3655 3656 res = searchForTest(t, b, &query.Language{Language: "Magik"}) 3657 wantSingleMatch(res, "be.magik") 3658 3659 // test fallback language search by pretending it's an older index version 3660 res = searchForTest(t, b, &query.Language{Language: "C++"}) 3661 if len(res.Files) != 0 { 3662 t.Errorf("got %d results for C++, want 0", len(res.Files)) 3663 } 3664 3665 b.featureVersion = 11 // force fallback 3666 res = searchForTest(t, b, &query.Language{Language: "C++"}) 3667 wantSingleMatch(res, "hello.h") 3668 }) 3669 3670 t.Run("ChunkMatches", func(t *testing.T) { 3671 wantSingleMatch := func(res *zoekt.SearchResult, want string) { 3672 t.Helper() 3673 fmatches := res.Files 3674 if len(fmatches) != 1 { 3675 t.Errorf("got %v, want 1 matches", len(fmatches)) 3676 return 3677 } 3678 if len(fmatches[0].ChunkMatches) != 1 { 3679 t.Errorf("got %d line matches", len(fmatches[0].ChunkMatches)) 3680 return 3681 } 3682 var got string 3683 if fmatches[0].ChunkMatches[0].FileName { 3684 got = fmatches[0].FileName 3685 } else { 3686 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset) 3687 } 3688 3689 if got != want { 3690 t.Errorf("got %s, want %s", got, want) 3691 } 3692 } 3693 3694 b.featureVersion = FeatureVersion // reset feature version 3695 res := searchForTest(t, b, &query.Language{Language: "Apex"}, chunkOpts) 3696 wantSingleMatch(res, "apex.cls") 3697 3698 res = searchForTest(t, b, &query.Language{Language: "TeX"}, chunkOpts) 3699 wantSingleMatch(res, "tex.cls") 3700 3701 res = searchForTest(t, b, &query.Language{Language: "C"}, chunkOpts) 3702 wantSingleMatch(res, "hello.h") 3703 3704 // test fallback language search by pretending it's an older index version 3705 res = searchForTest(t, b, &query.Language{Language: "C++"}, chunkOpts) 3706 if len(res.Files) != 0 { 3707 t.Errorf("got %d results for C++, want 0", len(res.Files)) 3708 } 3709 3710 b.featureVersion = 11 // force fallback 3711 res = searchForTest(t, b, &query.Language{Language: "C++"}, chunkOpts) 3712 wantSingleMatch(res, "hello.h") 3713 }) 3714} 3715 3716func TestStats(t *testing.T) { 3717 ignored := []cmp.Option{ 3718 cmpopts.EquateEmpty(), 3719 cmpopts.IgnoreFields(zoekt.RepoListEntry{}, "Repository"), 3720 cmpopts.IgnoreFields(zoekt.RepoListEntry{}, "IndexMetadata"), 3721 cmpopts.IgnoreFields(zoekt.RepoStats{}, "IndexBytes"), 3722 } 3723 3724 repoListEntries := func(b *IndexBuilder) []zoekt.RepoListEntry { 3725 searcher := searcherForTest(t, b) 3726 indexdata := searcher.(*indexData) 3727 return indexdata.repoListEntry 3728 } 3729 3730 t.Run("one empty repo", func(t *testing.T) { 3731 b := testIndexBuilder(t, nil) 3732 got := repoListEntries(b) 3733 want := []zoekt.RepoListEntry{ 3734 { 3735 Stats: zoekt.RepoStats{ 3736 Repos: 0, 3737 Shards: 1, 3738 Documents: 0, 3739 IndexBytes: 20, 3740 ContentBytes: 0, 3741 NewLinesCount: 0, 3742 DefaultBranchNewLinesCount: 0, 3743 OtherBranchesNewLinesCount: 0, 3744 }, 3745 }, 3746 } 3747 3748 if diff := cmp.Diff(want, got, ignored...); diff != "" { 3749 t.Fatalf("mismatch (-want +got):\n%s", diff) 3750 } 3751 }) 3752 3753 t.Run("one simple shard", func(t *testing.T) { 3754 b := testIndexBuilder(t, nil, 3755 Document{Name: "doc 0", Content: []byte("content 0")}, 3756 Document{Name: "doc 1", Content: []byte("content 1")}, 3757 ) 3758 got := repoListEntries(b) 3759 want := []zoekt.RepoListEntry{ 3760 { 3761 Stats: zoekt.RepoStats{ 3762 Repos: 0, 3763 Shards: 1, 3764 Documents: 2, 3765 IndexBytes: 224, 3766 ContentBytes: 28, 3767 NewLinesCount: 0, 3768 DefaultBranchNewLinesCount: 0, 3769 OtherBranchesNewLinesCount: 0, 3770 }, 3771 }, 3772 } 3773 3774 if diff := cmp.Diff(want, got, ignored...); diff != "" { 3775 t.Fatalf("mismatch (-want +got):\n%s", diff) 3776 } 3777 }) 3778 3779 t.Run("one compound shard", func(t *testing.T) { 3780 b := testIndexBuilderCompound(t, 3781 []*zoekt.Repository{ 3782 {Name: "repo 0"}, 3783 {Name: "repo 1"}, 3784 }, 3785 [][]Document{ 3786 { 3787 {Name: "doc 0", Content: []byte("content 0")}, 3788 {Name: "doc 1", Content: []byte("content 1")}, 3789 }, 3790 { 3791 {Name: "doc 2", Content: []byte("content 2")}, 3792 {Name: "doc 3", Content: []byte("content 3")}, 3793 }, 3794 }, 3795 ) 3796 got := repoListEntries(b) 3797 want := []zoekt.RepoListEntry{ 3798 { 3799 Stats: zoekt.RepoStats{ 3800 Repos: 0, 3801 Shards: 1, 3802 Documents: 2, 3803 IndexBytes: 180, 3804 ContentBytes: 28, 3805 NewLinesCount: 0, 3806 DefaultBranchNewLinesCount: 0, 3807 OtherBranchesNewLinesCount: 0, 3808 }, 3809 }, 3810 { 3811 Stats: zoekt.RepoStats{ 3812 Repos: 0, 3813 Shards: 1, 3814 Documents: 2, 3815 IndexBytes: 180, 3816 ContentBytes: 28, 3817 NewLinesCount: 0, 3818 DefaultBranchNewLinesCount: 0, 3819 OtherBranchesNewLinesCount: 0, 3820 }, 3821 }, 3822 } 3823 3824 if diff := cmp.Diff(want, got, ignored...); diff != "" { 3825 t.Fatalf("mismatch (-want +got):\n%s", diff) 3826 } 3827 }) 3828 3829 t.Run("compound shard with empty repos", func(t *testing.T) { 3830 b := testIndexBuilderCompound(t, 3831 []*zoekt.Repository{ 3832 {Name: "repo 0"}, 3833 {Name: "repo 1"}, 3834 {Name: "repo 2"}, 3835 {Name: "repo 3"}, 3836 {Name: "repo 4"}, 3837 }, 3838 [][]Document{ 3839 {{Name: "doc 0", Content: []byte("content 0")}}, 3840 nil, 3841 {{Name: "doc 1", Content: []byte("content 1")}}, 3842 nil, 3843 nil, 3844 }, 3845 ) 3846 got := repoListEntries(b) 3847 3848 entryEmpty := zoekt.RepoListEntry{Stats: zoekt.RepoStats{ 3849 Shards: 1, 3850 Documents: 0, 3851 ContentBytes: 0, 3852 }} 3853 entryNonEmpty := zoekt.RepoListEntry{Stats: zoekt.RepoStats{ 3854 Shards: 1, 3855 Documents: 1, 3856 ContentBytes: 14, 3857 }} 3858 3859 want := []zoekt.RepoListEntry{ 3860 entryNonEmpty, 3861 entryEmpty, 3862 entryNonEmpty, 3863 entryEmpty, 3864 entryEmpty, 3865 } 3866 3867 if diff := cmp.Diff(want, got, ignored...); diff != "" { 3868 t.Fatalf("mismatch (-want +got):\n%s", diff) 3869 } 3870 }) 3871} 3872 3873// This tests the frequent pattern "\bLITERAL\b". 3874func TestWordSearch(t *testing.T) { 3875 content := []byte("needle the bla") 3876 // ----------------01234567890123 3877 3878 b := testIndexBuilder(t, nil, 3879 Document{ 3880 Name: "f1", 3881 Content: content, 3882 }) 3883 3884 t.Run("LineMatches", func(t *testing.T) { 3885 sres := searchForTest(t, b, 3886 &query.Regexp{ 3887 Regexp: mustParseRE("\\bthe\\b"), 3888 CaseSensitive: true, 3889 Content: true, 3890 }) 3891 3892 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 { 3893 t.Fatalf("got %v, want 1 match in 1 file", sres.Files) 3894 } 3895 3896 if sres.Stats.RegexpsConsidered != 0 { 3897 t.Fatal("expected regexp to be skipped") 3898 } 3899 3900 got := sres.Files[0].LineMatches[0] 3901 want := zoekt.LineMatch{ 3902 LineFragments: []zoekt.LineFragmentMatch{{ 3903 LineOffset: 7, 3904 Offset: 7, 3905 MatchLength: 3, 3906 }}, 3907 Line: content, 3908 FileName: false, 3909 LineNumber: 1, 3910 LineStart: 0, 3911 LineEnd: 14, 3912 } 3913 3914 if !reflect.DeepEqual(got, want) { 3915 t.Errorf("got %#v, want %#v", got, want) 3916 } 3917 }) 3918 3919 t.Run("ChunkMatches", func(t *testing.T) { 3920 sres := searchForTest(t, b, 3921 &query.Regexp{ 3922 Regexp: mustParseRE("\\bthe\\b"), 3923 CaseSensitive: true, 3924 }, chunkOpts) 3925 3926 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 { 3927 t.Fatalf("got %v, want 1 match in 1 file", sres.Files) 3928 } 3929 3930 if sres.Stats.RegexpsConsidered != 0 { 3931 t.Fatal("expected regexp to be skipped") 3932 } 3933 3934 got := sres.Files[0].ChunkMatches[0] 3935 want := zoekt.ChunkMatch{ 3936 Content: content, 3937 ContentStart: zoekt.Location{ByteOffset: 0, LineNumber: 1, Column: 1}, 3938 Ranges: []zoekt.Range{{ 3939 Start: zoekt.Location{ByteOffset: 7, LineNumber: 1, Column: 8}, 3940 End: zoekt.Location{ByteOffset: 10, LineNumber: 1, Column: 11}, 3941 }}, 3942 } 3943 3944 if diff := cmp.Diff(want, got); diff != "" { 3945 t.Fatal(diff) 3946 } 3947 }) 3948} 3949 3950// Simple benchmark focused on chunk match scoring. It creates a single file that will have a 1000-line chunk match. 3951// The benchmark time is expected to be strongly correlated with time spent assembling and scoring this chunk. 3952func BenchmarkScoreChunkMatches(b *testing.B) { 3953 ctx := context.Background() 3954 var builder strings.Builder 3955 for i := 0; i < 1000; i++ { 3956 builder.WriteString(fmt.Sprintf("line-%d one one one two two two three three three four four four five five\n", i)) 3957 } 3958 3959 searcher := searcherForTest(b, testIndexBuilder(b, nil, 3960 Document{Name: "f1", Content: []byte(builder.String())}, 3961 )) 3962 3963 q := &query.Or{ 3964 Children: []query.Q{ 3965 &query.Substring{Pattern: "f"}, 3966 &query.Substring{Pattern: "t"}, 3967 }} 3968 3969 b.Run("score large ChunkMatch", func(b *testing.B) { 3970 b.ReportAllocs() 3971 b.ResetTimer() 3972 3973 for i := 0; i < b.N; i++ { 3974 sres, err := searcher.Search(ctx, q, &zoekt.SearchOptions{ChunkMatches: true, NumContextLines: 1}) 3975 if err != nil { 3976 b.Fatal(err) 3977 } 3978 3979 matches := sres.Files 3980 if len(matches) == 0 { 3981 b.Fatalf("want file index, got none") 3982 } 3983 } 3984 }) 3985}