fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

1// Copyright 2016 Google Inc. All rights reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package index 16 17import ( 18 "bytes" 19 "context" 20 "fmt" 21 "reflect" 22 "regexp/syntax" 23 "strings" 24 "testing" 25 26 "github.com/google/go-cmp/cmp" 27 "github.com/google/go-cmp/cmp/cmpopts" 28 "github.com/grafana/regexp" 29 30 "github.com/sourcegraph/zoekt" 31 "github.com/sourcegraph/zoekt/query" 32) 33 34func clearScores(r *zoekt.SearchResult) { 35 for i := range r.Files { 36 r.Files[i].Score = 0.0 37 for j := range r.Files[i].LineMatches { 38 r.Files[i].LineMatches[j].Score = 0.0 39 } 40 for j := range r.Files[i].ChunkMatches { 41 r.Files[i].ChunkMatches[j].Score = 0.0 42 r.Files[i].ChunkMatches[j].BestLineMatch = 0 43 } 44 r.Files[i].Checksum = nil 45 r.Files[i].Debug = "" 46 } 47} 48 49func testShardBuilder(tb testing.TB, repo *zoekt.Repository, docs ...Document) *ShardBuilder { 50 tb.Helper() 51 52 b, err := NewShardBuilder(repo) 53 if err != nil { 54 tb.Fatalf("NewShardBuilder: %v", err) 55 } 56 57 for i, d := range docs { 58 if err := b.Add(d); err != nil { 59 tb.Fatalf("Add %d: %v", i, err) 60 } 61 } 62 63 return b 64} 65 66func testShardBuilderCompound(t *testing.T, repos []*zoekt.Repository, docs [][]Document) *ShardBuilder { 67 t.Helper() 68 69 b := newShardBuilder() 70 b.indexFormatVersion = NextIndexFormatVersion 71 72 if len(repos) != len(docs) { 73 t.Fatalf("testShardBuilderCompound: repos must be the same length as docs, got: len(repos)=%d len(docs)=%d", len(repos), len(docs)) 74 } 75 76 for i, repo := range repos { 77 if err := b.setRepository(repo); err != nil { 78 t.Fatal(err) 79 } 80 for j, d := range docs[i] { 81 if err := b.Add(d); err != nil { 82 t.Fatalf("Add %d %d: %v", i, j, err) 83 } 84 } 85 } 86 87 return b 88} 89 90func TestBoundary(t *testing.T) { 91 b := testShardBuilder(t, nil, 92 Document{Name: "f1", Content: []byte("x the")}, 93 Document{Name: "f1", Content: []byte("reader")}) 94 res := searchForTest(t, b, &query.Substring{Pattern: "there"}) 95 if len(res.Files) > 0 { 96 t.Fatalf("got %v, want no matches", res.Files) 97 } 98} 99 100func TestDocSectionInvalid(t *testing.T) { 101 b, err := NewShardBuilder(nil) 102 if err != nil { 103 t.Fatalf("NewShardBuilder: %v", err) 104 } 105 doc := Document{ 106 Name: "f1", 107 Content: []byte("01234567890123"), 108 Symbols: []DocumentSection{{5, 8}, {7, 9}}, 109 } 110 111 if err := b.Add(doc); err == nil { 112 t.Errorf("overlapping doc sections should fail") 113 } 114 115 doc = Document{ 116 Name: "f1", 117 Content: []byte("01234567890123"), 118 Symbols: []DocumentSection{{0, 20}}, 119 } 120 121 if err := b.Add(doc); err == nil { 122 t.Errorf("doc sections beyond EOF should fail") 123 } 124} 125 126func TestBasic(t *testing.T) { 127 b := testShardBuilder(t, nil, 128 Document{ 129 Name: "f2", 130 Content: []byte("to carry water in the no later bla"), 131 // --------------0123456789012345678901234567890123 132 }) 133 134 t.Run("LineMatch", func(t *testing.T) { 135 res := searchForTest(t, b, &query.Substring{ 136 Pattern: "water", 137 CaseSensitive: true, 138 }) 139 fmatches := res.Files 140 if len(fmatches) != 1 || len(fmatches[0].LineMatches) != 1 { 141 t.Fatalf("got %v, want 1 matches", fmatches) 142 } 143 144 got := fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].LineMatches[0].LineFragments[0].Offset) 145 want := "f2:9" 146 if got != want { 147 t.Errorf("1: got %s, want %s", got, want) 148 } 149 }) 150 151 t.Run("ChunkMatch", func(t *testing.T) { 152 res := searchForTest(t, b, &query.Substring{ 153 Pattern: "water", 154 CaseSensitive: true, 155 }, chunkOpts) 156 fmatches := res.Files 157 if len(fmatches) != 1 || len(fmatches[0].ChunkMatches) != 1 { 158 t.Fatalf("got %v, want 1 matches", fmatches) 159 } 160 161 got := fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset) 162 want := "f2:9" 163 if got != want { 164 t.Errorf("1: got %s, want %s", got, want) 165 } 166 }) 167} 168 169func TestEmptyIndex(t *testing.T) { 170 b := testShardBuilder(t, nil) 171 searcher := searcherForTest(t, b) 172 173 var opts zoekt.SearchOptions 174 if _, err := searcher.Search(context.Background(), &query.Substring{}, &opts); err != nil { 175 t.Fatalf("Search: %v", err) 176 } 177 178 if _, err := searcher.List(context.Background(), &query.Repo{Regexp: regexp.MustCompile("")}, nil); err != nil { 179 t.Fatalf("List: %v", err) 180 } 181 182 if _, err := searcher.Search(context.Background(), &query.Substring{Pattern: "java", FileName: true}, &opts); err != nil { 183 t.Fatalf("Search: %v", err) 184 } 185} 186 187type memSeeker struct { 188 data []byte 189} 190 191func (s *memSeeker) Name() string { 192 return "memseeker" 193} 194 195func (s *memSeeker) Close() {} 196func (s *memSeeker) Read(off, sz uint32) ([]byte, error) { 197 return s.data[off : off+sz], nil 198} 199 200func (s *memSeeker) Size() (uint32, error) { 201 return uint32(len(s.data)), nil 202} 203 204func TestNewlines(t *testing.T) { 205 b := testShardBuilder(t, nil, 206 // -----------------------------------------012345-678901-234 207 Document{Name: "filename", Content: []byte("line1\nline2\nbla")}) 208 209 t.Run("LineMatches", func(t *testing.T) { 210 sres := searchForTest(t, b, &query.Substring{Pattern: "ne2"}) 211 212 matches := sres.Files 213 want := []zoekt.FileMatch{{ 214 FileName: "filename", 215 LineMatches: []zoekt.LineMatch{{ 216 LineFragments: []zoekt.LineFragmentMatch{{ 217 Offset: 8, 218 LineOffset: 2, 219 MatchLength: 3, 220 }}, 221 Line: []byte("line2\n"), 222 LineStart: 6, 223 LineEnd: 12, 224 LineNumber: 2, 225 }}, 226 }} 227 228 if diff := cmp.Diff(matches, want); diff != "" { 229 t.Fatal(diff) 230 } 231 }) 232 233 t.Run("ChunkMatches", func(t *testing.T) { 234 sres := searchForTest(t, b, &query.Substring{Pattern: "ne2"}, chunkOpts) 235 236 matches := sres.Files 237 want := []zoekt.FileMatch{{ 238 FileName: "filename", 239 ChunkMatches: []zoekt.ChunkMatch{{ 240 Content: []byte("line2\n"), 241 ContentStart: zoekt.Location{ 242 ByteOffset: 6, 243 LineNumber: 2, 244 Column: 1, 245 }, 246 Ranges: []zoekt.Range{{ 247 Start: zoekt.Location{ByteOffset: 8, LineNumber: 2, Column: 3}, 248 End: zoekt.Location{ByteOffset: 11, LineNumber: 2, Column: 6}, 249 }}, 250 }}, 251 }} 252 253 if diff := cmp.Diff(want, matches); diff != "" { 254 t.Fatal(diff) 255 } 256 }) 257} 258 259// A result spanning multiple lines should have LineMatches that only cover 260// single lines. 261func TestQueryNewlines(t *testing.T) { 262 text := "line1\nline2\nbla" 263 b := testShardBuilder(t, nil, 264 Document{Name: "filename", Content: []byte(text)}) 265 266 t.Run("LineMatches", func(t *testing.T) { 267 sres := searchForTest(t, b, &query.Substring{Pattern: "ine2\nbla"}) 268 matches := sres.Files 269 if len(matches) != 1 { 270 t.Fatalf("got %d file matches, want exactly one", len(matches)) 271 } 272 m := matches[0] 273 if len(m.LineMatches) != 2 { 274 t.Fatalf("got %d line matches, want exactly two %#v", len(m.LineMatches), m.LineMatches) 275 } 276 }) 277 278 t.Run("ChunkMatches", func(t *testing.T) { 279 sres := searchForTest(t, b, &query.Substring{Pattern: "ine2\nbla"}, chunkOpts) 280 matches := sres.Files 281 if len(matches) != 1 { 282 t.Fatalf("got %d file matches, want exactly one", len(matches)) 283 } 284 m := matches[0] 285 if len(m.ChunkMatches) != 1 { 286 t.Fatalf("got %d chunk matches, want exactly one", len(m.ChunkMatches)) 287 } 288 }) 289} 290 291var chunkOpts = zoekt.SearchOptions{ChunkMatches: true} 292 293func searchForTest(t *testing.T, b *ShardBuilder, q query.Q, o ...zoekt.SearchOptions) *zoekt.SearchResult { 294 searcher := searcherForTest(t, b) 295 var opts zoekt.SearchOptions 296 if len(o) > 0 { 297 opts = o[0] 298 } 299 res, err := searcher.Search(context.Background(), q, &opts) 300 if err != nil { 301 t.Fatalf("Search(%s): %v", q, err) 302 } 303 clearScores(res) 304 return res 305} 306 307func searcherForTest(t testing.TB, b *ShardBuilder) zoekt.Searcher { 308 var buf bytes.Buffer 309 if err := b.Write(&buf); err != nil { 310 t.Fatal(err) 311 } 312 f := &memSeeker{buf.Bytes()} 313 314 searcher, err := NewSearcher(f) 315 if err != nil { 316 t.Fatalf("NewSearcher: %v", err) 317 } 318 319 return searcher 320} 321 322func TestCaseFold(t *testing.T) { 323 b := testShardBuilder(t, nil, 324 Document{Name: "f1", Content: []byte("I love BaNaNAS.")}, 325 // -----------------------------------012345678901234 326 ) 327 t.Run("LineMatches", func(t *testing.T) { 328 sres := searchForTest(t, b, &query.Substring{ 329 Pattern: "bananas", 330 CaseSensitive: true, 331 }) 332 matches := sres.Files 333 if len(matches) != 0 { 334 t.Errorf("foldcase: got %#v, want 0 matches", matches) 335 } 336 337 sres = searchForTest(t, b, 338 &query.Substring{ 339 Pattern: "BaNaNAS", 340 CaseSensitive: true, 341 }) 342 matches = sres.Files 343 if len(matches) != 1 { 344 t.Errorf("no foldcase: got %v, want 1 matches", matches) 345 } else if matches[0].LineMatches[0].LineFragments[0].Offset != 7 { 346 t.Errorf("foldcase: got %v, want offsets 7", matches) 347 } 348 }) 349 350 t.Run("ChunkMatches", func(t *testing.T) { 351 sres := searchForTest(t, b, &query.Substring{ 352 Pattern: "bananas", 353 CaseSensitive: true, 354 }, chunkOpts) 355 matches := sres.Files 356 if len(matches) != 0 { 357 t.Errorf("foldcase: got %#v, want 0 matches", matches) 358 } 359 360 sres = searchForTest(t, b, 361 &query.Substring{ 362 Pattern: "BaNaNAS", 363 CaseSensitive: true, 364 }) 365 matches = sres.Files 366 if len(matches) != 1 { 367 t.Errorf("no foldcase: got %v, want 1 matches", matches) 368 } else if matches[0].LineMatches[0].LineFragments[0].Offset != 7 { 369 t.Errorf("foldcase: got %v, want offsets 7", matches) 370 } 371 }) 372} 373 374// wordsAsSymbols finds all ASCII words in doc.Content which are longer than 2 375// chars. Those are then set as symbols. 376func wordsAsSymbols(doc Document) Document { 377 re := regexp.MustCompile(`\b\w{2,}\b`) 378 var symbols []DocumentSection 379 var symbolsMetadata []*zoekt.Symbol 380 for _, match := range re.FindAllIndex(doc.Content, -1) { 381 symbols = append(symbols, DocumentSection{ 382 Start: uint32(match[0]), 383 End: uint32(match[1]), 384 }) 385 symbolsMetadata = append(symbolsMetadata, &zoekt.Symbol{Kind: "method"}) 386 } 387 doc.Symbols = symbols 388 doc.SymbolsMetaData = symbolsMetadata 389 return doc 390} 391 392func TestSearchStats(t *testing.T) { 393 ctx := context.Background() 394 searcher := searcherForTest(t, testShardBuilder(t, nil, 395 wordsAsSymbols(Document{Name: "f1", Content: []byte("x banana y")}), 396 wordsAsSymbols(Document{Name: "f2", Content: []byte("x apple y")}), 397 wordsAsSymbols(Document{Name: "f3", Content: []byte("x banana apple y")}), 398 // --------------------------------------------------0123456789012345 399 )) 400 401 andQuery := query.NewAnd( 402 &query.Substring{ 403 Pattern: "banana", 404 }, 405 &query.Substring{ 406 Pattern: "apple", 407 }, 408 ) 409 410 t.Run("LineMatches", func(t *testing.T) { 411 sres, err := searcher.Search(ctx, andQuery, &zoekt.SearchOptions{}) 412 if err != nil { 413 t.Fatal(err) 414 } 415 matches := sres.Files 416 if len(matches) != 1 || len(matches[0].LineMatches) != 1 || len(matches[0].LineMatches[0].LineFragments) != 2 { 417 t.Fatalf("got %#v, want 1 match with 2 fragments", matches) 418 } 419 420 if matches[0].LineMatches[0].LineFragments[0].Offset != 2 || matches[0].LineMatches[0].LineFragments[1].Offset != 9 { 421 t.Fatalf("got %#v, want offsets 2,9", matches) 422 } 423 }) 424 t.Run("ChunkMatches", func(t *testing.T) { 425 sres, err := searcher.Search(ctx, andQuery, &chunkOpts) 426 if err != nil { 427 t.Fatal(err) 428 } 429 matches := sres.Files 430 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 || len(matches[0].ChunkMatches[0].Ranges) != 2 { 431 t.Fatalf("got %#v, want 1 chunk match with 2 ranges", matches) 432 } 433 434 if matches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset != 2 || matches[0].ChunkMatches[0].Ranges[1].Start.ByteOffset != 9 { 435 t.Fatalf("got %#v, want offsets 2,9", matches) 436 } 437 }) 438 t.Run("Stats", func(t *testing.T) { 439 cases := []struct { 440 Name string 441 Q query.Q 442 Want zoekt.Stats 443 }{{ 444 Name: "and-query", 445 Q: andQuery, 446 Want: zoekt.Stats{ 447 FilesLoaded: 1, 448 ContentBytesLoaded: 22, 449 IndexBytesLoaded: 10, 450 NgramMatches: 3, // we look at doc 1, because it's max(0,1) due to AND 451 NgramLookups: 104, 452 MatchCount: 2, 453 FileCount: 1, 454 FilesConsidered: 2, 455 ShardsScanned: 1, 456 }, 457 }, { 458 Name: "one-trigram", 459 Q: &query.Substring{ 460 Pattern: "a y", 461 Content: true, 462 CaseSensitive: true, 463 }, 464 Want: zoekt.Stats{ 465 ContentBytesLoaded: 14, 466 IndexBytesLoaded: 1, 467 FileCount: 1, 468 FilesConsidered: 1, 469 FilesLoaded: 1, 470 ShardsScanned: 1, 471 MatchCount: 1, 472 NgramMatches: 1, 473 NgramLookups: 2, // once to lookup frequency then again to access posting list. 474 }, 475 }, { 476 Name: "one-trigram-case-insensitive", 477 Q: &query.Substring{ 478 Pattern: "a y", 479 Content: true, 480 }, 481 Want: zoekt.Stats{ 482 ContentBytesLoaded: 14, 483 IndexBytesLoaded: 1, 484 FileCount: 1, 485 FilesConsidered: 1, 486 FilesLoaded: 1, 487 ShardsScanned: 1, 488 MatchCount: 1, 489 NgramMatches: 1, 490 NgramLookups: 8, // "a y" has 2**2 casings which we lookup twice. 491 }, 492 }, { 493 Name: "one-trigram-pruned", 494 Q: &query.Substring{ 495 Pattern: "foo", 496 Content: true, 497 CaseSensitive: true, 498 }, 499 Want: zoekt.Stats{ 500 ShardsSkippedFilter: 1, 501 NgramLookups: 1, // only had to lookup once 502 }, 503 }, { 504 Name: "one-trigram-branch-pruned", 505 Q: query.NewAnd( 506 &query.Substring{ 507 Pattern: "foo", 508 Content: true, 509 CaseSensitive: true, 510 }, 511 &query.Substring{ 512 Pattern: "a y", 513 Content: true, 514 CaseSensitive: true, 515 }, 516 ), 517 Want: zoekt.Stats{ 518 IndexBytesLoaded: 1, // we created an iterator for "a y" before pruning. 519 ShardsSkippedFilter: 1, 520 NgramLookups: 3, // we lookedup "foo" once (1), but lookedup and created "a y" (2). 521 }, 522 }, { 523 Name: "symbol-substr-nomatch", 524 Q: &query.Symbol{Expr: &query.Substring{ 525 Pattern: "banana apple", 526 Content: true, 527 CaseSensitive: true, 528 }}, 529 Want: zoekt.Stats{ 530 IndexBytesLoaded: 3, 531 FilesConsidered: 1, // important that we only check 1 file to ensure we are using the index 532 MatchCount: 0, // even though there is a match it doesn't align with a symbol 533 ShardsScanned: 1, 534 NgramMatches: 1, 535 NgramLookups: 12, 536 }, 537 }, { 538 Name: "symbol-substr", 539 Q: &query.Symbol{Expr: &query.Substring{ 540 Pattern: "apple", 541 Content: true, 542 CaseSensitive: true, 543 }}, 544 Want: zoekt.Stats{ 545 ContentBytesLoaded: 35, 546 IndexBytesLoaded: 4, 547 FileCount: 2, 548 FilesConsidered: 2, // must be 2 to ensure we used the index 549 FilesLoaded: 2, 550 MatchCount: 2, // apple symbols is in two files 551 ShardsScanned: 1, 552 NgramMatches: 2, 553 NgramLookups: 5, 554 }, 555 }, { 556 Name: "symbol-regexp-nomatch", 557 Q: &query.Symbol{Expr: &query.Regexp{ 558 Regexp: mustParseRE("^apple.banana$"), 559 Content: true, 560 CaseSensitive: true, 561 }}, 562 Want: zoekt.Stats{ 563 ContentBytesLoaded: 33, // we still have to run regex since "app" matches two documents 564 IndexBytesLoaded: 10, 565 FilesConsidered: 2, // important that we don't check 3 to ensure we are using the index 566 FilesLoaded: 2, 567 MatchCount: 0, // even though there is a match it doesn't align with a symbol 568 ShardsScanned: 1, 569 NgramMatches: 3, 570 NgramLookups: 11, 571 }, 572 }, { 573 Name: "symbol-regexp", 574 Q: &query.Symbol{Expr: &query.Regexp{ 575 Regexp: mustParseRE("^app.e$"), 576 Content: true, 577 CaseSensitive: true, 578 }}, 579 Want: zoekt.Stats{ 580 ContentBytesLoaded: 35, 581 IndexBytesLoaded: 2, 582 FileCount: 2, 583 FilesConsidered: 2, // must be 2 to ensure we used the index 584 FilesLoaded: 2, 585 MatchCount: 2, // apple symbols is in two files 586 ShardsScanned: 1, 587 NgramMatches: 2, 588 NgramLookups: 2, 589 }, 590 }} 591 592 for _, tc := range cases { 593 t.Run(tc.Name, func(t *testing.T) { 594 sres, err := searcher.Search(ctx, tc.Q, &chunkOpts) 595 if err != nil { 596 t.Fatal(err) 597 } 598 if diff := cmp.Diff(tc.Want, sres.Stats, cmpopts.IgnoreFields(zoekt.Stats{}, "MatchTreeConstruction", "MatchTreeSearch")); diff != "" { 599 t.Errorf("unexpected Stats (-want +got):\n%s", diff) 600 } 601 }) 602 } 603 }) 604} 605 606func TestAndNegateSearch(t *testing.T) { 607 b := testShardBuilder(t, nil, 608 Document{Name: "f1", Content: []byte("x banana y")}, 609 // -----------------------------------0123456789 610 Document{Name: "f4", Content: []byte("x banana apple y")}) 611 612 t.Run("LineMatches", func(t *testing.T) { 613 sres := searchForTest(t, b, query.NewAnd( 614 &query.Substring{ 615 Pattern: "banana", 616 }, 617 &query.Not{Child: &query.Substring{ 618 Pattern: "apple", 619 }})) 620 621 matches := sres.Files 622 623 if len(matches) != 1 || len(matches[0].LineMatches) != 1 { 624 t.Fatalf("got %v, want 1 match", matches) 625 } 626 if matches[0].FileName != "f1" { 627 t.Fatalf("got match %#v, want FileName: f1", matches[0]) 628 } 629 if matches[0].LineMatches[0].LineFragments[0].Offset != 2 { 630 t.Fatalf("got %v, want offset 2", matches) 631 } 632 }) 633 634 t.Run("ChunkMatches", func(t *testing.T) { 635 sres := searchForTest(t, b, 636 query.NewAnd( 637 &query.Substring{ 638 Pattern: "banana", 639 }, 640 &query.Not{Child: &query.Substring{ 641 Pattern: "apple", 642 }}, 643 ), 644 chunkOpts, 645 ) 646 647 matches := sres.Files 648 649 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 { 650 t.Fatalf("got %v, want 1 match", matches) 651 } 652 if matches[0].FileName != "f1" { 653 t.Fatalf("got match %#v, want FileName: f1", matches[0]) 654 } 655 if matches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset != 2 { 656 t.Fatalf("got %v, want offset 2", matches) 657 } 658 }) 659} 660 661func TestNegativeMatchesOnlyShortcut(t *testing.T) { 662 b := testShardBuilder(t, nil, 663 Document{Name: "f1", Content: []byte("x banana y")}, 664 Document{Name: "f2", Content: []byte("x appelmoes y")}, 665 Document{Name: "f3", Content: []byte("x appelmoes y")}, 666 Document{Name: "f3", Content: []byte("x appelmoes y")}) 667 668 t.Run("LineMatches", func(t *testing.T) { 669 sres := searchForTest(t, b, query.NewAnd( 670 &query.Substring{ 671 Pattern: "banana", 672 }, 673 &query.Not{Child: &query.Substring{ 674 Pattern: "appel", 675 }})) 676 677 if sres.Stats.FilesConsidered != 1 { 678 t.Errorf("got %#v, want FilesConsidered: 1", sres.Stats) 679 } 680 }) 681 682 t.Run("ChunkMatches", func(t *testing.T) { 683 sres := searchForTest(t, b, query.NewAnd( 684 &query.Substring{ 685 Pattern: "banana", 686 }, 687 &query.Not{Child: &query.Substring{ 688 Pattern: "appel", 689 }}), chunkOpts) 690 691 if sres.Stats.FilesConsidered != 1 { 692 t.Errorf("got %#v, want FilesConsidered: 1", sres.Stats) 693 } 694 }) 695} 696 697func TestFileSearch(t *testing.T) { 698 b := testShardBuilder(t, nil, 699 Document{Name: "banzana", Content: []byte("x orange y")}, 700 // -------------0123456 701 Document{Name: "banana", Content: []byte("x apple y")}, 702 // -------------012345 703 ) 704 705 t.Run("LineMatches", func(t *testing.T) { 706 sres := searchForTest(t, b, &query.Substring{ 707 Pattern: "anan", 708 FileName: true, 709 }) 710 711 matches := sres.Files 712 if len(matches) != 1 || len(matches[0].LineMatches) != 1 { 713 t.Fatalf("got %v, want 1 match", matches) 714 } 715 716 got := matches[0].LineMatches[0] 717 want := zoekt.LineMatch{ 718 Line: []byte("banana"), 719 LineFragments: []zoekt.LineFragmentMatch{{ 720 Offset: 1, 721 LineOffset: 1, 722 MatchLength: 4, 723 }}, 724 FileName: true, 725 } 726 727 if !reflect.DeepEqual(got, want) { 728 t.Errorf("got %#v, want %#v", got, want) 729 } 730 }) 731 732 t.Run("ChunkMatches", func(t *testing.T) { 733 sres := searchForTest(t, b, &query.Substring{ 734 Pattern: "anan", 735 FileName: true, 736 }, chunkOpts) 737 738 matches := sres.Files 739 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 { 740 t.Fatalf("got %v, want 1 match", matches) 741 } 742 743 got := matches[0].ChunkMatches[0] 744 want := zoekt.ChunkMatch{ 745 Content: []byte("banana"), 746 ContentStart: zoekt.Location{ByteOffset: 0, LineNumber: 1, Column: 1}, 747 Ranges: []zoekt.Range{{ 748 Start: zoekt.Location{ByteOffset: 1, LineNumber: 1, Column: 2}, 749 End: zoekt.Location{ByteOffset: 5, LineNumber: 1, Column: 6}, 750 }}, 751 FileName: true, 752 } 753 754 if diff := cmp.Diff(want, got); diff != "" { 755 t.Fatal(diff) 756 } 757 }) 758 759 t.Run("FileNameSet", func(t *testing.T) { 760 sres := searchForTest(t, b, query.NewFileNameSet("banana"), chunkOpts) 761 762 matches := sres.Files 763 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 { 764 t.Fatalf("got %v, want 1 match", matches) 765 } 766 767 got := matches[0].ChunkMatches[0] 768 want := zoekt.ChunkMatch{ 769 Content: []byte("banana"), 770 ContentStart: zoekt.Location{ByteOffset: 0, LineNumber: 1, Column: 1}, 771 Ranges: []zoekt.Range{{ 772 Start: zoekt.Location{ByteOffset: 0, LineNumber: 1, Column: 1}, 773 End: zoekt.Location{ByteOffset: 6, LineNumber: 1, Column: 7}, 774 }}, 775 FileName: true, 776 } 777 778 if diff := cmp.Diff(want, got); diff != "" { 779 t.Fatal(diff) 780 } 781 }) 782} 783 784func TestFileCase(t *testing.T) { 785 b := testShardBuilder(t, nil, 786 Document{Name: "BANANA", Content: []byte("x orange y")}) 787 788 t.Run("LineMatches", func(t *testing.T) { 789 sres := searchForTest(t, b, &query.Substring{ 790 Pattern: "banana", 791 FileName: true, 792 }) 793 794 matches := sres.Files 795 if len(matches) != 1 || matches[0].FileName != "BANANA" { 796 t.Fatalf("got %v, want 1 match 'BANANA'", matches) 797 } 798 }) 799 800 t.Run("ChunkMatches", func(t *testing.T) { 801 sres := searchForTest(t, b, &query.Substring{ 802 Pattern: "banana", 803 FileName: true, 804 }, chunkOpts) 805 806 matches := sres.Files 807 if len(matches) != 1 || matches[0].FileName != "BANANA" { 808 t.Fatalf("got %v, want 1 match 'BANANA'", matches) 809 } 810 }) 811} 812 813func TestFileRegexpSearchBruteForce(t *testing.T) { 814 b := testShardBuilder(t, nil, 815 Document{Name: "banzana", Content: []byte("x orange y")}, 816 Document{Name: "banana", Content: []byte("x apple y")}, 817 ) 818 t.Run("LineMatches", func(t *testing.T) { 819 sres := searchForTest(t, b, &query.Regexp{ 820 Regexp: mustParseRE("[qn][zx]"), 821 FileName: true, 822 }) 823 824 matches := sres.Files 825 if len(matches) != 1 || matches[0].FileName != "banzana" { 826 t.Fatalf("got %v, want 1 match on 'banzana'", matches) 827 } 828 }) 829 t.Run("LineMatches", func(t *testing.T) { 830 sres := searchForTest(t, b, &query.Regexp{ 831 Regexp: mustParseRE("[qn][zx]"), 832 FileName: true, 833 }, chunkOpts) 834 835 matches := sres.Files 836 if len(matches) != 1 || matches[0].FileName != "banzana" { 837 t.Fatalf("got %v, want 1 match on 'banzana'", matches) 838 } 839 }) 840} 841 842func TestFileRegexpSearchShortString(t *testing.T) { 843 b := testShardBuilder(t, nil, 844 Document{Name: "banana.py", Content: []byte("x orange y")}) 845 846 t.Run("LineMatches", func(t *testing.T) { 847 sres := searchForTest(t, b, &query.Regexp{ 848 Regexp: mustParseRE("ana.py"), 849 FileName: true, 850 }) 851 852 matches := sres.Files 853 if len(matches) != 1 || matches[0].FileName != "banana.py" { 854 t.Fatalf("got %v, want 1 match on 'banana.py'", matches) 855 } 856 }) 857 858 t.Run("ChunkMatches", func(t *testing.T) { 859 sres := searchForTest(t, b, &query.Regexp{ 860 Regexp: mustParseRE("ana.py"), 861 FileName: true, 862 }, chunkOpts) 863 864 matches := sres.Files 865 if len(matches) != 1 || matches[0].FileName != "banana.py" { 866 t.Fatalf("got %v, want 1 match on 'banana.py'", matches) 867 } 868 }) 869} 870 871func TestFileSubstringSearchBruteForce(t *testing.T) { 872 b := testShardBuilder(t, nil, 873 Document{Name: "BANZANA", Content: []byte("x orange y")}, 874 Document{Name: "banana", Content: []byte("x apple y")}) 875 876 q := &query.Substring{ 877 Pattern: "z", 878 FileName: true, 879 } 880 881 t.Run("LineMatches", func(t *testing.T) { 882 res := searchForTest(t, b, q) 883 if len(res.Files) != 1 || res.Files[0].FileName != "BANZANA" { 884 t.Fatalf("got %v, want 1 match on 'BANZANA''", res.Files) 885 } 886 }) 887 888 t.Run("ChunkMatches", func(t *testing.T) { 889 res := searchForTest(t, b, q, chunkOpts) 890 if len(res.Files) != 1 || res.Files[0].FileName != "BANZANA" { 891 t.Fatalf("got %v, want 1 match on 'BANZANA''", res.Files) 892 } 893 }) 894} 895 896func TestFileSubstringSearchBruteForceEnd(t *testing.T) { 897 b := testShardBuilder(t, nil, 898 Document{Name: "BANZANA", Content: []byte("x orange y")}, 899 Document{Name: "bananaq", Content: []byte("x apple y")}) 900 901 q := &query.Substring{ 902 Pattern: "q", 903 FileName: true, 904 } 905 t.Run("LineMatches", func(t *testing.T) { 906 res := searchForTest(t, b, q) 907 if want := "bananaq"; len(res.Files) != 1 || res.Files[0].FileName != want { 908 t.Fatalf("got %v, want 1 match in %q", res.Files, want) 909 } 910 }) 911 912 t.Run("LineMatches", func(t *testing.T) { 913 res := searchForTest(t, b, q, chunkOpts) 914 if want := "bananaq"; len(res.Files) != 1 || res.Files[0].FileName != want { 915 t.Fatalf("got %v, want 1 match in %q", res.Files, want) 916 } 917 }) 918} 919 920func TestSearchMatchAll(t *testing.T) { 921 b := testShardBuilder(t, nil, 922 Document{Name: "banzana", Content: []byte("x orange y")}, 923 Document{Name: "banana", Content: []byte("x apple y")}) 924 925 t.Run("LineMatches", func(t *testing.T) { 926 sres := searchForTest(t, b, &query.Const{Value: true}) 927 matches := sres.Files 928 if len(matches) != 2 { 929 t.Fatalf("got %v, want 2 matches", matches) 930 } 931 }) 932 933 t.Run("ChunkMatches", func(t *testing.T) { 934 sres := searchForTest(t, b, &query.Const{Value: true}, chunkOpts) 935 matches := sres.Files 936 if len(matches) != 2 { 937 t.Fatalf("got %v, want 2 matches", matches) 938 } 939 }) 940} 941 942func TestSearchNewline(t *testing.T) { 943 b := testShardBuilder(t, nil, 944 Document{Name: "banzana", Content: []byte("abcd\ndefg")}) 945 946 t.Run("LineMatches", func(t *testing.T) { 947 sres := searchForTest(t, b, &query.Substring{Pattern: "d\nd"}) 948 949 // Just check that we don't crash. 950 951 matches := sres.Files 952 if len(matches) != 1 { 953 t.Fatalf("got %v, want 1 matches", matches) 954 } 955 }) 956 957 t.Run("ChunkMatches", func(t *testing.T) { 958 sres := searchForTest(t, b, &query.Substring{Pattern: "d\nd"}, chunkOpts) 959 960 // Just check that we don't crash. 961 962 matches := sres.Files 963 if len(matches) != 1 { 964 t.Fatalf("got %v, want 1 matches", matches) 965 } 966 }) 967} 968 969func TestSearchMatchAllRegexp(t *testing.T) { 970 b := testShardBuilder(t, nil, 971 Document{Name: "banzana", Content: []byte("abcd")}, 972 Document{Name: "banana", Content: []byte("pqrs")}) 973 974 t.Run("LineMatches", func(t *testing.T) { 975 sres := searchForTest(t, b, &query.Regexp{Regexp: mustParseRE(".")}) 976 977 matches := sres.Files 978 if len(matches) != 2 || sres.Stats.MatchCount != 2 { 979 t.Fatalf("got %v, want 2 matches", matches) 980 } 981 if len(matches[0].LineMatches[0].Line) != 4 || len(matches[1].LineMatches[0].Line) != 4 { 982 t.Fatalf("want 4 chars in every file, got %#v", matches) 983 } 984 }) 985 986 t.Run("ChunkMatches", func(t *testing.T) { 987 sres := searchForTest(t, b, &query.Regexp{Regexp: mustParseRE(".")}, chunkOpts) 988 989 matches := sres.Files 990 if len(matches) != 2 || sres.Stats.MatchCount != 8 { 991 t.Fatalf("got %v, want 2 matches", matches) 992 } 993 if len(matches[0].ChunkMatches[0].Content) != 4 || len(matches[1].ChunkMatches[0].Content) != 4 { 994 t.Fatalf("want 4 chars in every file, got %#v", matches) 995 } 996 }) 997} 998 999func TestSearchBM25MatchScores(t *testing.T) { 1000 ctx := context.Background() 1001 searcher := searcherForTest(t, testShardBuilder(t, nil, 1002 Document{Name: "f1", Content: []byte("one two three\naaaaaaaaaa\nbbbbbbbb\none two two")}, 1003 Document{Name: "f2", Content: []byte("four five six\naaaaaaaaaa\nbbbbbbbb\nfour five five\nsix six")}, 1004 wordsAsSymbols(Document{Name: "f3", Content: []byte("public static void main")}), 1005 )) 1006 1007 t.Run("LineMatches", func(t *testing.T) { 1008 q := &query.Substring{Pattern: "two"} 1009 sres, err := searcher.Search(ctx, q, &zoekt.SearchOptions{UseBM25Scoring: true}) 1010 if err != nil { 1011 t.Fatal(err) 1012 } 1013 matches := sres.Files 1014 if len(matches) != 1 { 1015 t.Fatalf("want 1 file index, got %d", len(matches)) 1016 } 1017 1018 if len(matches[0].LineMatches) != 2 { 1019 t.Fatalf("want 2 chunk matches, got %d", len(matches[0].ChunkMatches)) 1020 } 1021 1022 if matches[0].LineMatches[0].LineNumber != 4 { 1023 t.Fatalf("want best-scoring line to be line 4, got %d", matches[0].LineMatches[0].LineNumber) 1024 } 1025 }) 1026 1027 t.Run("ChunkMatches", func(t *testing.T) { 1028 q := &query.Substring{Pattern: "five"} 1029 sres, err := searcher.Search(ctx, q, &zoekt.SearchOptions{UseBM25Scoring: true, ChunkMatches: true, NumContextLines: 1}) 1030 if err != nil { 1031 t.Fatal(err) 1032 } 1033 1034 matches := sres.Files 1035 if len(matches) != 1 { 1036 t.Fatalf("want 1 file index, got %d", len(matches)) 1037 } 1038 1039 if len(matches[0].ChunkMatches) != 2 { 1040 t.Fatalf("want 2 chunk matches, got %d", len(matches[0].ChunkMatches)) 1041 } 1042 1043 if matches[0].ChunkMatches[0].BestLineMatch != 4 { 1044 t.Fatalf("want best-scoring line to be line 4, got %d", matches[0].ChunkMatches[0].BestLineMatch) 1045 } 1046 }) 1047 1048 t.Run("ChunkMatches with symbols", func(t *testing.T) { 1049 q := &query.Or{ 1050 Children: []query.Q{ 1051 &query.Symbol{Expr: &query.Substring{Pattern: "main"}}, 1052 &query.Substring{Pattern: "five"}, 1053 }, 1054 } 1055 1056 sres, err := searcher.Search(ctx, q, &zoekt.SearchOptions{UseBM25Scoring: true, ChunkMatches: true, NumContextLines: 1}) 1057 if err != nil { 1058 t.Fatal(err) 1059 } 1060 1061 matches := sres.Files 1062 if len(matches) != 2 { 1063 t.Fatalf("want 2 file index, got %d", len(matches)) 1064 } 1065 1066 foundSymbolInfo := false 1067 for _, m := range matches { 1068 for _, cm := range m.ChunkMatches { 1069 if len(cm.SymbolInfo) > 0 { 1070 foundSymbolInfo = true 1071 } 1072 } 1073 } 1074 1075 if !foundSymbolInfo { 1076 t.Fatalf("want symbol info, got none") 1077 } 1078 }) 1079} 1080 1081func TestFileRestriction(t *testing.T) { 1082 b := testShardBuilder(t, nil, 1083 Document{Name: "banana1", Content: []byte("x orange y")}, 1084 Document{Name: "banana2", Content: []byte("x apple y")}, 1085 Document{Name: "orange", Content: []byte("x apple z")}) 1086 1087 t.Run("LineMatches", func(t *testing.T) { 1088 sres := searchForTest(t, b, query.NewAnd( 1089 &query.Substring{ 1090 Pattern: "banana", 1091 FileName: true, 1092 }, 1093 &query.Substring{ 1094 Pattern: "apple", 1095 })) 1096 1097 matches := sres.Files 1098 if len(matches) != 1 || len(matches[0].LineMatches) != 1 { 1099 t.Fatalf("got %v, want 1 match", matches) 1100 } 1101 1102 match := matches[0].LineMatches[0] 1103 got := string(match.Line) 1104 want := "x apple y" 1105 if got != want { 1106 t.Errorf("got match %#v, want line %q", match, want) 1107 } 1108 }) 1109 1110 t.Run("ChunkMatches", func(t *testing.T) { 1111 sres := searchForTest(t, b, query.NewAnd( 1112 &query.Substring{ 1113 Pattern: "banana", 1114 FileName: true, 1115 }, 1116 &query.Substring{ 1117 Pattern: "apple", 1118 }), chunkOpts) 1119 1120 matches := sres.Files 1121 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 { 1122 t.Fatalf("got %v, want 1 match", matches) 1123 } 1124 1125 match := matches[0].ChunkMatches[0] 1126 got := string(match.Content) 1127 want := "x apple y" 1128 if got != want { 1129 t.Errorf("got match %#v, want line %q", match, want) 1130 } 1131 }) 1132} 1133 1134func TestFileNameBoundary(t *testing.T) { 1135 b := testShardBuilder(t, nil, 1136 Document{Name: "banana2", Content: []byte("x apple y")}, 1137 Document{Name: "helpers.go", Content: []byte("x apple y")}, 1138 Document{Name: "foo", Content: []byte("x apple y")}) 1139 1140 t.Run("LineMatches", func(t *testing.T) { 1141 sres := searchForTest(t, b, &query.Substring{ 1142 Pattern: "helpers.go", 1143 FileName: true, 1144 }) 1145 1146 matches := sres.Files 1147 if len(matches) != 1 || len(matches[0].LineMatches) != 1 { 1148 t.Fatalf("got %v, want 1 match", matches) 1149 } 1150 }) 1151 1152 t.Run("ChunkMatches", func(t *testing.T) { 1153 sres := searchForTest(t, b, &query.Substring{ 1154 Pattern: "helpers.go", 1155 FileName: true, 1156 }, chunkOpts) 1157 1158 matches := sres.Files 1159 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 { 1160 t.Fatalf("got %v, want 1 match", matches) 1161 } 1162 }) 1163} 1164 1165func TestDocumentOrder(t *testing.T) { 1166 var docs []Document 1167 for i := range 3 { 1168 docs = append(docs, Document{Name: fmt.Sprintf("f%d", i), Content: []byte("needle")}) 1169 } 1170 1171 b := testShardBuilder(t, nil, docs...) 1172 1173 t.Run("LineMatches", func(t *testing.T) { 1174 sres := searchForTest(t, b, query.NewAnd( 1175 &query.Substring{ 1176 Pattern: "needle", 1177 })) 1178 1179 want := []string{"f0", "f1", "f2"} 1180 var got []string 1181 for _, f := range sres.Files { 1182 got = append(got, f.FileName) 1183 } 1184 if !reflect.DeepEqual(got, want) { 1185 t.Fatalf("got %v, want %v", got, want) 1186 } 1187 }) 1188 1189 t.Run("ChunkMatches", func(t *testing.T) { 1190 sres := searchForTest(t, b, 1191 query.NewAnd(&query.Substring{ 1192 Pattern: "needle", 1193 }), 1194 chunkOpts, 1195 ) 1196 1197 want := []string{"f0", "f1", "f2"} 1198 var got []string 1199 for _, f := range sres.Files { 1200 got = append(got, f.FileName) 1201 } 1202 if !reflect.DeepEqual(got, want) { 1203 t.Fatalf("got %v, want %v", got, want) 1204 } 1205 }) 1206} 1207 1208func TestBranchMask(t *testing.T) { 1209 b := testShardBuilder(t, &zoekt.Repository{ 1210 Branches: []zoekt.RepositoryBranch{ 1211 {"master", "v-master"}, 1212 {"stable", "v-stable"}, 1213 {"bonzai", "v-bonzai"}, 1214 }, 1215 }, Document{Name: "f1", Content: []byte("needle"), Branches: []string{"master"}}, 1216 Document{Name: "f2", Content: []byte("needle"), Branches: []string{"stable", "master"}}, 1217 Document{Name: "f3", Content: []byte("needle"), Branches: []string{"stable", "master"}}, 1218 Document{Name: "f4", Content: []byte("needle"), Branches: []string{"bonzai"}}, 1219 ) 1220 1221 t.Run("LineMatches", func(t *testing.T) { 1222 sres := searchForTest(t, b, query.NewAnd( 1223 &query.Substring{ 1224 Pattern: "needle", 1225 }, 1226 &query.Branch{ 1227 Pattern: "table", 1228 })) 1229 1230 if len(sres.Files) != 2 || sres.Files[0].FileName != "f2" || sres.Files[1].FileName != "f3" { 1231 t.Fatalf("got %v, want 2 result from [f2,f3]", sres.Files) 1232 } 1233 1234 if len(sres.Files[0].Branches) != 1 || sres.Files[0].Branches[0] != "stable" { 1235 t.Fatalf("got %v, want 1 branch 'stable'", sres.Files[0].Branches) 1236 } 1237 }) 1238 1239 t.Run("ChunkMatches", func(t *testing.T) { 1240 sres := searchForTest(t, b, query.NewAnd( 1241 &query.Substring{ 1242 Pattern: "needle", 1243 }, 1244 &query.Branch{ 1245 Pattern: "table", 1246 }), 1247 chunkOpts, 1248 ) 1249 1250 if len(sres.Files) != 2 || sres.Files[0].FileName != "f2" || sres.Files[1].FileName != "f3" { 1251 t.Fatalf("got %v, want 2 result from [f2,f3]", sres.Files) 1252 } 1253 1254 if len(sres.Files[0].Branches) != 1 || sres.Files[0].Branches[0] != "stable" { 1255 t.Fatalf("got %v, want 1 branch 'stable'", sres.Files[0].Branches) 1256 } 1257 }) 1258} 1259 1260func TestBranchLimit(t *testing.T) { 1261 for limit := 64; limit <= 65; limit++ { 1262 r := &zoekt.Repository{} 1263 for i := range limit { 1264 s := fmt.Sprintf("b%d", i) 1265 r.Branches = append(r.Branches, zoekt.RepositoryBranch{ 1266 s, "v-" + s, 1267 }) 1268 } 1269 _, err := NewShardBuilder(r) 1270 if limit == 64 && err != nil { 1271 t.Fatalf("NewShardBuilder: %v", err) 1272 } else if limit == 65 && err == nil { 1273 t.Fatalf("NewShardBuilder succeeded") 1274 } 1275 } 1276} 1277 1278func TestBranchReport(t *testing.T) { 1279 branches := []string{"stable", "master"} 1280 b := testShardBuilder(t, &zoekt.Repository{ 1281 Branches: []zoekt.RepositoryBranch{ 1282 {"stable", "vs"}, 1283 {"master", "vm"}, 1284 }, 1285 }, 1286 Document{Name: "f2", Content: []byte("needle"), Branches: branches}) 1287 1288 t.Run("LineMatches", func(t *testing.T) { 1289 sres := searchForTest(t, b, &query.Substring{ 1290 Pattern: "needle", 1291 }) 1292 if len(sres.Files) != 1 { 1293 t.Fatalf("got %v, want 1 result from f2", sres.Files) 1294 } 1295 1296 f := sres.Files[0] 1297 if !reflect.DeepEqual(f.Branches, branches) { 1298 t.Fatalf("got branches %q, want %q", f.Branches, branches) 1299 } 1300 }) 1301 1302 t.Run("ChunkMatches", func(t *testing.T) { 1303 sres := searchForTest(t, b, &query.Substring{ 1304 Pattern: "needle", 1305 }, chunkOpts) 1306 if len(sres.Files) != 1 { 1307 t.Fatalf("got %v, want 1 result from f2", sres.Files) 1308 } 1309 1310 f := sres.Files[0] 1311 if !reflect.DeepEqual(f.Branches, branches) { 1312 t.Fatalf("got branches %q, want %q", f.Branches, branches) 1313 } 1314 }) 1315} 1316 1317func TestBranchVersions(t *testing.T) { 1318 b := testShardBuilder(t, &zoekt.Repository{ 1319 Branches: []zoekt.RepositoryBranch{ 1320 {"stable", "v-stable"}, 1321 {"master", "v-master"}, 1322 }, 1323 }, Document{Name: "f2", Content: []byte("needle"), Branches: []string{"master"}}) 1324 1325 t.Run("LineMatches", func(t *testing.T) { 1326 sres := searchForTest(t, b, &query.Substring{ 1327 Pattern: "needle", 1328 }) 1329 if len(sres.Files) != 1 { 1330 t.Fatalf("got %v, want 1 result from f2", sres.Files) 1331 } 1332 1333 f := sres.Files[0] 1334 if f.Version != "v-master" { 1335 t.Fatalf("got file %#v, want version 'v-master'", f) 1336 } 1337 }) 1338 1339 t.Run("ChunkMatches", func(t *testing.T) { 1340 sres := searchForTest(t, b, &query.Substring{ 1341 Pattern: "needle", 1342 }, chunkOpts) 1343 if len(sres.Files) != 1 { 1344 t.Fatalf("got %v, want 1 result from f2", sres.Files) 1345 } 1346 1347 f := sres.Files[0] 1348 if f.Version != "v-master" { 1349 t.Fatalf("got file %#v, want version 'v-master'", f) 1350 } 1351 }) 1352} 1353 1354func mustParseRE(s string) *syntax.Regexp { 1355 r, err := syntax.Parse(s, syntax.Perl) 1356 if err != nil { 1357 panic(err) 1358 } 1359 1360 return r 1361} 1362 1363func TestRegexp(t *testing.T) { 1364 content := []byte("needle the bla") 1365 // ----------------01234567890123 1366 1367 b := testShardBuilder(t, nil, 1368 Document{ 1369 Name: "f1", 1370 Content: content, 1371 }) 1372 1373 t.Run("LineMatches", func(t *testing.T) { 1374 sres := searchForTest(t, b, 1375 &query.Regexp{ 1376 Regexp: mustParseRE("dle.*bla"), 1377 }) 1378 1379 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 { 1380 t.Fatalf("got %v, want 1 match in 1 file", sres.Files) 1381 } 1382 1383 got := sres.Files[0].LineMatches[0] 1384 want := zoekt.LineMatch{ 1385 LineFragments: []zoekt.LineFragmentMatch{{ 1386 LineOffset: 3, 1387 Offset: 3, 1388 MatchLength: 11, 1389 }}, 1390 Line: content, 1391 FileName: false, 1392 LineNumber: 1, 1393 LineStart: 0, 1394 LineEnd: 14, 1395 } 1396 1397 if !reflect.DeepEqual(got, want) { 1398 t.Errorf("got %#v, want %#v", got, want) 1399 } 1400 }) 1401 1402 t.Run("ChunkMatches", func(t *testing.T) { 1403 sres := searchForTest(t, b, 1404 &query.Regexp{ 1405 Regexp: mustParseRE("dle.*bla"), 1406 }, chunkOpts) 1407 1408 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 { 1409 t.Fatalf("got %v, want 1 match in 1 file", sres.Files) 1410 } 1411 1412 got := sres.Files[0].ChunkMatches[0] 1413 want := zoekt.ChunkMatch{ 1414 Content: content, 1415 ContentStart: zoekt.Location{ByteOffset: 0, LineNumber: 1, Column: 1}, 1416 Ranges: []zoekt.Range{{ 1417 Start: zoekt.Location{ByteOffset: 3, LineNumber: 1, Column: 4}, 1418 End: zoekt.Location{ByteOffset: 14, LineNumber: 1, Column: 15}, 1419 }}, 1420 } 1421 1422 if diff := cmp.Diff(want, got); diff != "" { 1423 t.Fatal(diff) 1424 } 1425 }) 1426} 1427 1428func TestRegexpFile(t *testing.T) { 1429 content := []byte("needle the bla") 1430 1431 name := "let's play: find the mussel" 1432 b := testShardBuilder(t, nil, 1433 Document{Name: name, Content: content}, 1434 Document{Name: "play.txt", Content: content}) 1435 1436 t.Run("LineMatches", func(t *testing.T) { 1437 sres := searchForTest(t, b, 1438 &query.Regexp{ 1439 Regexp: mustParseRE("play.*mussel"), 1440 FileName: true, 1441 }) 1442 1443 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 { 1444 t.Fatalf("got %v, want 1 match in 1 file", sres.Files) 1445 } 1446 1447 if sres.Files[0].FileName != name { 1448 t.Errorf("got match %#v, want name %q", sres.Files[0], name) 1449 } 1450 }) 1451 1452 t.Run("ChunkMatches", func(t *testing.T) { 1453 sres := searchForTest(t, b, 1454 &query.Regexp{ 1455 Regexp: mustParseRE("play.*mussel"), 1456 FileName: true, 1457 }, chunkOpts) 1458 1459 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 { 1460 t.Fatalf("got %v, want 1 match in 1 file", sres.Files) 1461 } 1462 1463 if sres.Files[0].FileName != name { 1464 t.Errorf("got match %#v, want name %q", sres.Files[0], name) 1465 } 1466 }) 1467} 1468 1469func TestRegexpOrder(t *testing.T) { 1470 content := []byte("bla the needle") 1471 // ----------------01234567890123 1472 1473 b := testShardBuilder(t, nil, 1474 Document{Name: "f1", Content: content}) 1475 1476 t.Run("LineMatches", func(t *testing.T) { 1477 sres := searchForTest(t, b, 1478 &query.Regexp{ 1479 Regexp: mustParseRE("dle.*bla"), 1480 }) 1481 1482 if len(sres.Files) != 0 { 1483 t.Fatalf("got %v, want 0 matches", sres.Files) 1484 } 1485 }) 1486 1487 t.Run("ChunkMatches", func(t *testing.T) { 1488 sres := searchForTest(t, b, 1489 &query.Regexp{ 1490 Regexp: mustParseRE("dle.*bla"), 1491 }) 1492 1493 if len(sres.Files) != 0 { 1494 t.Fatalf("got %v, want 0 matches", sres.Files) 1495 } 1496 }) 1497} 1498 1499func TestRepoName(t *testing.T) { 1500 content := []byte("bla the needle") 1501 // ----------------01234567890123 1502 1503 b := testShardBuilder(t, &zoekt.Repository{Name: "bla"}, 1504 Document{Name: "f1", Content: content}) 1505 1506 t.Run("LineMatches", func(t *testing.T) { 1507 sres := searchForTest(t, b, 1508 query.NewAnd( 1509 &query.Substring{Pattern: "needle"}, 1510 &query.Repo{Regexp: regexp.MustCompile("foo")}, 1511 )) 1512 1513 if len(sres.Files) != 0 { 1514 t.Fatalf("got %v, want 0 matches", sres.Files) 1515 } 1516 1517 if sres.Stats.FilesConsidered > 0 { 1518 t.Fatalf("got FilesConsidered %d, should have short circuited", sres.Stats.FilesConsidered) 1519 } 1520 1521 sres = searchForTest(t, b, 1522 query.NewAnd( 1523 &query.Substring{Pattern: "needle"}, 1524 &query.Repo{Regexp: regexp.MustCompile("bla")}, 1525 )) 1526 if len(sres.Files) != 1 { 1527 t.Fatalf("got %v, want 1 match", sres.Files) 1528 } 1529 }) 1530 1531 t.Run("ChunkMatches", func(t *testing.T) { 1532 sres := searchForTest(t, b, 1533 query.NewAnd( 1534 &query.Substring{Pattern: "needle"}, 1535 &query.Repo{Regexp: regexp.MustCompile("foo")}, 1536 ), 1537 chunkOpts, 1538 ) 1539 1540 if len(sres.Files) != 0 { 1541 t.Fatalf("got %v, want 0 matches", sres.Files) 1542 } 1543 1544 if sres.Stats.FilesConsidered > 0 { 1545 t.Fatalf("got FilesConsidered %d, should have short circuited", sres.Stats.FilesConsidered) 1546 } 1547 1548 sres = searchForTest(t, b, 1549 query.NewAnd( 1550 &query.Substring{Pattern: "needle"}, 1551 &query.Repo{Regexp: regexp.MustCompile("bla")}, 1552 )) 1553 if len(sres.Files) != 1 { 1554 t.Fatalf("got %v, want 1 match", sres.Files) 1555 } 1556 }) 1557} 1558 1559func TestMergeMatches(t *testing.T) { 1560 t.Run("LineMatches, adjacent matches", func(t *testing.T) { 1561 b := testShardBuilder(t, nil, 1562 Document{Name: "f1", Content: []byte("blablabla")}) 1563 sres := searchForTest(t, b, 1564 &query.Substring{Pattern: "bla"}) 1565 1566 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 { 1567 t.Fatalf("got %v, want 1 match", sres.Files) 1568 } 1569 1570 if len(sres.Files[0].LineMatches[0].LineFragments) != 3 { 1571 t.Fatalf("got %v, want 3 fragments", sres.Files[0].LineMatches[0].LineFragments) 1572 } 1573 }) 1574 1575 t.Run("LineMatches, overlapping matches", func(t *testing.T) { 1576 b := testShardBuilder(t, nil, 1577 Document{Name: "f1", Content: []byte("hellogoodbye")}) 1578 sres := searchForTest(t, b, 1579 &query.And{Children: []query.Q{ 1580 &query.Substring{Pattern: "hello"}, 1581 &query.Substring{Pattern: "logood"}, 1582 }}) 1583 1584 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 { 1585 t.Fatalf("got %v, want 1 match", sres.Files) 1586 } 1587 1588 lineFragments := sres.Files[0].LineMatches[0].LineFragments 1589 if len(lineFragments) != 1 || lineFragments[0].MatchLength != len("hello") { 1590 t.Fatalf("got %v, want single line fragment 'hello'", lineFragments) 1591 } 1592 }) 1593 1594 t.Run("ChunkMatches, no overlap", func(t *testing.T) { 1595 b := testShardBuilder(t, nil, 1596 Document{Name: "f1", Content: []byte("blablabla")}) 1597 1598 sres := searchForTest(t, b, 1599 &query.Substring{Pattern: "bla"}, 1600 chunkOpts, 1601 ) 1602 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 { 1603 t.Fatalf("got %v, want 1 match", sres.Files) 1604 } 1605 1606 if len(sres.Files[0].ChunkMatches[0].Ranges) != 3 { 1607 t.Fatalf("got %v, want 3 ranges", sres.Files[0].ChunkMatches[0].Ranges) 1608 } 1609 }) 1610 1611 t.Run("ChunkMatches, overlapping matches", func(t *testing.T) { 1612 b := testShardBuilder(t, nil, 1613 Document{Name: "f1", Content: []byte("hellogoodbye")}) 1614 sres := searchForTest(t, b, 1615 &query.And{Children: []query.Q{ 1616 &query.Substring{Pattern: "hello"}, 1617 &query.Substring{Pattern: "logood"}, 1618 }}, chunkOpts) 1619 1620 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 { 1621 t.Fatalf("got %v, want 1 chunk match", sres.Files) 1622 } 1623 1624 ranges := sres.Files[0].ChunkMatches[0].Ranges 1625 if len(ranges) != 1 || ranges[0].Start.ByteOffset != 0 || ranges[0].End.ByteOffset != 5 { 1626 t.Fatalf("got %v, want single chunk range 'hello'", ranges) 1627 } 1628 }) 1629} 1630 1631func TestRepoURL(t *testing.T) { 1632 content := []byte("blablabla") 1633 b := testShardBuilder(t, &zoekt.Repository{ 1634 Name: "name", 1635 URL: "URL", 1636 CommitURLTemplate: "commit", 1637 FileURLTemplate: "file-url", 1638 LineFragmentTemplate: "fragment", 1639 }, Document{Name: "f1", Content: content}) 1640 1641 sres := searchForTest(t, b, &query.Substring{Pattern: "bla"}) 1642 1643 if sres.RepoURLs["name"] != "file-url" { 1644 t.Errorf("got RepoURLs %v, want {name: URL}", sres.RepoURLs) 1645 } 1646 if sres.LineFragments["name"] != "fragment" { 1647 t.Errorf("got URLs %v, want {name: URL}", sres.LineFragments) 1648 } 1649} 1650 1651func TestRegexpCaseSensitive(t *testing.T) { 1652 content := []byte("bla\nfunc unmarshalGitiles\n") 1653 b := testShardBuilder(t, nil, Document{ 1654 Name: "f1", 1655 Content: content, 1656 }) 1657 1658 t.Run("LineMatches", func(t *testing.T) { 1659 res := searchForTest(t, b, 1660 &query.Regexp{ 1661 Regexp: mustParseRE("func.*Gitiles"), 1662 CaseSensitive: true, 1663 }) 1664 1665 if len(res.Files) != 1 { 1666 t.Fatalf("got %v, want one index", res.Files) 1667 } 1668 }) 1669 1670 t.Run("ChunkMatches", func(t *testing.T) { 1671 res := searchForTest(t, b, 1672 &query.Regexp{ 1673 Regexp: mustParseRE("func.*Gitiles"), 1674 CaseSensitive: true, 1675 }, 1676 chunkOpts, 1677 ) 1678 1679 if len(res.Files) != 1 { 1680 t.Fatalf("got %v, want one index", res.Files) 1681 } 1682 }) 1683} 1684 1685func TestRegexpCaseFolding(t *testing.T) { 1686 content := []byte("bla\nfunc unmarshalGitiles\n") 1687 1688 b := testShardBuilder(t, nil, 1689 Document{Name: "f1", Content: content}) 1690 res := searchForTest(t, b, 1691 &query.Regexp{ 1692 Regexp: mustParseRE("func.*GITILES"), 1693 CaseSensitive: false, 1694 }) 1695 1696 if len(res.Files) != 1 { 1697 t.Fatalf("got %v, want one index", res.Files) 1698 } 1699} 1700 1701func TestCaseRegexp(t *testing.T) { 1702 content := []byte("BLABLABLA") 1703 b := testShardBuilder(t, nil, 1704 Document{Name: "f1", Content: content}) 1705 1706 t.Run("LineMatches", func(t *testing.T) { 1707 res := searchForTest(t, b, 1708 &query.Regexp{ 1709 Regexp: mustParseRE("[xb][xl][xa]"), 1710 CaseSensitive: true, 1711 }) 1712 1713 if len(res.Files) > 0 { 1714 t.Fatalf("got %v, want no matches", res.Files) 1715 } 1716 }) 1717 1718 t.Run("ChunkMatches", func(t *testing.T) { 1719 res := searchForTest(t, b, 1720 &query.Regexp{ 1721 Regexp: mustParseRE("[xb][xl][xa]"), 1722 CaseSensitive: true, 1723 }, 1724 chunkOpts, 1725 ) 1726 1727 if len(res.Files) > 0 { 1728 t.Fatalf("got %v, want no matches", res.Files) 1729 } 1730 }) 1731} 1732 1733func TestNegativeRegexp(t *testing.T) { 1734 content := []byte("BLABLABLA needle bla") 1735 b := testShardBuilder(t, nil, 1736 Document{Name: "f1", Content: content}) 1737 1738 t.Run("LineMatches", func(t *testing.T) { 1739 res := searchForTest(t, b, 1740 query.NewAnd( 1741 &query.Substring{ 1742 Pattern: "needle", 1743 }, 1744 &query.Not{ 1745 Child: &query.Regexp{ 1746 Regexp: mustParseRE(".cs"), 1747 }, 1748 })) 1749 1750 if len(res.Files) != 1 { 1751 t.Fatalf("got %v, want 1 match", res.Files) 1752 } 1753 }) 1754 1755 t.Run("ChunkMatches", func(t *testing.T) { 1756 res := searchForTest(t, b, 1757 query.NewAnd( 1758 &query.Substring{ 1759 Pattern: "needle", 1760 }, 1761 &query.Not{ 1762 Child: &query.Regexp{ 1763 Regexp: mustParseRE(".cs"), 1764 }, 1765 }, 1766 ), 1767 chunkOpts) 1768 1769 if len(res.Files) != 1 { 1770 t.Fatalf("got %v, want 1 match", res.Files) 1771 } 1772 }) 1773} 1774 1775func TestSymbolRank(t *testing.T) { 1776 t.Skip() 1777 1778 content := []byte("func bla() blubxxxxx") 1779 // ----------------01234567890123456789 1780 b := testShardBuilder(t, nil, 1781 Document{ 1782 Name: "f1", 1783 Content: content, 1784 }, Document{ 1785 Name: "f2", 1786 Content: content, 1787 Symbols: []DocumentSection{{5, 8}}, 1788 }, Document{ 1789 Name: "f3", 1790 Content: content, 1791 }) 1792 1793 t.Run("LineMatches", func(t *testing.T) { 1794 res := searchForTest(t, b, 1795 &query.Substring{ 1796 CaseSensitive: false, 1797 Pattern: "bla", 1798 }) 1799 1800 if len(res.Files) != 3 { 1801 t.Fatalf("got %d files, want 3 files. Full data: %v", len(res.Files), res.Files) 1802 } 1803 if res.Files[0].FileName != "f2" { 1804 t.Errorf("got %#v, want 'f2' as top index", res.Files[0]) 1805 } 1806 }) 1807 1808 t.Run("ChunkMatches", func(t *testing.T) { 1809 res := searchForTest(t, b, 1810 &query.Substring{ 1811 CaseSensitive: false, 1812 Pattern: "bla", 1813 }, chunkOpts) 1814 1815 if len(res.Files) != 3 { 1816 t.Fatalf("got %d files, want 3 files. Full data: %v", len(res.Files), res.Files) 1817 } 1818 if res.Files[0].FileName != "f2" { 1819 t.Errorf("got %#v, want 'f2' as top index", res.Files[0]) 1820 } 1821 }) 1822} 1823 1824func TestSymbolRankRegexpUTF8(t *testing.T) { 1825 t.Skip() 1826 1827 prefix := strings.Repeat(string([]rune{kelvinCodePoint}), 100) + "\n" 1828 content := []byte(prefix + 1829 "func bla() blub") 1830 // ------012345678901234 1831 b := testShardBuilder(t, nil, 1832 Document{ 1833 Name: "f1", 1834 Content: content, 1835 }, Document{ 1836 Name: "f2", 1837 Content: content, 1838 Symbols: []DocumentSection{{uint32(len(prefix) + 5), uint32(len(prefix) + 8)}}, 1839 }, Document{ 1840 Name: "f3", 1841 Content: content, 1842 }) 1843 1844 t.Run("LineMatches", func(t *testing.T) { 1845 res := searchForTest(t, b, 1846 &query.Regexp{ 1847 Regexp: mustParseRE("b.a"), 1848 }) 1849 1850 if len(res.Files) != 3 { 1851 t.Fatalf("got %#v, want 3 files", res.Files) 1852 } 1853 if res.Files[0].FileName != "f2" { 1854 t.Errorf("got %#v, want 'f2' as top index", res.Files[0]) 1855 } 1856 }) 1857 1858 t.Run("ChunjkMatches", func(t *testing.T) { 1859 res := searchForTest(t, b, 1860 &query.Regexp{ 1861 Regexp: mustParseRE("b.a"), 1862 }, chunkOpts) 1863 1864 if len(res.Files) != 3 { 1865 t.Fatalf("got %#v, want 3 files", res.Files) 1866 } 1867 if res.Files[0].FileName != "f2" { 1868 t.Errorf("got %#v, want 'f2' as top index", res.Files[0]) 1869 } 1870 }) 1871} 1872 1873func TestPartialSymbolRank(t *testing.T) { 1874 t.Skip() 1875 1876 content := []byte("func bla() blub") 1877 // ----------------012345678901234 1878 1879 b := testShardBuilder(t, nil, 1880 Document{ 1881 Name: "f1", 1882 Content: content, 1883 Symbols: []DocumentSection{{4, 9}}, 1884 }, Document{ 1885 Name: "f2", 1886 Content: content, 1887 Symbols: []DocumentSection{{4, 8}}, 1888 }, Document{ 1889 Name: "f3", 1890 Content: content, 1891 Symbols: []DocumentSection{{4, 9}}, 1892 }) 1893 1894 t.Run("LineMatches", func(t *testing.T) { 1895 res := searchForTest(t, b, 1896 &query.Substring{ 1897 Pattern: "bla", 1898 }) 1899 1900 if len(res.Files) != 3 { 1901 t.Fatalf("got %#v, want 3 files", res.Files) 1902 } 1903 if res.Files[0].FileName != "f2" { 1904 t.Errorf("got %#v, want 'f2' as top index", res.Files[0]) 1905 } 1906 }) 1907 1908 t.Run("ChunkMatches", func(t *testing.T) { 1909 res := searchForTest(t, b, 1910 &query.Substring{ 1911 Pattern: "bla", 1912 }, chunkOpts) 1913 1914 if len(res.Files) != 3 { 1915 t.Fatalf("got %#v, want 3 files", res.Files) 1916 } 1917 if res.Files[0].FileName != "f2" { 1918 t.Errorf("got %#v, want 'f2' as top index", res.Files[0]) 1919 } 1920 }) 1921} 1922 1923func TestNegativeRepo(t *testing.T) { 1924 content := []byte("bla the needle") 1925 // ----------------01234567890123 1926 b := testShardBuilder(t, &zoekt.Repository{ 1927 Name: "bla", 1928 }, Document{Name: "f1", Content: content}) 1929 1930 t.Run("LineMatches", func(t *testing.T) { 1931 sres := searchForTest(t, b, 1932 query.NewAnd( 1933 &query.Substring{Pattern: "needle"}, 1934 &query.Not{Child: &query.Repo{Regexp: regexp.MustCompile("bla")}}, 1935 )) 1936 1937 if len(sres.Files) != 0 { 1938 t.Fatalf("got %v, want 0 matches", sres.Files) 1939 } 1940 }) 1941 1942 t.Run("ChunkMatches", func(t *testing.T) { 1943 sres := searchForTest(t, b, 1944 query.NewAnd( 1945 &query.Substring{Pattern: "needle"}, 1946 &query.Not{Child: &query.Repo{Regexp: regexp.MustCompile("bla")}}, 1947 ), chunkOpts) 1948 1949 if len(sres.Files) != 0 { 1950 t.Fatalf("got %v, want 0 matches", sres.Files) 1951 } 1952 }) 1953} 1954 1955func TestListRepos(t *testing.T) { 1956 content := []byte("bla the needle\n") 1957 // ----------------012345678901234- 1958 1959 t.Run("default and minimal fallback", func(t *testing.T) { 1960 repo := &zoekt.Repository{ 1961 Name: "reponame", 1962 Branches: []zoekt.RepositoryBranch{{Name: "main"}, {Name: "dev"}}, 1963 } 1964 b := testShardBuilder(t, repo, 1965 Document{Name: "f1", Content: content, Branches: []string{"main", "dev"}}, 1966 Document{Name: "f2", Content: content, Branches: []string{"main"}}, 1967 Document{Name: "f2", Content: content, Branches: []string{"dev"}}, 1968 Document{Name: "f3", Content: content, Branches: []string{"dev"}}) 1969 1970 searcher := searcherForTest(t, b) 1971 1972 for _, opts := range []*zoekt.ListOptions{ 1973 nil, 1974 {}, 1975 {Field: zoekt.RepoListFieldRepos}, 1976 {Field: zoekt.RepoListFieldReposMap}, 1977 } { 1978 t.Run(fmt.Sprint(opts), func(t *testing.T) { 1979 q := &query.Repo{Regexp: regexp.MustCompile("epo")} 1980 1981 res, err := searcher.List(context.Background(), q, opts) 1982 if err != nil { 1983 t.Fatalf("List(%v): %v", q, err) 1984 } 1985 1986 want := &zoekt.RepoList{ 1987 Repos: []*zoekt.RepoListEntry{{ 1988 Repository: *repo, 1989 Stats: zoekt.RepoStats{ 1990 Documents: 4, 1991 ContentBytes: 68, // (15 bytes of content and 2 bytes of filename) x 4 1992 Shards: 1, 1993 1994 NewLinesCount: 4, 1995 DefaultBranchNewLinesCount: 2, 1996 OtherBranchesNewLinesCount: 3, 1997 }, 1998 }}, 1999 Stats: zoekt.RepoStats{ 2000 Repos: 1, 2001 Documents: 4, 2002 ContentBytes: 68, 2003 Shards: 1, 2004 2005 NewLinesCount: 4, 2006 DefaultBranchNewLinesCount: 2, 2007 OtherBranchesNewLinesCount: 3, 2008 }, 2009 } 2010 ignored := []cmp.Option{ 2011 cmpopts.EquateEmpty(), 2012 cmpopts.IgnoreFields(zoekt.RepoListEntry{}, "IndexMetadata"), 2013 cmpopts.IgnoreFields(zoekt.RepoStats{}, "IndexBytes"), 2014 cmpopts.IgnoreFields(zoekt.Repository{}, "SubRepoMap"), 2015 cmpopts.IgnoreFields(zoekt.Repository{}, "priority"), 2016 } 2017 if diff := cmp.Diff(want, res, ignored...); diff != "" { 2018 t.Fatalf("mismatch (-want +got):\n%s", diff) 2019 } 2020 2021 q = &query.Repo{Regexp: regexp.MustCompile("bla")} 2022 res, err = searcher.List(context.Background(), q, nil) 2023 if err != nil { 2024 t.Fatalf("List(%v): %v", q, err) 2025 } 2026 if len(res.Repos) != 0 || len(res.ReposMap) != 0 { 2027 t.Fatalf("got %v, want 0 matches", res) 2028 } 2029 }) 2030 } 2031 }) 2032 2033 t.Run("minimal", func(t *testing.T) { 2034 repo := &zoekt.Repository{ 2035 ID: 1234, 2036 Name: "reponame", 2037 Branches: []zoekt.RepositoryBranch{{Name: "main"}, {Name: "dev"}}, 2038 RawConfig: map[string]string{"repoid": "1234"}, 2039 } 2040 b := testShardBuilder(t, repo, 2041 Document{Name: "f1", Content: content, Branches: []string{"main", "dev"}}, 2042 Document{Name: "f2", Content: content, Branches: []string{"main"}}, 2043 Document{Name: "f2", Content: content, Branches: []string{"dev"}}, 2044 Document{Name: "f3", Content: content, Branches: []string{"dev"}}) 2045 2046 searcher := searcherForTest(t, b) 2047 2048 q := &query.Repo{Regexp: regexp.MustCompile("epo")} 2049 res, err := searcher.List(context.Background(), q, &zoekt.ListOptions{Field: zoekt.RepoListFieldReposMap}) 2050 if err != nil { 2051 t.Fatalf("List(%v): %v", q, err) 2052 } 2053 2054 want := &zoekt.RepoList{ 2055 ReposMap: zoekt.ReposMap{ 2056 repo.ID: { 2057 HasSymbols: repo.HasSymbols, 2058 Branches: repo.Branches, 2059 }, 2060 }, 2061 Stats: zoekt.RepoStats{ 2062 Repos: 1, 2063 Shards: 1, 2064 Documents: 4, 2065 IndexBytes: 412, 2066 ContentBytes: 68, 2067 NewLinesCount: 4, 2068 DefaultBranchNewLinesCount: 2, 2069 OtherBranchesNewLinesCount: 3, 2070 }, 2071 } 2072 2073 ignored := []cmp.Option{ 2074 cmpopts.IgnoreFields(zoekt.MinimalRepoListEntry{}, "IndexTimeUnix"), 2075 } 2076 if diff := cmp.Diff(want, res, ignored...); diff != "" { 2077 t.Fatalf("mismatch (-want +got):\n%s", diff) 2078 } 2079 2080 q = &query.Repo{Regexp: regexp.MustCompile("bla")} 2081 res, err = searcher.List(context.Background(), q, &zoekt.ListOptions{Field: zoekt.RepoListFieldReposMap}) 2082 if err != nil { 2083 t.Fatalf("List(%v): %v", q, err) 2084 } 2085 if len(res.Repos) != 0 || len(res.ReposMap) != 0 { 2086 t.Fatalf("got %v, want 0 matches", res) 2087 } 2088 }) 2089} 2090 2091func TestListReposByContent(t *testing.T) { 2092 content := []byte("bla the needle") 2093 2094 b := testShardBuilder(t, &zoekt.Repository{ 2095 Name: "reponame", 2096 }, 2097 Document{Name: "f1", Content: content}, 2098 Document{Name: "f2", Content: content}) 2099 2100 searcher := searcherForTest(t, b) 2101 q := &query.Substring{Pattern: "needle"} 2102 res, err := searcher.List(context.Background(), q, nil) 2103 if err != nil { 2104 t.Fatalf("List(%v): %v", q, err) 2105 } 2106 if len(res.Repos) != 1 || res.Repos[0].Repository.Name != "reponame" { 2107 t.Fatalf("got %v, want 1 matches", res) 2108 } 2109 if got := res.Repos[0].Stats.Shards; got != 1 { 2110 t.Fatalf("got %d, want 1 shard", got) 2111 } 2112 q = &query.Substring{Pattern: "foo"} 2113 res, err = searcher.List(context.Background(), q, nil) 2114 if err != nil { 2115 t.Fatalf("List(%v): %v", q, err) 2116 } 2117 if len(res.Repos) != 0 { 2118 t.Fatalf("got %v, want 0 matches", res) 2119 } 2120} 2121 2122func TestMetadata(t *testing.T) { 2123 content := []byte("bla the needle") 2124 2125 b := testShardBuilder(t, &zoekt.Repository{ 2126 Name: "reponame", 2127 }, Document{Name: "f1", Content: content}, 2128 Document{Name: "f2", Content: content}) 2129 2130 var buf bytes.Buffer 2131 if err := b.Write(&buf); err != nil { 2132 t.Fatal(err) 2133 } 2134 f := &memSeeker{buf.Bytes()} 2135 2136 rd, _, err := ReadMetadata(f) 2137 if err != nil { 2138 t.Fatalf("ReadMetadata: %v", err) 2139 } 2140 2141 if got, want := rd[0].Name, "reponame"; got != want { 2142 t.Fatalf("got %q want %q", got, want) 2143 } 2144} 2145 2146func TestOr(t *testing.T) { 2147 b := testShardBuilder(t, nil, 2148 Document{Name: "f1", Content: []byte("needle")}, 2149 Document{Name: "f2", Content: []byte("banana")}) 2150 t.Run("LineMatches", func(t *testing.T) { 2151 sres := searchForTest(t, b, query.NewOr( 2152 &query.Substring{Pattern: "needle"}, 2153 &query.Substring{Pattern: "banana"})) 2154 2155 if len(sres.Files) != 2 { 2156 t.Fatalf("got %v, want 2 files", sres.Files) 2157 } 2158 }) 2159 2160 t.Run("ChunkMatches", func(t *testing.T) { 2161 sres := searchForTest(t, b, query.NewOr( 2162 &query.Substring{Pattern: "needle"}, 2163 &query.Substring{Pattern: "banana"})) 2164 2165 if len(sres.Files) != 2 { 2166 t.Fatalf("got %v, want 2 files", sres.Files) 2167 } 2168 }) 2169} 2170 2171func TestFrequency(t *testing.T) { 2172 content := []byte("sla _Py_HashDouble(double v sla las las shd dot dot") 2173 2174 b := testShardBuilder(t, nil, 2175 Document{ 2176 Name: "f1", 2177 Content: content, 2178 }) 2179 2180 t.Run("LineMatches", func(t *testing.T) { 2181 sres := searchForTest(t, b, &query.Substring{Pattern: "slashdot"}) 2182 if len(sres.Files) != 0 { 2183 t.Errorf("got %v, wanted 0 matches", sres.Files) 2184 } 2185 }) 2186 2187 t.Run("ChunkMatches", func(t *testing.T) { 2188 sres := searchForTest(t, b, &query.Substring{Pattern: "slashdot"}, chunkOpts) 2189 if len(sres.Files) != 0 { 2190 t.Errorf("got %v, wanted 0 matches", sres.Files) 2191 } 2192 }) 2193} 2194 2195func TestMatchNewline(t *testing.T) { 2196 re, err := syntax.Parse("[^a]a", syntax.ClassNL) 2197 if err != nil { 2198 t.Fatalf("syntax.Parse: %v", err) 2199 } 2200 2201 content := []byte("pqr\nalex") 2202 2203 b := testShardBuilder(t, nil, 2204 Document{ 2205 Name: "f1", 2206 Content: content, 2207 }) 2208 2209 t.Run("LineMatches", func(t *testing.T) { 2210 sres := searchForTest(t, b, &query.Regexp{Regexp: re, CaseSensitive: true}) 2211 if len(sres.Files) != 1 { 2212 t.Errorf("got %v, wanted 1 matches", sres.Files) 2213 } else if l := sres.Files[0].LineMatches[0].Line; !bytes.Equal(l, content[len("pqr\n"):]) { 2214 t.Errorf("got match line %q, want %q", l, content) 2215 } 2216 }) 2217 2218 t.Run("ChunkMatches", func(t *testing.T) { 2219 sres := searchForTest(t, b, &query.Regexp{Regexp: re, CaseSensitive: true}, chunkOpts) 2220 if len(sres.Files) != 1 { 2221 t.Errorf("got %v, wanted 1 matches", sres.Files) 2222 } else if c := sres.Files[0].ChunkMatches[0].Content; !bytes.Equal(c, content) { 2223 t.Errorf("got match line %q, want %q", c, content) 2224 } 2225 }) 2226} 2227 2228func TestSubRepo(t *testing.T) { 2229 subRepos := map[string]*zoekt.Repository{ 2230 "sub": { 2231 Name: "sub-name", 2232 LineFragmentTemplate: "sub-line", 2233 }, 2234 } 2235 2236 content := []byte("pqr\nalex") 2237 2238 b := testShardBuilder(t, &zoekt.Repository{ 2239 SubRepoMap: subRepos, 2240 }, Document{ 2241 Name: "sub/f1", 2242 Content: content, 2243 SubRepositoryPath: "sub", 2244 }) 2245 2246 sres := searchForTest(t, b, &query.Substring{Pattern: "alex"}) 2247 if len(sres.Files) != 1 { 2248 t.Fatalf("got %v, wanted 1 matches", sres.Files) 2249 } 2250 2251 f := sres.Files[0] 2252 if f.SubRepositoryPath != "sub" || f.SubRepositoryName != "sub-name" { 2253 t.Errorf("got %#v, want SubRepository{Path,Name} = {'sub', 'sub-name'}", f) 2254 } 2255 2256 if sres.LineFragments["sub-name"] != "sub-line" { 2257 t.Errorf("got LineFragmentTemplate %v, want {'sub':'sub-line'}", sres.LineFragments) 2258 } 2259} 2260 2261func TestSearchEither(t *testing.T) { 2262 b := testShardBuilder(t, nil, 2263 Document{Name: "f1", Content: []byte("bla needle bla")}, 2264 Document{Name: "needle-file-branch", Content: []byte("bla content")}) 2265 2266 t.Run("LineMatches", func(t *testing.T) { 2267 sres := searchForTest(t, b, &query.Substring{Pattern: "needle"}) 2268 if len(sres.Files) != 2 { 2269 t.Fatalf("got %v, wanted 2 matches", sres.Files) 2270 } 2271 2272 sres = searchForTest(t, b, &query.Substring{Pattern: "needle", Content: true}) 2273 if len(sres.Files) != 1 { 2274 t.Fatalf("got %v, wanted 1 index", sres.Files) 2275 } 2276 2277 if got, want := sres.Files[0].FileName, "f1"; got != want { 2278 t.Errorf("got %q, want %q", got, want) 2279 } 2280 }) 2281 2282 t.Run("ChunkMatches", func(t *testing.T) { 2283 sres := searchForTest(t, b, &query.Substring{Pattern: "needle"}, chunkOpts) 2284 if len(sres.Files) != 2 { 2285 t.Fatalf("got %v, wanted 2 matches", sres.Files) 2286 } 2287 2288 sres = searchForTest(t, b, &query.Substring{Pattern: "needle", Content: true}, chunkOpts) 2289 if len(sres.Files) != 1 { 2290 t.Fatalf("got %v, wanted 1 index", sres.Files) 2291 } 2292 2293 if got, want := sres.Files[0].FileName, "f1"; got != want { 2294 t.Errorf("got %q, want %q", got, want) 2295 } 2296 }) 2297} 2298 2299func TestUnicodeExactMatch(t *testing.T) { 2300 needle := "néédlÉ" 2301 content := []byte("blá blá " + needle + " blâ") 2302 2303 b := testShardBuilder(t, nil, 2304 Document{Name: "f1", Content: content}) 2305 2306 t.Run("LineMatches", func(t *testing.T) { 2307 if res := searchForTest(t, b, &query.Substring{Pattern: needle, CaseSensitive: true}); len(res.Files) != 1 { 2308 t.Fatalf("case sensitive: got %v, wanted 1 index", res.Files) 2309 } 2310 }) 2311 2312 t.Run("ChunkMatches", func(t *testing.T) { 2313 res := searchForTest(t, b, &query.Substring{Pattern: needle, CaseSensitive: true}, chunkOpts) 2314 if len(res.Files) != 1 { 2315 t.Fatalf("case sensitive: got %v, wanted 1 index", res.Files) 2316 } 2317 }) 2318} 2319 2320func TestUnicodeCoverContent(t *testing.T) { 2321 needle := "néédlÉ" 2322 content := []byte("blá blá " + needle + " blâ") 2323 2324 b := testShardBuilder(t, nil, 2325 Document{Name: "f1", Content: content}) 2326 2327 t.Run("LineMatches", func(t *testing.T) { 2328 if res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ", CaseSensitive: true}); len(res.Files) != 0 { 2329 t.Fatalf("case sensitive: got %v, wanted 0 index", res.Files) 2330 } 2331 2332 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ"}) 2333 if len(res.Files) != 1 { 2334 t.Fatalf("case insensitive: got %v, wanted 1 index", res.Files) 2335 } 2336 2337 if got, want := res.Files[0].LineMatches[0].LineFragments[0].Offset, uint32(strings.Index(string(content), needle)); got != want { 2338 t.Errorf("got %d want %d", got, want) 2339 } 2340 }) 2341 2342 t.Run("ChunkMatches", func(t *testing.T) { 2343 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ", CaseSensitive: true}, chunkOpts) 2344 if len(res.Files) != 0 { 2345 t.Fatalf("case sensitive: got %v, wanted 0 index", res.Files) 2346 } 2347 2348 res = searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ"}, chunkOpts) 2349 if len(res.Files) != 1 { 2350 t.Fatalf("case insensitive: got %v, wanted 1 index", res.Files) 2351 } 2352 2353 got := res.Files[0].ChunkMatches[0].Ranges[0].Start.ByteOffset 2354 want := uint32(strings.Index(string(content), needle)) 2355 if got != want { 2356 t.Errorf("got %d want %d", got, want) 2357 } 2358 }) 2359} 2360 2361func TestUnicodeNonCoverContent(t *testing.T) { 2362 needle := "nééáádlÉ" 2363 content := []byte("blá blá " + needle + " blâ") 2364 2365 b := testShardBuilder(t, nil, 2366 Document{Name: "f1", Content: content}) 2367 2368 t.Run("LineMatches", func(t *testing.T) { 2369 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉÁÁDLÉ", Content: true}) 2370 if len(res.Files) != 1 { 2371 t.Fatalf("got %v, wanted 1 index", res.Files) 2372 } 2373 2374 if got, want := res.Files[0].LineMatches[0].LineFragments[0].Offset, uint32(strings.Index(string(content), needle)); got != want { 2375 t.Errorf("got %d want %d", got, want) 2376 } 2377 }) 2378 2379 t.Run("ChunkMatches", func(t *testing.T) { 2380 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉÁÁDLÉ", Content: true}, chunkOpts) 2381 if len(res.Files) != 1 { 2382 t.Fatalf("got %v, wanted 1 index", res.Files) 2383 } 2384 2385 got := res.Files[0].ChunkMatches[0].Ranges[0].Start.ByteOffset 2386 want := uint32(strings.Index(string(content), needle)) 2387 if got != want { 2388 t.Errorf("got %d want %d", got, want) 2389 } 2390 }) 2391} 2392 2393const kelvinCodePoint = 8490 2394 2395func TestUnicodeVariableLength(t *testing.T) { 2396 lower := 'k' 2397 upper := rune(kelvinCodePoint) 2398 2399 needle := "nee" + string([]rune{lower}) + "eed" 2400 corpus := []byte("nee" + string([]rune{upper}) + "eed" + 2401 " ee" + string([]rune{lower}) + "ee" + 2402 " ee" + string([]rune{upper}) + "ee") 2403 2404 t.Run("LineMatches", func(t *testing.T) { 2405 b := testShardBuilder(t, nil, 2406 Document{Name: "f1", Content: corpus}) 2407 2408 res := searchForTest(t, b, &query.Substring{Pattern: needle, Content: true}) 2409 if len(res.Files) != 1 { 2410 t.Fatalf("got %v, wanted 1 index", res.Files) 2411 } 2412 }) 2413 2414 t.Run("ChunkMatches", func(t *testing.T) { 2415 b := testShardBuilder(t, nil, 2416 Document{Name: "f1", Content: corpus}) 2417 2418 res := searchForTest(t, b, &query.Substring{Pattern: needle, Content: true}, chunkOpts) 2419 if len(res.Files) != 1 { 2420 t.Fatalf("got %v, wanted 1 index", res.Files) 2421 } 2422 }) 2423} 2424 2425func TestUnicodeFileStartOffsets(t *testing.T) { 2426 unicode := "世界" 2427 wat := "waaaaaat" 2428 b := testShardBuilder(t, nil, 2429 Document{ 2430 Name: "f1", 2431 Content: []byte(unicode), 2432 }, 2433 Document{ 2434 Name: "f2", 2435 Content: []byte(wat), 2436 }, 2437 ) 2438 q := &query.Substring{Pattern: wat, Content: true} 2439 res := searchForTest(t, b, q) 2440 if len(res.Files) != 1 { 2441 t.Fatalf("got %v, wanted 1 index", res.Files) 2442 } 2443} 2444 2445func TestLongFileUTF8(t *testing.T) { 2446 needle := "neeedle" 2447 2448 // 6 bytes. 2449 unicode := "世界" 2450 content := []byte(strings.Repeat(unicode, 100) + needle) 2451 b := testShardBuilder(t, nil, 2452 Document{ 2453 Name: "f1", 2454 Content: []byte(strings.Repeat("a", 50)), 2455 }, 2456 Document{ 2457 Name: "f2", 2458 Content: content, 2459 }) 2460 2461 t.Run("LineMatches", func(t *testing.T) { 2462 q := &query.Substring{Pattern: needle, Content: true} 2463 res := searchForTest(t, b, q) 2464 if len(res.Files) != 1 { 2465 t.Errorf("got %v, want 1 result", res) 2466 } 2467 }) 2468 2469 t.Run("ChunkMatches", func(t *testing.T) { 2470 q := &query.Substring{Pattern: needle, Content: true} 2471 res := searchForTest(t, b, q, chunkOpts) 2472 if len(res.Files) != 1 { 2473 t.Errorf("got %v, want 1 result", res) 2474 } 2475 }) 2476} 2477 2478func TestEstimateDocCount(t *testing.T) { 2479 content := []byte("bla needle bla") 2480 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"}, 2481 Document{Name: "f1", Content: content}, 2482 Document{Name: "f2", Content: content}, 2483 ) 2484 2485 t.Run("LineMatches", func(t *testing.T) { 2486 if sres := searchForTest(t, b, 2487 query.NewAnd( 2488 &query.Substring{Pattern: "needle"}, 2489 &query.Repo{Regexp: regexp.MustCompile("reponame")}, 2490 ), zoekt.SearchOptions{ 2491 EstimateDocCount: true, 2492 }); sres.Stats.ShardFilesConsidered != 2 { 2493 t.Errorf("got FilesConsidered = %d, want 2", sres.Stats.FilesConsidered) 2494 } 2495 if sres := searchForTest(t, b, 2496 query.NewAnd( 2497 &query.Substring{Pattern: "needle"}, 2498 &query.Repo{Regexp: regexp.MustCompile("nomatch")}, 2499 ), zoekt.SearchOptions{ 2500 EstimateDocCount: true, 2501 }); sres.Stats.ShardFilesConsidered != 0 { 2502 t.Errorf("got FilesConsidered = %d, want 0", sres.Stats.FilesConsidered) 2503 } 2504 }) 2505 2506 t.Run("ChunkMatches", func(t *testing.T) { 2507 if sres := searchForTest(t, b, 2508 query.NewAnd( 2509 &query.Substring{Pattern: "needle"}, 2510 &query.Repo{Regexp: regexp.MustCompile("reponame")}, 2511 ), zoekt.SearchOptions{ 2512 EstimateDocCount: true, 2513 ChunkMatches: true, 2514 }); sres.Stats.ShardFilesConsidered != 2 { 2515 t.Errorf("got FilesConsidered = %d, want 2", sres.Stats.FilesConsidered) 2516 } 2517 if sres := searchForTest(t, b, 2518 query.NewAnd( 2519 &query.Substring{Pattern: "needle"}, 2520 &query.Repo{Regexp: regexp.MustCompile("nomatch")}, 2521 ), zoekt.SearchOptions{ 2522 EstimateDocCount: true, 2523 ChunkMatches: true, 2524 }); sres.Stats.ShardFilesConsidered != 0 { 2525 t.Errorf("got FilesConsidered = %d, want 0", sres.Stats.FilesConsidered) 2526 } 2527 }) 2528} 2529 2530func TestUTF8CorrectCorpus(t *testing.T) { 2531 needle := "neeedle" 2532 2533 // 6 bytes. 2534 unicode := "世界" 2535 b := testShardBuilder(t, nil, 2536 Document{ 2537 Name: "f1", 2538 Content: []byte(strings.Repeat(unicode, 100)), 2539 }, 2540 Document{ 2541 Name: "xxxxxneeedle", 2542 Content: []byte("hello"), 2543 }) 2544 2545 t.Run("LineMatches", func(t *testing.T) { 2546 q := &query.Substring{Pattern: needle, FileName: true} 2547 res := searchForTest(t, b, q) 2548 if len(res.Files) != 1 { 2549 t.Errorf("got %v, want 1 result", res) 2550 } 2551 }) 2552 2553 t.Run("ChunkMatches", func(t *testing.T) { 2554 q := &query.Substring{Pattern: needle, FileName: true} 2555 res := searchForTest(t, b, q, chunkOpts) 2556 if len(res.Files) != 1 { 2557 t.Errorf("got %v, want 1 result", res) 2558 } 2559 }) 2560} 2561 2562func TestBuilderStats(t *testing.T) { 2563 b := testShardBuilder(t, nil, 2564 Document{ 2565 Name: "f1", 2566 Content: []byte(strings.Repeat("abcd", 1024)), 2567 }) 2568 var buf bytes.Buffer 2569 if err := b.Write(&buf); err != nil { 2570 t.Fatal(err) 2571 } 2572 2573 if got, want := b.ContentSize(), uint32(2+4*1024); got != want { 2574 t.Errorf("got %d, want %d", got, want) 2575 } 2576} 2577 2578func TestIOStats(t *testing.T) { 2579 b := testShardBuilder(t, nil, 2580 Document{ 2581 Name: "f1", 2582 Content: []byte(strings.Repeat("abcd", 1024)), 2583 }) 2584 2585 t.Run("LineMatches", func(t *testing.T) { 2586 q := &query.Substring{Pattern: "abc", CaseSensitive: true, Content: true} 2587 res := searchForTest(t, b, q) 2588 2589 // 4096 (content) + 2 (overhead: newlines or doc sections) 2590 if got, want := res.Stats.ContentBytesLoaded, int64(4098); got != want { 2591 t.Errorf("got content I/O %d, want %d", got, want) 2592 } 2593 2594 // 1024 entries, each 4 bytes apart. 4 fits into single byte 2595 // delta encoded. 2596 if got, want := res.Stats.IndexBytesLoaded, int64(1024); got != want { 2597 t.Errorf("got index I/O %d, want %d", got, want) 2598 } 2599 }) 2600 2601 t.Run("ChunkMatches", func(t *testing.T) { 2602 q := &query.Substring{Pattern: "abc", CaseSensitive: true, Content: true} 2603 res := searchForTest(t, b, q, chunkOpts) 2604 2605 // 4096 (content) + 2 (overhead: newlines or doc sections) 2606 if got, want := res.Stats.ContentBytesLoaded, int64(4098); got != want { 2607 t.Errorf("got content I/O %d, want %d", got, want) 2608 } 2609 2610 // 1024 entries, each 4 bytes apart. 4 fits into single byte 2611 // delta encoded. 2612 if got, want := res.Stats.IndexBytesLoaded, int64(1024); got != want { 2613 t.Errorf("got index I/O %d, want %d", got, want) 2614 } 2615 }) 2616 2617 t.Run("LineMatches with BM25", func(t *testing.T) { 2618 q := &query.Substring{Pattern: "abc", CaseSensitive: true, Content: true} 2619 res := searchForTest(t, b, q, zoekt.SearchOptions{UseBM25Scoring: true}) 2620 2621 // 4096 (content) + 2 (overhead: newlines or doc sections) 2622 if got, want := res.Stats.ContentBytesLoaded, int64(4098); got != want { 2623 t.Errorf("got content I/O %d, want %d", got, want) 2624 } 2625 2626 // 1024 entries, each 4 bytes apart. 4 fits into single byte 2627 // delta encoded. 2628 if got, want := res.Stats.IndexBytesLoaded, int64(1024); got != want { 2629 t.Errorf("got index I/O %d, want %d", got, want) 2630 } 2631 }) 2632 2633 t.Run("ChunkMatches with BM25", func(t *testing.T) { 2634 q := &query.Substring{Pattern: "abc", CaseSensitive: true, Content: true} 2635 res := searchForTest(t, b, q, zoekt.SearchOptions{UseBM25Scoring: true, ChunkMatches: true}) 2636 2637 // 4096 (content) + 2 (overhead: newlines or doc sections) 2638 if got, want := res.Stats.ContentBytesLoaded, int64(4098); got != want { 2639 t.Errorf("got content I/O %d, want %d", got, want) 2640 } 2641 2642 // 1024 entries, each 4 bytes apart. 4 fits into single byte 2643 // delta encoded. 2644 if got, want := res.Stats.IndexBytesLoaded, int64(1024); got != want { 2645 t.Errorf("got index I/O %d, want %d", got, want) 2646 } 2647 }) 2648} 2649 2650func TestStartLineAnchor(t *testing.T) { 2651 b := testShardBuilder(t, nil, 2652 Document{ 2653 Name: "f1", 2654 Content: []byte( 2655 `hello 2656start of middle of line 2657`), 2658 }) 2659 2660 t.Run("LineMatches", func(t *testing.T) { 2661 q, err := query.Parse("^start") 2662 if err != nil { 2663 t.Errorf("parse: %v", err) 2664 } 2665 2666 res := searchForTest(t, b, q) 2667 if len(res.Files) != 1 { 2668 t.Errorf("got %v, want 1 file", res.Files) 2669 } 2670 2671 q, err = query.Parse("^middle") 2672 if err != nil { 2673 t.Errorf("parse: %v", err) 2674 } 2675 res = searchForTest(t, b, q) 2676 if len(res.Files) != 0 { 2677 t.Errorf("got %v, want 0 files", res.Files) 2678 } 2679 }) 2680 2681 t.Run("ChunkMatches", func(t *testing.T) { 2682 q, err := query.Parse("^start") 2683 if err != nil { 2684 t.Errorf("parse: %v", err) 2685 } 2686 2687 res := searchForTest(t, b, q, chunkOpts) 2688 if len(res.Files) != 1 { 2689 t.Errorf("got %v, want 1 file", res.Files) 2690 } 2691 2692 q, err = query.Parse("^middle") 2693 if err != nil { 2694 t.Errorf("parse: %v", err) 2695 } 2696 res = searchForTest(t, b, q, chunkOpts) 2697 if len(res.Files) != 0 { 2698 t.Errorf("got %v, want 0 files", res.Files) 2699 } 2700 }) 2701} 2702 2703func TestAndOrUnicode(t *testing.T) { 2704 q, err := query.Parse("orange.*apple") 2705 if err != nil { 2706 t.Errorf("parse: %v", err) 2707 } 2708 finalQ := query.NewAnd(q, 2709 query.NewOr(query.NewAnd(&query.Repo{Regexp: regexp.MustCompile("name")}, 2710 query.NewOr(&query.Branch{Pattern: "master"})))) 2711 2712 b := testShardBuilder(t, &zoekt.Repository{ 2713 Name: "name", 2714 Branches: []zoekt.RepositoryBranch{{"master", "master-version"}}, 2715 }, Document{ 2716 Name: "f2", 2717 Content: []byte("orange\u2318apple"), 2718 // --------------0123456 78901 2719 Branches: []string{"master"}, 2720 }) 2721 2722 t.Run("LineMatches", func(t *testing.T) { 2723 res := searchForTest(t, b, finalQ) 2724 if len(res.Files) != 1 { 2725 t.Errorf("got %v, want 1 result", res.Files) 2726 } 2727 }) 2728 2729 t.Run("ChunkMatches", func(t *testing.T) { 2730 res := searchForTest(t, b, finalQ, chunkOpts) 2731 if len(res.Files) != 1 { 2732 t.Errorf("got %v, want 1 result", res.Files) 2733 } 2734 }) 2735} 2736 2737func TestAndShort(t *testing.T) { 2738 content := []byte("bla needle at orange bla") 2739 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"}, 2740 Document{Name: "f1", Content: content}, 2741 Document{Name: "f2", Content: []byte("xx at xx")}, 2742 Document{Name: "f3", Content: []byte("yy orange xx")}, 2743 ) 2744 2745 q := query.NewAnd(&query.Substring{Pattern: "at"}, 2746 &query.Substring{Pattern: "orange"}) 2747 2748 t.Run("LineMatches", func(t *testing.T) { 2749 res := searchForTest(t, b, q) 2750 if len(res.Files) != 1 || res.Files[0].FileName != "f1" { 2751 t.Errorf("got %v, want 1 result", res.Files) 2752 } 2753 }) 2754 2755 t.Run("ChunkMatches", func(t *testing.T) { 2756 res := searchForTest(t, b, q, chunkOpts) 2757 if len(res.Files) != 1 || res.Files[0].FileName != "f1" { 2758 t.Errorf("got %v, want 1 result", res.Files) 2759 } 2760 }) 2761} 2762 2763func TestNoCollectRegexpSubstring(t *testing.T) { 2764 content := []byte("bla final bla\nfoo final, foo") 2765 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"}, 2766 Document{Name: "f1", Content: content}, 2767 ) 2768 2769 q := &query.Regexp{ 2770 Regexp: mustParseRE("final[,.]"), 2771 } 2772 2773 t.Run("LineMatches", func(t *testing.T) { 2774 res := searchForTest(t, b, q) 2775 if len(res.Files) != 1 { 2776 t.Fatalf("got %v, want 1 result", res.Files) 2777 } 2778 if f := res.Files[0]; len(f.LineMatches) != 1 { 2779 t.Fatalf("got line matches %v, want 1 line index", printLineMatches(f.LineMatches)) 2780 } 2781 }) 2782 2783 t.Run("ChunkMatches", func(t *testing.T) { 2784 res := searchForTest(t, b, q, chunkOpts) 2785 if len(res.Files) != 1 { 2786 t.Fatalf("got %v, want 1 result", res.Files) 2787 } 2788 if f := res.Files[0]; len(f.ChunkMatches) != 1 { 2789 t.Fatalf("got line matches %v, want 1 line index", printLineMatches(f.LineMatches)) 2790 } 2791 }) 2792} 2793 2794func printLineMatches(ms []zoekt.LineMatch) string { 2795 var ss []string 2796 for _, m := range ms { 2797 ss = append(ss, fmt.Sprintf("%d:%q %v", m.LineNumber, m.Line, m.LineFragments)) 2798 } 2799 2800 return strings.Join(ss, ", ") 2801} 2802 2803func TestLang(t *testing.T) { 2804 content := []byte("bla needle bla") 2805 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"}, 2806 Document{Name: "f1", Content: content}, 2807 Document{Name: "f2", Language: "java", Content: content}, 2808 Document{Name: "f3", Language: "cpp", Content: content}, 2809 ) 2810 2811 q := query.NewAnd(&query.Substring{Pattern: "needle"}, 2812 &query.Language{Language: "cpp"}) 2813 2814 t.Run("LineMatches", func(t *testing.T) { 2815 res := searchForTest(t, b, q) 2816 if len(res.Files) != 1 { 2817 t.Fatalf("got %v, want 1 result in f3", res.Files) 2818 } 2819 f := res.Files[0] 2820 if f.FileName != "f3" || f.Language != "cpp" { 2821 t.Fatalf("got %v, want 1 match with language cpp", f) 2822 } 2823 }) 2824 2825 t.Run("ChunkMatches", func(t *testing.T) { 2826 res := searchForTest(t, b, q, chunkOpts) 2827 if len(res.Files) != 1 { 2828 t.Fatalf("got %v, want 1 result in f3", res.Files) 2829 } 2830 f := res.Files[0] 2831 if f.FileName != "f3" || f.Language != "cpp" { 2832 t.Fatalf("got %v, want 1 match with language cpp", f) 2833 } 2834 }) 2835} 2836 2837func TestLangShortcut(t *testing.T) { 2838 content := []byte("bla needle bla") 2839 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"}, 2840 Document{Name: "f2", Language: "java", Content: content}, 2841 Document{Name: "f3", Language: "cpp", Content: content}, 2842 ) 2843 2844 q := query.NewAnd(&query.Substring{Pattern: "needle"}, 2845 &query.Language{Language: "fortran"}) 2846 2847 t.Run("LineMatches", func(t *testing.T) { 2848 res := searchForTest(t, b, q) 2849 if len(res.Files) != 0 { 2850 t.Fatalf("got %v, want 0 results", res.Files) 2851 } 2852 if res.Stats.IndexBytesLoaded > 0 { 2853 t.Errorf("got matchBytesLoaded %d, want 0", res.Stats.IndexBytesLoaded) 2854 } 2855 }) 2856 2857 t.Run("ChunkMatches", func(t *testing.T) { 2858 res := searchForTest(t, b, q, chunkOpts) 2859 if len(res.Files) != 0 { 2860 t.Fatalf("got %v, want 0 results", res.Files) 2861 } 2862 if res.Stats.IndexBytesLoaded > 0 { 2863 t.Errorf("got matchBytesLoaded %d, want 0", res.Stats.IndexBytesLoaded) 2864 } 2865 }) 2866} 2867 2868func TestNoTextMatchAtoms(t *testing.T) { 2869 content := []byte("bla needle bla") 2870 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"}, 2871 Document{Name: "f1", Content: content}, 2872 Document{Name: "f2", Language: "java", Content: content}, 2873 Document{Name: "f3", Language: "cpp", Content: content}, 2874 ) 2875 q := query.NewAnd(&query.Language{Language: "java"}) 2876 t.Run("LineMatches", func(t *testing.T) { 2877 res := searchForTest(t, b, q) 2878 if len(res.Files) != 1 { 2879 t.Fatalf("got %v, want 1 result in f3", res.Files) 2880 } 2881 }) 2882 2883 t.Run("ChunkMatches", func(t *testing.T) { 2884 res := searchForTest(t, b, q, chunkOpts) 2885 if len(res.Files) != 1 { 2886 t.Fatalf("got %v, want 1 result in f3", res.Files) 2887 } 2888 }) 2889} 2890 2891func TestNoPositiveAtoms(t *testing.T) { 2892 content := []byte("bla needle bla") 2893 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"}, 2894 Document{Name: "f1", Content: content}, 2895 Document{Name: "f2", Content: content}, 2896 ) 2897 2898 q := query.NewAnd( 2899 &query.Not{Child: &query.Substring{Pattern: "xyz"}}, 2900 &query.Repo{Regexp: regexp.MustCompile("reponame")}) 2901 t.Run("LineMatches", func(t *testing.T) { 2902 res := searchForTest(t, b, q) 2903 if len(res.Files) != 2 { 2904 t.Fatalf("got %v, want 2 results in f3", res.Files) 2905 } 2906 }) 2907 t.Run("ChunkMatches", func(t *testing.T) { 2908 res := searchForTest(t, b, q, chunkOpts) 2909 if len(res.Files) != 2 { 2910 t.Fatalf("got %v, want 2 results in f3", res.Files) 2911 } 2912 }) 2913} 2914 2915func TestSymbolBoundaryStart(t *testing.T) { 2916 content := []byte("start\nbla bla\nend") 2917 // ----------------012345-67890123-456 2918 2919 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"}, 2920 Document{ 2921 Name: "f1", 2922 Content: content, 2923 Symbols: []DocumentSection{{0, 5}, {14, 17}}, 2924 }, 2925 ) 2926 q := &query.Symbol{ 2927 Expr: &query.Substring{Pattern: "start"}, 2928 } 2929 t.Run("LineMatches", func(t *testing.T) { 2930 res := searchForTest(t, b, q) 2931 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 { 2932 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2933 } 2934 m := res.Files[0].LineMatches[0].LineFragments[0] 2935 if m.Offset != 0 { 2936 t.Fatalf("got offset %d want 0", m.Offset) 2937 } 2938 }) 2939 2940 t.Run("ChunkMatches", func(t *testing.T) { 2941 res := searchForTest(t, b, q, chunkOpts) 2942 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 { 2943 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2944 } 2945 m := res.Files[0].ChunkMatches[0].Ranges[0] 2946 if m.Start.ByteOffset != 0 { 2947 t.Fatalf("got offset %d want 0", m.Start.ByteOffset) 2948 } 2949 }) 2950} 2951 2952func TestSymbolBoundaryEnd(t *testing.T) { 2953 content := []byte("start\nbla bla\nend") 2954 // ----------------012345-67890123-456 2955 2956 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"}, 2957 Document{ 2958 Name: "f1", 2959 Content: content, 2960 Symbols: []DocumentSection{{14, 17}}, 2961 }, 2962 ) 2963 q := &query.Symbol{ 2964 Expr: &query.Substring{Pattern: "end"}, 2965 } 2966 t.Run("LineMatches", func(t *testing.T) { 2967 res := searchForTest(t, b, q) 2968 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 { 2969 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2970 } 2971 m := res.Files[0].LineMatches[0].LineFragments[0] 2972 if m.Offset != 14 { 2973 t.Fatalf("got offset %d want 0", m.Offset) 2974 } 2975 }) 2976 2977 t.Run("ChunkMatches", func(t *testing.T) { 2978 res := searchForTest(t, b, q, chunkOpts) 2979 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 { 2980 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2981 } 2982 m := res.Files[0].ChunkMatches[0].Ranges[0] 2983 if m.Start.ByteOffset != 14 { 2984 t.Fatalf("got offset %d want 0", m.Start.ByteOffset) 2985 } 2986 }) 2987} 2988 2989func TestSymbolSubstring(t *testing.T) { 2990 content := []byte("bla\nsymblabla\nbla") 2991 // ----------------0123-4567890123-456 2992 2993 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"}, 2994 Document{ 2995 Name: "f1", 2996 Content: content, 2997 Symbols: []DocumentSection{{4, 12}}, 2998 }, 2999 ) 3000 q := &query.Symbol{ 3001 Expr: &query.Substring{Pattern: "bla"}, 3002 } 3003 t.Run("LineMatches", func(t *testing.T) { 3004 res := searchForTest(t, b, q) 3005 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 { 3006 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 3007 } 3008 m := res.Files[0].LineMatches[0].LineFragments[0] 3009 if m.Offset != 7 || m.MatchLength != 3 { 3010 t.Fatalf("got offset %d, size %d want 7 size 3", m.Offset, m.MatchLength) 3011 } 3012 }) 3013 3014 t.Run("ChunkMatches", func(t *testing.T) { 3015 res := searchForTest(t, b, q, chunkOpts) 3016 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 { 3017 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 3018 } 3019 m := res.Files[0].ChunkMatches[0].Ranges[0] 3020 if m.Start.ByteOffset != 7 || m.End.ByteOffset != 10 { 3021 t.Fatalf("got offset %d, end %d want 7, 10", m.Start.ByteOffset, m.End.ByteOffset) 3022 } 3023 }) 3024} 3025 3026func TestSymbolSubstringExact(t *testing.T) { 3027 content := []byte("bla\nsym\nbla\nsym\nasymb") 3028 // ----------------0123-4567-890123456-78901 3029 3030 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"}, 3031 Document{ 3032 Name: "f1", 3033 Content: content, 3034 Symbols: []DocumentSection{{4, 7}}, 3035 }, 3036 ) 3037 q := &query.Symbol{ 3038 Expr: &query.Substring{Pattern: "sym"}, 3039 } 3040 t.Run("LineMatches", func(t *testing.T) { 3041 res := searchForTest(t, b, q) 3042 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 { 3043 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 3044 } 3045 m := res.Files[0].LineMatches[0].LineFragments[0] 3046 if m.Offset != 4 { 3047 t.Fatalf("got offset %d, want 7", m.Offset) 3048 } 3049 }) 3050 3051 t.Run("ChunkMatches", func(t *testing.T) { 3052 res := searchForTest(t, b, q, chunkOpts) 3053 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 { 3054 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 3055 } 3056 m := res.Files[0].ChunkMatches[0].Ranges[0] 3057 if m.Start.ByteOffset != 4 { 3058 t.Fatalf("got offset %d, want 7", m.Start.ByteOffset) 3059 } 3060 }) 3061} 3062 3063func TestSymbolRegexpExact(t *testing.T) { 3064 content := []byte("blah\nbla\nbl") 3065 // ----------------01234-5678-90 3066 3067 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"}, 3068 Document{ 3069 Name: "f1", 3070 Content: content, 3071 Symbols: []DocumentSection{{0, 4}, {5, 8}, {9, 11}}, 3072 }, 3073 ) 3074 q := &query.Symbol{ 3075 Expr: &query.Regexp{Regexp: mustParseRE("^bla$")}, 3076 } 3077 t.Run("LineMatches", func(t *testing.T) { 3078 res := searchForTest(t, b, q) 3079 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 { 3080 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 3081 } 3082 m := res.Files[0].LineMatches[0].LineFragments[0] 3083 if m.Offset != 5 { 3084 t.Fatalf("got offset %d, want 5", m.Offset) 3085 } 3086 }) 3087 3088 t.Run("ChunkMatches", func(t *testing.T) { 3089 res := searchForTest(t, b, q, chunkOpts) 3090 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 { 3091 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 3092 } 3093 m := res.Files[0].ChunkMatches[0].Ranges[0] 3094 if m.Start.ByteOffset != 5 { 3095 t.Fatalf("got offset %d, want 5", m.Start.ByteOffset) 3096 } 3097 }) 3098} 3099 3100func TestSymbolRegexpPartial(t *testing.T) { 3101 content := []byte("abcdef") 3102 // ----------------012345 3103 3104 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"}, 3105 Document{ 3106 Name: "f1", 3107 Content: content, 3108 Symbols: []DocumentSection{{0, 6}}, 3109 }, 3110 ) 3111 q := &query.Symbol{ 3112 Expr: &query.Regexp{Regexp: mustParseRE("(b|d)c(d|b)")}, 3113 } 3114 t.Run("LineMatches", func(t *testing.T) { 3115 res := searchForTest(t, b, q) 3116 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 { 3117 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 3118 } 3119 m := res.Files[0].LineMatches[0].LineFragments[0] 3120 if m.Offset != 1 { 3121 t.Fatalf("got offset %d, want 1", m.Offset) 3122 } 3123 if m.MatchLength != 3 { 3124 t.Fatalf("got match length %d, want 3", m.MatchLength) 3125 } 3126 }) 3127 3128 t.Run("ChunkMatches", func(t *testing.T) { 3129 res := searchForTest(t, b, q, chunkOpts) 3130 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 { 3131 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 3132 } 3133 m := res.Files[0].ChunkMatches[0].Ranges[0] 3134 if m.Start.ByteOffset != 1 { 3135 t.Fatalf("got offset %d, want 1", m.Start.ByteOffset) 3136 } 3137 if m.End.ByteOffset != 4 { 3138 t.Fatalf("got match end %d, want 4", m.End.ByteOffset) 3139 } 3140 }) 3141} 3142 3143func TestSymbolRegexpAll(t *testing.T) { 3144 docs := []Document{ 3145 { 3146 Name: "f1", 3147 Content: []byte("Hello Zoekt"), 3148 // --------------01234567890 3149 Symbols: []DocumentSection{{0, 5}, {6, 11}}, 3150 }, 3151 { 3152 Name: "f2", 3153 Content: []byte("Second Zoekt Third"), 3154 // --------------012345678901234567 3155 Symbols: []DocumentSection{{0, 6}, {7, 12}, {13, 18}}, 3156 }, 3157 } 3158 3159 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"}, docs...) 3160 q := &query.Symbol{ 3161 Expr: &query.Regexp{Regexp: mustParseRE(".*")}, 3162 } 3163 t.Run("LineMatches", func(t *testing.T) { 3164 res := searchForTest(t, b, q) 3165 if len(res.Files) != len(docs) { 3166 t.Fatalf("got %v, want %d file", res.Files, len(docs)) 3167 } 3168 for i, want := range docs { 3169 got := res.Files[i].LineMatches[0].LineFragments 3170 if len(got) != len(want.Symbols) { 3171 t.Fatalf("got %d symbols, want %d symbols in doc %s", len(got), len(want.Symbols), want.Name) 3172 } 3173 3174 for j, sec := range want.Symbols { 3175 if sec.Start != got[j].Offset { 3176 t.Fatalf("got offset %d, want %d in doc %s", got[j].Offset, sec.Start, want.Name) 3177 } 3178 } 3179 } 3180 }) 3181 3182 t.Run("ChunkMatches", func(t *testing.T) { 3183 res := searchForTest(t, b, q, chunkOpts) 3184 if len(res.Files) != len(docs) { 3185 t.Fatalf("got %v, want %d file", res.Files, len(docs)) 3186 } 3187 for i, want := range docs { 3188 got := res.Files[i].ChunkMatches[0].Ranges 3189 if len(got) != len(want.Symbols) { 3190 t.Fatalf("got %d symbols, want %d symbols in doc %s", len(got), len(want.Symbols), want.Name) 3191 } 3192 3193 for j, sec := range want.Symbols { 3194 if sec.Start != got[j].Start.ByteOffset { 3195 t.Fatalf("got offset %d, want %d in doc %s", got[j].Start.ByteOffset, sec.Start, want.Name) 3196 } 3197 } 3198 } 3199 }) 3200} 3201 3202func TestHitIterTerminate(t *testing.T) { 3203 // contrived input: trigram frequencies forces selecting abc + 3204 // def for the distance iteration. There is no index, so this 3205 // will advance the compressedPostingIterator to beyond the 3206 // end. 3207 content := []byte("abc bcdbcd cdecde abcabc def efg") 3208 b := testShardBuilder(t, nil, 3209 Document{ 3210 Name: "f1", 3211 Content: content, 3212 }, 3213 ) 3214 3215 t.Run("LineMatches", func(t *testing.T) { 3216 searchForTest(t, b, &query.Substring{Pattern: "abcdef"}) 3217 }) 3218 3219 t.Run("ChunkMatches", func(t *testing.T) { 3220 searchForTest(t, b, &query.Substring{Pattern: "abcdef"}, chunkOpts) 3221 }) 3222} 3223 3224func TestDistanceHitIterBailLast(t *testing.T) { 3225 content := []byte("AST AST AST UASH") 3226 b := testShardBuilder(t, nil, 3227 Document{ 3228 Name: "f1", 3229 Content: content, 3230 }, 3231 ) 3232 t.Run("LineMatches", func(t *testing.T) { 3233 res := searchForTest(t, b, &query.Substring{Pattern: "UAST"}) 3234 if len(res.Files) != 0 { 3235 t.Fatalf("got %v, want no results", res.Files) 3236 } 3237 }) 3238 3239 t.Run("LineMatches", func(t *testing.T) { 3240 res := searchForTest(t, b, &query.Substring{Pattern: "UAST"}, chunkOpts) 3241 if len(res.Files) != 0 { 3242 t.Fatalf("got %v, want no results", res.Files) 3243 } 3244 }) 3245} 3246 3247func TestDocumentSectionRuneBoundary(t *testing.T) { 3248 content := string([]rune{kelvinCodePoint, kelvinCodePoint, kelvinCodePoint}) 3249 b, err := NewShardBuilder(nil) 3250 if err != nil { 3251 t.Fatalf("NewShardBuilder: %v", err) 3252 } 3253 3254 for i, sec := range []DocumentSection{ 3255 {2, 6}, 3256 {3, 7}, 3257 } { 3258 if err := b.Add(Document{ 3259 Name: "f1", 3260 Content: []byte(content), 3261 Symbols: []DocumentSection{sec}, 3262 }); err == nil { 3263 t.Errorf("%d: Add succeeded", i) 3264 } 3265 } 3266} 3267 3268func TestUnicodeQuery(t *testing.T) { 3269 content := string([]rune{kelvinCodePoint, kelvinCodePoint, kelvinCodePoint}) 3270 b := testShardBuilder(t, nil, 3271 Document{ 3272 Name: "f1", 3273 Content: []byte(content), 3274 }, 3275 ) 3276 3277 q := &query.Substring{Pattern: content} 3278 3279 t.Run("LineMatches", func(t *testing.T) { 3280 res := searchForTest(t, b, q) 3281 if len(res.Files) != 1 { 3282 t.Fatalf("want 1 match, got %v", res.Files) 3283 } 3284 3285 f := res.Files[0] 3286 if len(f.LineMatches) != 1 { 3287 t.Fatalf("want 1 line, got %v", f.LineMatches) 3288 } 3289 l := f.LineMatches[0] 3290 3291 if len(l.LineFragments) != 1 { 3292 t.Fatalf("want 1 line fragment, got %v", l.LineFragments) 3293 } 3294 fr := l.LineFragments[0] 3295 if fr.MatchLength != len(content) { 3296 t.Fatalf("got MatchLength %d want %d", fr.MatchLength, len(content)) 3297 } 3298 }) 3299 3300 t.Run("ChunkMatches", func(t *testing.T) { 3301 res := searchForTest(t, b, q, chunkOpts) 3302 if len(res.Files) != 1 { 3303 t.Fatalf("want 1 match, got %v", res.Files) 3304 } 3305 3306 f := res.Files[0] 3307 if len(f.ChunkMatches) != 1 { 3308 t.Fatalf("want 1 line, got %v", f.LineMatches) 3309 } 3310 cm := f.ChunkMatches[0] 3311 3312 if len(cm.Ranges) != 1 { 3313 t.Fatalf("want 1 line fragment, got %v", cm.Ranges) 3314 } 3315 rr := cm.Ranges[0] 3316 if matchLen := rr.End.ByteOffset - rr.Start.ByteOffset; int(matchLen) != len(content) { 3317 t.Fatalf("got MatchLength %d want %d", matchLen, len(content)) 3318 } 3319 }) 3320} 3321 3322func TestSkipInvalidContent(t *testing.T) { 3323 for _, content := range []string{ 3324 // Binary 3325 "abc def \x00 abc", 3326 } { 3327 3328 b, err := NewShardBuilder(nil) 3329 if err != nil { 3330 t.Fatalf("NewShardBuilder: %v", err) 3331 } 3332 3333 if err := b.Add(Document{ 3334 Name: "f1", 3335 Content: []byte(content), 3336 }); err != nil { 3337 t.Fatal(err) 3338 } 3339 3340 t.Run("LineMatches", func(t *testing.T) { 3341 q := &query.Substring{Pattern: "abc def"} 3342 res := searchForTest(t, b, q) 3343 if len(res.Files) != 0 { 3344 t.Fatalf("got %v, want no results", res.Files) 3345 } 3346 3347 q = &query.Substring{Pattern: "NOT-INDEXED"} 3348 res = searchForTest(t, b, q) 3349 if len(res.Files) != 1 { 3350 t.Fatalf("got %v, want 1 result", res.Files) 3351 } 3352 }) 3353 3354 t.Run("ChunkMatches", func(t *testing.T) { 3355 q := &query.Substring{Pattern: "abc def"} 3356 res := searchForTest(t, b, q, chunkOpts) 3357 if len(res.Files) != 0 { 3358 t.Fatalf("got %v, want no results", res.Files) 3359 } 3360 3361 q = &query.Substring{Pattern: "NOT-INDEXED"} 3362 res = searchForTest(t, b, q, chunkOpts) 3363 if len(res.Files) != 1 { 3364 t.Fatalf("got %v, want 1 result", res.Files) 3365 } 3366 }) 3367 } 3368} 3369 3370func TestDocChecker(t *testing.T) { 3371 docChecker := DocChecker{} 3372 3373 // Test valid and invalid text 3374 for _, text := range []string{"", "simple ascii", "símplé unicödé", "\uFEFFwith utf8 'bom'", "with \uFFFD unicode replacement char"} { 3375 if skip := docChecker.Check([]byte(text), 20000, false); skip != SkipReasonNone { 3376 t.Errorf("Check(%q): %v", text, skip) 3377 } 3378 } 3379 for _, text := range []string{"zero\x00byte", "xx", "0123456789abcdefghi"} { 3380 if skip := docChecker.Check([]byte(text), 15, false); skip == SkipReasonNone { 3381 t.Errorf("Check(%q) succeeded", text) 3382 } 3383 } 3384 3385 // Test valid and invalid text with an allowed large file 3386 for _, text := range []string{"0123456789abcdefghi", "qwertyuiopasdfghjklzxcvbnm"} { 3387 if skip := docChecker.Check([]byte(text), 15, true); skip != SkipReasonNone { 3388 t.Errorf("Check(%q): %v", text, skip) 3389 } 3390 } 3391 for _, text := range []string{"zero\x00byte", "xx"} { 3392 if skip := docChecker.Check([]byte(text), 15, true); skip == SkipReasonNone { 3393 t.Errorf("Check(%q) succeeded", text) 3394 } 3395 } 3396} 3397 3398func TestLineAnd(t *testing.T) { 3399 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"}, 3400 Document{Name: "f1", Content: []byte("apple\nbanana\napple banana chocolate apple pudding banana\ngrape")}, 3401 Document{Name: "f2", Content: []byte("apple orange\nbanana")}, 3402 Document{Name: "f3", Content: []byte("banana grape")}, 3403 ) 3404 pattern := "(apple)(?-s:.)*?(banana)" 3405 r, _ := syntax.Parse(pattern, syntax.Perl) 3406 3407 q := query.Regexp{ 3408 Regexp: r, 3409 Content: true, 3410 } 3411 t.Run("LineMatches", func(t *testing.T) { 3412 res := searchForTest(t, b, &q) 3413 wantRegexpCount := 1 3414 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount { 3415 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount) 3416 } 3417 if len(res.Files) != 1 || res.Files[0].FileName != "f1" { 3418 t.Errorf("got %v, want 1 result", res.Files) 3419 } 3420 }) 3421 3422 t.Run("ChunkMatches", func(t *testing.T) { 3423 res := searchForTest(t, b, &q, chunkOpts) 3424 wantRegexpCount := 1 3425 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount { 3426 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount) 3427 } 3428 if len(res.Files) != 1 || res.Files[0].FileName != "f1" { 3429 t.Errorf("got %v, want 1 result", res.Files) 3430 } 3431 }) 3432} 3433 3434func TestLineAndFileName(t *testing.T) { 3435 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"}, 3436 Document{Name: "f1", Content: []byte("apple banana\ngrape")}, 3437 Document{Name: "f2", Content: []byte("apple banana\norange")}, 3438 Document{Name: "apple banana", Content: []byte("banana grape")}, 3439 ) 3440 pattern := "(apple)(?-s:.)*?(banana)" 3441 r, _ := syntax.Parse(pattern, syntax.Perl) 3442 3443 q := query.Regexp{ 3444 Regexp: r, 3445 FileName: true, 3446 } 3447 t.Run("LineMatches", func(t *testing.T) { 3448 res := searchForTest(t, b, &q) 3449 wantRegexpCount := 1 3450 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount { 3451 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount) 3452 } 3453 if len(res.Files) != 1 || res.Files[0].FileName != "apple banana" { 3454 t.Errorf("got %v, want 1 result", res.Files) 3455 } 3456 }) 3457 3458 t.Run("ChunkMatches", func(t *testing.T) { 3459 res := searchForTest(t, b, &q, chunkOpts) 3460 wantRegexpCount := 1 3461 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount { 3462 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount) 3463 } 3464 if len(res.Files) != 1 || res.Files[0].FileName != "apple banana" { 3465 t.Errorf("got %v, want 1 result", res.Files) 3466 } 3467 }) 3468} 3469 3470func TestMultiLineRegex(t *testing.T) { 3471 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"}, 3472 Document{Name: "f1", Content: []byte("apple banana\ngrape")}, 3473 Document{Name: "f2", Content: []byte("apple orange")}, 3474 Document{Name: "f3", Content: []byte("grape apple")}, 3475 ) 3476 pattern := "(apple).*?[[:space:]].*?(grape)" 3477 r, _ := syntax.Parse(pattern, syntax.Perl) 3478 3479 q := query.Regexp{ 3480 Regexp: r, 3481 } 3482 t.Run("LineMatches", func(t *testing.T) { 3483 res := searchForTest(t, b, &q) 3484 wantRegexpCount := 2 3485 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount { 3486 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount) 3487 } 3488 if len(res.Files) != 1 || res.Files[0].FileName != "f1" { 3489 t.Errorf("got %v, want 1 result", res.Files) 3490 } 3491 if l := len(res.Files[0].LineMatches); l != 2 { 3492 t.Errorf("got %v, want 2 line matches", l) 3493 } 3494 }) 3495 3496 t.Run("ChunkMatches", func(t *testing.T) { 3497 res := searchForTest(t, b, &q, chunkOpts) 3498 wantRegexpCount := 2 3499 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount { 3500 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount) 3501 } 3502 if len(res.Files) != 1 || res.Files[0].FileName != "f1" { 3503 t.Errorf("got %v, want 1 result", res.Files) 3504 } 3505 if l := len(res.Files[0].ChunkMatches); l != 1 { 3506 t.Errorf("got %v, want 1 chunk matches", l) 3507 } 3508 if l := len(res.Files[0].ChunkMatches[0].Ranges); l != 1 { 3509 t.Errorf("got %v, want 1 chunk ranges", l) 3510 } 3511 }) 3512} 3513 3514func TestSearchTypeFileName(t *testing.T) { 3515 b := testShardBuilder(t, &zoekt.Repository{ 3516 Name: "reponame", 3517 }, 3518 Document{Name: "f1", Content: []byte("bla the needle")}, 3519 Document{Name: "f2", Content: []byte("another file another\nneedle")}, 3520 // -----------------------------------012345678901234567890-123456 3521 ) 3522 3523 t.Run("LineMatches", func(t *testing.T) { 3524 wantSingleMatch := func(res *zoekt.SearchResult, want string) { 3525 t.Helper() 3526 fmatches := res.Files 3527 if len(fmatches) != 1 { 3528 t.Errorf("got %v, want 1 matches", len(fmatches)) 3529 return 3530 } 3531 if len(fmatches[0].LineMatches) != 1 { 3532 t.Errorf("got %d line matches", len(fmatches[0].LineMatches)) 3533 return 3534 } 3535 var got string 3536 if fmatches[0].LineMatches[0].FileName { 3537 got = fmatches[0].FileName 3538 } else { 3539 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].LineMatches[0].LineFragments[0].Offset) 3540 } 3541 3542 if got != want { 3543 t.Errorf("got %s, want %s", got, want) 3544 } 3545 } 3546 3547 // Only return the later match in the second file 3548 res := searchForTest(t, b, query.NewAnd( 3549 &query.Type{ 3550 Type: query.TypeFileName, 3551 Child: &query.Substring{Pattern: "needle"}, 3552 }, 3553 &query.Substring{Pattern: "file"})) 3554 wantSingleMatch(res, "f2:8") 3555 3556 // Only return a filename result 3557 res = searchForTest(t, b, 3558 &query.Type{ 3559 Type: query.TypeFileName, 3560 Child: &query.Substring{Pattern: "file"}, 3561 }) 3562 wantSingleMatch(res, "f2") 3563 }) 3564 3565 t.Run("ChunkMatches", func(t *testing.T) { 3566 wantSingleMatch := func(res *zoekt.SearchResult, want string) { 3567 t.Helper() 3568 fmatches := res.Files 3569 if len(fmatches) != 1 { 3570 t.Errorf("got %v, want 1 matches", len(fmatches)) 3571 return 3572 } 3573 if len(fmatches[0].ChunkMatches) != 1 { 3574 t.Errorf("got %d line matches", len(fmatches[0].ChunkMatches)) 3575 return 3576 } 3577 var got string 3578 if fmatches[0].ChunkMatches[0].FileName { 3579 got = fmatches[0].FileName 3580 } else { 3581 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset) 3582 } 3583 3584 if got != want { 3585 t.Errorf("got %s, want %s", got, want) 3586 } 3587 } 3588 3589 // Only return the later match in the second file 3590 res := searchForTest(t, b, query.NewAnd( 3591 &query.Type{ 3592 Type: query.TypeFileName, 3593 Child: &query.Substring{Pattern: "needle"}, 3594 }, 3595 &query.Substring{Pattern: "file"}), 3596 chunkOpts, 3597 ) 3598 wantSingleMatch(res, "f2:8") 3599 3600 // Only return a filename result 3601 res = searchForTest(t, b, 3602 &query.Type{ 3603 Type: query.TypeFileName, 3604 Child: &query.Substring{Pattern: "file"}, 3605 }, 3606 chunkOpts, 3607 ) 3608 wantSingleMatch(res, "f2") 3609 }) 3610} 3611 3612func TestSearchTypeLanguage(t *testing.T) { 3613 b := testShardBuilder(t, &zoekt.Repository{ 3614 Name: "reponame", 3615 }, 3616 Document{Name: "apex.cls", Content: []byte("public class Car extends Vehicle {")}, 3617 Document{Name: "tex.cls", Content: []byte(`\DeclareOption*{`)}, 3618 Document{Name: "hello.h", Content: []byte(`#include <stdio.h>`)}, 3619 Document{Name: "be.magik", Content: []byte(`_package unicorn`)}, 3620 ) 3621 3622 t.Log(b.languageMap) 3623 3624 t.Run("LineMatches", func(t *testing.T) { 3625 wantSingleMatch := func(res *zoekt.SearchResult, want string) { 3626 t.Helper() 3627 fmatches := res.Files 3628 if len(fmatches) != 1 { 3629 t.Errorf("got %v, want 1 matches", len(fmatches)) 3630 return 3631 } 3632 if len(fmatches[0].LineMatches) != 1 { 3633 t.Errorf("got %d line matches", len(fmatches[0].LineMatches)) 3634 return 3635 } 3636 var got string 3637 if fmatches[0].LineMatches[0].FileName { 3638 got = fmatches[0].FileName 3639 } else { 3640 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].LineMatches[0].LineFragments[0].Offset) 3641 } 3642 3643 if got != want { 3644 t.Errorf("got %s, want %s", got, want) 3645 } 3646 } 3647 3648 res := searchForTest(t, b, &query.Language{Language: "Apex"}) 3649 wantSingleMatch(res, "apex.cls") 3650 3651 res = searchForTest(t, b, &query.Language{Language: "TeX"}) 3652 wantSingleMatch(res, "tex.cls") 3653 3654 res = searchForTest(t, b, &query.Language{Language: "C"}) 3655 wantSingleMatch(res, "hello.h") 3656 3657 res = searchForTest(t, b, &query.Language{Language: "Magik"}) 3658 wantSingleMatch(res, "be.magik") 3659 3660 // test fallback language search by pretending it's an older index version 3661 res = searchForTest(t, b, &query.Language{Language: "C++"}) 3662 if len(res.Files) != 0 { 3663 t.Errorf("got %d results for C++, want 0", len(res.Files)) 3664 } 3665 3666 b.featureVersion = 11 // force fallback 3667 res = searchForTest(t, b, &query.Language{Language: "C++"}) 3668 wantSingleMatch(res, "hello.h") 3669 }) 3670 3671 t.Run("ChunkMatches", func(t *testing.T) { 3672 wantSingleMatch := func(res *zoekt.SearchResult, want string) { 3673 t.Helper() 3674 fmatches := res.Files 3675 if len(fmatches) != 1 { 3676 t.Errorf("got %v, want 1 matches", len(fmatches)) 3677 return 3678 } 3679 if len(fmatches[0].ChunkMatches) != 1 { 3680 t.Errorf("got %d line matches", len(fmatches[0].ChunkMatches)) 3681 return 3682 } 3683 var got string 3684 if fmatches[0].ChunkMatches[0].FileName { 3685 got = fmatches[0].FileName 3686 } else { 3687 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset) 3688 } 3689 3690 if got != want { 3691 t.Errorf("got %s, want %s", got, want) 3692 } 3693 } 3694 3695 b.featureVersion = FeatureVersion // reset feature version 3696 res := searchForTest(t, b, &query.Language{Language: "Apex"}, chunkOpts) 3697 wantSingleMatch(res, "apex.cls") 3698 3699 res = searchForTest(t, b, &query.Language{Language: "TeX"}, chunkOpts) 3700 wantSingleMatch(res, "tex.cls") 3701 3702 res = searchForTest(t, b, &query.Language{Language: "C"}, chunkOpts) 3703 wantSingleMatch(res, "hello.h") 3704 3705 // test fallback language search by pretending it's an older index version 3706 res = searchForTest(t, b, &query.Language{Language: "C++"}, chunkOpts) 3707 if len(res.Files) != 0 { 3708 t.Errorf("got %d results for C++, want 0", len(res.Files)) 3709 } 3710 3711 b.featureVersion = 11 // force fallback 3712 res = searchForTest(t, b, &query.Language{Language: "C++"}, chunkOpts) 3713 wantSingleMatch(res, "hello.h") 3714 }) 3715} 3716 3717func TestStats(t *testing.T) { 3718 ignored := []cmp.Option{ 3719 cmpopts.EquateEmpty(), 3720 cmpopts.IgnoreFields(zoekt.RepoListEntry{}, "Repository"), 3721 cmpopts.IgnoreFields(zoekt.RepoListEntry{}, "IndexMetadata"), 3722 cmpopts.IgnoreFields(zoekt.RepoStats{}, "IndexBytes"), 3723 } 3724 3725 repoListEntries := func(b *ShardBuilder) []zoekt.RepoListEntry { 3726 searcher := searcherForTest(t, b) 3727 indexdata := searcher.(*indexData) 3728 return indexdata.repoListEntry 3729 } 3730 3731 t.Run("one empty repo", func(t *testing.T) { 3732 b := testShardBuilder(t, nil) 3733 got := repoListEntries(b) 3734 want := []zoekt.RepoListEntry{ 3735 { 3736 Stats: zoekt.RepoStats{ 3737 Repos: 0, 3738 Shards: 1, 3739 Documents: 0, 3740 IndexBytes: 20, 3741 ContentBytes: 0, 3742 NewLinesCount: 0, 3743 DefaultBranchNewLinesCount: 0, 3744 OtherBranchesNewLinesCount: 0, 3745 }, 3746 }, 3747 } 3748 3749 if diff := cmp.Diff(want, got, ignored...); diff != "" { 3750 t.Fatalf("mismatch (-want +got):\n%s", diff) 3751 } 3752 }) 3753 3754 t.Run("one simple shard", func(t *testing.T) { 3755 b := testShardBuilder(t, nil, 3756 Document{Name: "doc 0", Content: []byte("content 0")}, 3757 Document{Name: "doc 1", Content: []byte("content 1")}, 3758 ) 3759 got := repoListEntries(b) 3760 want := []zoekt.RepoListEntry{ 3761 { 3762 Stats: zoekt.RepoStats{ 3763 Repos: 0, 3764 Shards: 1, 3765 Documents: 2, 3766 IndexBytes: 224, 3767 ContentBytes: 28, 3768 NewLinesCount: 0, 3769 DefaultBranchNewLinesCount: 0, 3770 OtherBranchesNewLinesCount: 0, 3771 }, 3772 }, 3773 } 3774 3775 if diff := cmp.Diff(want, got, ignored...); diff != "" { 3776 t.Fatalf("mismatch (-want +got):\n%s", diff) 3777 } 3778 }) 3779 3780 t.Run("one compound shard", func(t *testing.T) { 3781 b := testShardBuilderCompound(t, 3782 []*zoekt.Repository{ 3783 {Name: "repo 0"}, 3784 {Name: "repo 1"}, 3785 }, 3786 [][]Document{ 3787 { 3788 {Name: "doc 0", Content: []byte("content 0")}, 3789 {Name: "doc 1", Content: []byte("content 1")}, 3790 }, 3791 { 3792 {Name: "doc 2", Content: []byte("content 2")}, 3793 {Name: "doc 3", Content: []byte("content 3")}, 3794 }, 3795 }, 3796 ) 3797 got := repoListEntries(b) 3798 want := []zoekt.RepoListEntry{ 3799 { 3800 Stats: zoekt.RepoStats{ 3801 Repos: 0, 3802 Shards: 1, 3803 Documents: 2, 3804 IndexBytes: 180, 3805 ContentBytes: 28, 3806 NewLinesCount: 0, 3807 DefaultBranchNewLinesCount: 0, 3808 OtherBranchesNewLinesCount: 0, 3809 }, 3810 }, 3811 { 3812 Stats: zoekt.RepoStats{ 3813 Repos: 0, 3814 Shards: 1, 3815 Documents: 2, 3816 IndexBytes: 180, 3817 ContentBytes: 28, 3818 NewLinesCount: 0, 3819 DefaultBranchNewLinesCount: 0, 3820 OtherBranchesNewLinesCount: 0, 3821 }, 3822 }, 3823 } 3824 3825 if diff := cmp.Diff(want, got, ignored...); diff != "" { 3826 t.Fatalf("mismatch (-want +got):\n%s", diff) 3827 } 3828 }) 3829 3830 t.Run("compound shard with empty repos", func(t *testing.T) { 3831 b := testShardBuilderCompound(t, 3832 []*zoekt.Repository{ 3833 {Name: "repo 0"}, 3834 {Name: "repo 1"}, 3835 {Name: "repo 2"}, 3836 {Name: "repo 3"}, 3837 {Name: "repo 4"}, 3838 }, 3839 [][]Document{ 3840 {{Name: "doc 0", Content: []byte("content 0")}}, 3841 nil, 3842 {{Name: "doc 1", Content: []byte("content 1")}}, 3843 nil, 3844 nil, 3845 }, 3846 ) 3847 got := repoListEntries(b) 3848 3849 entryEmpty := zoekt.RepoListEntry{Stats: zoekt.RepoStats{ 3850 Shards: 1, 3851 Documents: 0, 3852 ContentBytes: 0, 3853 }} 3854 entryNonEmpty := zoekt.RepoListEntry{Stats: zoekt.RepoStats{ 3855 Shards: 1, 3856 Documents: 1, 3857 ContentBytes: 14, 3858 }} 3859 3860 want := []zoekt.RepoListEntry{ 3861 entryNonEmpty, 3862 entryEmpty, 3863 entryNonEmpty, 3864 entryEmpty, 3865 entryEmpty, 3866 } 3867 3868 if diff := cmp.Diff(want, got, ignored...); diff != "" { 3869 t.Fatalf("mismatch (-want +got):\n%s", diff) 3870 } 3871 }) 3872} 3873 3874// This tests the frequent pattern "\bLITERAL\b". 3875func TestWordSearch(t *testing.T) { 3876 content := []byte("needle the bla") 3877 // ----------------01234567890123 3878 3879 b := testShardBuilder(t, nil, 3880 Document{ 3881 Name: "f1", 3882 Content: content, 3883 }) 3884 3885 t.Run("LineMatches", func(t *testing.T) { 3886 sres := searchForTest(t, b, 3887 &query.Regexp{ 3888 Regexp: mustParseRE("\\bthe\\b"), 3889 CaseSensitive: true, 3890 Content: true, 3891 }) 3892 3893 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 { 3894 t.Fatalf("got %v, want 1 match in 1 file", sres.Files) 3895 } 3896 3897 if sres.Stats.RegexpsConsidered != 0 { 3898 t.Fatal("expected regexp to be skipped") 3899 } 3900 3901 got := sres.Files[0].LineMatches[0] 3902 want := zoekt.LineMatch{ 3903 LineFragments: []zoekt.LineFragmentMatch{{ 3904 LineOffset: 7, 3905 Offset: 7, 3906 MatchLength: 3, 3907 }}, 3908 Line: content, 3909 FileName: false, 3910 LineNumber: 1, 3911 LineStart: 0, 3912 LineEnd: 14, 3913 } 3914 3915 if !reflect.DeepEqual(got, want) { 3916 t.Errorf("got %#v, want %#v", got, want) 3917 } 3918 }) 3919 3920 t.Run("ChunkMatches", func(t *testing.T) { 3921 sres := searchForTest(t, b, 3922 &query.Regexp{ 3923 Regexp: mustParseRE("\\bthe\\b"), 3924 CaseSensitive: true, 3925 }, chunkOpts) 3926 3927 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 { 3928 t.Fatalf("got %v, want 1 match in 1 file", sres.Files) 3929 } 3930 3931 if sres.Stats.RegexpsConsidered != 0 { 3932 t.Fatal("expected regexp to be skipped") 3933 } 3934 3935 got := sres.Files[0].ChunkMatches[0] 3936 want := zoekt.ChunkMatch{ 3937 Content: content, 3938 ContentStart: zoekt.Location{ByteOffset: 0, LineNumber: 1, Column: 1}, 3939 Ranges: []zoekt.Range{{ 3940 Start: zoekt.Location{ByteOffset: 7, LineNumber: 1, Column: 8}, 3941 End: zoekt.Location{ByteOffset: 10, LineNumber: 1, Column: 11}, 3942 }}, 3943 } 3944 3945 if diff := cmp.Diff(want, got); diff != "" { 3946 t.Fatal(diff) 3947 } 3948 }) 3949} 3950 3951// Simple benchmark focused on chunk match scoring. It creates a single file that will have a 1000-line chunk match. 3952// The benchmark time is expected to be strongly correlated with time spent assembling and scoring this chunk. 3953func BenchmarkScoreChunkMatches(b *testing.B) { 3954 ctx := context.Background() 3955 var builder strings.Builder 3956 for i := range 1000 { 3957 builder.WriteString(fmt.Sprintf("line-%d one one one two two two three three three four four four five five\n", i)) 3958 } 3959 3960 searcher := searcherForTest(b, testShardBuilder(b, nil, 3961 Document{Name: "f1", Content: []byte(builder.String())}, 3962 )) 3963 3964 q := &query.Or{ 3965 Children: []query.Q{ 3966 &query.Substring{Pattern: "f"}, 3967 &query.Substring{Pattern: "t"}, 3968 }} 3969 3970 b.Run("score large ChunkMatch", func(b *testing.B) { 3971 b.ReportAllocs() 3972 b.ResetTimer() 3973 3974 for i := 0; i < b.N; i++ { 3975 sres, err := searcher.Search(ctx, q, &zoekt.SearchOptions{ChunkMatches: true, NumContextLines: 1}) 3976 if err != nil { 3977 b.Fatal(err) 3978 } 3979 3980 matches := sres.Files 3981 if len(matches) == 0 { 3982 b.Fatalf("want file index, got none") 3983 } 3984 } 3985 }) 3986}