fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

at main 110 kB View raw
1// Copyright 2016 Google Inc. All rights reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package index 16 17import ( 18 "bytes" 19 "context" 20 "fmt" 21 "reflect" 22 "regexp/syntax" 23 "strings" 24 "testing" 25 26 "github.com/google/go-cmp/cmp" 27 "github.com/google/go-cmp/cmp/cmpopts" 28 "github.com/grafana/regexp" 29 30 "github.com/sourcegraph/zoekt" 31 "github.com/sourcegraph/zoekt/query" 32) 33 34func clearScores(r *zoekt.SearchResult) { 35 for i := range r.Files { 36 r.Files[i].Score = 0.0 37 for j := range r.Files[i].LineMatches { 38 r.Files[i].LineMatches[j].Score = 0.0 39 } 40 for j := range r.Files[i].ChunkMatches { 41 r.Files[i].ChunkMatches[j].Score = 0.0 42 r.Files[i].ChunkMatches[j].BestLineMatch = 0 43 } 44 r.Files[i].Checksum = nil 45 r.Files[i].Debug = "" 46 } 47} 48 49func testShardBuilder(tb testing.TB, repo *zoekt.Repository, docs ...Document) *ShardBuilder { 50 tb.Helper() 51 52 b, err := NewShardBuilder(repo) 53 if err != nil { 54 tb.Fatalf("NewShardBuilder: %v", err) 55 } 56 57 for i, d := range docs { 58 if err := b.Add(d); err != nil { 59 tb.Fatalf("Add %d: %v", i, err) 60 } 61 } 62 63 return b 64} 65 66func testShardBuilderCompound(t *testing.T, repos []*zoekt.Repository, docs [][]Document) *ShardBuilder { 67 t.Helper() 68 69 b := newShardBuilder(0) 70 b.indexFormatVersion = NextIndexFormatVersion 71 72 if len(repos) != len(docs) { 73 t.Fatalf("testShardBuilderCompound: repos must be the same length as docs, got: len(repos)=%d len(docs)=%d", len(repos), len(docs)) 74 } 75 76 for i, repo := range repos { 77 if err := b.setRepository(repo); err != nil { 78 t.Fatal(err) 79 } 80 for j, d := range docs[i] { 81 if err := b.Add(d); err != nil { 82 t.Fatalf("Add %d %d: %v", i, j, err) 83 } 84 } 85 } 86 87 return b 88} 89 90func TestBoundary(t *testing.T) { 91 b := testShardBuilder(t, nil, 92 Document{Name: "f1", Content: []byte("x the")}, 93 Document{Name: "f1", Content: []byte("reader")}) 94 res := searchForTest(t, b, &query.Substring{Pattern: "there"}) 95 if len(res.Files) > 0 { 96 t.Fatalf("got %v, want no matches", res.Files) 97 } 98} 99 100func TestDocSectionInvalid(t *testing.T) { 101 b, err := NewShardBuilder(nil) 102 if err != nil { 103 t.Fatalf("NewShardBuilder: %v", err) 104 } 105 doc := Document{ 106 Name: "f1", 107 Content: []byte("01234567890123"), 108 Symbols: []DocumentSection{{5, 8}, {7, 9}}, 109 } 110 111 if err := b.Add(doc); err == nil { 112 t.Errorf("overlapping doc sections should fail") 113 } 114 115 doc = Document{ 116 Name: "f1", 117 Content: []byte("01234567890123"), 118 Symbols: []DocumentSection{{0, 20}}, 119 } 120 121 if err := b.Add(doc); err == nil { 122 t.Errorf("doc sections beyond EOF should fail") 123 } 124} 125 126func TestBasic(t *testing.T) { 127 b := testShardBuilder(t, nil, 128 Document{ 129 Name: "f2", 130 Content: []byte("to carry water in the no later bla"), 131 // --------------0123456789012345678901234567890123 132 }) 133 134 t.Run("LineMatch", func(t *testing.T) { 135 res := searchForTest(t, b, &query.Substring{ 136 Pattern: "water", 137 CaseSensitive: true, 138 }) 139 fmatches := res.Files 140 if len(fmatches) != 1 || len(fmatches[0].LineMatches) != 1 { 141 t.Fatalf("got %v, want 1 matches", fmatches) 142 } 143 144 got := fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].LineMatches[0].LineFragments[0].Offset) 145 want := "f2:9" 146 if got != want { 147 t.Errorf("1: got %s, want %s", got, want) 148 } 149 }) 150 151 t.Run("ChunkMatch", func(t *testing.T) { 152 res := searchForTest(t, b, &query.Substring{ 153 Pattern: "water", 154 CaseSensitive: true, 155 }, chunkOpts) 156 fmatches := res.Files 157 if len(fmatches) != 1 || len(fmatches[0].ChunkMatches) != 1 { 158 t.Fatalf("got %v, want 1 matches", fmatches) 159 } 160 161 got := fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset) 162 want := "f2:9" 163 if got != want { 164 t.Errorf("1: got %s, want %s", got, want) 165 } 166 }) 167} 168 169func TestEmptyIndex(t *testing.T) { 170 b := testShardBuilder(t, nil) 171 searcher := searcherForTest(t, b) 172 173 var opts zoekt.SearchOptions 174 if _, err := searcher.Search(context.Background(), &query.Substring{}, &opts); err != nil { 175 t.Fatalf("Search: %v", err) 176 } 177 178 if _, err := searcher.List(context.Background(), &query.Repo{Regexp: regexp.MustCompile("")}, nil); err != nil { 179 t.Fatalf("List: %v", err) 180 } 181 182 if _, err := searcher.Search(context.Background(), &query.Substring{Pattern: "java", FileName: true}, &opts); err != nil { 183 t.Fatalf("Search: %v", err) 184 } 185} 186 187type memSeeker struct { 188 data []byte 189} 190 191func (s *memSeeker) Name() string { 192 return "memseeker" 193} 194 195func (s *memSeeker) Close() {} 196func (s *memSeeker) Read(off, sz uint32) ([]byte, error) { 197 return s.data[off : off+sz], nil 198} 199 200func (s *memSeeker) Size() (uint32, error) { 201 return uint32(len(s.data)), nil 202} 203 204func TestNewlines(t *testing.T) { 205 b := testShardBuilder(t, nil, 206 // -----------------------------------------012345-678901-234 207 Document{Name: "filename", Content: []byte("line1\nline2\nbla")}) 208 209 t.Run("LineMatches", func(t *testing.T) { 210 sres := searchForTest(t, b, &query.Substring{Pattern: "ne2"}) 211 212 matches := sres.Files 213 want := []zoekt.FileMatch{{ 214 FileName: "filename", 215 LineMatches: []zoekt.LineMatch{{ 216 LineFragments: []zoekt.LineFragmentMatch{{ 217 Offset: 8, 218 LineOffset: 2, 219 MatchLength: 3, 220 }}, 221 Line: []byte("line2\n"), 222 LineStart: 6, 223 LineEnd: 12, 224 LineNumber: 2, 225 }}, 226 }} 227 228 if diff := cmp.Diff(matches, want); diff != "" { 229 t.Fatal(diff) 230 } 231 }) 232 233 t.Run("ChunkMatches", func(t *testing.T) { 234 sres := searchForTest(t, b, &query.Substring{Pattern: "ne2"}, chunkOpts) 235 236 matches := sres.Files 237 want := []zoekt.FileMatch{{ 238 FileName: "filename", 239 ChunkMatches: []zoekt.ChunkMatch{{ 240 Content: []byte("line2\n"), 241 ContentStart: zoekt.Location{ 242 ByteOffset: 6, 243 LineNumber: 2, 244 Column: 1, 245 }, 246 Ranges: []zoekt.Range{{ 247 Start: zoekt.Location{ByteOffset: 8, LineNumber: 2, Column: 3}, 248 End: zoekt.Location{ByteOffset: 11, LineNumber: 2, Column: 6}, 249 }}, 250 }}, 251 }} 252 253 if diff := cmp.Diff(want, matches); diff != "" { 254 t.Fatal(diff) 255 } 256 }) 257} 258 259// A result spanning multiple lines should have LineMatches that only cover 260// single lines. 261func TestQueryNewlines(t *testing.T) { 262 text := "line1\nline2\nbla" 263 b := testShardBuilder(t, nil, 264 Document{Name: "filename", Content: []byte(text)}) 265 266 t.Run("LineMatches", func(t *testing.T) { 267 sres := searchForTest(t, b, &query.Substring{Pattern: "ine2\nbla"}) 268 matches := sres.Files 269 if len(matches) != 1 { 270 t.Fatalf("got %d file matches, want exactly one", len(matches)) 271 } 272 m := matches[0] 273 if len(m.LineMatches) != 2 { 274 t.Fatalf("got %d line matches, want exactly two %#v", len(m.LineMatches), m.LineMatches) 275 } 276 }) 277 278 t.Run("ChunkMatches", func(t *testing.T) { 279 sres := searchForTest(t, b, &query.Substring{Pattern: "ine2\nbla"}, chunkOpts) 280 matches := sres.Files 281 if len(matches) != 1 { 282 t.Fatalf("got %d file matches, want exactly one", len(matches)) 283 } 284 m := matches[0] 285 if len(m.ChunkMatches) != 1 { 286 t.Fatalf("got %d chunk matches, want exactly one", len(m.ChunkMatches)) 287 } 288 }) 289} 290 291var chunkOpts = zoekt.SearchOptions{ChunkMatches: true} 292 293func searchForTest(t *testing.T, b *ShardBuilder, q query.Q, o ...zoekt.SearchOptions) *zoekt.SearchResult { 294 searcher := searcherForTest(t, b) 295 var opts zoekt.SearchOptions 296 if len(o) > 0 { 297 opts = o[0] 298 } 299 res, err := searcher.Search(context.Background(), q, &opts) 300 if err != nil { 301 t.Fatalf("Search(%s): %v", q, err) 302 } 303 clearScores(res) 304 return res 305} 306 307func searcherForTest(t testing.TB, b *ShardBuilder) zoekt.Searcher { 308 var buf bytes.Buffer 309 if err := b.Write(&buf); err != nil { 310 t.Fatal(err) 311 } 312 f := &memSeeker{buf.Bytes()} 313 314 searcher, err := NewSearcher(f) 315 if err != nil { 316 t.Fatalf("NewSearcher: %v", err) 317 } 318 319 return searcher 320} 321 322func TestCaseFold(t *testing.T) { 323 b := testShardBuilder(t, nil, 324 Document{Name: "f1", Content: []byte("I love BaNaNAS.")}, 325 // -----------------------------------012345678901234 326 ) 327 t.Run("LineMatches", func(t *testing.T) { 328 sres := searchForTest(t, b, &query.Substring{ 329 Pattern: "bananas", 330 CaseSensitive: true, 331 }) 332 matches := sres.Files 333 if len(matches) != 0 { 334 t.Errorf("foldcase: got %#v, want 0 matches", matches) 335 } 336 337 sres = searchForTest(t, b, 338 &query.Substring{ 339 Pattern: "BaNaNAS", 340 CaseSensitive: true, 341 }) 342 matches = sres.Files 343 if len(matches) != 1 { 344 t.Errorf("no foldcase: got %v, want 1 matches", matches) 345 } else if matches[0].LineMatches[0].LineFragments[0].Offset != 7 { 346 t.Errorf("foldcase: got %v, want offsets 7", matches) 347 } 348 }) 349 350 t.Run("ChunkMatches", func(t *testing.T) { 351 sres := searchForTest(t, b, &query.Substring{ 352 Pattern: "bananas", 353 CaseSensitive: true, 354 }, chunkOpts) 355 matches := sres.Files 356 if len(matches) != 0 { 357 t.Errorf("foldcase: got %#v, want 0 matches", matches) 358 } 359 360 sres = searchForTest(t, b, 361 &query.Substring{ 362 Pattern: "BaNaNAS", 363 CaseSensitive: true, 364 }) 365 matches = sres.Files 366 if len(matches) != 1 { 367 t.Errorf("no foldcase: got %v, want 1 matches", matches) 368 } else if matches[0].LineMatches[0].LineFragments[0].Offset != 7 { 369 t.Errorf("foldcase: got %v, want offsets 7", matches) 370 } 371 }) 372} 373 374// wordsAsSymbols finds all ASCII words in doc.Content which are longer than 2 375// chars. Those are then set as symbols. 376func wordsAsSymbols(doc Document) Document { 377 re := regexp.MustCompile(`\b\w{2,}\b`) 378 var symbols []DocumentSection 379 var symbolsMetadata []*zoekt.Symbol 380 for _, match := range re.FindAllIndex(doc.Content, -1) { 381 symbols = append(symbols, DocumentSection{ 382 Start: uint32(match[0]), 383 End: uint32(match[1]), 384 }) 385 symbolsMetadata = append(symbolsMetadata, &zoekt.Symbol{Kind: "method"}) 386 } 387 doc.Symbols = symbols 388 doc.SymbolsMetaData = symbolsMetadata 389 return doc 390} 391 392func TestSearchStats(t *testing.T) { 393 ctx := context.Background() 394 searcher := searcherForTest(t, testShardBuilder(t, nil, 395 wordsAsSymbols(Document{Name: "f1", Content: []byte("x banana y")}), 396 wordsAsSymbols(Document{Name: "f2", Content: []byte("x apple y")}), 397 wordsAsSymbols(Document{Name: "f3", Content: []byte("x banana apple y")}), 398 // --------------------------------------------------0123456789012345 399 )) 400 401 andQuery := query.NewAnd( 402 &query.Substring{ 403 Pattern: "banana", 404 }, 405 &query.Substring{ 406 Pattern: "apple", 407 }, 408 ) 409 410 t.Run("LineMatches", func(t *testing.T) { 411 sres, err := searcher.Search(ctx, andQuery, &zoekt.SearchOptions{}) 412 if err != nil { 413 t.Fatal(err) 414 } 415 matches := sres.Files 416 if len(matches) != 1 || len(matches[0].LineMatches) != 1 || len(matches[0].LineMatches[0].LineFragments) != 2 { 417 t.Fatalf("got %#v, want 1 match with 2 fragments", matches) 418 } 419 420 if matches[0].LineMatches[0].LineFragments[0].Offset != 2 || matches[0].LineMatches[0].LineFragments[1].Offset != 9 { 421 t.Fatalf("got %#v, want offsets 2,9", matches) 422 } 423 }) 424 t.Run("ChunkMatches", func(t *testing.T) { 425 sres, err := searcher.Search(ctx, andQuery, &chunkOpts) 426 if err != nil { 427 t.Fatal(err) 428 } 429 matches := sres.Files 430 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 || len(matches[0].ChunkMatches[0].Ranges) != 2 { 431 t.Fatalf("got %#v, want 1 chunk match with 2 ranges", matches) 432 } 433 434 if matches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset != 2 || matches[0].ChunkMatches[0].Ranges[1].Start.ByteOffset != 9 { 435 t.Fatalf("got %#v, want offsets 2,9", matches) 436 } 437 }) 438 t.Run("Stats", func(t *testing.T) { 439 cases := []struct { 440 Name string 441 Q query.Q 442 Want zoekt.Stats 443 }{{ 444 Name: "and-query", 445 Q: andQuery, 446 Want: zoekt.Stats{ 447 FilesLoaded: 1, 448 ContentBytesLoaded: 22, 449 IndexBytesLoaded: 10, 450 NgramMatches: 3, // we look at doc 1, because it's max(0,1) due to AND 451 NgramLookups: 104, 452 MatchCount: 2, 453 FileCount: 1, 454 FilesConsidered: 2, 455 ShardsScanned: 1, 456 }, 457 }, { 458 Name: "one-trigram", 459 Q: &query.Substring{ 460 Pattern: "a y", 461 Content: true, 462 CaseSensitive: true, 463 }, 464 Want: zoekt.Stats{ 465 ContentBytesLoaded: 14, 466 IndexBytesLoaded: 1, 467 FileCount: 1, 468 FilesConsidered: 1, 469 FilesLoaded: 1, 470 ShardsScanned: 1, 471 MatchCount: 1, 472 NgramMatches: 1, 473 NgramLookups: 2, // once to lookup frequency then again to access posting list. 474 }, 475 }, { 476 Name: "one-trigram-case-insensitive", 477 Q: &query.Substring{ 478 Pattern: "a y", 479 Content: true, 480 }, 481 Want: zoekt.Stats{ 482 ContentBytesLoaded: 14, 483 IndexBytesLoaded: 1, 484 FileCount: 1, 485 FilesConsidered: 1, 486 FilesLoaded: 1, 487 ShardsScanned: 1, 488 MatchCount: 1, 489 NgramMatches: 1, 490 NgramLookups: 8, // "a y" has 2**2 casings which we lookup twice. 491 }, 492 }, { 493 Name: "one-trigram-pruned", 494 Q: &query.Substring{ 495 Pattern: "foo", 496 Content: true, 497 CaseSensitive: true, 498 }, 499 Want: zoekt.Stats{ 500 ShardsSkippedFilter: 1, 501 NgramLookups: 1, // only had to lookup once 502 }, 503 }, { 504 Name: "one-trigram-branch-pruned", 505 Q: query.NewAnd( 506 &query.Substring{ 507 Pattern: "foo", 508 Content: true, 509 CaseSensitive: true, 510 }, 511 &query.Substring{ 512 Pattern: "a y", 513 Content: true, 514 CaseSensitive: true, 515 }, 516 ), 517 Want: zoekt.Stats{ 518 IndexBytesLoaded: 1, // we created an iterator for "a y" before pruning. 519 ShardsSkippedFilter: 1, 520 NgramLookups: 3, // we lookedup "foo" once (1), but lookedup and created "a y" (2). 521 }, 522 }, { 523 Name: "symbol-substr-nomatch", 524 Q: &query.Symbol{Expr: &query.Substring{ 525 Pattern: "banana apple", 526 Content: true, 527 CaseSensitive: true, 528 }}, 529 Want: zoekt.Stats{ 530 IndexBytesLoaded: 3, 531 FilesConsidered: 1, // important that we only check 1 file to ensure we are using the index 532 MatchCount: 0, // even though there is a match it doesn't align with a symbol 533 ShardsScanned: 1, 534 NgramMatches: 1, 535 NgramLookups: 12, 536 }, 537 }, { 538 Name: "symbol-substr", 539 Q: &query.Symbol{Expr: &query.Substring{ 540 Pattern: "apple", 541 Content: true, 542 CaseSensitive: true, 543 }}, 544 Want: zoekt.Stats{ 545 ContentBytesLoaded: 35, 546 IndexBytesLoaded: 4, 547 FileCount: 2, 548 FilesConsidered: 2, // must be 2 to ensure we used the index 549 FilesLoaded: 2, 550 MatchCount: 2, // apple symbols is in two files 551 ShardsScanned: 1, 552 NgramMatches: 2, 553 NgramLookups: 5, 554 }, 555 }, { 556 Name: "symbol-regexp-nomatch", 557 Q: &query.Symbol{Expr: &query.Regexp{ 558 Regexp: mustParseRE("^apple.banana$"), 559 Content: true, 560 CaseSensitive: true, 561 }}, 562 Want: zoekt.Stats{ 563 ContentBytesLoaded: 33, // we still have to run regex since "app" matches two documents 564 IndexBytesLoaded: 10, 565 FilesConsidered: 2, // important that we don't check 3 to ensure we are using the index 566 FilesLoaded: 2, 567 MatchCount: 0, // even though there is a match it doesn't align with a symbol 568 ShardsScanned: 1, 569 NgramMatches: 3, 570 NgramLookups: 11, 571 }, 572 }, { 573 Name: "symbol-regexp", 574 Q: &query.Symbol{Expr: &query.Regexp{ 575 Regexp: mustParseRE("^app.e$"), 576 Content: true, 577 CaseSensitive: true, 578 }}, 579 Want: zoekt.Stats{ 580 ContentBytesLoaded: 35, 581 IndexBytesLoaded: 2, 582 FileCount: 2, 583 FilesConsidered: 2, // must be 2 to ensure we used the index 584 FilesLoaded: 2, 585 MatchCount: 2, // apple symbols is in two files 586 ShardsScanned: 1, 587 NgramMatches: 2, 588 NgramLookups: 2, 589 }, 590 }} 591 592 for _, tc := range cases { 593 t.Run(tc.Name, func(t *testing.T) { 594 sres, err := searcher.Search(ctx, tc.Q, &chunkOpts) 595 if err != nil { 596 t.Fatal(err) 597 } 598 if diff := cmp.Diff(tc.Want, sres.Stats, cmpopts.IgnoreFields(zoekt.Stats{}, "MatchTreeConstruction", "MatchTreeSearch")); diff != "" { 599 t.Errorf("unexpected Stats (-want +got):\n%s", diff) 600 } 601 }) 602 } 603 }) 604} 605 606func TestAndNegateSearch(t *testing.T) { 607 b := testShardBuilder(t, nil, 608 Document{Name: "f1", Content: []byte("x banana y")}, 609 // -----------------------------------0123456789 610 Document{Name: "f4", Content: []byte("x banana apple y")}) 611 612 t.Run("LineMatches", func(t *testing.T) { 613 sres := searchForTest(t, b, query.NewAnd( 614 &query.Substring{ 615 Pattern: "banana", 616 }, 617 &query.Not{Child: &query.Substring{ 618 Pattern: "apple", 619 }})) 620 621 matches := sres.Files 622 623 if len(matches) != 1 || len(matches[0].LineMatches) != 1 { 624 t.Fatalf("got %v, want 1 match", matches) 625 } 626 if matches[0].FileName != "f1" { 627 t.Fatalf("got match %#v, want FileName: f1", matches[0]) 628 } 629 if matches[0].LineMatches[0].LineFragments[0].Offset != 2 { 630 t.Fatalf("got %v, want offset 2", matches) 631 } 632 }) 633 634 t.Run("ChunkMatches", func(t *testing.T) { 635 sres := searchForTest(t, b, 636 query.NewAnd( 637 &query.Substring{ 638 Pattern: "banana", 639 }, 640 &query.Not{Child: &query.Substring{ 641 Pattern: "apple", 642 }}, 643 ), 644 chunkOpts, 645 ) 646 647 matches := sres.Files 648 649 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 { 650 t.Fatalf("got %v, want 1 match", matches) 651 } 652 if matches[0].FileName != "f1" { 653 t.Fatalf("got match %#v, want FileName: f1", matches[0]) 654 } 655 if matches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset != 2 { 656 t.Fatalf("got %v, want offset 2", matches) 657 } 658 }) 659} 660 661func TestNegativeMatchesOnlyShortcut(t *testing.T) { 662 b := testShardBuilder(t, nil, 663 Document{Name: "f1", Content: []byte("x banana y")}, 664 Document{Name: "f2", Content: []byte("x appelmoes y")}, 665 Document{Name: "f3", Content: []byte("x appelmoes y")}, 666 Document{Name: "f3", Content: []byte("x appelmoes y")}) 667 668 t.Run("LineMatches", func(t *testing.T) { 669 sres := searchForTest(t, b, query.NewAnd( 670 &query.Substring{ 671 Pattern: "banana", 672 }, 673 &query.Not{Child: &query.Substring{ 674 Pattern: "appel", 675 }})) 676 677 if sres.Stats.FilesConsidered != 1 { 678 t.Errorf("got %#v, want FilesConsidered: 1", sres.Stats) 679 } 680 }) 681 682 t.Run("ChunkMatches", func(t *testing.T) { 683 sres := searchForTest(t, b, query.NewAnd( 684 &query.Substring{ 685 Pattern: "banana", 686 }, 687 &query.Not{Child: &query.Substring{ 688 Pattern: "appel", 689 }}), chunkOpts) 690 691 if sres.Stats.FilesConsidered != 1 { 692 t.Errorf("got %#v, want FilesConsidered: 1", sres.Stats) 693 } 694 }) 695} 696 697func TestFileSearch(t *testing.T) { 698 b := testShardBuilder(t, nil, 699 Document{Name: "banzana", Content: []byte("x orange y")}, 700 // -------------0123456 701 Document{Name: "banana", Content: []byte("x apple y")}, 702 // -------------012345 703 ) 704 705 t.Run("LineMatches", func(t *testing.T) { 706 sres := searchForTest(t, b, &query.Substring{ 707 Pattern: "anan", 708 FileName: true, 709 }) 710 711 matches := sres.Files 712 if len(matches) != 1 || len(matches[0].LineMatches) != 1 { 713 t.Fatalf("got %v, want 1 match", matches) 714 } 715 716 got := matches[0].LineMatches[0] 717 want := zoekt.LineMatch{ 718 Line: []byte("banana"), 719 LineFragments: []zoekt.LineFragmentMatch{{ 720 Offset: 1, 721 LineOffset: 1, 722 MatchLength: 4, 723 }}, 724 FileName: true, 725 } 726 727 if !reflect.DeepEqual(got, want) { 728 t.Errorf("got %#v, want %#v", got, want) 729 } 730 }) 731 732 t.Run("ChunkMatches", func(t *testing.T) { 733 sres := searchForTest(t, b, &query.Substring{ 734 Pattern: "anan", 735 FileName: true, 736 }, chunkOpts) 737 738 matches := sres.Files 739 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 { 740 t.Fatalf("got %v, want 1 match", matches) 741 } 742 743 got := matches[0].ChunkMatches[0] 744 want := zoekt.ChunkMatch{ 745 Content: []byte("banana"), 746 ContentStart: zoekt.Location{ByteOffset: 0, LineNumber: 1, Column: 1}, 747 Ranges: []zoekt.Range{{ 748 Start: zoekt.Location{ByteOffset: 1, LineNumber: 1, Column: 2}, 749 End: zoekt.Location{ByteOffset: 5, LineNumber: 1, Column: 6}, 750 }}, 751 FileName: true, 752 } 753 754 if diff := cmp.Diff(want, got); diff != "" { 755 t.Fatal(diff) 756 } 757 }) 758 759 t.Run("FileNameSet", func(t *testing.T) { 760 sres := searchForTest(t, b, query.NewFileNameSet("banana"), chunkOpts) 761 762 matches := sres.Files 763 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 { 764 t.Fatalf("got %v, want 1 match", matches) 765 } 766 767 got := matches[0].ChunkMatches[0] 768 want := zoekt.ChunkMatch{ 769 Content: []byte("banana"), 770 ContentStart: zoekt.Location{ByteOffset: 0, LineNumber: 1, Column: 1}, 771 Ranges: []zoekt.Range{{ 772 Start: zoekt.Location{ByteOffset: 0, LineNumber: 1, Column: 1}, 773 End: zoekt.Location{ByteOffset: 6, LineNumber: 1, Column: 7}, 774 }}, 775 FileName: true, 776 } 777 778 if diff := cmp.Diff(want, got); diff != "" { 779 t.Fatal(diff) 780 } 781 }) 782} 783 784func TestFileCase(t *testing.T) { 785 b := testShardBuilder(t, nil, 786 Document{Name: "BANANA", Content: []byte("x orange y")}) 787 788 t.Run("LineMatches", func(t *testing.T) { 789 sres := searchForTest(t, b, &query.Substring{ 790 Pattern: "banana", 791 FileName: true, 792 }) 793 794 matches := sres.Files 795 if len(matches) != 1 || matches[0].FileName != "BANANA" { 796 t.Fatalf("got %v, want 1 match 'BANANA'", matches) 797 } 798 }) 799 800 t.Run("ChunkMatches", func(t *testing.T) { 801 sres := searchForTest(t, b, &query.Substring{ 802 Pattern: "banana", 803 FileName: true, 804 }, chunkOpts) 805 806 matches := sres.Files 807 if len(matches) != 1 || matches[0].FileName != "BANANA" { 808 t.Fatalf("got %v, want 1 match 'BANANA'", matches) 809 } 810 }) 811} 812 813func TestFileRegexpSearchBruteForce(t *testing.T) { 814 b := testShardBuilder(t, nil, 815 Document{Name: "banzana", Content: []byte("x orange y")}, 816 Document{Name: "banana", Content: []byte("x apple y")}, 817 ) 818 t.Run("LineMatches", func(t *testing.T) { 819 sres := searchForTest(t, b, &query.Regexp{ 820 Regexp: mustParseRE("[qn][zx]"), 821 FileName: true, 822 }) 823 824 matches := sres.Files 825 if len(matches) != 1 || matches[0].FileName != "banzana" { 826 t.Fatalf("got %v, want 1 match on 'banzana'", matches) 827 } 828 }) 829 t.Run("LineMatches", func(t *testing.T) { 830 sres := searchForTest(t, b, &query.Regexp{ 831 Regexp: mustParseRE("[qn][zx]"), 832 FileName: true, 833 }, chunkOpts) 834 835 matches := sres.Files 836 if len(matches) != 1 || matches[0].FileName != "banzana" { 837 t.Fatalf("got %v, want 1 match on 'banzana'", matches) 838 } 839 }) 840} 841 842func TestFileRegexpSearchShortString(t *testing.T) { 843 b := testShardBuilder(t, nil, 844 Document{Name: "banana.py", Content: []byte("x orange y")}) 845 846 t.Run("LineMatches", func(t *testing.T) { 847 sres := searchForTest(t, b, &query.Regexp{ 848 Regexp: mustParseRE("ana.py"), 849 FileName: true, 850 }) 851 852 matches := sres.Files 853 if len(matches) != 1 || matches[0].FileName != "banana.py" { 854 t.Fatalf("got %v, want 1 match on 'banana.py'", matches) 855 } 856 }) 857 858 t.Run("ChunkMatches", func(t *testing.T) { 859 sres := searchForTest(t, b, &query.Regexp{ 860 Regexp: mustParseRE("ana.py"), 861 FileName: true, 862 }, chunkOpts) 863 864 matches := sres.Files 865 if len(matches) != 1 || matches[0].FileName != "banana.py" { 866 t.Fatalf("got %v, want 1 match on 'banana.py'", matches) 867 } 868 }) 869} 870 871func TestFileSubstringSearchBruteForce(t *testing.T) { 872 b := testShardBuilder(t, nil, 873 Document{Name: "BANZANA", Content: []byte("x orange y")}, 874 Document{Name: "banana", Content: []byte("x apple y")}) 875 876 q := &query.Substring{ 877 Pattern: "z", 878 FileName: true, 879 } 880 881 t.Run("LineMatches", func(t *testing.T) { 882 res := searchForTest(t, b, q) 883 if len(res.Files) != 1 || res.Files[0].FileName != "BANZANA" { 884 t.Fatalf("got %v, want 1 match on 'BANZANA''", res.Files) 885 } 886 }) 887 888 t.Run("ChunkMatches", func(t *testing.T) { 889 res := searchForTest(t, b, q, chunkOpts) 890 if len(res.Files) != 1 || res.Files[0].FileName != "BANZANA" { 891 t.Fatalf("got %v, want 1 match on 'BANZANA''", res.Files) 892 } 893 }) 894} 895 896func TestFileSubstringSearchBruteForceEnd(t *testing.T) { 897 b := testShardBuilder(t, nil, 898 Document{Name: "BANZANA", Content: []byte("x orange y")}, 899 Document{Name: "bananaq", Content: []byte("x apple y")}) 900 901 q := &query.Substring{ 902 Pattern: "q", 903 FileName: true, 904 } 905 t.Run("LineMatches", func(t *testing.T) { 906 res := searchForTest(t, b, q) 907 if want := "bananaq"; len(res.Files) != 1 || res.Files[0].FileName != want { 908 t.Fatalf("got %v, want 1 match in %q", res.Files, want) 909 } 910 }) 911 912 t.Run("LineMatches", func(t *testing.T) { 913 res := searchForTest(t, b, q, chunkOpts) 914 if want := "bananaq"; len(res.Files) != 1 || res.Files[0].FileName != want { 915 t.Fatalf("got %v, want 1 match in %q", res.Files, want) 916 } 917 }) 918} 919 920func TestSearchMatchAll(t *testing.T) { 921 b := testShardBuilder(t, nil, 922 Document{Name: "banzana", Content: []byte("x orange y")}, 923 Document{Name: "banana", Content: []byte("x apple y")}) 924 925 t.Run("LineMatches", func(t *testing.T) { 926 sres := searchForTest(t, b, &query.Const{Value: true}) 927 matches := sres.Files 928 if len(matches) != 2 { 929 t.Fatalf("got %v, want 2 matches", matches) 930 } 931 }) 932 933 t.Run("ChunkMatches", func(t *testing.T) { 934 sres := searchForTest(t, b, &query.Const{Value: true}, chunkOpts) 935 matches := sres.Files 936 if len(matches) != 2 { 937 t.Fatalf("got %v, want 2 matches", matches) 938 } 939 }) 940} 941 942func TestSearchNewline(t *testing.T) { 943 b := testShardBuilder(t, nil, 944 Document{Name: "banzana", Content: []byte("abcd\ndefg")}) 945 946 t.Run("LineMatches", func(t *testing.T) { 947 sres := searchForTest(t, b, &query.Substring{Pattern: "d\nd"}) 948 949 // Just check that we don't crash. 950 951 matches := sres.Files 952 if len(matches) != 1 { 953 t.Fatalf("got %v, want 1 matches", matches) 954 } 955 }) 956 957 t.Run("ChunkMatches", func(t *testing.T) { 958 sres := searchForTest(t, b, &query.Substring{Pattern: "d\nd"}, chunkOpts) 959 960 // Just check that we don't crash. 961 962 matches := sres.Files 963 if len(matches) != 1 { 964 t.Fatalf("got %v, want 1 matches", matches) 965 } 966 }) 967} 968 969func TestSearchMatchAllRegexp(t *testing.T) { 970 b := testShardBuilder(t, nil, 971 Document{Name: "banzana", Content: []byte("abcd")}, 972 Document{Name: "banana", Content: []byte("pqrs")}) 973 974 t.Run("LineMatches", func(t *testing.T) { 975 sres := searchForTest(t, b, &query.Regexp{Regexp: mustParseRE(".")}) 976 977 matches := sres.Files 978 if len(matches) != 2 || sres.Stats.MatchCount != 2 { 979 t.Fatalf("got %v, want 2 matches", matches) 980 } 981 if len(matches[0].LineMatches[0].Line) != 4 || len(matches[1].LineMatches[0].Line) != 4 { 982 t.Fatalf("want 4 chars in every file, got %#v", matches) 983 } 984 }) 985 986 t.Run("ChunkMatches", func(t *testing.T) { 987 sres := searchForTest(t, b, &query.Regexp{Regexp: mustParseRE(".")}, chunkOpts) 988 989 matches := sres.Files 990 if len(matches) != 2 || sres.Stats.MatchCount != 8 { 991 t.Fatalf("got %v, want 2 matches", matches) 992 } 993 if len(matches[0].ChunkMatches[0].Content) != 4 || len(matches[1].ChunkMatches[0].Content) != 4 { 994 t.Fatalf("want 4 chars in every file, got %#v", matches) 995 } 996 }) 997} 998 999func TestSearchBM25MatchScores(t *testing.T) { 1000 ctx := context.Background() 1001 searcher := searcherForTest(t, testShardBuilder(t, nil, 1002 Document{Name: "f1", Content: []byte("one two three\naaaaaaaaaa\nbbbbbbbb\none two two")}, 1003 Document{Name: "f2", Content: []byte("four five six\naaaaaaaaaa\nbbbbbbbb\nfour five five\nsix six")}, 1004 wordsAsSymbols(Document{Name: "f3", Content: []byte("public static void main")}), 1005 )) 1006 1007 t.Run("LineMatches", func(t *testing.T) { 1008 q := &query.Substring{Pattern: "two"} 1009 sres, err := searcher.Search(ctx, q, &zoekt.SearchOptions{UseBM25Scoring: true}) 1010 if err != nil { 1011 t.Fatal(err) 1012 } 1013 matches := sres.Files 1014 if len(matches) != 1 { 1015 t.Fatalf("want 1 file index, got %d", len(matches)) 1016 } 1017 1018 if len(matches[0].LineMatches) != 2 { 1019 t.Fatalf("want 2 chunk matches, got %d", len(matches[0].ChunkMatches)) 1020 } 1021 1022 if matches[0].LineMatches[0].LineNumber != 4 { 1023 t.Fatalf("want best-scoring line to be line 4, got %d", matches[0].LineMatches[0].LineNumber) 1024 } 1025 }) 1026 1027 t.Run("ChunkMatches", func(t *testing.T) { 1028 q := &query.Substring{Pattern: "five"} 1029 sres, err := searcher.Search(ctx, q, &zoekt.SearchOptions{UseBM25Scoring: true, ChunkMatches: true, NumContextLines: 1}) 1030 if err != nil { 1031 t.Fatal(err) 1032 } 1033 1034 matches := sres.Files 1035 if len(matches) != 1 { 1036 t.Fatalf("want 1 file index, got %d", len(matches)) 1037 } 1038 1039 if len(matches[0].ChunkMatches) != 2 { 1040 t.Fatalf("want 2 chunk matches, got %d", len(matches[0].ChunkMatches)) 1041 } 1042 1043 if matches[0].ChunkMatches[0].BestLineMatch != 4 { 1044 t.Fatalf("want best-scoring line to be line 4, got %d", matches[0].ChunkMatches[0].BestLineMatch) 1045 } 1046 }) 1047 1048 t.Run("ChunkMatches with symbols", func(t *testing.T) { 1049 q := &query.Or{ 1050 Children: []query.Q{ 1051 &query.Symbol{Expr: &query.Substring{Pattern: "main"}}, 1052 &query.Substring{Pattern: "five"}, 1053 }, 1054 } 1055 1056 sres, err := searcher.Search(ctx, q, &zoekt.SearchOptions{UseBM25Scoring: true, ChunkMatches: true, NumContextLines: 1}) 1057 if err != nil { 1058 t.Fatal(err) 1059 } 1060 1061 matches := sres.Files 1062 if len(matches) != 2 { 1063 t.Fatalf("want 2 file index, got %d", len(matches)) 1064 } 1065 1066 foundSymbolInfo := false 1067 for _, m := range matches { 1068 for _, cm := range m.ChunkMatches { 1069 if len(cm.SymbolInfo) > 0 { 1070 foundSymbolInfo = true 1071 } 1072 } 1073 } 1074 1075 if !foundSymbolInfo { 1076 t.Fatalf("want symbol info, got none") 1077 } 1078 }) 1079} 1080 1081func TestFileRestriction(t *testing.T) { 1082 b := testShardBuilder(t, nil, 1083 Document{Name: "banana1", Content: []byte("x orange y")}, 1084 Document{Name: "banana2", Content: []byte("x apple y")}, 1085 Document{Name: "orange", Content: []byte("x apple z")}) 1086 1087 t.Run("LineMatches", func(t *testing.T) { 1088 sres := searchForTest(t, b, query.NewAnd( 1089 &query.Substring{ 1090 Pattern: "banana", 1091 FileName: true, 1092 }, 1093 &query.Substring{ 1094 Pattern: "apple", 1095 })) 1096 1097 matches := sres.Files 1098 if len(matches) != 1 || len(matches[0].LineMatches) != 1 { 1099 t.Fatalf("got %v, want 1 match", matches) 1100 } 1101 1102 match := matches[0].LineMatches[0] 1103 got := string(match.Line) 1104 want := "x apple y" 1105 if got != want { 1106 t.Errorf("got match %#v, want line %q", match, want) 1107 } 1108 }) 1109 1110 t.Run("ChunkMatches", func(t *testing.T) { 1111 sres := searchForTest(t, b, query.NewAnd( 1112 &query.Substring{ 1113 Pattern: "banana", 1114 FileName: true, 1115 }, 1116 &query.Substring{ 1117 Pattern: "apple", 1118 }), chunkOpts) 1119 1120 matches := sres.Files 1121 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 { 1122 t.Fatalf("got %v, want 1 match", matches) 1123 } 1124 1125 match := matches[0].ChunkMatches[0] 1126 got := string(match.Content) 1127 want := "x apple y" 1128 if got != want { 1129 t.Errorf("got match %#v, want line %q", match, want) 1130 } 1131 }) 1132} 1133 1134func TestFileNameBoundary(t *testing.T) { 1135 b := testShardBuilder(t, nil, 1136 Document{Name: "banana2", Content: []byte("x apple y")}, 1137 Document{Name: "helpers.go", Content: []byte("x apple y")}, 1138 Document{Name: "foo", Content: []byte("x apple y")}) 1139 1140 t.Run("LineMatches", func(t *testing.T) { 1141 sres := searchForTest(t, b, &query.Substring{ 1142 Pattern: "helpers.go", 1143 FileName: true, 1144 }) 1145 1146 matches := sres.Files 1147 if len(matches) != 1 || len(matches[0].LineMatches) != 1 { 1148 t.Fatalf("got %v, want 1 match", matches) 1149 } 1150 }) 1151 1152 t.Run("ChunkMatches", func(t *testing.T) { 1153 sres := searchForTest(t, b, &query.Substring{ 1154 Pattern: "helpers.go", 1155 FileName: true, 1156 }, chunkOpts) 1157 1158 matches := sres.Files 1159 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 { 1160 t.Fatalf("got %v, want 1 match", matches) 1161 } 1162 }) 1163} 1164 1165func TestDocumentOrder(t *testing.T) { 1166 var docs []Document 1167 for i := range 3 { 1168 docs = append(docs, Document{Name: fmt.Sprintf("f%d", i), Content: []byte("needle")}) 1169 } 1170 1171 b := testShardBuilder(t, nil, docs...) 1172 1173 t.Run("LineMatches", func(t *testing.T) { 1174 sres := searchForTest(t, b, query.NewAnd( 1175 &query.Substring{ 1176 Pattern: "needle", 1177 })) 1178 1179 want := []string{"f0", "f1", "f2"} 1180 var got []string 1181 for _, f := range sres.Files { 1182 got = append(got, f.FileName) 1183 } 1184 if !reflect.DeepEqual(got, want) { 1185 t.Fatalf("got %v, want %v", got, want) 1186 } 1187 }) 1188 1189 t.Run("ChunkMatches", func(t *testing.T) { 1190 sres := searchForTest(t, b, 1191 query.NewAnd(&query.Substring{ 1192 Pattern: "needle", 1193 }), 1194 chunkOpts, 1195 ) 1196 1197 want := []string{"f0", "f1", "f2"} 1198 var got []string 1199 for _, f := range sres.Files { 1200 got = append(got, f.FileName) 1201 } 1202 if !reflect.DeepEqual(got, want) { 1203 t.Fatalf("got %v, want %v", got, want) 1204 } 1205 }) 1206} 1207 1208func TestBranchMask(t *testing.T) { 1209 b := testShardBuilder(t, &zoekt.Repository{ 1210 Branches: []zoekt.RepositoryBranch{ 1211 {"master", "v-master"}, 1212 {"stable", "v-stable"}, 1213 {"bonzai", "v-bonzai"}, 1214 }, 1215 }, Document{Name: "f1", Content: []byte("needle"), Branches: []string{"master"}}, 1216 Document{Name: "f2", Content: []byte("needle"), Branches: []string{"stable", "master"}}, 1217 Document{Name: "f3", Content: []byte("needle"), Branches: []string{"stable", "master"}}, 1218 Document{Name: "f4", Content: []byte("needle"), Branches: []string{"bonzai"}}, 1219 ) 1220 1221 t.Run("LineMatches", func(t *testing.T) { 1222 sres := searchForTest(t, b, query.NewAnd( 1223 &query.Substring{ 1224 Pattern: "needle", 1225 }, 1226 &query.Branch{ 1227 Pattern: "table", 1228 })) 1229 1230 if len(sres.Files) != 2 || sres.Files[0].FileName != "f2" || sres.Files[1].FileName != "f3" { 1231 t.Fatalf("got %v, want 2 result from [f2,f3]", sres.Files) 1232 } 1233 1234 if len(sres.Files[0].Branches) != 1 || sres.Files[0].Branches[0] != "stable" { 1235 t.Fatalf("got %v, want 1 branch 'stable'", sres.Files[0].Branches) 1236 } 1237 }) 1238 1239 t.Run("ChunkMatches", func(t *testing.T) { 1240 sres := searchForTest(t, b, query.NewAnd( 1241 &query.Substring{ 1242 Pattern: "needle", 1243 }, 1244 &query.Branch{ 1245 Pattern: "table", 1246 }), 1247 chunkOpts, 1248 ) 1249 1250 if len(sres.Files) != 2 || sres.Files[0].FileName != "f2" || sres.Files[1].FileName != "f3" { 1251 t.Fatalf("got %v, want 2 result from [f2,f3]", sres.Files) 1252 } 1253 1254 if len(sres.Files[0].Branches) != 1 || sres.Files[0].Branches[0] != "stable" { 1255 t.Fatalf("got %v, want 1 branch 'stable'", sres.Files[0].Branches) 1256 } 1257 }) 1258} 1259 1260func TestBranchLimit(t *testing.T) { 1261 for limit := 64; limit <= 65; limit++ { 1262 r := &zoekt.Repository{} 1263 for i := range limit { 1264 s := fmt.Sprintf("b%d", i) 1265 r.Branches = append(r.Branches, zoekt.RepositoryBranch{ 1266 s, "v-" + s, 1267 }) 1268 } 1269 _, err := NewShardBuilder(r) 1270 if limit == 64 && err != nil { 1271 t.Fatalf("NewShardBuilder: %v", err) 1272 } else if limit == 65 && err == nil { 1273 t.Fatalf("NewShardBuilder succeeded") 1274 } 1275 } 1276} 1277 1278func TestBranchReport(t *testing.T) { 1279 branches := []string{"stable", "master"} 1280 b := testShardBuilder(t, &zoekt.Repository{ 1281 Branches: []zoekt.RepositoryBranch{ 1282 {"stable", "vs"}, 1283 {"master", "vm"}, 1284 }, 1285 }, 1286 Document{Name: "f2", Content: []byte("needle"), Branches: branches}) 1287 1288 t.Run("LineMatches", func(t *testing.T) { 1289 sres := searchForTest(t, b, &query.Substring{ 1290 Pattern: "needle", 1291 }) 1292 if len(sres.Files) != 1 { 1293 t.Fatalf("got %v, want 1 result from f2", sres.Files) 1294 } 1295 1296 f := sres.Files[0] 1297 if !reflect.DeepEqual(f.Branches, branches) { 1298 t.Fatalf("got branches %q, want %q", f.Branches, branches) 1299 } 1300 }) 1301 1302 t.Run("ChunkMatches", func(t *testing.T) { 1303 sres := searchForTest(t, b, &query.Substring{ 1304 Pattern: "needle", 1305 }, chunkOpts) 1306 if len(sres.Files) != 1 { 1307 t.Fatalf("got %v, want 1 result from f2", sres.Files) 1308 } 1309 1310 f := sres.Files[0] 1311 if !reflect.DeepEqual(f.Branches, branches) { 1312 t.Fatalf("got branches %q, want %q", f.Branches, branches) 1313 } 1314 }) 1315} 1316 1317func TestBranchVersions(t *testing.T) { 1318 b := testShardBuilder(t, &zoekt.Repository{ 1319 Branches: []zoekt.RepositoryBranch{ 1320 {"stable", "v-stable"}, 1321 {"master", "v-master"}, 1322 }, 1323 }, Document{Name: "f2", Content: []byte("needle"), Branches: []string{"master"}}) 1324 1325 t.Run("LineMatches", func(t *testing.T) { 1326 sres := searchForTest(t, b, &query.Substring{ 1327 Pattern: "needle", 1328 }) 1329 if len(sres.Files) != 1 { 1330 t.Fatalf("got %v, want 1 result from f2", sres.Files) 1331 } 1332 1333 f := sres.Files[0] 1334 if f.Version != "v-master" { 1335 t.Fatalf("got file %#v, want version 'v-master'", f) 1336 } 1337 }) 1338 1339 t.Run("ChunkMatches", func(t *testing.T) { 1340 sres := searchForTest(t, b, &query.Substring{ 1341 Pattern: "needle", 1342 }, chunkOpts) 1343 if len(sres.Files) != 1 { 1344 t.Fatalf("got %v, want 1 result from f2", sres.Files) 1345 } 1346 1347 f := sres.Files[0] 1348 if f.Version != "v-master" { 1349 t.Fatalf("got file %#v, want version 'v-master'", f) 1350 } 1351 }) 1352} 1353 1354func mustParseRE(s string) *syntax.Regexp { 1355 r, err := syntax.Parse(s, syntax.Perl) 1356 if err != nil { 1357 panic(err) 1358 } 1359 1360 return r 1361} 1362 1363func TestRegexp(t *testing.T) { 1364 content := []byte("needle the bla") 1365 // ----------------01234567890123 1366 1367 b := testShardBuilder(t, nil, 1368 Document{ 1369 Name: "f1", 1370 Content: content, 1371 }) 1372 1373 t.Run("LineMatches", func(t *testing.T) { 1374 sres := searchForTest(t, b, 1375 &query.Regexp{ 1376 Regexp: mustParseRE("dle.*bla"), 1377 }) 1378 1379 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 { 1380 t.Fatalf("got %v, want 1 match in 1 file", sres.Files) 1381 } 1382 1383 got := sres.Files[0].LineMatches[0] 1384 want := zoekt.LineMatch{ 1385 LineFragments: []zoekt.LineFragmentMatch{{ 1386 LineOffset: 3, 1387 Offset: 3, 1388 MatchLength: 11, 1389 }}, 1390 Line: content, 1391 FileName: false, 1392 LineNumber: 1, 1393 LineStart: 0, 1394 LineEnd: 14, 1395 } 1396 1397 if !reflect.DeepEqual(got, want) { 1398 t.Errorf("got %#v, want %#v", got, want) 1399 } 1400 }) 1401 1402 t.Run("ChunkMatches", func(t *testing.T) { 1403 sres := searchForTest(t, b, 1404 &query.Regexp{ 1405 Regexp: mustParseRE("dle.*bla"), 1406 }, chunkOpts) 1407 1408 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 { 1409 t.Fatalf("got %v, want 1 match in 1 file", sres.Files) 1410 } 1411 1412 got := sres.Files[0].ChunkMatches[0] 1413 want := zoekt.ChunkMatch{ 1414 Content: content, 1415 ContentStart: zoekt.Location{ByteOffset: 0, LineNumber: 1, Column: 1}, 1416 Ranges: []zoekt.Range{{ 1417 Start: zoekt.Location{ByteOffset: 3, LineNumber: 1, Column: 4}, 1418 End: zoekt.Location{ByteOffset: 14, LineNumber: 1, Column: 15}, 1419 }}, 1420 } 1421 1422 if diff := cmp.Diff(want, got); diff != "" { 1423 t.Fatal(diff) 1424 } 1425 }) 1426} 1427 1428func TestRegexpFile(t *testing.T) { 1429 content := []byte("needle the bla") 1430 1431 name := "let's play: find the mussel" 1432 b := testShardBuilder(t, nil, 1433 Document{Name: name, Content: content}, 1434 Document{Name: "play.txt", Content: content}) 1435 1436 t.Run("LineMatches", func(t *testing.T) { 1437 sres := searchForTest(t, b, 1438 &query.Regexp{ 1439 Regexp: mustParseRE("play.*mussel"), 1440 FileName: true, 1441 }) 1442 1443 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 { 1444 t.Fatalf("got %v, want 1 match in 1 file", sres.Files) 1445 } 1446 1447 if sres.Files[0].FileName != name { 1448 t.Errorf("got match %#v, want name %q", sres.Files[0], name) 1449 } 1450 }) 1451 1452 t.Run("ChunkMatches", func(t *testing.T) { 1453 sres := searchForTest(t, b, 1454 &query.Regexp{ 1455 Regexp: mustParseRE("play.*mussel"), 1456 FileName: true, 1457 }, chunkOpts) 1458 1459 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 { 1460 t.Fatalf("got %v, want 1 match in 1 file", sres.Files) 1461 } 1462 1463 if sres.Files[0].FileName != name { 1464 t.Errorf("got match %#v, want name %q", sres.Files[0], name) 1465 } 1466 }) 1467} 1468 1469func TestRegexpOrder(t *testing.T) { 1470 content := []byte("bla the needle") 1471 // ----------------01234567890123 1472 1473 b := testShardBuilder(t, nil, 1474 Document{Name: "f1", Content: content}) 1475 1476 t.Run("LineMatches", func(t *testing.T) { 1477 sres := searchForTest(t, b, 1478 &query.Regexp{ 1479 Regexp: mustParseRE("dle.*bla"), 1480 }) 1481 1482 if len(sres.Files) != 0 { 1483 t.Fatalf("got %v, want 0 matches", sres.Files) 1484 } 1485 }) 1486 1487 t.Run("ChunkMatches", func(t *testing.T) { 1488 sres := searchForTest(t, b, 1489 &query.Regexp{ 1490 Regexp: mustParseRE("dle.*bla"), 1491 }) 1492 1493 if len(sres.Files) != 0 { 1494 t.Fatalf("got %v, want 0 matches", sres.Files) 1495 } 1496 }) 1497} 1498 1499func TestRepoName(t *testing.T) { 1500 content := []byte("bla the needle") 1501 // ----------------01234567890123 1502 1503 b := testShardBuilder(t, &zoekt.Repository{Name: "bla"}, 1504 Document{Name: "f1", Content: content}) 1505 1506 t.Run("LineMatches", func(t *testing.T) { 1507 sres := searchForTest(t, b, 1508 query.NewAnd( 1509 &query.Substring{Pattern: "needle"}, 1510 &query.Repo{Regexp: regexp.MustCompile("foo")}, 1511 )) 1512 1513 if len(sres.Files) != 0 { 1514 t.Fatalf("got %v, want 0 matches", sres.Files) 1515 } 1516 1517 if sres.Stats.FilesConsidered > 0 { 1518 t.Fatalf("got FilesConsidered %d, should have short circuited", sres.Stats.FilesConsidered) 1519 } 1520 1521 sres = searchForTest(t, b, 1522 query.NewAnd( 1523 &query.Substring{Pattern: "needle"}, 1524 &query.Repo{Regexp: regexp.MustCompile("bla")}, 1525 )) 1526 if len(sres.Files) != 1 { 1527 t.Fatalf("got %v, want 1 match", sres.Files) 1528 } 1529 }) 1530 1531 t.Run("ChunkMatches", func(t *testing.T) { 1532 sres := searchForTest(t, b, 1533 query.NewAnd( 1534 &query.Substring{Pattern: "needle"}, 1535 &query.Repo{Regexp: regexp.MustCompile("foo")}, 1536 ), 1537 chunkOpts, 1538 ) 1539 1540 if len(sres.Files) != 0 { 1541 t.Fatalf("got %v, want 0 matches", sres.Files) 1542 } 1543 1544 if sres.Stats.FilesConsidered > 0 { 1545 t.Fatalf("got FilesConsidered %d, should have short circuited", sres.Stats.FilesConsidered) 1546 } 1547 1548 sres = searchForTest(t, b, 1549 query.NewAnd( 1550 &query.Substring{Pattern: "needle"}, 1551 &query.Repo{Regexp: regexp.MustCompile("bla")}, 1552 )) 1553 if len(sres.Files) != 1 { 1554 t.Fatalf("got %v, want 1 match", sres.Files) 1555 } 1556 }) 1557} 1558 1559func TestMergeMatches(t *testing.T) { 1560 t.Run("LineMatches, adjacent matches", func(t *testing.T) { 1561 b := testShardBuilder(t, nil, 1562 Document{Name: "f1", Content: []byte("blablabla")}) 1563 sres := searchForTest(t, b, 1564 &query.Substring{Pattern: "bla"}) 1565 1566 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 { 1567 t.Fatalf("got %v, want 1 match", sres.Files) 1568 } 1569 1570 if len(sres.Files[0].LineMatches[0].LineFragments) != 3 { 1571 t.Fatalf("got %v, want 3 fragments", sres.Files[0].LineMatches[0].LineFragments) 1572 } 1573 }) 1574 1575 t.Run("LineMatches, overlapping matches", func(t *testing.T) { 1576 b := testShardBuilder(t, nil, 1577 Document{Name: "f1", Content: []byte("hellogoodbye")}) 1578 sres := searchForTest(t, b, 1579 &query.And{Children: []query.Q{ 1580 &query.Substring{Pattern: "hello"}, 1581 &query.Substring{Pattern: "logood"}, 1582 }}) 1583 1584 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 { 1585 t.Fatalf("got %v, want 1 match", sres.Files) 1586 } 1587 1588 lineFragments := sres.Files[0].LineMatches[0].LineFragments 1589 if len(lineFragments) != 1 || lineFragments[0].MatchLength != len("hello") { 1590 t.Fatalf("got %v, want single line fragment 'hello'", lineFragments) 1591 } 1592 }) 1593 1594 t.Run("ChunkMatches, no overlap", func(t *testing.T) { 1595 b := testShardBuilder(t, nil, 1596 Document{Name: "f1", Content: []byte("blablabla")}) 1597 1598 sres := searchForTest(t, b, 1599 &query.Substring{Pattern: "bla"}, 1600 chunkOpts, 1601 ) 1602 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 { 1603 t.Fatalf("got %v, want 1 match", sres.Files) 1604 } 1605 1606 if len(sres.Files[0].ChunkMatches[0].Ranges) != 3 { 1607 t.Fatalf("got %v, want 3 ranges", sres.Files[0].ChunkMatches[0].Ranges) 1608 } 1609 }) 1610 1611 t.Run("ChunkMatches, overlapping matches", func(t *testing.T) { 1612 b := testShardBuilder(t, nil, 1613 Document{Name: "f1", Content: []byte("hellogoodbye")}) 1614 sres := searchForTest(t, b, 1615 &query.And{Children: []query.Q{ 1616 &query.Substring{Pattern: "hello"}, 1617 &query.Substring{Pattern: "logood"}, 1618 }}, chunkOpts) 1619 1620 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 { 1621 t.Fatalf("got %v, want 1 chunk match", sres.Files) 1622 } 1623 1624 ranges := sres.Files[0].ChunkMatches[0].Ranges 1625 if len(ranges) != 1 || ranges[0].Start.ByteOffset != 0 || ranges[0].End.ByteOffset != 5 { 1626 t.Fatalf("got %v, want single chunk range 'hello'", ranges) 1627 } 1628 }) 1629} 1630 1631func TestRepoURL(t *testing.T) { 1632 content := []byte("blablabla") 1633 b := testShardBuilder(t, &zoekt.Repository{ 1634 Name: "name", 1635 URL: "URL", 1636 CommitURLTemplate: "commit", 1637 FileURLTemplate: "file-url", 1638 LineFragmentTemplate: "fragment", 1639 }, Document{Name: "f1", Content: content}) 1640 1641 sres := searchForTest(t, b, &query.Substring{Pattern: "bla"}) 1642 1643 if sres.RepoURLs["name"] != "file-url" { 1644 t.Errorf("got RepoURLs %v, want {name: URL}", sres.RepoURLs) 1645 } 1646 if sres.LineFragments["name"] != "fragment" { 1647 t.Errorf("got URLs %v, want {name: URL}", sres.LineFragments) 1648 } 1649} 1650 1651func TestRegexpCaseSensitive(t *testing.T) { 1652 content := []byte("bla\nfunc unmarshalGitiles\n") 1653 b := testShardBuilder(t, nil, Document{ 1654 Name: "f1", 1655 Content: content, 1656 }) 1657 1658 t.Run("LineMatches", func(t *testing.T) { 1659 res := searchForTest(t, b, 1660 &query.Regexp{ 1661 Regexp: mustParseRE("func.*Gitiles"), 1662 CaseSensitive: true, 1663 }) 1664 1665 if len(res.Files) != 1 { 1666 t.Fatalf("got %v, want one index", res.Files) 1667 } 1668 }) 1669 1670 t.Run("ChunkMatches", func(t *testing.T) { 1671 res := searchForTest(t, b, 1672 &query.Regexp{ 1673 Regexp: mustParseRE("func.*Gitiles"), 1674 CaseSensitive: true, 1675 }, 1676 chunkOpts, 1677 ) 1678 1679 if len(res.Files) != 1 { 1680 t.Fatalf("got %v, want one index", res.Files) 1681 } 1682 }) 1683} 1684 1685func TestRegexpCaseFolding(t *testing.T) { 1686 content := []byte("bla\nfunc unmarshalGitiles\n") 1687 1688 b := testShardBuilder(t, nil, 1689 Document{Name: "f1", Content: content}) 1690 res := searchForTest(t, b, 1691 &query.Regexp{ 1692 Regexp: mustParseRE("func.*GITILES"), 1693 CaseSensitive: false, 1694 }) 1695 1696 if len(res.Files) != 1 { 1697 t.Fatalf("got %v, want one index", res.Files) 1698 } 1699} 1700 1701func TestCaseRegexp(t *testing.T) { 1702 content := []byte("BLABLABLA") 1703 b := testShardBuilder(t, nil, 1704 Document{Name: "f1", Content: content}) 1705 1706 t.Run("LineMatches", func(t *testing.T) { 1707 res := searchForTest(t, b, 1708 &query.Regexp{ 1709 Regexp: mustParseRE("[xb][xl][xa]"), 1710 CaseSensitive: true, 1711 }) 1712 1713 if len(res.Files) > 0 { 1714 t.Fatalf("got %v, want no matches", res.Files) 1715 } 1716 }) 1717 1718 t.Run("ChunkMatches", func(t *testing.T) { 1719 res := searchForTest(t, b, 1720 &query.Regexp{ 1721 Regexp: mustParseRE("[xb][xl][xa]"), 1722 CaseSensitive: true, 1723 }, 1724 chunkOpts, 1725 ) 1726 1727 if len(res.Files) > 0 { 1728 t.Fatalf("got %v, want no matches", res.Files) 1729 } 1730 }) 1731} 1732 1733func TestNegativeRegexp(t *testing.T) { 1734 content := []byte("BLABLABLA needle bla") 1735 b := testShardBuilder(t, nil, 1736 Document{Name: "f1", Content: content}) 1737 1738 t.Run("LineMatches", func(t *testing.T) { 1739 res := searchForTest(t, b, 1740 query.NewAnd( 1741 &query.Substring{ 1742 Pattern: "needle", 1743 }, 1744 &query.Not{ 1745 Child: &query.Regexp{ 1746 Regexp: mustParseRE(".cs"), 1747 }, 1748 })) 1749 1750 if len(res.Files) != 1 { 1751 t.Fatalf("got %v, want 1 match", res.Files) 1752 } 1753 }) 1754 1755 t.Run("ChunkMatches", func(t *testing.T) { 1756 res := searchForTest(t, b, 1757 query.NewAnd( 1758 &query.Substring{ 1759 Pattern: "needle", 1760 }, 1761 &query.Not{ 1762 Child: &query.Regexp{ 1763 Regexp: mustParseRE(".cs"), 1764 }, 1765 }, 1766 ), 1767 chunkOpts) 1768 1769 if len(res.Files) != 1 { 1770 t.Fatalf("got %v, want 1 match", res.Files) 1771 } 1772 }) 1773} 1774 1775func TestSymbolRank(t *testing.T) { 1776 t.Skip() 1777 1778 content := []byte("func bla() blubxxxxx") 1779 // ----------------01234567890123456789 1780 b := testShardBuilder(t, nil, 1781 Document{ 1782 Name: "f1", 1783 Content: content, 1784 }, Document{ 1785 Name: "f2", 1786 Content: content, 1787 Symbols: []DocumentSection{{5, 8}}, 1788 }, Document{ 1789 Name: "f3", 1790 Content: content, 1791 }) 1792 1793 t.Run("LineMatches", func(t *testing.T) { 1794 res := searchForTest(t, b, 1795 &query.Substring{ 1796 CaseSensitive: false, 1797 Pattern: "bla", 1798 }) 1799 1800 if len(res.Files) != 3 { 1801 t.Fatalf("got %d files, want 3 files. Full data: %v", len(res.Files), res.Files) 1802 } 1803 if res.Files[0].FileName != "f2" { 1804 t.Errorf("got %#v, want 'f2' as top index", res.Files[0]) 1805 } 1806 }) 1807 1808 t.Run("ChunkMatches", func(t *testing.T) { 1809 res := searchForTest(t, b, 1810 &query.Substring{ 1811 CaseSensitive: false, 1812 Pattern: "bla", 1813 }, chunkOpts) 1814 1815 if len(res.Files) != 3 { 1816 t.Fatalf("got %d files, want 3 files. Full data: %v", len(res.Files), res.Files) 1817 } 1818 if res.Files[0].FileName != "f2" { 1819 t.Errorf("got %#v, want 'f2' as top index", res.Files[0]) 1820 } 1821 }) 1822} 1823 1824func TestSymbolRankRegexpUTF8(t *testing.T) { 1825 t.Skip() 1826 1827 prefix := strings.Repeat(string([]rune{kelvinCodePoint}), 100) + "\n" 1828 content := []byte(prefix + 1829 "func bla() blub") 1830 // ------012345678901234 1831 b := testShardBuilder(t, nil, 1832 Document{ 1833 Name: "f1", 1834 Content: content, 1835 }, Document{ 1836 Name: "f2", 1837 Content: content, 1838 Symbols: []DocumentSection{{uint32(len(prefix) + 5), uint32(len(prefix) + 8)}}, 1839 }, Document{ 1840 Name: "f3", 1841 Content: content, 1842 }) 1843 1844 t.Run("LineMatches", func(t *testing.T) { 1845 res := searchForTest(t, b, 1846 &query.Regexp{ 1847 Regexp: mustParseRE("b.a"), 1848 }) 1849 1850 if len(res.Files) != 3 { 1851 t.Fatalf("got %#v, want 3 files", res.Files) 1852 } 1853 if res.Files[0].FileName != "f2" { 1854 t.Errorf("got %#v, want 'f2' as top index", res.Files[0]) 1855 } 1856 }) 1857 1858 t.Run("ChunjkMatches", func(t *testing.T) { 1859 res := searchForTest(t, b, 1860 &query.Regexp{ 1861 Regexp: mustParseRE("b.a"), 1862 }, chunkOpts) 1863 1864 if len(res.Files) != 3 { 1865 t.Fatalf("got %#v, want 3 files", res.Files) 1866 } 1867 if res.Files[0].FileName != "f2" { 1868 t.Errorf("got %#v, want 'f2' as top index", res.Files[0]) 1869 } 1870 }) 1871} 1872 1873func TestPartialSymbolRank(t *testing.T) { 1874 t.Skip() 1875 1876 content := []byte("func bla() blub") 1877 // ----------------012345678901234 1878 1879 b := testShardBuilder(t, nil, 1880 Document{ 1881 Name: "f1", 1882 Content: content, 1883 Symbols: []DocumentSection{{4, 9}}, 1884 }, Document{ 1885 Name: "f2", 1886 Content: content, 1887 Symbols: []DocumentSection{{4, 8}}, 1888 }, Document{ 1889 Name: "f3", 1890 Content: content, 1891 Symbols: []DocumentSection{{4, 9}}, 1892 }) 1893 1894 t.Run("LineMatches", func(t *testing.T) { 1895 res := searchForTest(t, b, 1896 &query.Substring{ 1897 Pattern: "bla", 1898 }) 1899 1900 if len(res.Files) != 3 { 1901 t.Fatalf("got %#v, want 3 files", res.Files) 1902 } 1903 if res.Files[0].FileName != "f2" { 1904 t.Errorf("got %#v, want 'f2' as top index", res.Files[0]) 1905 } 1906 }) 1907 1908 t.Run("ChunkMatches", func(t *testing.T) { 1909 res := searchForTest(t, b, 1910 &query.Substring{ 1911 Pattern: "bla", 1912 }, chunkOpts) 1913 1914 if len(res.Files) != 3 { 1915 t.Fatalf("got %#v, want 3 files", res.Files) 1916 } 1917 if res.Files[0].FileName != "f2" { 1918 t.Errorf("got %#v, want 'f2' as top index", res.Files[0]) 1919 } 1920 }) 1921} 1922 1923func TestNegativeRepo(t *testing.T) { 1924 content := []byte("bla the needle") 1925 // ----------------01234567890123 1926 b := testShardBuilder(t, &zoekt.Repository{ 1927 Name: "bla", 1928 }, Document{Name: "f1", Content: content}) 1929 1930 t.Run("LineMatches", func(t *testing.T) { 1931 sres := searchForTest(t, b, 1932 query.NewAnd( 1933 &query.Substring{Pattern: "needle"}, 1934 &query.Not{Child: &query.Repo{Regexp: regexp.MustCompile("bla")}}, 1935 )) 1936 1937 if len(sres.Files) != 0 { 1938 t.Fatalf("got %v, want 0 matches", sres.Files) 1939 } 1940 }) 1941 1942 t.Run("ChunkMatches", func(t *testing.T) { 1943 sres := searchForTest(t, b, 1944 query.NewAnd( 1945 &query.Substring{Pattern: "needle"}, 1946 &query.Not{Child: &query.Repo{Regexp: regexp.MustCompile("bla")}}, 1947 ), chunkOpts) 1948 1949 if len(sres.Files) != 0 { 1950 t.Fatalf("got %v, want 0 matches", sres.Files) 1951 } 1952 }) 1953} 1954 1955func TestListRepos(t *testing.T) { 1956 content := []byte("bla the needle\n") 1957 // ----------------012345678901234- 1958 1959 t.Run("default and minimal fallback", func(t *testing.T) { 1960 repo := &zoekt.Repository{ 1961 Name: "reponame", 1962 Branches: []zoekt.RepositoryBranch{{Name: "main"}, {Name: "dev"}}, 1963 } 1964 b := testShardBuilder(t, repo, 1965 Document{Name: "f1", Content: content, Branches: []string{"main", "dev"}}, 1966 Document{Name: "f2", Content: content, Branches: []string{"main"}}, 1967 Document{Name: "f2", Content: content, Branches: []string{"dev"}}, 1968 Document{Name: "f3", Content: content, Branches: []string{"dev"}}) 1969 1970 searcher := searcherForTest(t, b) 1971 1972 for _, opts := range []*zoekt.ListOptions{ 1973 nil, 1974 {}, 1975 {Field: zoekt.RepoListFieldRepos}, 1976 {Field: zoekt.RepoListFieldReposMap}, 1977 } { 1978 t.Run(fmt.Sprint(opts), func(t *testing.T) { 1979 q := &query.Repo{Regexp: regexp.MustCompile("epo")} 1980 1981 res, err := searcher.List(context.Background(), q, opts) 1982 if err != nil { 1983 t.Fatalf("List(%v): %v", q, err) 1984 } 1985 1986 want := &zoekt.RepoList{ 1987 Repos: []*zoekt.RepoListEntry{{ 1988 Repository: *repo, 1989 Stats: zoekt.RepoStats{ 1990 Documents: 4, 1991 ContentBytes: 68, // (15 bytes of content and 2 bytes of filename) x 4 1992 Shards: 1, 1993 1994 NewLinesCount: 4, 1995 DefaultBranchNewLinesCount: 2, 1996 OtherBranchesNewLinesCount: 3, 1997 }, 1998 }}, 1999 Stats: zoekt.RepoStats{ 2000 Repos: 1, 2001 Documents: 4, 2002 ContentBytes: 68, 2003 Shards: 1, 2004 2005 NewLinesCount: 4, 2006 DefaultBranchNewLinesCount: 2, 2007 OtherBranchesNewLinesCount: 3, 2008 }, 2009 } 2010 ignored := []cmp.Option{ 2011 cmpopts.EquateEmpty(), 2012 cmpopts.IgnoreFields(zoekt.RepoListEntry{}, "IndexMetadata"), 2013 cmpopts.IgnoreFields(zoekt.RepoStats{}, "IndexBytes"), 2014 cmpopts.IgnoreFields(zoekt.Repository{}, "SubRepoMap"), 2015 cmpopts.IgnoreFields(zoekt.Repository{}, "priority"), 2016 } 2017 if diff := cmp.Diff(want, res, ignored...); diff != "" { 2018 t.Fatalf("mismatch (-want +got):\n%s", diff) 2019 } 2020 2021 q = &query.Repo{Regexp: regexp.MustCompile("bla")} 2022 res, err = searcher.List(context.Background(), q, nil) 2023 if err != nil { 2024 t.Fatalf("List(%v): %v", q, err) 2025 } 2026 if len(res.Repos) != 0 || len(res.ReposMap) != 0 { 2027 t.Fatalf("got %v, want 0 matches", res) 2028 } 2029 }) 2030 } 2031 }) 2032 2033 t.Run("minimal", func(t *testing.T) { 2034 repo := &zoekt.Repository{ 2035 ID: 1234, 2036 Name: "reponame", 2037 Branches: []zoekt.RepositoryBranch{{Name: "main"}, {Name: "dev"}}, 2038 RawConfig: map[string]string{"repoid": "1234"}, 2039 } 2040 b := testShardBuilder(t, repo, 2041 Document{Name: "f1", Content: content, Branches: []string{"main", "dev"}}, 2042 Document{Name: "f2", Content: content, Branches: []string{"main"}}, 2043 Document{Name: "f2", Content: content, Branches: []string{"dev"}}, 2044 Document{Name: "f3", Content: content, Branches: []string{"dev"}}) 2045 2046 searcher := searcherForTest(t, b) 2047 2048 q := &query.Repo{Regexp: regexp.MustCompile("epo")} 2049 res, err := searcher.List(context.Background(), q, &zoekt.ListOptions{Field: zoekt.RepoListFieldReposMap}) 2050 if err != nil { 2051 t.Fatalf("List(%v): %v", q, err) 2052 } 2053 2054 want := &zoekt.RepoList{ 2055 ReposMap: zoekt.ReposMap{ 2056 repo.ID: { 2057 HasSymbols: repo.HasSymbols, 2058 Branches: repo.Branches, 2059 }, 2060 }, 2061 Stats: zoekt.RepoStats{ 2062 Repos: 1, 2063 Shards: 1, 2064 Documents: 4, 2065 IndexBytes: 412, 2066 ContentBytes: 68, 2067 NewLinesCount: 4, 2068 DefaultBranchNewLinesCount: 2, 2069 OtherBranchesNewLinesCount: 3, 2070 }, 2071 } 2072 2073 ignored := []cmp.Option{ 2074 cmpopts.IgnoreFields(zoekt.MinimalRepoListEntry{}, "IndexTimeUnix"), 2075 } 2076 if diff := cmp.Diff(want, res, ignored...); diff != "" { 2077 t.Fatalf("mismatch (-want +got):\n%s", diff) 2078 } 2079 2080 q = &query.Repo{Regexp: regexp.MustCompile("bla")} 2081 res, err = searcher.List(context.Background(), q, &zoekt.ListOptions{Field: zoekt.RepoListFieldReposMap}) 2082 if err != nil { 2083 t.Fatalf("List(%v): %v", q, err) 2084 } 2085 if len(res.Repos) != 0 || len(res.ReposMap) != 0 { 2086 t.Fatalf("got %v, want 0 matches", res) 2087 } 2088 }) 2089} 2090 2091func TestListReposByContent(t *testing.T) { 2092 content := []byte("bla the needle") 2093 2094 b := testShardBuilder(t, &zoekt.Repository{ 2095 Name: "reponame", 2096 }, 2097 Document{Name: "f1", Content: content}, 2098 Document{Name: "f2", Content: content}) 2099 2100 searcher := searcherForTest(t, b) 2101 q := &query.Substring{Pattern: "needle"} 2102 res, err := searcher.List(context.Background(), q, nil) 2103 if err != nil { 2104 t.Fatalf("List(%v): %v", q, err) 2105 } 2106 if len(res.Repos) != 1 || res.Repos[0].Repository.Name != "reponame" { 2107 t.Fatalf("got %v, want 1 matches", res) 2108 } 2109 if got := res.Repos[0].Stats.Shards; got != 1 { 2110 t.Fatalf("got %d, want 1 shard", got) 2111 } 2112 q = &query.Substring{Pattern: "foo"} 2113 res, err = searcher.List(context.Background(), q, nil) 2114 if err != nil { 2115 t.Fatalf("List(%v): %v", q, err) 2116 } 2117 if len(res.Repos) != 0 { 2118 t.Fatalf("got %v, want 0 matches", res) 2119 } 2120} 2121 2122func TestMetadata(t *testing.T) { 2123 content := []byte("bla the needle") 2124 2125 b := testShardBuilder(t, &zoekt.Repository{ 2126 Name: "reponame", 2127 }, Document{Name: "f1", Content: content}, 2128 Document{Name: "f2", Content: content}) 2129 2130 var buf bytes.Buffer 2131 if err := b.Write(&buf); err != nil { 2132 t.Fatal(err) 2133 } 2134 f := &memSeeker{buf.Bytes()} 2135 2136 rd, _, err := ReadMetadata(f) 2137 if err != nil { 2138 t.Fatalf("ReadMetadata: %v", err) 2139 } 2140 2141 if got, want := rd[0].Name, "reponame"; got != want { 2142 t.Fatalf("got %q want %q", got, want) 2143 } 2144} 2145 2146func TestRepoWithMetadata(t *testing.T) { 2147 sb := newShardBuilder(0) 2148 sb.repoList = []zoekt.Repository{ 2149 { 2150 Name: "repo1", 2151 Metadata: map[string]string{"language": "go", "custom_key": "value"}, 2152 }, 2153 } 2154 2155 var buf bytes.Buffer 2156 if err := sb.Write(&buf); err != nil { 2157 t.Fatalf("failed to write shard: %v", err) 2158 } 2159 2160 // Simulate reading the shard back 2161 f := &memSeeker{buf.Bytes()} 2162 repoMetaData, _, err := ReadMetadata(f) 2163 if err != nil { 2164 t.Fatalf("failed to read metadata: %v", err) 2165 } 2166 2167 // Verify the metadata 2168 if len(repoMetaData) != 1 { 2169 t.Fatalf("expected 1 repository, got %d", len(repoMetaData)) 2170 } 2171 if got, want := repoMetaData[0].Metadata["language"], "go"; got != want { 2172 t.Errorf("expected metadata 'language' to be %q, got %q", want, got) 2173 } 2174 if got, want := repoMetaData[0].Metadata["custom_key"], "value"; got != want { 2175 t.Errorf("expected metadata 'custom_key' to be %q, got %q", want, got) 2176 } 2177} 2178 2179func TestOr(t *testing.T) { 2180 b := testShardBuilder(t, nil, 2181 Document{Name: "f1", Content: []byte("needle")}, 2182 Document{Name: "f2", Content: []byte("banana")}) 2183 t.Run("LineMatches", func(t *testing.T) { 2184 sres := searchForTest(t, b, query.NewOr( 2185 &query.Substring{Pattern: "needle"}, 2186 &query.Substring{Pattern: "banana"})) 2187 2188 if len(sres.Files) != 2 { 2189 t.Fatalf("got %v, want 2 files", sres.Files) 2190 } 2191 }) 2192 2193 t.Run("ChunkMatches", func(t *testing.T) { 2194 sres := searchForTest(t, b, query.NewOr( 2195 &query.Substring{Pattern: "needle"}, 2196 &query.Substring{Pattern: "banana"})) 2197 2198 if len(sres.Files) != 2 { 2199 t.Fatalf("got %v, want 2 files", sres.Files) 2200 } 2201 }) 2202} 2203 2204func TestFrequency(t *testing.T) { 2205 content := []byte("sla _Py_HashDouble(double v sla las las shd dot dot") 2206 2207 b := testShardBuilder(t, nil, 2208 Document{ 2209 Name: "f1", 2210 Content: content, 2211 }) 2212 2213 t.Run("LineMatches", func(t *testing.T) { 2214 sres := searchForTest(t, b, &query.Substring{Pattern: "slashdot"}) 2215 if len(sres.Files) != 0 { 2216 t.Errorf("got %v, wanted 0 matches", sres.Files) 2217 } 2218 }) 2219 2220 t.Run("ChunkMatches", func(t *testing.T) { 2221 sres := searchForTest(t, b, &query.Substring{Pattern: "slashdot"}, chunkOpts) 2222 if len(sres.Files) != 0 { 2223 t.Errorf("got %v, wanted 0 matches", sres.Files) 2224 } 2225 }) 2226} 2227 2228func TestMatchNewline(t *testing.T) { 2229 re, err := syntax.Parse("[^a]a", syntax.ClassNL) 2230 if err != nil { 2231 t.Fatalf("syntax.Parse: %v", err) 2232 } 2233 2234 content := []byte("pqr\nalex") 2235 2236 b := testShardBuilder(t, nil, 2237 Document{ 2238 Name: "f1", 2239 Content: content, 2240 }) 2241 2242 t.Run("LineMatches", func(t *testing.T) { 2243 sres := searchForTest(t, b, &query.Regexp{Regexp: re, CaseSensitive: true}) 2244 if len(sres.Files) != 1 { 2245 t.Errorf("got %v, wanted 1 matches", sres.Files) 2246 } else if l := sres.Files[0].LineMatches[0].Line; !bytes.Equal(l, content[len("pqr\n"):]) { 2247 t.Errorf("got match line %q, want %q", l, content) 2248 } 2249 }) 2250 2251 t.Run("ChunkMatches", func(t *testing.T) { 2252 sres := searchForTest(t, b, &query.Regexp{Regexp: re, CaseSensitive: true}, chunkOpts) 2253 if len(sres.Files) != 1 { 2254 t.Errorf("got %v, wanted 1 matches", sres.Files) 2255 } else if c := sres.Files[0].ChunkMatches[0].Content; !bytes.Equal(c, content) { 2256 t.Errorf("got match line %q, want %q", c, content) 2257 } 2258 }) 2259} 2260 2261func TestSubRepo(t *testing.T) { 2262 subRepos := map[string]*zoekt.Repository{ 2263 "sub": { 2264 Name: "sub-name", 2265 LineFragmentTemplate: "sub-line", 2266 }, 2267 } 2268 2269 content := []byte("pqr\nalex") 2270 2271 b := testShardBuilder(t, &zoekt.Repository{ 2272 SubRepoMap: subRepos, 2273 }, Document{ 2274 Name: "sub/f1", 2275 Content: content, 2276 SubRepositoryPath: "sub", 2277 }) 2278 2279 sres := searchForTest(t, b, &query.Substring{Pattern: "alex"}) 2280 if len(sres.Files) != 1 { 2281 t.Fatalf("got %v, wanted 1 matches", sres.Files) 2282 } 2283 2284 f := sres.Files[0] 2285 if f.SubRepositoryPath != "sub" || f.SubRepositoryName != "sub-name" { 2286 t.Errorf("got %#v, want SubRepository{Path,Name} = {'sub', 'sub-name'}", f) 2287 } 2288 2289 if sres.LineFragments["sub-name"] != "sub-line" { 2290 t.Errorf("got LineFragmentTemplate %v, want {'sub':'sub-line'}", sres.LineFragments) 2291 } 2292} 2293 2294func TestSearchEither(t *testing.T) { 2295 b := testShardBuilder(t, nil, 2296 Document{Name: "f1", Content: []byte("bla needle bla")}, 2297 Document{Name: "needle-file-branch", Content: []byte("bla content")}) 2298 2299 t.Run("LineMatches", func(t *testing.T) { 2300 sres := searchForTest(t, b, &query.Substring{Pattern: "needle"}) 2301 if len(sres.Files) != 2 { 2302 t.Fatalf("got %v, wanted 2 matches", sres.Files) 2303 } 2304 2305 sres = searchForTest(t, b, &query.Substring{Pattern: "needle", Content: true}) 2306 if len(sres.Files) != 1 { 2307 t.Fatalf("got %v, wanted 1 index", sres.Files) 2308 } 2309 2310 if got, want := sres.Files[0].FileName, "f1"; got != want { 2311 t.Errorf("got %q, want %q", got, want) 2312 } 2313 }) 2314 2315 t.Run("ChunkMatches", func(t *testing.T) { 2316 sres := searchForTest(t, b, &query.Substring{Pattern: "needle"}, chunkOpts) 2317 if len(sres.Files) != 2 { 2318 t.Fatalf("got %v, wanted 2 matches", sres.Files) 2319 } 2320 2321 sres = searchForTest(t, b, &query.Substring{Pattern: "needle", Content: true}, chunkOpts) 2322 if len(sres.Files) != 1 { 2323 t.Fatalf("got %v, wanted 1 index", sres.Files) 2324 } 2325 2326 if got, want := sres.Files[0].FileName, "f1"; got != want { 2327 t.Errorf("got %q, want %q", got, want) 2328 } 2329 }) 2330} 2331 2332func TestUnicodeExactMatch(t *testing.T) { 2333 needle := "néédlÉ" 2334 content := []byte("blá blá " + needle + " blâ") 2335 2336 b := testShardBuilder(t, nil, 2337 Document{Name: "f1", Content: content}) 2338 2339 t.Run("LineMatches", func(t *testing.T) { 2340 if res := searchForTest(t, b, &query.Substring{Pattern: needle, CaseSensitive: true}); len(res.Files) != 1 { 2341 t.Fatalf("case sensitive: got %v, wanted 1 index", res.Files) 2342 } 2343 }) 2344 2345 t.Run("ChunkMatches", func(t *testing.T) { 2346 res := searchForTest(t, b, &query.Substring{Pattern: needle, CaseSensitive: true}, chunkOpts) 2347 if len(res.Files) != 1 { 2348 t.Fatalf("case sensitive: got %v, wanted 1 index", res.Files) 2349 } 2350 }) 2351} 2352 2353func TestUnicodeCoverContent(t *testing.T) { 2354 needle := "néédlÉ" 2355 content := []byte("blá blá " + needle + " blâ") 2356 2357 b := testShardBuilder(t, nil, 2358 Document{Name: "f1", Content: content}) 2359 2360 t.Run("LineMatches", func(t *testing.T) { 2361 if res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ", CaseSensitive: true}); len(res.Files) != 0 { 2362 t.Fatalf("case sensitive: got %v, wanted 0 index", res.Files) 2363 } 2364 2365 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ"}) 2366 if len(res.Files) != 1 { 2367 t.Fatalf("case insensitive: got %v, wanted 1 index", res.Files) 2368 } 2369 2370 if got, want := res.Files[0].LineMatches[0].LineFragments[0].Offset, uint32(strings.Index(string(content), needle)); got != want { 2371 t.Errorf("got %d want %d", got, want) 2372 } 2373 }) 2374 2375 t.Run("ChunkMatches", func(t *testing.T) { 2376 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ", CaseSensitive: true}, chunkOpts) 2377 if len(res.Files) != 0 { 2378 t.Fatalf("case sensitive: got %v, wanted 0 index", res.Files) 2379 } 2380 2381 res = searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ"}, chunkOpts) 2382 if len(res.Files) != 1 { 2383 t.Fatalf("case insensitive: got %v, wanted 1 index", res.Files) 2384 } 2385 2386 got := res.Files[0].ChunkMatches[0].Ranges[0].Start.ByteOffset 2387 want := uint32(strings.Index(string(content), needle)) 2388 if got != want { 2389 t.Errorf("got %d want %d", got, want) 2390 } 2391 }) 2392} 2393 2394func TestUnicodeNonCoverContent(t *testing.T) { 2395 needle := "nééáádlÉ" 2396 content := []byte("blá blá " + needle + " blâ") 2397 2398 b := testShardBuilder(t, nil, 2399 Document{Name: "f1", Content: content}) 2400 2401 t.Run("LineMatches", func(t *testing.T) { 2402 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉÁÁDLÉ", Content: true}) 2403 if len(res.Files) != 1 { 2404 t.Fatalf("got %v, wanted 1 index", res.Files) 2405 } 2406 2407 if got, want := res.Files[0].LineMatches[0].LineFragments[0].Offset, uint32(strings.Index(string(content), needle)); got != want { 2408 t.Errorf("got %d want %d", got, want) 2409 } 2410 }) 2411 2412 t.Run("ChunkMatches", func(t *testing.T) { 2413 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉÁÁDLÉ", Content: true}, chunkOpts) 2414 if len(res.Files) != 1 { 2415 t.Fatalf("got %v, wanted 1 index", res.Files) 2416 } 2417 2418 got := res.Files[0].ChunkMatches[0].Ranges[0].Start.ByteOffset 2419 want := uint32(strings.Index(string(content), needle)) 2420 if got != want { 2421 t.Errorf("got %d want %d", got, want) 2422 } 2423 }) 2424} 2425 2426const kelvinCodePoint = 8490 2427 2428func TestUnicodeVariableLength(t *testing.T) { 2429 lower := 'k' 2430 upper := rune(kelvinCodePoint) 2431 2432 needle := "nee" + string([]rune{lower}) + "eed" 2433 corpus := []byte("nee" + string([]rune{upper}) + "eed" + 2434 " ee" + string([]rune{lower}) + "ee" + 2435 " ee" + string([]rune{upper}) + "ee") 2436 2437 t.Run("LineMatches", func(t *testing.T) { 2438 b := testShardBuilder(t, nil, 2439 Document{Name: "f1", Content: corpus}) 2440 2441 res := searchForTest(t, b, &query.Substring{Pattern: needle, Content: true}) 2442 if len(res.Files) != 1 { 2443 t.Fatalf("got %v, wanted 1 index", res.Files) 2444 } 2445 }) 2446 2447 t.Run("ChunkMatches", func(t *testing.T) { 2448 b := testShardBuilder(t, nil, 2449 Document{Name: "f1", Content: corpus}) 2450 2451 res := searchForTest(t, b, &query.Substring{Pattern: needle, Content: true}, chunkOpts) 2452 if len(res.Files) != 1 { 2453 t.Fatalf("got %v, wanted 1 index", res.Files) 2454 } 2455 }) 2456} 2457 2458func TestUnicodeFileStartOffsets(t *testing.T) { 2459 unicode := "世界" 2460 wat := "waaaaaat" 2461 b := testShardBuilder(t, nil, 2462 Document{ 2463 Name: "f1", 2464 Content: []byte(unicode), 2465 }, 2466 Document{ 2467 Name: "f2", 2468 Content: []byte(wat), 2469 }, 2470 ) 2471 q := &query.Substring{Pattern: wat, Content: true} 2472 res := searchForTest(t, b, q) 2473 if len(res.Files) != 1 { 2474 t.Fatalf("got %v, wanted 1 index", res.Files) 2475 } 2476} 2477 2478func TestLongFileUTF8(t *testing.T) { 2479 needle := "neeedle" 2480 2481 // 6 bytes. 2482 unicode := "世界" 2483 content := []byte(strings.Repeat(unicode, 100) + needle) 2484 b := testShardBuilder(t, nil, 2485 Document{ 2486 Name: "f1", 2487 Content: []byte(strings.Repeat("a", 50)), 2488 }, 2489 Document{ 2490 Name: "f2", 2491 Content: content, 2492 }) 2493 2494 t.Run("LineMatches", func(t *testing.T) { 2495 q := &query.Substring{Pattern: needle, Content: true} 2496 res := searchForTest(t, b, q) 2497 if len(res.Files) != 1 { 2498 t.Errorf("got %v, want 1 result", res) 2499 } 2500 }) 2501 2502 t.Run("ChunkMatches", func(t *testing.T) { 2503 q := &query.Substring{Pattern: needle, Content: true} 2504 res := searchForTest(t, b, q, chunkOpts) 2505 if len(res.Files) != 1 { 2506 t.Errorf("got %v, want 1 result", res) 2507 } 2508 }) 2509} 2510 2511func TestEstimateDocCount(t *testing.T) { 2512 content := []byte("bla needle bla") 2513 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"}, 2514 Document{Name: "f1", Content: content}, 2515 Document{Name: "f2", Content: content}, 2516 ) 2517 2518 t.Run("LineMatches", func(t *testing.T) { 2519 if sres := searchForTest(t, b, 2520 query.NewAnd( 2521 &query.Substring{Pattern: "needle"}, 2522 &query.Repo{Regexp: regexp.MustCompile("reponame")}, 2523 ), zoekt.SearchOptions{ 2524 EstimateDocCount: true, 2525 }); sres.Stats.ShardFilesConsidered != 2 { 2526 t.Errorf("got FilesConsidered = %d, want 2", sres.Stats.FilesConsidered) 2527 } 2528 if sres := searchForTest(t, b, 2529 query.NewAnd( 2530 &query.Substring{Pattern: "needle"}, 2531 &query.Repo{Regexp: regexp.MustCompile("nomatch")}, 2532 ), zoekt.SearchOptions{ 2533 EstimateDocCount: true, 2534 }); sres.Stats.ShardFilesConsidered != 0 { 2535 t.Errorf("got FilesConsidered = %d, want 0", sres.Stats.FilesConsidered) 2536 } 2537 }) 2538 2539 t.Run("ChunkMatches", func(t *testing.T) { 2540 if sres := searchForTest(t, b, 2541 query.NewAnd( 2542 &query.Substring{Pattern: "needle"}, 2543 &query.Repo{Regexp: regexp.MustCompile("reponame")}, 2544 ), zoekt.SearchOptions{ 2545 EstimateDocCount: true, 2546 ChunkMatches: true, 2547 }); sres.Stats.ShardFilesConsidered != 2 { 2548 t.Errorf("got FilesConsidered = %d, want 2", sres.Stats.FilesConsidered) 2549 } 2550 if sres := searchForTest(t, b, 2551 query.NewAnd( 2552 &query.Substring{Pattern: "needle"}, 2553 &query.Repo{Regexp: regexp.MustCompile("nomatch")}, 2554 ), zoekt.SearchOptions{ 2555 EstimateDocCount: true, 2556 ChunkMatches: true, 2557 }); sres.Stats.ShardFilesConsidered != 0 { 2558 t.Errorf("got FilesConsidered = %d, want 0", sres.Stats.FilesConsidered) 2559 } 2560 }) 2561} 2562 2563func TestUTF8CorrectCorpus(t *testing.T) { 2564 needle := "neeedle" 2565 2566 // 6 bytes. 2567 unicode := "世界" 2568 b := testShardBuilder(t, nil, 2569 Document{ 2570 Name: "f1", 2571 Content: []byte(strings.Repeat(unicode, 100)), 2572 }, 2573 Document{ 2574 Name: "xxxxxneeedle", 2575 Content: []byte("hello"), 2576 }) 2577 2578 t.Run("LineMatches", func(t *testing.T) { 2579 q := &query.Substring{Pattern: needle, FileName: true} 2580 res := searchForTest(t, b, q) 2581 if len(res.Files) != 1 { 2582 t.Errorf("got %v, want 1 result", res) 2583 } 2584 }) 2585 2586 t.Run("ChunkMatches", func(t *testing.T) { 2587 q := &query.Substring{Pattern: needle, FileName: true} 2588 res := searchForTest(t, b, q, chunkOpts) 2589 if len(res.Files) != 1 { 2590 t.Errorf("got %v, want 1 result", res) 2591 } 2592 }) 2593} 2594 2595func TestBuilderStats(t *testing.T) { 2596 b := testShardBuilder(t, nil, 2597 Document{ 2598 Name: "f1", 2599 Content: []byte(strings.Repeat("abcd", 1024)), 2600 }) 2601 var buf bytes.Buffer 2602 if err := b.Write(&buf); err != nil { 2603 t.Fatal(err) 2604 } 2605 2606 if got, want := b.ContentSize(), uint32(2+4*1024); got != want { 2607 t.Errorf("got %d, want %d", got, want) 2608 } 2609} 2610 2611func TestIOStats(t *testing.T) { 2612 b := testShardBuilder(t, nil, 2613 Document{ 2614 Name: "f1", 2615 Content: []byte(strings.Repeat("abcd", 1024)), 2616 }) 2617 2618 t.Run("LineMatches", func(t *testing.T) { 2619 q := &query.Substring{Pattern: "abc", CaseSensitive: true, Content: true} 2620 res := searchForTest(t, b, q) 2621 2622 // 4096 (content) + 2 (overhead: newlines or doc sections) 2623 if got, want := res.Stats.ContentBytesLoaded, int64(4098); got != want { 2624 t.Errorf("got content I/O %d, want %d", got, want) 2625 } 2626 2627 // 1024 entries, each 4 bytes apart. 4 fits into single byte 2628 // delta encoded. 2629 if got, want := res.Stats.IndexBytesLoaded, int64(1024); got != want { 2630 t.Errorf("got index I/O %d, want %d", got, want) 2631 } 2632 }) 2633 2634 t.Run("ChunkMatches", func(t *testing.T) { 2635 q := &query.Substring{Pattern: "abc", CaseSensitive: true, Content: true} 2636 res := searchForTest(t, b, q, chunkOpts) 2637 2638 // 4096 (content) + 2 (overhead: newlines or doc sections) 2639 if got, want := res.Stats.ContentBytesLoaded, int64(4098); got != want { 2640 t.Errorf("got content I/O %d, want %d", got, want) 2641 } 2642 2643 // 1024 entries, each 4 bytes apart. 4 fits into single byte 2644 // delta encoded. 2645 if got, want := res.Stats.IndexBytesLoaded, int64(1024); got != want { 2646 t.Errorf("got index I/O %d, want %d", got, want) 2647 } 2648 }) 2649 2650 t.Run("LineMatches with BM25", func(t *testing.T) { 2651 q := &query.Substring{Pattern: "abc", CaseSensitive: true, Content: true} 2652 res := searchForTest(t, b, q, zoekt.SearchOptions{UseBM25Scoring: true}) 2653 2654 // 4096 (content) + 2 (overhead: newlines or doc sections) 2655 if got, want := res.Stats.ContentBytesLoaded, int64(4098); got != want { 2656 t.Errorf("got content I/O %d, want %d", got, want) 2657 } 2658 2659 // 1024 entries, each 4 bytes apart. 4 fits into single byte 2660 // delta encoded. 2661 if got, want := res.Stats.IndexBytesLoaded, int64(1024); got != want { 2662 t.Errorf("got index I/O %d, want %d", got, want) 2663 } 2664 }) 2665 2666 t.Run("ChunkMatches with BM25", func(t *testing.T) { 2667 q := &query.Substring{Pattern: "abc", CaseSensitive: true, Content: true} 2668 res := searchForTest(t, b, q, zoekt.SearchOptions{UseBM25Scoring: true, ChunkMatches: true}) 2669 2670 // 4096 (content) + 2 (overhead: newlines or doc sections) 2671 if got, want := res.Stats.ContentBytesLoaded, int64(4098); got != want { 2672 t.Errorf("got content I/O %d, want %d", got, want) 2673 } 2674 2675 // 1024 entries, each 4 bytes apart. 4 fits into single byte 2676 // delta encoded. 2677 if got, want := res.Stats.IndexBytesLoaded, int64(1024); got != want { 2678 t.Errorf("got index I/O %d, want %d", got, want) 2679 } 2680 }) 2681} 2682 2683func TestStartLineAnchor(t *testing.T) { 2684 b := testShardBuilder(t, nil, 2685 Document{ 2686 Name: "f1", 2687 Content: []byte( 2688 `hello 2689start of middle of line 2690`), 2691 }) 2692 2693 t.Run("LineMatches", func(t *testing.T) { 2694 q, err := query.Parse("^start") 2695 if err != nil { 2696 t.Errorf("parse: %v", err) 2697 } 2698 2699 res := searchForTest(t, b, q) 2700 if len(res.Files) != 1 { 2701 t.Errorf("got %v, want 1 file", res.Files) 2702 } 2703 2704 q, err = query.Parse("^middle") 2705 if err != nil { 2706 t.Errorf("parse: %v", err) 2707 } 2708 res = searchForTest(t, b, q) 2709 if len(res.Files) != 0 { 2710 t.Errorf("got %v, want 0 files", res.Files) 2711 } 2712 }) 2713 2714 t.Run("ChunkMatches", func(t *testing.T) { 2715 q, err := query.Parse("^start") 2716 if err != nil { 2717 t.Errorf("parse: %v", err) 2718 } 2719 2720 res := searchForTest(t, b, q, chunkOpts) 2721 if len(res.Files) != 1 { 2722 t.Errorf("got %v, want 1 file", res.Files) 2723 } 2724 2725 q, err = query.Parse("^middle") 2726 if err != nil { 2727 t.Errorf("parse: %v", err) 2728 } 2729 res = searchForTest(t, b, q, chunkOpts) 2730 if len(res.Files) != 0 { 2731 t.Errorf("got %v, want 0 files", res.Files) 2732 } 2733 }) 2734} 2735 2736func TestAndOrUnicode(t *testing.T) { 2737 q, err := query.Parse("orange.*apple") 2738 if err != nil { 2739 t.Errorf("parse: %v", err) 2740 } 2741 finalQ := query.NewAnd(q, 2742 query.NewOr(query.NewAnd(&query.Repo{Regexp: regexp.MustCompile("name")}, 2743 query.NewOr(&query.Branch{Pattern: "master"})))) 2744 2745 b := testShardBuilder(t, &zoekt.Repository{ 2746 Name: "name", 2747 Branches: []zoekt.RepositoryBranch{{"master", "master-version"}}, 2748 }, Document{ 2749 Name: "f2", 2750 Content: []byte("orange\u2318apple"), 2751 // --------------0123456 78901 2752 Branches: []string{"master"}, 2753 }) 2754 2755 t.Run("LineMatches", func(t *testing.T) { 2756 res := searchForTest(t, b, finalQ) 2757 if len(res.Files) != 1 { 2758 t.Errorf("got %v, want 1 result", res.Files) 2759 } 2760 }) 2761 2762 t.Run("ChunkMatches", func(t *testing.T) { 2763 res := searchForTest(t, b, finalQ, chunkOpts) 2764 if len(res.Files) != 1 { 2765 t.Errorf("got %v, want 1 result", res.Files) 2766 } 2767 }) 2768} 2769 2770func TestAndShort(t *testing.T) { 2771 content := []byte("bla needle at orange bla") 2772 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"}, 2773 Document{Name: "f1", Content: content}, 2774 Document{Name: "f2", Content: []byte("xx at xx")}, 2775 Document{Name: "f3", Content: []byte("yy orange xx")}, 2776 ) 2777 2778 q := query.NewAnd(&query.Substring{Pattern: "at"}, 2779 &query.Substring{Pattern: "orange"}) 2780 2781 t.Run("LineMatches", func(t *testing.T) { 2782 res := searchForTest(t, b, q) 2783 if len(res.Files) != 1 || res.Files[0].FileName != "f1" { 2784 t.Errorf("got %v, want 1 result", res.Files) 2785 } 2786 }) 2787 2788 t.Run("ChunkMatches", func(t *testing.T) { 2789 res := searchForTest(t, b, q, chunkOpts) 2790 if len(res.Files) != 1 || res.Files[0].FileName != "f1" { 2791 t.Errorf("got %v, want 1 result", res.Files) 2792 } 2793 }) 2794} 2795 2796func TestNoCollectRegexpSubstring(t *testing.T) { 2797 content := []byte("bla final bla\nfoo final, foo") 2798 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"}, 2799 Document{Name: "f1", Content: content}, 2800 ) 2801 2802 q := &query.Regexp{ 2803 Regexp: mustParseRE("final[,.]"), 2804 } 2805 2806 t.Run("LineMatches", func(t *testing.T) { 2807 res := searchForTest(t, b, q) 2808 if len(res.Files) != 1 { 2809 t.Fatalf("got %v, want 1 result", res.Files) 2810 } 2811 if f := res.Files[0]; len(f.LineMatches) != 1 { 2812 t.Fatalf("got line matches %v, want 1 line index", printLineMatches(f.LineMatches)) 2813 } 2814 }) 2815 2816 t.Run("ChunkMatches", func(t *testing.T) { 2817 res := searchForTest(t, b, q, chunkOpts) 2818 if len(res.Files) != 1 { 2819 t.Fatalf("got %v, want 1 result", res.Files) 2820 } 2821 if f := res.Files[0]; len(f.ChunkMatches) != 1 { 2822 t.Fatalf("got line matches %v, want 1 line index", printLineMatches(f.LineMatches)) 2823 } 2824 }) 2825} 2826 2827func printLineMatches(ms []zoekt.LineMatch) string { 2828 var ss []string 2829 for _, m := range ms { 2830 ss = append(ss, fmt.Sprintf("%d:%q %v", m.LineNumber, m.Line, m.LineFragments)) 2831 } 2832 2833 return strings.Join(ss, ", ") 2834} 2835 2836func TestLang(t *testing.T) { 2837 content := []byte("bla needle bla") 2838 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"}, 2839 Document{Name: "f1", Content: content}, 2840 Document{Name: "f2", Language: "java", Content: content}, 2841 Document{Name: "f3", Language: "cpp", Content: content}, 2842 ) 2843 2844 q := query.NewAnd(&query.Substring{Pattern: "needle"}, 2845 &query.Language{Language: "cpp"}) 2846 2847 t.Run("LineMatches", func(t *testing.T) { 2848 res := searchForTest(t, b, q) 2849 if len(res.Files) != 1 { 2850 t.Fatalf("got %v, want 1 result in f3", res.Files) 2851 } 2852 f := res.Files[0] 2853 if f.FileName != "f3" || f.Language != "cpp" { 2854 t.Fatalf("got %v, want 1 match with language cpp", f) 2855 } 2856 }) 2857 2858 t.Run("ChunkMatches", func(t *testing.T) { 2859 res := searchForTest(t, b, q, chunkOpts) 2860 if len(res.Files) != 1 { 2861 t.Fatalf("got %v, want 1 result in f3", res.Files) 2862 } 2863 f := res.Files[0] 2864 if f.FileName != "f3" || f.Language != "cpp" { 2865 t.Fatalf("got %v, want 1 match with language cpp", f) 2866 } 2867 }) 2868} 2869 2870func TestLangShortcut(t *testing.T) { 2871 content := []byte("bla needle bla") 2872 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"}, 2873 Document{Name: "f2", Language: "java", Content: content}, 2874 Document{Name: "f3", Language: "cpp", Content: content}, 2875 ) 2876 2877 q := query.NewAnd(&query.Substring{Pattern: "needle"}, 2878 &query.Language{Language: "fortran"}) 2879 2880 t.Run("LineMatches", func(t *testing.T) { 2881 res := searchForTest(t, b, q) 2882 if len(res.Files) != 0 { 2883 t.Fatalf("got %v, want 0 results", res.Files) 2884 } 2885 if res.Stats.IndexBytesLoaded > 0 { 2886 t.Errorf("got matchBytesLoaded %d, want 0", res.Stats.IndexBytesLoaded) 2887 } 2888 }) 2889 2890 t.Run("ChunkMatches", func(t *testing.T) { 2891 res := searchForTest(t, b, q, chunkOpts) 2892 if len(res.Files) != 0 { 2893 t.Fatalf("got %v, want 0 results", res.Files) 2894 } 2895 if res.Stats.IndexBytesLoaded > 0 { 2896 t.Errorf("got matchBytesLoaded %d, want 0", res.Stats.IndexBytesLoaded) 2897 } 2898 }) 2899} 2900 2901func TestNoTextMatchAtoms(t *testing.T) { 2902 content := []byte("bla needle bla") 2903 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"}, 2904 Document{Name: "f1", Content: content}, 2905 Document{Name: "f2", Language: "java", Content: content}, 2906 Document{Name: "f3", Language: "cpp", Content: content}, 2907 ) 2908 q := query.NewAnd(&query.Language{Language: "java"}) 2909 t.Run("LineMatches", func(t *testing.T) { 2910 res := searchForTest(t, b, q) 2911 if len(res.Files) != 1 { 2912 t.Fatalf("got %v, want 1 result in f3", res.Files) 2913 } 2914 }) 2915 2916 t.Run("ChunkMatches", func(t *testing.T) { 2917 res := searchForTest(t, b, q, chunkOpts) 2918 if len(res.Files) != 1 { 2919 t.Fatalf("got %v, want 1 result in f3", res.Files) 2920 } 2921 }) 2922} 2923 2924func TestNoPositiveAtoms(t *testing.T) { 2925 content := []byte("bla needle bla") 2926 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"}, 2927 Document{Name: "f1", Content: content}, 2928 Document{Name: "f2", Content: content}, 2929 ) 2930 2931 q := query.NewAnd( 2932 &query.Not{Child: &query.Substring{Pattern: "xyz"}}, 2933 &query.Repo{Regexp: regexp.MustCompile("reponame")}) 2934 t.Run("LineMatches", func(t *testing.T) { 2935 res := searchForTest(t, b, q) 2936 if len(res.Files) != 2 { 2937 t.Fatalf("got %v, want 2 results in f3", res.Files) 2938 } 2939 }) 2940 t.Run("ChunkMatches", func(t *testing.T) { 2941 res := searchForTest(t, b, q, chunkOpts) 2942 if len(res.Files) != 2 { 2943 t.Fatalf("got %v, want 2 results in f3", res.Files) 2944 } 2945 }) 2946} 2947 2948func TestSymbolBoundaryStart(t *testing.T) { 2949 content := []byte("start\nbla bla\nend") 2950 // ----------------012345-67890123-456 2951 2952 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"}, 2953 Document{ 2954 Name: "f1", 2955 Content: content, 2956 Symbols: []DocumentSection{{0, 5}, {14, 17}}, 2957 }, 2958 ) 2959 q := &query.Symbol{ 2960 Expr: &query.Substring{Pattern: "start"}, 2961 } 2962 t.Run("LineMatches", func(t *testing.T) { 2963 res := searchForTest(t, b, q) 2964 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 { 2965 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2966 } 2967 m := res.Files[0].LineMatches[0].LineFragments[0] 2968 if m.Offset != 0 { 2969 t.Fatalf("got offset %d want 0", m.Offset) 2970 } 2971 }) 2972 2973 t.Run("ChunkMatches", func(t *testing.T) { 2974 res := searchForTest(t, b, q, chunkOpts) 2975 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 { 2976 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2977 } 2978 m := res.Files[0].ChunkMatches[0].Ranges[0] 2979 if m.Start.ByteOffset != 0 { 2980 t.Fatalf("got offset %d want 0", m.Start.ByteOffset) 2981 } 2982 }) 2983} 2984 2985func TestSymbolBoundaryEnd(t *testing.T) { 2986 content := []byte("start\nbla bla\nend") 2987 // ----------------012345-67890123-456 2988 2989 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"}, 2990 Document{ 2991 Name: "f1", 2992 Content: content, 2993 Symbols: []DocumentSection{{14, 17}}, 2994 }, 2995 ) 2996 q := &query.Symbol{ 2997 Expr: &query.Substring{Pattern: "end"}, 2998 } 2999 t.Run("LineMatches", func(t *testing.T) { 3000 res := searchForTest(t, b, q) 3001 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 { 3002 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 3003 } 3004 m := res.Files[0].LineMatches[0].LineFragments[0] 3005 if m.Offset != 14 { 3006 t.Fatalf("got offset %d want 0", m.Offset) 3007 } 3008 }) 3009 3010 t.Run("ChunkMatches", func(t *testing.T) { 3011 res := searchForTest(t, b, q, chunkOpts) 3012 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 { 3013 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 3014 } 3015 m := res.Files[0].ChunkMatches[0].Ranges[0] 3016 if m.Start.ByteOffset != 14 { 3017 t.Fatalf("got offset %d want 0", m.Start.ByteOffset) 3018 } 3019 }) 3020} 3021 3022func TestSymbolSubstring(t *testing.T) { 3023 content := []byte("bla\nsymblabla\nbla") 3024 // ----------------0123-4567890123-456 3025 3026 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"}, 3027 Document{ 3028 Name: "f1", 3029 Content: content, 3030 Symbols: []DocumentSection{{4, 12}}, 3031 }, 3032 ) 3033 q := &query.Symbol{ 3034 Expr: &query.Substring{Pattern: "bla"}, 3035 } 3036 t.Run("LineMatches", func(t *testing.T) { 3037 res := searchForTest(t, b, q) 3038 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 { 3039 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 3040 } 3041 m := res.Files[0].LineMatches[0].LineFragments[0] 3042 if m.Offset != 7 || m.MatchLength != 3 { 3043 t.Fatalf("got offset %d, size %d want 7 size 3", m.Offset, m.MatchLength) 3044 } 3045 }) 3046 3047 t.Run("ChunkMatches", func(t *testing.T) { 3048 res := searchForTest(t, b, q, chunkOpts) 3049 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 { 3050 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 3051 } 3052 m := res.Files[0].ChunkMatches[0].Ranges[0] 3053 if m.Start.ByteOffset != 7 || m.End.ByteOffset != 10 { 3054 t.Fatalf("got offset %d, end %d want 7, 10", m.Start.ByteOffset, m.End.ByteOffset) 3055 } 3056 }) 3057} 3058 3059func TestSymbolSubstringExact(t *testing.T) { 3060 content := []byte("bla\nsym\nbla\nsym\nasymb") 3061 // ----------------0123-4567-890123456-78901 3062 3063 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"}, 3064 Document{ 3065 Name: "f1", 3066 Content: content, 3067 Symbols: []DocumentSection{{4, 7}}, 3068 }, 3069 ) 3070 q := &query.Symbol{ 3071 Expr: &query.Substring{Pattern: "sym"}, 3072 } 3073 t.Run("LineMatches", func(t *testing.T) { 3074 res := searchForTest(t, b, q) 3075 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 { 3076 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 3077 } 3078 m := res.Files[0].LineMatches[0].LineFragments[0] 3079 if m.Offset != 4 { 3080 t.Fatalf("got offset %d, want 7", m.Offset) 3081 } 3082 }) 3083 3084 t.Run("ChunkMatches", func(t *testing.T) { 3085 res := searchForTest(t, b, q, chunkOpts) 3086 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 { 3087 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 3088 } 3089 m := res.Files[0].ChunkMatches[0].Ranges[0] 3090 if m.Start.ByteOffset != 4 { 3091 t.Fatalf("got offset %d, want 7", m.Start.ByteOffset) 3092 } 3093 }) 3094} 3095 3096func TestSymbolRegexpExact(t *testing.T) { 3097 content := []byte("blah\nbla\nbl") 3098 // ----------------01234-5678-90 3099 3100 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"}, 3101 Document{ 3102 Name: "f1", 3103 Content: content, 3104 Symbols: []DocumentSection{{0, 4}, {5, 8}, {9, 11}}, 3105 }, 3106 ) 3107 q := &query.Symbol{ 3108 Expr: &query.Regexp{Regexp: mustParseRE("^bla$")}, 3109 } 3110 t.Run("LineMatches", func(t *testing.T) { 3111 res := searchForTest(t, b, q) 3112 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 { 3113 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 3114 } 3115 m := res.Files[0].LineMatches[0].LineFragments[0] 3116 if m.Offset != 5 { 3117 t.Fatalf("got offset %d, want 5", m.Offset) 3118 } 3119 }) 3120 3121 t.Run("ChunkMatches", func(t *testing.T) { 3122 res := searchForTest(t, b, q, chunkOpts) 3123 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 { 3124 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 3125 } 3126 m := res.Files[0].ChunkMatches[0].Ranges[0] 3127 if m.Start.ByteOffset != 5 { 3128 t.Fatalf("got offset %d, want 5", m.Start.ByteOffset) 3129 } 3130 }) 3131} 3132 3133func TestSymbolRegexpPartial(t *testing.T) { 3134 content := []byte("abcdef") 3135 // ----------------012345 3136 3137 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"}, 3138 Document{ 3139 Name: "f1", 3140 Content: content, 3141 Symbols: []DocumentSection{{0, 6}}, 3142 }, 3143 ) 3144 q := &query.Symbol{ 3145 Expr: &query.Regexp{Regexp: mustParseRE("(b|d)c(d|b)")}, 3146 } 3147 t.Run("LineMatches", func(t *testing.T) { 3148 res := searchForTest(t, b, q) 3149 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 { 3150 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 3151 } 3152 m := res.Files[0].LineMatches[0].LineFragments[0] 3153 if m.Offset != 1 { 3154 t.Fatalf("got offset %d, want 1", m.Offset) 3155 } 3156 if m.MatchLength != 3 { 3157 t.Fatalf("got match length %d, want 3", m.MatchLength) 3158 } 3159 }) 3160 3161 t.Run("ChunkMatches", func(t *testing.T) { 3162 res := searchForTest(t, b, q, chunkOpts) 3163 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 { 3164 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 3165 } 3166 m := res.Files[0].ChunkMatches[0].Ranges[0] 3167 if m.Start.ByteOffset != 1 { 3168 t.Fatalf("got offset %d, want 1", m.Start.ByteOffset) 3169 } 3170 if m.End.ByteOffset != 4 { 3171 t.Fatalf("got match end %d, want 4", m.End.ByteOffset) 3172 } 3173 }) 3174} 3175 3176func TestSymbolRegexpAll(t *testing.T) { 3177 docs := []Document{ 3178 { 3179 Name: "f1", 3180 Content: []byte("Hello Zoekt"), 3181 // --------------01234567890 3182 Symbols: []DocumentSection{{0, 5}, {6, 11}}, 3183 }, 3184 { 3185 Name: "f2", 3186 Content: []byte("Second Zoekt Third"), 3187 // --------------012345678901234567 3188 Symbols: []DocumentSection{{0, 6}, {7, 12}, {13, 18}}, 3189 }, 3190 } 3191 3192 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"}, docs...) 3193 q := &query.Symbol{ 3194 Expr: &query.Regexp{Regexp: mustParseRE(".*")}, 3195 } 3196 t.Run("LineMatches", func(t *testing.T) { 3197 res := searchForTest(t, b, q) 3198 if len(res.Files) != len(docs) { 3199 t.Fatalf("got %v, want %d file", res.Files, len(docs)) 3200 } 3201 for i, want := range docs { 3202 got := res.Files[i].LineMatches[0].LineFragments 3203 if len(got) != len(want.Symbols) { 3204 t.Fatalf("got %d symbols, want %d symbols in doc %s", len(got), len(want.Symbols), want.Name) 3205 } 3206 3207 for j, sec := range want.Symbols { 3208 if sec.Start != got[j].Offset { 3209 t.Fatalf("got offset %d, want %d in doc %s", got[j].Offset, sec.Start, want.Name) 3210 } 3211 } 3212 } 3213 }) 3214 3215 t.Run("ChunkMatches", func(t *testing.T) { 3216 res := searchForTest(t, b, q, chunkOpts) 3217 if len(res.Files) != len(docs) { 3218 t.Fatalf("got %v, want %d file", res.Files, len(docs)) 3219 } 3220 for i, want := range docs { 3221 got := res.Files[i].ChunkMatches[0].Ranges 3222 if len(got) != len(want.Symbols) { 3223 t.Fatalf("got %d symbols, want %d symbols in doc %s", len(got), len(want.Symbols), want.Name) 3224 } 3225 3226 for j, sec := range want.Symbols { 3227 if sec.Start != got[j].Start.ByteOffset { 3228 t.Fatalf("got offset %d, want %d in doc %s", got[j].Start.ByteOffset, sec.Start, want.Name) 3229 } 3230 } 3231 } 3232 }) 3233} 3234 3235func TestHitIterTerminate(t *testing.T) { 3236 // contrived input: trigram frequencies forces selecting abc + 3237 // def for the distance iteration. There is no index, so this 3238 // will advance the compressedPostingIterator to beyond the 3239 // end. 3240 content := []byte("abc bcdbcd cdecde abcabc def efg") 3241 b := testShardBuilder(t, nil, 3242 Document{ 3243 Name: "f1", 3244 Content: content, 3245 }, 3246 ) 3247 3248 t.Run("LineMatches", func(t *testing.T) { 3249 searchForTest(t, b, &query.Substring{Pattern: "abcdef"}) 3250 }) 3251 3252 t.Run("ChunkMatches", func(t *testing.T) { 3253 searchForTest(t, b, &query.Substring{Pattern: "abcdef"}, chunkOpts) 3254 }) 3255} 3256 3257func TestDistanceHitIterBailLast(t *testing.T) { 3258 content := []byte("AST AST AST UASH") 3259 b := testShardBuilder(t, nil, 3260 Document{ 3261 Name: "f1", 3262 Content: content, 3263 }, 3264 ) 3265 t.Run("LineMatches", func(t *testing.T) { 3266 res := searchForTest(t, b, &query.Substring{Pattern: "UAST"}) 3267 if len(res.Files) != 0 { 3268 t.Fatalf("got %v, want no results", res.Files) 3269 } 3270 }) 3271 3272 t.Run("LineMatches", func(t *testing.T) { 3273 res := searchForTest(t, b, &query.Substring{Pattern: "UAST"}, chunkOpts) 3274 if len(res.Files) != 0 { 3275 t.Fatalf("got %v, want no results", res.Files) 3276 } 3277 }) 3278} 3279 3280func TestDocumentSectionRuneBoundary(t *testing.T) { 3281 content := string([]rune{kelvinCodePoint, kelvinCodePoint, kelvinCodePoint}) 3282 b, err := NewShardBuilder(nil) 3283 if err != nil { 3284 t.Fatalf("NewShardBuilder: %v", err) 3285 } 3286 3287 for i, sec := range []DocumentSection{ 3288 {2, 6}, 3289 {3, 7}, 3290 } { 3291 if err := b.Add(Document{ 3292 Name: "f1", 3293 Content: []byte(content), 3294 Symbols: []DocumentSection{sec}, 3295 }); err == nil { 3296 t.Errorf("%d: Add succeeded", i) 3297 } 3298 } 3299} 3300 3301func TestUnicodeQuery(t *testing.T) { 3302 content := string([]rune{kelvinCodePoint, kelvinCodePoint, kelvinCodePoint}) 3303 b := testShardBuilder(t, nil, 3304 Document{ 3305 Name: "f1", 3306 Content: []byte(content), 3307 }, 3308 ) 3309 3310 q := &query.Substring{Pattern: content} 3311 3312 t.Run("LineMatches", func(t *testing.T) { 3313 res := searchForTest(t, b, q) 3314 if len(res.Files) != 1 { 3315 t.Fatalf("want 1 match, got %v", res.Files) 3316 } 3317 3318 f := res.Files[0] 3319 if len(f.LineMatches) != 1 { 3320 t.Fatalf("want 1 line, got %v", f.LineMatches) 3321 } 3322 l := f.LineMatches[0] 3323 3324 if len(l.LineFragments) != 1 { 3325 t.Fatalf("want 1 line fragment, got %v", l.LineFragments) 3326 } 3327 fr := l.LineFragments[0] 3328 if fr.MatchLength != len(content) { 3329 t.Fatalf("got MatchLength %d want %d", fr.MatchLength, len(content)) 3330 } 3331 }) 3332 3333 t.Run("ChunkMatches", func(t *testing.T) { 3334 res := searchForTest(t, b, q, chunkOpts) 3335 if len(res.Files) != 1 { 3336 t.Fatalf("want 1 match, got %v", res.Files) 3337 } 3338 3339 f := res.Files[0] 3340 if len(f.ChunkMatches) != 1 { 3341 t.Fatalf("want 1 line, got %v", f.LineMatches) 3342 } 3343 cm := f.ChunkMatches[0] 3344 3345 if len(cm.Ranges) != 1 { 3346 t.Fatalf("want 1 line fragment, got %v", cm.Ranges) 3347 } 3348 rr := cm.Ranges[0] 3349 if matchLen := rr.End.ByteOffset - rr.Start.ByteOffset; int(matchLen) != len(content) { 3350 t.Fatalf("got MatchLength %d want %d", matchLen, len(content)) 3351 } 3352 }) 3353} 3354 3355func TestSkipInvalidContent(t *testing.T) { 3356 for _, content := range []string{ 3357 // Binary 3358 "abc def \x00 abc", 3359 } { 3360 3361 b, err := NewShardBuilder(nil) 3362 if err != nil { 3363 t.Fatalf("NewShardBuilder: %v", err) 3364 } 3365 3366 if err := b.Add(Document{ 3367 Name: "f1", 3368 Content: []byte(content), 3369 }); err != nil { 3370 t.Fatal(err) 3371 } 3372 3373 t.Run("LineMatches", func(t *testing.T) { 3374 q := &query.Substring{Pattern: "abc def"} 3375 res := searchForTest(t, b, q) 3376 if len(res.Files) != 0 { 3377 t.Fatalf("got %v, want no results", res.Files) 3378 } 3379 3380 q = &query.Substring{Pattern: "NOT-INDEXED"} 3381 res = searchForTest(t, b, q) 3382 if len(res.Files) != 1 { 3383 t.Fatalf("got %v, want 1 result", res.Files) 3384 } 3385 }) 3386 3387 t.Run("ChunkMatches", func(t *testing.T) { 3388 q := &query.Substring{Pattern: "abc def"} 3389 res := searchForTest(t, b, q, chunkOpts) 3390 if len(res.Files) != 0 { 3391 t.Fatalf("got %v, want no results", res.Files) 3392 } 3393 3394 q = &query.Substring{Pattern: "NOT-INDEXED"} 3395 res = searchForTest(t, b, q, chunkOpts) 3396 if len(res.Files) != 1 { 3397 t.Fatalf("got %v, want 1 result", res.Files) 3398 } 3399 }) 3400 } 3401} 3402 3403func TestDocChecker(t *testing.T) { 3404 docChecker := DocChecker{} 3405 3406 // Test valid and invalid text 3407 for _, text := range []string{"", "simple ascii", "símplé unicödé", "\uFEFFwith utf8 'bom'", "with \uFFFD unicode replacement char"} { 3408 if skip := docChecker.Check([]byte(text), 20000, false); skip != SkipReasonNone { 3409 t.Errorf("Check(%q): %v", text, skip) 3410 } 3411 } 3412 for _, text := range []string{"zero\x00byte", "\x00starts with null byte", "xx", "0123456789abcdefghi"} { 3413 if skip := docChecker.Check([]byte(text), 15, false); skip == SkipReasonNone { 3414 t.Errorf("Check(%q) succeeded", text) 3415 } 3416 } 3417 3418 // Test valid and invalid text with an allowed large file 3419 for _, text := range []string{"0123456789abcdefghi", "qwertyuiopasdfghjklzxcvbnm"} { 3420 if skip := docChecker.Check([]byte(text), 15, true); skip != SkipReasonNone { 3421 t.Errorf("Check(%q): %v", text, skip) 3422 } 3423 } 3424 for _, text := range []string{"zero\x00byte", "\x00starts with null byte", "xx"} { 3425 if skip := docChecker.Check([]byte(text), 15, true); skip == SkipReasonNone { 3426 t.Errorf("Check(%q) succeeded", text) 3427 } 3428 } 3429} 3430 3431func TestLineAnd(t *testing.T) { 3432 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"}, 3433 Document{Name: "f1", Content: []byte("apple\nbanana\napple banana chocolate apple pudding banana\ngrape")}, 3434 Document{Name: "f2", Content: []byte("apple orange\nbanana")}, 3435 Document{Name: "f3", Content: []byte("banana grape")}, 3436 ) 3437 pattern := "(apple)(?-s:.)*?(banana)" 3438 r, _ := syntax.Parse(pattern, syntax.Perl) 3439 3440 q := query.Regexp{ 3441 Regexp: r, 3442 Content: true, 3443 } 3444 t.Run("LineMatches", func(t *testing.T) { 3445 res := searchForTest(t, b, &q) 3446 wantRegexpCount := 1 3447 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount { 3448 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount) 3449 } 3450 if len(res.Files) != 1 || res.Files[0].FileName != "f1" { 3451 t.Errorf("got %v, want 1 result", res.Files) 3452 } 3453 }) 3454 3455 t.Run("ChunkMatches", func(t *testing.T) { 3456 res := searchForTest(t, b, &q, chunkOpts) 3457 wantRegexpCount := 1 3458 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount { 3459 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount) 3460 } 3461 if len(res.Files) != 1 || res.Files[0].FileName != "f1" { 3462 t.Errorf("got %v, want 1 result", res.Files) 3463 } 3464 }) 3465} 3466 3467func TestLineAndFileName(t *testing.T) { 3468 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"}, 3469 Document{Name: "f1", Content: []byte("apple banana\ngrape")}, 3470 Document{Name: "f2", Content: []byte("apple banana\norange")}, 3471 Document{Name: "apple banana", Content: []byte("banana grape")}, 3472 ) 3473 pattern := "(apple)(?-s:.)*?(banana)" 3474 r, _ := syntax.Parse(pattern, syntax.Perl) 3475 3476 q := query.Regexp{ 3477 Regexp: r, 3478 FileName: true, 3479 } 3480 t.Run("LineMatches", func(t *testing.T) { 3481 res := searchForTest(t, b, &q) 3482 wantRegexpCount := 1 3483 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount { 3484 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount) 3485 } 3486 if len(res.Files) != 1 || res.Files[0].FileName != "apple banana" { 3487 t.Errorf("got %v, want 1 result", res.Files) 3488 } 3489 }) 3490 3491 t.Run("ChunkMatches", func(t *testing.T) { 3492 res := searchForTest(t, b, &q, chunkOpts) 3493 wantRegexpCount := 1 3494 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount { 3495 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount) 3496 } 3497 if len(res.Files) != 1 || res.Files[0].FileName != "apple banana" { 3498 t.Errorf("got %v, want 1 result", res.Files) 3499 } 3500 }) 3501} 3502 3503func TestMultiLineRegex(t *testing.T) { 3504 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"}, 3505 Document{Name: "f1", Content: []byte("apple banana\ngrape")}, 3506 Document{Name: "f2", Content: []byte("apple orange")}, 3507 Document{Name: "f3", Content: []byte("grape apple")}, 3508 ) 3509 pattern := "(apple).*?[[:space:]].*?(grape)" 3510 r, _ := syntax.Parse(pattern, syntax.Perl) 3511 3512 q := query.Regexp{ 3513 Regexp: r, 3514 } 3515 t.Run("LineMatches", func(t *testing.T) { 3516 res := searchForTest(t, b, &q) 3517 wantRegexpCount := 2 3518 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount { 3519 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount) 3520 } 3521 if len(res.Files) != 1 || res.Files[0].FileName != "f1" { 3522 t.Errorf("got %v, want 1 result", res.Files) 3523 } 3524 if l := len(res.Files[0].LineMatches); l != 2 { 3525 t.Errorf("got %v, want 2 line matches", l) 3526 } 3527 }) 3528 3529 t.Run("ChunkMatches", func(t *testing.T) { 3530 res := searchForTest(t, b, &q, chunkOpts) 3531 wantRegexpCount := 2 3532 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount { 3533 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount) 3534 } 3535 if len(res.Files) != 1 || res.Files[0].FileName != "f1" { 3536 t.Errorf("got %v, want 1 result", res.Files) 3537 } 3538 if l := len(res.Files[0].ChunkMatches); l != 1 { 3539 t.Errorf("got %v, want 1 chunk matches", l) 3540 } 3541 if l := len(res.Files[0].ChunkMatches[0].Ranges); l != 1 { 3542 t.Errorf("got %v, want 1 chunk ranges", l) 3543 } 3544 }) 3545} 3546 3547func TestSearchTypeFileName(t *testing.T) { 3548 b := testShardBuilder(t, &zoekt.Repository{ 3549 Name: "reponame", 3550 }, 3551 Document{Name: "f1", Content: []byte("bla the needle")}, 3552 Document{Name: "f2", Content: []byte("another file another\nneedle")}, 3553 // -----------------------------------012345678901234567890-123456 3554 ) 3555 3556 t.Run("LineMatches", func(t *testing.T) { 3557 wantSingleMatch := func(res *zoekt.SearchResult, want string) { 3558 t.Helper() 3559 fmatches := res.Files 3560 if len(fmatches) != 1 { 3561 t.Errorf("got %v, want 1 matches", len(fmatches)) 3562 return 3563 } 3564 if len(fmatches[0].LineMatches) != 1 { 3565 t.Errorf("got %d line matches", len(fmatches[0].LineMatches)) 3566 return 3567 } 3568 var got string 3569 if fmatches[0].LineMatches[0].FileName { 3570 got = fmatches[0].FileName 3571 } else { 3572 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].LineMatches[0].LineFragments[0].Offset) 3573 } 3574 3575 if got != want { 3576 t.Errorf("got %s, want %s", got, want) 3577 } 3578 } 3579 3580 // Only return the later match in the second file 3581 res := searchForTest(t, b, query.NewAnd( 3582 &query.Type{ 3583 Type: query.TypeFileName, 3584 Child: &query.Substring{Pattern: "needle"}, 3585 }, 3586 &query.Substring{Pattern: "file"})) 3587 wantSingleMatch(res, "f2:8") 3588 3589 // Only return a filename result 3590 res = searchForTest(t, b, 3591 &query.Type{ 3592 Type: query.TypeFileName, 3593 Child: &query.Substring{Pattern: "file"}, 3594 }) 3595 wantSingleMatch(res, "f2") 3596 }) 3597 3598 t.Run("ChunkMatches", func(t *testing.T) { 3599 wantSingleMatch := func(res *zoekt.SearchResult, want string) { 3600 t.Helper() 3601 fmatches := res.Files 3602 if len(fmatches) != 1 { 3603 t.Errorf("got %v, want 1 matches", len(fmatches)) 3604 return 3605 } 3606 if len(fmatches[0].ChunkMatches) != 1 { 3607 t.Errorf("got %d line matches", len(fmatches[0].ChunkMatches)) 3608 return 3609 } 3610 var got string 3611 if fmatches[0].ChunkMatches[0].FileName { 3612 got = fmatches[0].FileName 3613 } else { 3614 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset) 3615 } 3616 3617 if got != want { 3618 t.Errorf("got %s, want %s", got, want) 3619 } 3620 } 3621 3622 // Only return the later match in the second file 3623 res := searchForTest(t, b, query.NewAnd( 3624 &query.Type{ 3625 Type: query.TypeFileName, 3626 Child: &query.Substring{Pattern: "needle"}, 3627 }, 3628 &query.Substring{Pattern: "file"}), 3629 chunkOpts, 3630 ) 3631 wantSingleMatch(res, "f2:8") 3632 3633 // Only return a filename result 3634 res = searchForTest(t, b, 3635 &query.Type{ 3636 Type: query.TypeFileName, 3637 Child: &query.Substring{Pattern: "file"}, 3638 }, 3639 chunkOpts, 3640 ) 3641 wantSingleMatch(res, "f2") 3642 }) 3643} 3644 3645func TestSearchTypeLanguage(t *testing.T) { 3646 b := testShardBuilder(t, &zoekt.Repository{ 3647 Name: "reponame", 3648 }, 3649 Document{Name: "apex.cls", Content: []byte("public class Car extends Vehicle {")}, 3650 Document{Name: "tex.cls", Content: []byte(`\DeclareOption*{`)}, 3651 Document{Name: "hello.h", Content: []byte(`#include <stdio.h>`)}, 3652 Document{Name: "be.magik", Content: []byte(`_package unicorn`)}, 3653 ) 3654 3655 t.Log(b.languageMap) 3656 3657 t.Run("LineMatches", func(t *testing.T) { 3658 wantSingleMatch := func(res *zoekt.SearchResult, want string) { 3659 t.Helper() 3660 fmatches := res.Files 3661 if len(fmatches) != 1 { 3662 t.Errorf("got %v, want 1 matches", len(fmatches)) 3663 return 3664 } 3665 if len(fmatches[0].LineMatches) != 1 { 3666 t.Errorf("got %d line matches", len(fmatches[0].LineMatches)) 3667 return 3668 } 3669 var got string 3670 if fmatches[0].LineMatches[0].FileName { 3671 got = fmatches[0].FileName 3672 } else { 3673 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].LineMatches[0].LineFragments[0].Offset) 3674 } 3675 3676 if got != want { 3677 t.Errorf("got %s, want %s", got, want) 3678 } 3679 } 3680 3681 res := searchForTest(t, b, &query.Language{Language: "Apex"}) 3682 wantSingleMatch(res, "apex.cls") 3683 3684 res = searchForTest(t, b, &query.Language{Language: "TeX"}) 3685 wantSingleMatch(res, "tex.cls") 3686 3687 res = searchForTest(t, b, &query.Language{Language: "C"}) 3688 wantSingleMatch(res, "hello.h") 3689 3690 res = searchForTest(t, b, &query.Language{Language: "Magik"}) 3691 wantSingleMatch(res, "be.magik") 3692 3693 // test fallback language search by pretending it's an older index version 3694 res = searchForTest(t, b, &query.Language{Language: "C++"}) 3695 if len(res.Files) != 0 { 3696 t.Errorf("got %d results for C++, want 0", len(res.Files)) 3697 } 3698 3699 b.featureVersion = 11 // force fallback 3700 res = searchForTest(t, b, &query.Language{Language: "C++"}) 3701 wantSingleMatch(res, "hello.h") 3702 }) 3703 3704 t.Run("ChunkMatches", func(t *testing.T) { 3705 wantSingleMatch := func(res *zoekt.SearchResult, want string) { 3706 t.Helper() 3707 fmatches := res.Files 3708 if len(fmatches) != 1 { 3709 t.Errorf("got %v, want 1 matches", len(fmatches)) 3710 return 3711 } 3712 if len(fmatches[0].ChunkMatches) != 1 { 3713 t.Errorf("got %d line matches", len(fmatches[0].ChunkMatches)) 3714 return 3715 } 3716 var got string 3717 if fmatches[0].ChunkMatches[0].FileName { 3718 got = fmatches[0].FileName 3719 } else { 3720 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset) 3721 } 3722 3723 if got != want { 3724 t.Errorf("got %s, want %s", got, want) 3725 } 3726 } 3727 3728 b.featureVersion = FeatureVersion // reset feature version 3729 res := searchForTest(t, b, &query.Language{Language: "Apex"}, chunkOpts) 3730 wantSingleMatch(res, "apex.cls") 3731 3732 res = searchForTest(t, b, &query.Language{Language: "TeX"}, chunkOpts) 3733 wantSingleMatch(res, "tex.cls") 3734 3735 res = searchForTest(t, b, &query.Language{Language: "C"}, chunkOpts) 3736 wantSingleMatch(res, "hello.h") 3737 3738 // test fallback language search by pretending it's an older index version 3739 res = searchForTest(t, b, &query.Language{Language: "C++"}, chunkOpts) 3740 if len(res.Files) != 0 { 3741 t.Errorf("got %d results for C++, want 0", len(res.Files)) 3742 } 3743 3744 b.featureVersion = 11 // force fallback 3745 res = searchForTest(t, b, &query.Language{Language: "C++"}, chunkOpts) 3746 wantSingleMatch(res, "hello.h") 3747 }) 3748} 3749 3750func TestStats(t *testing.T) { 3751 ignored := []cmp.Option{ 3752 cmpopts.EquateEmpty(), 3753 cmpopts.IgnoreFields(zoekt.RepoListEntry{}, "Repository"), 3754 cmpopts.IgnoreFields(zoekt.RepoListEntry{}, "IndexMetadata"), 3755 cmpopts.IgnoreFields(zoekt.RepoStats{}, "IndexBytes"), 3756 } 3757 3758 repoListEntries := func(b *ShardBuilder) []zoekt.RepoListEntry { 3759 searcher := searcherForTest(t, b) 3760 indexdata := searcher.(*indexData) 3761 return indexdata.repoListEntry 3762 } 3763 3764 t.Run("one empty repo", func(t *testing.T) { 3765 b := testShardBuilder(t, nil) 3766 got := repoListEntries(b) 3767 want := []zoekt.RepoListEntry{ 3768 { 3769 Stats: zoekt.RepoStats{ 3770 Repos: 0, 3771 Shards: 1, 3772 Documents: 0, 3773 IndexBytes: 20, 3774 ContentBytes: 0, 3775 NewLinesCount: 0, 3776 DefaultBranchNewLinesCount: 0, 3777 OtherBranchesNewLinesCount: 0, 3778 }, 3779 }, 3780 } 3781 3782 if diff := cmp.Diff(want, got, ignored...); diff != "" { 3783 t.Fatalf("mismatch (-want +got):\n%s", diff) 3784 } 3785 }) 3786 3787 t.Run("one simple shard", func(t *testing.T) { 3788 b := testShardBuilder(t, nil, 3789 Document{Name: "doc 0", Content: []byte("content 0")}, 3790 Document{Name: "doc 1", Content: []byte("content 1")}, 3791 ) 3792 got := repoListEntries(b) 3793 want := []zoekt.RepoListEntry{ 3794 { 3795 Stats: zoekt.RepoStats{ 3796 Repos: 0, 3797 Shards: 1, 3798 Documents: 2, 3799 IndexBytes: 224, 3800 ContentBytes: 28, 3801 NewLinesCount: 0, 3802 DefaultBranchNewLinesCount: 0, 3803 OtherBranchesNewLinesCount: 0, 3804 }, 3805 }, 3806 } 3807 3808 if diff := cmp.Diff(want, got, ignored...); diff != "" { 3809 t.Fatalf("mismatch (-want +got):\n%s", diff) 3810 } 3811 }) 3812 3813 t.Run("one compound shard", func(t *testing.T) { 3814 b := testShardBuilderCompound(t, 3815 []*zoekt.Repository{ 3816 {Name: "repo 0"}, 3817 {Name: "repo 1"}, 3818 }, 3819 [][]Document{ 3820 { 3821 {Name: "doc 0", Content: []byte("content 0")}, 3822 {Name: "doc 1", Content: []byte("content 1")}, 3823 }, 3824 { 3825 {Name: "doc 2", Content: []byte("content 2")}, 3826 {Name: "doc 3", Content: []byte("content 3")}, 3827 }, 3828 }, 3829 ) 3830 got := repoListEntries(b) 3831 want := []zoekt.RepoListEntry{ 3832 { 3833 Stats: zoekt.RepoStats{ 3834 Repos: 0, 3835 Shards: 1, 3836 Documents: 2, 3837 IndexBytes: 180, 3838 ContentBytes: 28, 3839 NewLinesCount: 0, 3840 DefaultBranchNewLinesCount: 0, 3841 OtherBranchesNewLinesCount: 0, 3842 }, 3843 }, 3844 { 3845 Stats: zoekt.RepoStats{ 3846 Repos: 0, 3847 Shards: 1, 3848 Documents: 2, 3849 IndexBytes: 180, 3850 ContentBytes: 28, 3851 NewLinesCount: 0, 3852 DefaultBranchNewLinesCount: 0, 3853 OtherBranchesNewLinesCount: 0, 3854 }, 3855 }, 3856 } 3857 3858 if diff := cmp.Diff(want, got, ignored...); diff != "" { 3859 t.Fatalf("mismatch (-want +got):\n%s", diff) 3860 } 3861 }) 3862 3863 t.Run("compound shard with empty repos", func(t *testing.T) { 3864 b := testShardBuilderCompound(t, 3865 []*zoekt.Repository{ 3866 {Name: "repo 0"}, 3867 {Name: "repo 1"}, 3868 {Name: "repo 2"}, 3869 {Name: "repo 3"}, 3870 {Name: "repo 4"}, 3871 }, 3872 [][]Document{ 3873 {{Name: "doc 0", Content: []byte("content 0")}}, 3874 nil, 3875 {{Name: "doc 1", Content: []byte("content 1")}}, 3876 nil, 3877 nil, 3878 }, 3879 ) 3880 got := repoListEntries(b) 3881 3882 entryEmpty := zoekt.RepoListEntry{Stats: zoekt.RepoStats{ 3883 Shards: 1, 3884 Documents: 0, 3885 ContentBytes: 0, 3886 }} 3887 entryNonEmpty := zoekt.RepoListEntry{Stats: zoekt.RepoStats{ 3888 Shards: 1, 3889 Documents: 1, 3890 ContentBytes: 14, 3891 }} 3892 3893 want := []zoekt.RepoListEntry{ 3894 entryNonEmpty, 3895 entryEmpty, 3896 entryNonEmpty, 3897 entryEmpty, 3898 entryEmpty, 3899 } 3900 3901 if diff := cmp.Diff(want, got, ignored...); diff != "" { 3902 t.Fatalf("mismatch (-want +got):\n%s", diff) 3903 } 3904 }) 3905} 3906 3907// This tests the frequent pattern "\bLITERAL\b". 3908func TestWordSearch(t *testing.T) { 3909 content := []byte("needle the bla") 3910 // ----------------01234567890123 3911 3912 b := testShardBuilder(t, nil, 3913 Document{ 3914 Name: "f1", 3915 Content: content, 3916 }) 3917 3918 t.Run("LineMatches", func(t *testing.T) { 3919 sres := searchForTest(t, b, 3920 &query.Regexp{ 3921 Regexp: mustParseRE("\\bthe\\b"), 3922 CaseSensitive: true, 3923 Content: true, 3924 }) 3925 3926 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 { 3927 t.Fatalf("got %v, want 1 match in 1 file", sres.Files) 3928 } 3929 3930 if sres.Stats.RegexpsConsidered != 0 { 3931 t.Fatal("expected regexp to be skipped") 3932 } 3933 3934 got := sres.Files[0].LineMatches[0] 3935 want := zoekt.LineMatch{ 3936 LineFragments: []zoekt.LineFragmentMatch{{ 3937 LineOffset: 7, 3938 Offset: 7, 3939 MatchLength: 3, 3940 }}, 3941 Line: content, 3942 FileName: false, 3943 LineNumber: 1, 3944 LineStart: 0, 3945 LineEnd: 14, 3946 } 3947 3948 if !reflect.DeepEqual(got, want) { 3949 t.Errorf("got %#v, want %#v", got, want) 3950 } 3951 }) 3952 3953 t.Run("ChunkMatches", func(t *testing.T) { 3954 sres := searchForTest(t, b, 3955 &query.Regexp{ 3956 Regexp: mustParseRE("\\bthe\\b"), 3957 CaseSensitive: true, 3958 }, chunkOpts) 3959 3960 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 { 3961 t.Fatalf("got %v, want 1 match in 1 file", sres.Files) 3962 } 3963 3964 if sres.Stats.RegexpsConsidered != 0 { 3965 t.Fatal("expected regexp to be skipped") 3966 } 3967 3968 got := sres.Files[0].ChunkMatches[0] 3969 want := zoekt.ChunkMatch{ 3970 Content: content, 3971 ContentStart: zoekt.Location{ByteOffset: 0, LineNumber: 1, Column: 1}, 3972 Ranges: []zoekt.Range{{ 3973 Start: zoekt.Location{ByteOffset: 7, LineNumber: 1, Column: 8}, 3974 End: zoekt.Location{ByteOffset: 10, LineNumber: 1, Column: 11}, 3975 }}, 3976 } 3977 3978 if diff := cmp.Diff(want, got); diff != "" { 3979 t.Fatal(diff) 3980 } 3981 }) 3982} 3983 3984// Simple benchmark focused on chunk match scoring. It creates a single file that will have a 1000-line chunk match. 3985// The benchmark time is expected to be strongly correlated with time spent assembling and scoring this chunk. 3986func BenchmarkScoreChunkMatches(b *testing.B) { 3987 ctx := context.Background() 3988 var builder strings.Builder 3989 for i := range 1000 { 3990 builder.WriteString(fmt.Sprintf("line-%d one one one two two two three three three four four four five five\n", i)) 3991 } 3992 3993 searcher := searcherForTest(b, testShardBuilder(b, nil, 3994 Document{Name: "f1", Content: []byte(builder.String())}, 3995 )) 3996 3997 q := &query.Or{ 3998 Children: []query.Q{ 3999 &query.Substring{Pattern: "f"}, 4000 &query.Substring{Pattern: "t"}, 4001 }} 4002 4003 b.Run("score large ChunkMatch", func(b *testing.B) { 4004 b.ReportAllocs() 4005 b.ResetTimer() 4006 4007 for i := 0; i < b.N; i++ { 4008 sres, err := searcher.Search(ctx, q, &zoekt.SearchOptions{ChunkMatches: true, NumContextLines: 1}) 4009 if err != nil { 4010 b.Fatal(err) 4011 } 4012 4013 matches := sres.Files 4014 if len(matches) == 0 { 4015 b.Fatalf("want file index, got none") 4016 } 4017 } 4018 }) 4019}