fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

at tngl 110 kB View raw
1// Copyright 2016 Google Inc. All rights reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package index 16 17import ( 18 "bytes" 19 "context" 20 "fmt" 21 "reflect" 22 "regexp/syntax" 23 "strings" 24 "testing" 25 26 "github.com/google/go-cmp/cmp" 27 "github.com/google/go-cmp/cmp/cmpopts" 28 "github.com/grafana/regexp" 29 30 "github.com/sourcegraph/zoekt" 31 "github.com/sourcegraph/zoekt/query" 32) 33 34func clearScores(r *zoekt.SearchResult) { 35 for i := range r.Files { 36 r.Files[i].Score = 0.0 37 for j := range r.Files[i].LineMatches { 38 r.Files[i].LineMatches[j].Score = 0.0 39 } 40 for j := range r.Files[i].ChunkMatches { 41 r.Files[i].ChunkMatches[j].Score = 0.0 42 r.Files[i].ChunkMatches[j].BestLineMatch = 0 43 } 44 r.Files[i].Checksum = nil 45 r.Files[i].Debug = "" 46 } 47} 48 49func testShardBuilder(tb testing.TB, repo *zoekt.Repository, docs ...Document) *ShardBuilder { 50 tb.Helper() 51 52 b, err := NewShardBuilder(repo) 53 if err != nil { 54 tb.Fatalf("NewShardBuilder: %v", err) 55 } 56 57 for i, d := range docs { 58 if err := b.Add(d); err != nil { 59 tb.Fatalf("Add %d: %v", i, err) 60 } 61 } 62 63 return b 64} 65 66func testShardBuilderCompound(t *testing.T, repos []*zoekt.Repository, docs [][]Document) *ShardBuilder { 67 t.Helper() 68 69 b := newShardBuilder(0) 70 b.indexFormatVersion = NextIndexFormatVersion 71 72 if len(repos) != len(docs) { 73 t.Fatalf("testShardBuilderCompound: repos must be the same length as docs, got: len(repos)=%d len(docs)=%d", len(repos), len(docs)) 74 } 75 76 for i, repo := range repos { 77 if err := b.setRepository(repo); err != nil { 78 t.Fatal(err) 79 } 80 for j, d := range docs[i] { 81 if err := b.Add(d); err != nil { 82 t.Fatalf("Add %d %d: %v", i, j, err) 83 } 84 } 85 } 86 87 return b 88} 89 90func TestBoundary(t *testing.T) { 91 b := testShardBuilder(t, nil, 92 Document{Name: "f1", Content: []byte("x the")}, 93 Document{Name: "f1", Content: []byte("reader")}) 94 res := searchForTest(t, b, &query.Substring{Pattern: "there"}) 95 if len(res.Files) > 0 { 96 t.Fatalf("got %v, want no matches", res.Files) 97 } 98} 99 100func TestDocSectionInvalid(t *testing.T) { 101 b, err := NewShardBuilder(nil) 102 if err != nil { 103 t.Fatalf("NewShardBuilder: %v", err) 104 } 105 doc := Document{ 106 Name: "f1", 107 Content: []byte("01234567890123"), 108 Symbols: []DocumentSection{{5, 8}, {7, 9}}, 109 } 110 111 if err := b.Add(doc); err == nil { 112 t.Errorf("overlapping doc sections should fail") 113 } 114 115 doc = Document{ 116 Name: "f1", 117 Content: []byte("01234567890123"), 118 Symbols: []DocumentSection{{0, 20}}, 119 } 120 121 if err := b.Add(doc); err == nil { 122 t.Errorf("doc sections beyond EOF should fail") 123 } 124} 125 126func TestBasic(t *testing.T) { 127 b := testShardBuilder(t, nil, 128 Document{ 129 Name: "f2", 130 Content: []byte("to carry water in the no later bla"), 131 // --------------0123456789012345678901234567890123 132 }) 133 134 t.Run("LineMatch", func(t *testing.T) { 135 res := searchForTest(t, b, &query.Substring{ 136 Pattern: "water", 137 CaseSensitive: true, 138 }) 139 fmatches := res.Files 140 if len(fmatches) != 1 || len(fmatches[0].LineMatches) != 1 { 141 t.Fatalf("got %v, want 1 matches", fmatches) 142 } 143 144 got := fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].LineMatches[0].LineFragments[0].Offset) 145 want := "f2:9" 146 if got != want { 147 t.Errorf("1: got %s, want %s", got, want) 148 } 149 }) 150 151 t.Run("ChunkMatch", func(t *testing.T) { 152 res := searchForTest(t, b, &query.Substring{ 153 Pattern: "water", 154 CaseSensitive: true, 155 }, chunkOpts) 156 fmatches := res.Files 157 if len(fmatches) != 1 || len(fmatches[0].ChunkMatches) != 1 { 158 t.Fatalf("got %v, want 1 matches", fmatches) 159 } 160 161 got := fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset) 162 want := "f2:9" 163 if got != want { 164 t.Errorf("1: got %s, want %s", got, want) 165 } 166 }) 167} 168 169func TestEmptyIndex(t *testing.T) { 170 b := testShardBuilder(t, nil) 171 searcher := searcherForTest(t, b) 172 173 var opts zoekt.SearchOptions 174 if _, err := searcher.Search(context.Background(), &query.Substring{}, &opts); err != nil { 175 t.Fatalf("Search: %v", err) 176 } 177 178 if _, err := searcher.List(context.Background(), &query.Repo{Regexp: regexp.MustCompile("")}, nil); err != nil { 179 t.Fatalf("List: %v", err) 180 } 181 182 if _, err := searcher.Search(context.Background(), &query.Substring{Pattern: "java", FileName: true}, &opts); err != nil { 183 t.Fatalf("Search: %v", err) 184 } 185} 186 187type memSeeker struct { 188 data []byte 189} 190 191func (s *memSeeker) Name() string { 192 return "memseeker" 193} 194 195func (s *memSeeker) Close() {} 196func (s *memSeeker) Read(off, sz uint32) ([]byte, error) { 197 return s.data[off : off+sz], nil 198} 199 200func (s *memSeeker) Size() (uint32, error) { 201 return uint32(len(s.data)), nil 202} 203 204func TestNewlines(t *testing.T) { 205 b := testShardBuilder(t, nil, 206 // -----------------------------------------012345-678901-234 207 Document{Name: "filename", Content: []byte("line1\nline2\nbla")}) 208 209 t.Run("LineMatches", func(t *testing.T) { 210 sres := searchForTest(t, b, &query.Substring{Pattern: "ne2"}) 211 212 matches := sres.Files 213 want := []zoekt.FileMatch{{ 214 FileName: "filename", 215 LineMatches: []zoekt.LineMatch{{ 216 LineFragments: []zoekt.LineFragmentMatch{{ 217 Offset: 8, 218 LineOffset: 2, 219 MatchLength: 3, 220 }}, 221 Line: []byte("line2\n"), 222 LineStart: 6, 223 LineEnd: 12, 224 LineNumber: 2, 225 }}, 226 }} 227 228 if diff := cmp.Diff(matches, want); diff != "" { 229 t.Fatal(diff) 230 } 231 }) 232 233 t.Run("ChunkMatches", func(t *testing.T) { 234 sres := searchForTest(t, b, &query.Substring{Pattern: "ne2"}, chunkOpts) 235 236 matches := sres.Files 237 want := []zoekt.FileMatch{{ 238 FileName: "filename", 239 ChunkMatches: []zoekt.ChunkMatch{{ 240 Content: []byte("line2\n"), 241 ContentStart: zoekt.Location{ 242 ByteOffset: 6, 243 LineNumber: 2, 244 Column: 1, 245 }, 246 Ranges: []zoekt.Range{{ 247 Start: zoekt.Location{ByteOffset: 8, LineNumber: 2, Column: 3}, 248 End: zoekt.Location{ByteOffset: 11, LineNumber: 2, Column: 6}, 249 }}, 250 }}, 251 }} 252 253 if diff := cmp.Diff(want, matches); diff != "" { 254 t.Fatal(diff) 255 } 256 }) 257} 258 259// A result spanning multiple lines should have LineMatches that only cover 260// single lines. 261func TestQueryNewlines(t *testing.T) { 262 text := "line1\nline2\nbla" 263 b := testShardBuilder(t, nil, 264 Document{Name: "filename", Content: []byte(text)}) 265 266 t.Run("LineMatches", func(t *testing.T) { 267 sres := searchForTest(t, b, &query.Substring{Pattern: "ine2\nbla"}) 268 matches := sres.Files 269 if len(matches) != 1 { 270 t.Fatalf("got %d file matches, want exactly one", len(matches)) 271 } 272 m := matches[0] 273 if len(m.LineMatches) != 2 { 274 t.Fatalf("got %d line matches, want exactly two %#v", len(m.LineMatches), m.LineMatches) 275 } 276 }) 277 278 t.Run("ChunkMatches", func(t *testing.T) { 279 sres := searchForTest(t, b, &query.Substring{Pattern: "ine2\nbla"}, chunkOpts) 280 matches := sres.Files 281 if len(matches) != 1 { 282 t.Fatalf("got %d file matches, want exactly one", len(matches)) 283 } 284 m := matches[0] 285 if len(m.ChunkMatches) != 1 { 286 t.Fatalf("got %d chunk matches, want exactly one", len(m.ChunkMatches)) 287 } 288 }) 289} 290 291var chunkOpts = zoekt.SearchOptions{ChunkMatches: true} 292 293func searchForTest(t *testing.T, b *ShardBuilder, q query.Q, o ...zoekt.SearchOptions) *zoekt.SearchResult { 294 searcher := searcherForTest(t, b) 295 var opts zoekt.SearchOptions 296 if len(o) > 0 { 297 opts = o[0] 298 } 299 res, err := searcher.Search(context.Background(), q, &opts) 300 if err != nil { 301 t.Fatalf("Search(%s): %v", q, err) 302 } 303 clearScores(res) 304 return res 305} 306 307func searcherForTest(t testing.TB, b *ShardBuilder) zoekt.Searcher { 308 var buf bytes.Buffer 309 if err := b.Write(&buf); err != nil { 310 t.Fatal(err) 311 } 312 f := &memSeeker{buf.Bytes()} 313 314 searcher, err := NewSearcher(f) 315 if err != nil { 316 t.Fatalf("NewSearcher: %v", err) 317 } 318 319 return searcher 320} 321 322func TestCaseFold(t *testing.T) { 323 b := testShardBuilder(t, nil, 324 Document{Name: "f1", Content: []byte("I love BaNaNAS.")}, 325 // -----------------------------------012345678901234 326 ) 327 t.Run("LineMatches", func(t *testing.T) { 328 sres := searchForTest(t, b, &query.Substring{ 329 Pattern: "bananas", 330 CaseSensitive: true, 331 }) 332 matches := sres.Files 333 if len(matches) != 0 { 334 t.Errorf("foldcase: got %#v, want 0 matches", matches) 335 } 336 337 sres = searchForTest(t, b, 338 &query.Substring{ 339 Pattern: "BaNaNAS", 340 CaseSensitive: true, 341 }) 342 matches = sres.Files 343 if len(matches) != 1 { 344 t.Errorf("no foldcase: got %v, want 1 matches", matches) 345 } else if matches[0].LineMatches[0].LineFragments[0].Offset != 7 { 346 t.Errorf("foldcase: got %v, want offsets 7", matches) 347 } 348 }) 349 350 t.Run("ChunkMatches", func(t *testing.T) { 351 sres := searchForTest(t, b, &query.Substring{ 352 Pattern: "bananas", 353 CaseSensitive: true, 354 }, chunkOpts) 355 matches := sres.Files 356 if len(matches) != 0 { 357 t.Errorf("foldcase: got %#v, want 0 matches", matches) 358 } 359 360 sres = searchForTest(t, b, 361 &query.Substring{ 362 Pattern: "BaNaNAS", 363 CaseSensitive: true, 364 }) 365 matches = sres.Files 366 if len(matches) != 1 { 367 t.Errorf("no foldcase: got %v, want 1 matches", matches) 368 } else if matches[0].LineMatches[0].LineFragments[0].Offset != 7 { 369 t.Errorf("foldcase: got %v, want offsets 7", matches) 370 } 371 }) 372} 373 374// wordsAsSymbols finds all ASCII words in doc.Content which are longer than 2 375// chars. Those are then set as symbols. 376func wordsAsSymbols(doc Document) Document { 377 re := regexp.MustCompile(`\b\w{2,}\b`) 378 var symbols []DocumentSection 379 var symbolsMetadata []*zoekt.Symbol 380 for _, match := range re.FindAllIndex(doc.Content, -1) { 381 symbols = append(symbols, DocumentSection{ 382 Start: uint32(match[0]), 383 End: uint32(match[1]), 384 }) 385 symbolsMetadata = append(symbolsMetadata, &zoekt.Symbol{Kind: "method"}) 386 } 387 doc.Symbols = symbols 388 doc.SymbolsMetaData = symbolsMetadata 389 return doc 390} 391 392func TestSearchStats(t *testing.T) { 393 ctx := context.Background() 394 searcher := searcherForTest(t, testShardBuilder(t, nil, 395 wordsAsSymbols(Document{Name: "f1", Content: []byte("x banana y")}), 396 wordsAsSymbols(Document{Name: "f2", Content: []byte("x apple y")}), 397 wordsAsSymbols(Document{Name: "f3", Content: []byte("x banana apple y")}), 398 // --------------------------------------------------0123456789012345 399 )) 400 401 andQuery := query.NewAnd( 402 &query.Substring{ 403 Pattern: "banana", 404 }, 405 &query.Substring{ 406 Pattern: "apple", 407 }, 408 ) 409 410 t.Run("LineMatches", func(t *testing.T) { 411 sres, err := searcher.Search(ctx, andQuery, &zoekt.SearchOptions{}) 412 if err != nil { 413 t.Fatal(err) 414 } 415 matches := sres.Files 416 if len(matches) != 1 || len(matches[0].LineMatches) != 1 || len(matches[0].LineMatches[0].LineFragments) != 2 { 417 t.Fatalf("got %#v, want 1 match with 2 fragments", matches) 418 } 419 420 if matches[0].LineMatches[0].LineFragments[0].Offset != 2 || matches[0].LineMatches[0].LineFragments[1].Offset != 9 { 421 t.Fatalf("got %#v, want offsets 2,9", matches) 422 } 423 }) 424 t.Run("ChunkMatches", func(t *testing.T) { 425 sres, err := searcher.Search(ctx, andQuery, &chunkOpts) 426 if err != nil { 427 t.Fatal(err) 428 } 429 matches := sres.Files 430 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 || len(matches[0].ChunkMatches[0].Ranges) != 2 { 431 t.Fatalf("got %#v, want 1 chunk match with 2 ranges", matches) 432 } 433 434 if matches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset != 2 || matches[0].ChunkMatches[0].Ranges[1].Start.ByteOffset != 9 { 435 t.Fatalf("got %#v, want offsets 2,9", matches) 436 } 437 }) 438 t.Run("Stats", func(t *testing.T) { 439 cases := []struct { 440 Name string 441 Q query.Q 442 Want zoekt.Stats 443 }{{ 444 Name: "and-query", 445 Q: andQuery, 446 Want: zoekt.Stats{ 447 FilesLoaded: 1, 448 ContentBytesLoaded: 22, 449 IndexBytesLoaded: 10, 450 NgramMatches: 3, // we look at doc 1, because it's max(0,1) due to AND 451 NgramLookups: 104, 452 MatchCount: 2, 453 FileCount: 1, 454 FilesConsidered: 2, 455 ShardsScanned: 1, 456 }, 457 }, { 458 Name: "one-trigram", 459 Q: &query.Substring{ 460 Pattern: "a y", 461 Content: true, 462 CaseSensitive: true, 463 }, 464 Want: zoekt.Stats{ 465 ContentBytesLoaded: 14, 466 IndexBytesLoaded: 1, 467 FileCount: 1, 468 FilesConsidered: 1, 469 FilesLoaded: 1, 470 ShardsScanned: 1, 471 MatchCount: 1, 472 NgramMatches: 1, 473 NgramLookups: 2, // once to lookup frequency then again to access posting list. 474 }, 475 }, { 476 Name: "one-trigram-case-insensitive", 477 Q: &query.Substring{ 478 Pattern: "a y", 479 Content: true, 480 }, 481 Want: zoekt.Stats{ 482 ContentBytesLoaded: 14, 483 IndexBytesLoaded: 1, 484 FileCount: 1, 485 FilesConsidered: 1, 486 FilesLoaded: 1, 487 ShardsScanned: 1, 488 MatchCount: 1, 489 NgramMatches: 1, 490 NgramLookups: 8, // "a y" has 2**2 casings which we lookup twice. 491 }, 492 }, { 493 Name: "one-trigram-pruned", 494 Q: &query.Substring{ 495 Pattern: "foo", 496 Content: true, 497 CaseSensitive: true, 498 }, 499 Want: zoekt.Stats{ 500 ShardsSkippedFilter: 1, 501 NgramLookups: 1, // only had to lookup once 502 }, 503 }, { 504 Name: "one-trigram-branch-pruned", 505 Q: query.NewAnd( 506 &query.Substring{ 507 Pattern: "foo", 508 Content: true, 509 CaseSensitive: true, 510 }, 511 &query.Substring{ 512 Pattern: "a y", 513 Content: true, 514 CaseSensitive: true, 515 }, 516 ), 517 Want: zoekt.Stats{ 518 IndexBytesLoaded: 1, // we created an iterator for "a y" before pruning. 519 ShardsSkippedFilter: 1, 520 NgramLookups: 3, // we lookedup "foo" once (1), but lookedup and created "a y" (2). 521 }, 522 }, { 523 Name: "symbol-substr-nomatch", 524 Q: &query.Symbol{Expr: &query.Substring{ 525 Pattern: "banana apple", 526 Content: true, 527 CaseSensitive: true, 528 }}, 529 Want: zoekt.Stats{ 530 IndexBytesLoaded: 3, 531 FilesConsidered: 1, // important that we only check 1 file to ensure we are using the index 532 MatchCount: 0, // even though there is a match it doesn't align with a symbol 533 ShardsScanned: 1, 534 NgramMatches: 1, 535 NgramLookups: 12, 536 }, 537 }, { 538 Name: "symbol-substr", 539 Q: &query.Symbol{Expr: &query.Substring{ 540 Pattern: "apple", 541 Content: true, 542 CaseSensitive: true, 543 }}, 544 Want: zoekt.Stats{ 545 ContentBytesLoaded: 35, 546 IndexBytesLoaded: 4, 547 FileCount: 2, 548 FilesConsidered: 2, // must be 2 to ensure we used the index 549 FilesLoaded: 2, 550 MatchCount: 2, // apple symbols is in two files 551 ShardsScanned: 1, 552 NgramMatches: 2, 553 NgramLookups: 5, 554 }, 555 }, { 556 Name: "symbol-regexp-nomatch", 557 Q: &query.Symbol{Expr: &query.Regexp{ 558 Regexp: mustParseRE("^apple.banana$"), 559 Content: true, 560 CaseSensitive: true, 561 }}, 562 Want: zoekt.Stats{ 563 ContentBytesLoaded: 33, // we still have to run regex since "app" matches two documents 564 IndexBytesLoaded: 10, 565 FilesConsidered: 2, // important that we don't check 3 to ensure we are using the index 566 FilesLoaded: 2, 567 MatchCount: 0, // even though there is a match it doesn't align with a symbol 568 ShardsScanned: 1, 569 NgramMatches: 3, 570 NgramLookups: 11, 571 }, 572 }, { 573 Name: "symbol-regexp", 574 Q: &query.Symbol{Expr: &query.Regexp{ 575 Regexp: mustParseRE("^app.e$"), 576 Content: true, 577 CaseSensitive: true, 578 }}, 579 Want: zoekt.Stats{ 580 ContentBytesLoaded: 35, 581 IndexBytesLoaded: 2, 582 FileCount: 2, 583 FilesConsidered: 2, // must be 2 to ensure we used the index 584 FilesLoaded: 2, 585 MatchCount: 2, // apple symbols is in two files 586 ShardsScanned: 1, 587 NgramMatches: 2, 588 NgramLookups: 2, 589 }, 590 }} 591 592 for _, tc := range cases { 593 t.Run(tc.Name, func(t *testing.T) { 594 sres, err := searcher.Search(ctx, tc.Q, &chunkOpts) 595 if err != nil { 596 t.Fatal(err) 597 } 598 if diff := cmp.Diff(tc.Want, sres.Stats, cmpopts.IgnoreFields(zoekt.Stats{}, "MatchTreeConstruction", "MatchTreeSearch")); diff != "" { 599 t.Errorf("unexpected Stats (-want +got):\n%s", diff) 600 } 601 }) 602 } 603 }) 604} 605 606func TestAndNegateSearch(t *testing.T) { 607 b := testShardBuilder(t, nil, 608 Document{Name: "f1", Content: []byte("x banana y")}, 609 // -----------------------------------0123456789 610 Document{Name: "f4", Content: []byte("x banana apple y")}) 611 612 t.Run("LineMatches", func(t *testing.T) { 613 sres := searchForTest(t, b, query.NewAnd( 614 &query.Substring{ 615 Pattern: "banana", 616 }, 617 &query.Not{Child: &query.Substring{ 618 Pattern: "apple", 619 }})) 620 621 matches := sres.Files 622 623 if len(matches) != 1 || len(matches[0].LineMatches) != 1 { 624 t.Fatalf("got %v, want 1 match", matches) 625 } 626 if matches[0].FileName != "f1" { 627 t.Fatalf("got match %#v, want FileName: f1", matches[0]) 628 } 629 if matches[0].LineMatches[0].LineFragments[0].Offset != 2 { 630 t.Fatalf("got %v, want offset 2", matches) 631 } 632 }) 633 634 t.Run("ChunkMatches", func(t *testing.T) { 635 sres := searchForTest(t, b, 636 query.NewAnd( 637 &query.Substring{ 638 Pattern: "banana", 639 }, 640 &query.Not{Child: &query.Substring{ 641 Pattern: "apple", 642 }}, 643 ), 644 chunkOpts, 645 ) 646 647 matches := sres.Files 648 649 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 { 650 t.Fatalf("got %v, want 1 match", matches) 651 } 652 if matches[0].FileName != "f1" { 653 t.Fatalf("got match %#v, want FileName: f1", matches[0]) 654 } 655 if matches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset != 2 { 656 t.Fatalf("got %v, want offset 2", matches) 657 } 658 }) 659} 660 661func TestNegativeMatchesOnlyShortcut(t *testing.T) { 662 b := testShardBuilder(t, nil, 663 Document{Name: "f1", Content: []byte("x banana y")}, 664 Document{Name: "f2", Content: []byte("x appelmoes y")}, 665 Document{Name: "f3", Content: []byte("x appelmoes y")}, 666 Document{Name: "f3", Content: []byte("x appelmoes y")}) 667 668 t.Run("LineMatches", func(t *testing.T) { 669 sres := searchForTest(t, b, query.NewAnd( 670 &query.Substring{ 671 Pattern: "banana", 672 }, 673 &query.Not{Child: &query.Substring{ 674 Pattern: "appel", 675 }})) 676 677 if sres.Stats.FilesConsidered != 1 { 678 t.Errorf("got %#v, want FilesConsidered: 1", sres.Stats) 679 } 680 }) 681 682 t.Run("ChunkMatches", func(t *testing.T) { 683 sres := searchForTest(t, b, query.NewAnd( 684 &query.Substring{ 685 Pattern: "banana", 686 }, 687 &query.Not{Child: &query.Substring{ 688 Pattern: "appel", 689 }}), chunkOpts) 690 691 if sres.Stats.FilesConsidered != 1 { 692 t.Errorf("got %#v, want FilesConsidered: 1", sres.Stats) 693 } 694 }) 695} 696 697func TestFileSearch(t *testing.T) { 698 b := testShardBuilder(t, nil, 699 Document{Name: "banzana", Content: []byte("x orange y")}, 700 // -------------0123456 701 Document{Name: "banana", Content: []byte("x apple y")}, 702 // -------------012345 703 ) 704 705 t.Run("LineMatches", func(t *testing.T) { 706 sres := searchForTest(t, b, &query.Substring{ 707 Pattern: "anan", 708 FileName: true, 709 }) 710 711 matches := sres.Files 712 if len(matches) != 1 || len(matches[0].LineMatches) != 1 { 713 t.Fatalf("got %v, want 1 match", matches) 714 } 715 716 got := matches[0].LineMatches[0] 717 want := zoekt.LineMatch{ 718 Line: []byte("banana"), 719 LineFragments: []zoekt.LineFragmentMatch{{ 720 Offset: 1, 721 LineOffset: 1, 722 MatchLength: 4, 723 }}, 724 FileName: true, 725 } 726 727 if !reflect.DeepEqual(got, want) { 728 t.Errorf("got %#v, want %#v", got, want) 729 } 730 }) 731 732 t.Run("ChunkMatches", func(t *testing.T) { 733 sres := searchForTest(t, b, &query.Substring{ 734 Pattern: "anan", 735 FileName: true, 736 }, chunkOpts) 737 738 matches := sres.Files 739 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 { 740 t.Fatalf("got %v, want 1 match", matches) 741 } 742 743 got := matches[0].ChunkMatches[0] 744 want := zoekt.ChunkMatch{ 745 Content: []byte("banana"), 746 ContentStart: zoekt.Location{ByteOffset: 0, LineNumber: 1, Column: 1}, 747 Ranges: []zoekt.Range{{ 748 Start: zoekt.Location{ByteOffset: 1, LineNumber: 1, Column: 2}, 749 End: zoekt.Location{ByteOffset: 5, LineNumber: 1, Column: 6}, 750 }}, 751 FileName: true, 752 } 753 754 if diff := cmp.Diff(want, got); diff != "" { 755 t.Fatal(diff) 756 } 757 }) 758 759 t.Run("FileNameSet", func(t *testing.T) { 760 sres := searchForTest(t, b, query.NewFileNameSet("banana"), chunkOpts) 761 762 matches := sres.Files 763 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 { 764 t.Fatalf("got %v, want 1 match", matches) 765 } 766 767 got := matches[0].ChunkMatches[0] 768 want := zoekt.ChunkMatch{ 769 Content: []byte("banana"), 770 ContentStart: zoekt.Location{ByteOffset: 0, LineNumber: 1, Column: 1}, 771 Ranges: []zoekt.Range{{ 772 Start: zoekt.Location{ByteOffset: 0, LineNumber: 1, Column: 1}, 773 End: zoekt.Location{ByteOffset: 6, LineNumber: 1, Column: 7}, 774 }}, 775 FileName: true, 776 } 777 778 if diff := cmp.Diff(want, got); diff != "" { 779 t.Fatal(diff) 780 } 781 }) 782} 783 784func TestFileCase(t *testing.T) { 785 b := testShardBuilder(t, nil, 786 Document{Name: "BANANA", Content: []byte("x orange y")}) 787 788 t.Run("LineMatches", func(t *testing.T) { 789 sres := searchForTest(t, b, &query.Substring{ 790 Pattern: "banana", 791 FileName: true, 792 }) 793 794 matches := sres.Files 795 if len(matches) != 1 || matches[0].FileName != "BANANA" { 796 t.Fatalf("got %v, want 1 match 'BANANA'", matches) 797 } 798 }) 799 800 t.Run("ChunkMatches", func(t *testing.T) { 801 sres := searchForTest(t, b, &query.Substring{ 802 Pattern: "banana", 803 FileName: true, 804 }, chunkOpts) 805 806 matches := sres.Files 807 if len(matches) != 1 || matches[0].FileName != "BANANA" { 808 t.Fatalf("got %v, want 1 match 'BANANA'", matches) 809 } 810 }) 811} 812 813func TestFileRegexpSearchBruteForce(t *testing.T) { 814 b := testShardBuilder(t, nil, 815 Document{Name: "banzana", Content: []byte("x orange y")}, 816 Document{Name: "banana", Content: []byte("x apple y")}, 817 ) 818 t.Run("LineMatches", func(t *testing.T) { 819 sres := searchForTest(t, b, &query.Regexp{ 820 Regexp: mustParseRE("[qn][zx]"), 821 FileName: true, 822 }) 823 824 matches := sres.Files 825 if len(matches) != 1 || matches[0].FileName != "banzana" { 826 t.Fatalf("got %v, want 1 match on 'banzana'", matches) 827 } 828 }) 829 t.Run("LineMatches", func(t *testing.T) { 830 sres := searchForTest(t, b, &query.Regexp{ 831 Regexp: mustParseRE("[qn][zx]"), 832 FileName: true, 833 }, chunkOpts) 834 835 matches := sres.Files 836 if len(matches) != 1 || matches[0].FileName != "banzana" { 837 t.Fatalf("got %v, want 1 match on 'banzana'", matches) 838 } 839 }) 840} 841 842func TestFileRegexpSearchShortString(t *testing.T) { 843 b := testShardBuilder(t, nil, 844 Document{Name: "banana.py", Content: []byte("x orange y")}) 845 846 t.Run("LineMatches", func(t *testing.T) { 847 sres := searchForTest(t, b, &query.Regexp{ 848 Regexp: mustParseRE("ana.py"), 849 FileName: true, 850 }) 851 852 matches := sres.Files 853 if len(matches) != 1 || matches[0].FileName != "banana.py" { 854 t.Fatalf("got %v, want 1 match on 'banana.py'", matches) 855 } 856 }) 857 858 t.Run("ChunkMatches", func(t *testing.T) { 859 sres := searchForTest(t, b, &query.Regexp{ 860 Regexp: mustParseRE("ana.py"), 861 FileName: true, 862 }, chunkOpts) 863 864 matches := sres.Files 865 if len(matches) != 1 || matches[0].FileName != "banana.py" { 866 t.Fatalf("got %v, want 1 match on 'banana.py'", matches) 867 } 868 }) 869} 870 871func TestFileSubstringSearchBruteForce(t *testing.T) { 872 b := testShardBuilder(t, nil, 873 Document{Name: "BANZANA", Content: []byte("x orange y")}, 874 Document{Name: "banana", Content: []byte("x apple y")}) 875 876 q := &query.Substring{ 877 Pattern: "z", 878 FileName: true, 879 } 880 881 t.Run("LineMatches", func(t *testing.T) { 882 res := searchForTest(t, b, q) 883 if len(res.Files) != 1 || res.Files[0].FileName != "BANZANA" { 884 t.Fatalf("got %v, want 1 match on 'BANZANA''", res.Files) 885 } 886 }) 887 888 t.Run("ChunkMatches", func(t *testing.T) { 889 res := searchForTest(t, b, q, chunkOpts) 890 if len(res.Files) != 1 || res.Files[0].FileName != "BANZANA" { 891 t.Fatalf("got %v, want 1 match on 'BANZANA''", res.Files) 892 } 893 }) 894} 895 896func TestFileSubstringSearchBruteForceEnd(t *testing.T) { 897 b := testShardBuilder(t, nil, 898 Document{Name: "BANZANA", Content: []byte("x orange y")}, 899 Document{Name: "bananaq", Content: []byte("x apple y")}) 900 901 q := &query.Substring{ 902 Pattern: "q", 903 FileName: true, 904 } 905 t.Run("LineMatches", func(t *testing.T) { 906 res := searchForTest(t, b, q) 907 if want := "bananaq"; len(res.Files) != 1 || res.Files[0].FileName != want { 908 t.Fatalf("got %v, want 1 match in %q", res.Files, want) 909 } 910 }) 911 912 t.Run("LineMatches", func(t *testing.T) { 913 res := searchForTest(t, b, q, chunkOpts) 914 if want := "bananaq"; len(res.Files) != 1 || res.Files[0].FileName != want { 915 t.Fatalf("got %v, want 1 match in %q", res.Files, want) 916 } 917 }) 918} 919 920func TestSearchMatchAll(t *testing.T) { 921 b := testShardBuilder(t, nil, 922 Document{Name: "banzana", Content: []byte("x orange y")}, 923 Document{Name: "banana", Content: []byte("x apple y")}) 924 925 t.Run("LineMatches", func(t *testing.T) { 926 sres := searchForTest(t, b, &query.Const{Value: true}) 927 matches := sres.Files 928 if len(matches) != 2 { 929 t.Fatalf("got %v, want 2 matches", matches) 930 } 931 }) 932 933 t.Run("ChunkMatches", func(t *testing.T) { 934 sres := searchForTest(t, b, &query.Const{Value: true}, chunkOpts) 935 matches := sres.Files 936 if len(matches) != 2 { 937 t.Fatalf("got %v, want 2 matches", matches) 938 } 939 }) 940} 941 942func TestSearchNewline(t *testing.T) { 943 b := testShardBuilder(t, nil, 944 Document{Name: "banzana", Content: []byte("abcd\ndefg")}) 945 946 t.Run("LineMatches", func(t *testing.T) { 947 sres := searchForTest(t, b, &query.Substring{Pattern: "d\nd"}) 948 949 // Just check that we don't crash. 950 951 matches := sres.Files 952 if len(matches) != 1 { 953 t.Fatalf("got %v, want 1 matches", matches) 954 } 955 }) 956 957 t.Run("ChunkMatches", func(t *testing.T) { 958 sres := searchForTest(t, b, &query.Substring{Pattern: "d\nd"}, chunkOpts) 959 960 // Just check that we don't crash. 961 962 matches := sres.Files 963 if len(matches) != 1 { 964 t.Fatalf("got %v, want 1 matches", matches) 965 } 966 }) 967} 968 969func TestSearchMatchAllRegexp(t *testing.T) { 970 b := testShardBuilder(t, nil, 971 Document{Name: "banzana", Content: []byte("abcd")}, 972 Document{Name: "banana", Content: []byte("pqrs")}) 973 974 t.Run("LineMatches", func(t *testing.T) { 975 sres := searchForTest(t, b, &query.Regexp{Regexp: mustParseRE(".")}) 976 977 matches := sres.Files 978 if len(matches) != 2 || sres.Stats.MatchCount != 2 { 979 t.Fatalf("got %v, want 2 matches", matches) 980 } 981 if len(matches[0].LineMatches[0].Line) != 4 || len(matches[1].LineMatches[0].Line) != 4 { 982 t.Fatalf("want 4 chars in every file, got %#v", matches) 983 } 984 }) 985 986 t.Run("ChunkMatches", func(t *testing.T) { 987 sres := searchForTest(t, b, &query.Regexp{Regexp: mustParseRE(".")}, chunkOpts) 988 989 matches := sres.Files 990 if len(matches) != 2 || sres.Stats.MatchCount != 8 { 991 t.Fatalf("got %v, want 2 matches", matches) 992 } 993 if len(matches[0].ChunkMatches[0].Content) != 4 || len(matches[1].ChunkMatches[0].Content) != 4 { 994 t.Fatalf("want 4 chars in every file, got %#v", matches) 995 } 996 }) 997} 998 999func TestSearchBM25MatchScores(t *testing.T) { 1000 ctx := context.Background() 1001 searcher := searcherForTest(t, testShardBuilder(t, nil, 1002 Document{Name: "f1", Content: []byte("one two three\naaaaaaaaaa\nbbbbbbbb\none two two")}, 1003 Document{Name: "f2", Content: []byte("four five six\naaaaaaaaaa\nbbbbbbbb\nfour five five\nsix six")}, 1004 wordsAsSymbols(Document{Name: "f3", Content: []byte("public static void main")}), 1005 )) 1006 1007 t.Run("LineMatches", func(t *testing.T) { 1008 q := &query.Substring{Pattern: "two"} 1009 sres, err := searcher.Search(ctx, q, &zoekt.SearchOptions{UseBM25Scoring: true}) 1010 if err != nil { 1011 t.Fatal(err) 1012 } 1013 matches := sres.Files 1014 if len(matches) != 1 { 1015 t.Fatalf("want 1 file index, got %d", len(matches)) 1016 } 1017 1018 if len(matches[0].LineMatches) != 2 { 1019 t.Fatalf("want 2 chunk matches, got %d", len(matches[0].ChunkMatches)) 1020 } 1021 1022 if matches[0].LineMatches[0].LineNumber != 4 { 1023 t.Fatalf("want best-scoring line to be line 4, got %d", matches[0].LineMatches[0].LineNumber) 1024 } 1025 }) 1026 1027 t.Run("ChunkMatches", func(t *testing.T) { 1028 q := &query.Substring{Pattern: "five"} 1029 sres, err := searcher.Search(ctx, q, &zoekt.SearchOptions{UseBM25Scoring: true, ChunkMatches: true, NumContextLines: 1}) 1030 if err != nil { 1031 t.Fatal(err) 1032 } 1033 1034 matches := sres.Files 1035 if len(matches) != 1 { 1036 t.Fatalf("want 1 file index, got %d", len(matches)) 1037 } 1038 1039 if len(matches[0].ChunkMatches) != 2 { 1040 t.Fatalf("want 2 chunk matches, got %d", len(matches[0].ChunkMatches)) 1041 } 1042 1043 if matches[0].ChunkMatches[0].BestLineMatch != 4 { 1044 t.Fatalf("want best-scoring line to be line 4, got %d", matches[0].ChunkMatches[0].BestLineMatch) 1045 } 1046 }) 1047 1048 t.Run("ChunkMatches with symbols", func(t *testing.T) { 1049 q := &query.Or{ 1050 Children: []query.Q{ 1051 &query.Symbol{Expr: &query.Substring{Pattern: "main"}}, 1052 &query.Substring{Pattern: "five"}, 1053 }, 1054 } 1055 1056 sres, err := searcher.Search(ctx, q, &zoekt.SearchOptions{UseBM25Scoring: true, ChunkMatches: true, NumContextLines: 1}) 1057 if err != nil { 1058 t.Fatal(err) 1059 } 1060 1061 matches := sres.Files 1062 if len(matches) != 2 { 1063 t.Fatalf("want 2 file index, got %d", len(matches)) 1064 } 1065 1066 foundSymbolInfo := false 1067 for _, m := range matches { 1068 for _, cm := range m.ChunkMatches { 1069 if len(cm.SymbolInfo) > 0 { 1070 foundSymbolInfo = true 1071 } 1072 } 1073 } 1074 1075 if !foundSymbolInfo { 1076 t.Fatalf("want symbol info, got none") 1077 } 1078 }) 1079} 1080 1081func TestFileRestriction(t *testing.T) { 1082 b := testShardBuilder(t, nil, 1083 Document{Name: "banana1", Content: []byte("x orange y")}, 1084 Document{Name: "banana2", Content: []byte("x apple y")}, 1085 Document{Name: "orange", Content: []byte("x apple z")}) 1086 1087 t.Run("LineMatches", func(t *testing.T) { 1088 sres := searchForTest(t, b, query.NewAnd( 1089 &query.Substring{ 1090 Pattern: "banana", 1091 FileName: true, 1092 }, 1093 &query.Substring{ 1094 Pattern: "apple", 1095 })) 1096 1097 matches := sres.Files 1098 if len(matches) != 1 || len(matches[0].LineMatches) != 1 { 1099 t.Fatalf("got %v, want 1 match", matches) 1100 } 1101 1102 match := matches[0].LineMatches[0] 1103 got := string(match.Line) 1104 want := "x apple y" 1105 if got != want { 1106 t.Errorf("got match %#v, want line %q", match, want) 1107 } 1108 }) 1109 1110 t.Run("ChunkMatches", func(t *testing.T) { 1111 sres := searchForTest(t, b, query.NewAnd( 1112 &query.Substring{ 1113 Pattern: "banana", 1114 FileName: true, 1115 }, 1116 &query.Substring{ 1117 Pattern: "apple", 1118 }), chunkOpts) 1119 1120 matches := sres.Files 1121 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 { 1122 t.Fatalf("got %v, want 1 match", matches) 1123 } 1124 1125 match := matches[0].ChunkMatches[0] 1126 got := string(match.Content) 1127 want := "x apple y" 1128 if got != want { 1129 t.Errorf("got match %#v, want line %q", match, want) 1130 } 1131 }) 1132} 1133 1134func TestFileNameBoundary(t *testing.T) { 1135 b := testShardBuilder(t, nil, 1136 Document{Name: "banana2", Content: []byte("x apple y")}, 1137 Document{Name: "helpers.go", Content: []byte("x apple y")}, 1138 Document{Name: "foo", Content: []byte("x apple y")}) 1139 1140 t.Run("LineMatches", func(t *testing.T) { 1141 sres := searchForTest(t, b, &query.Substring{ 1142 Pattern: "helpers.go", 1143 FileName: true, 1144 }) 1145 1146 matches := sres.Files 1147 if len(matches) != 1 || len(matches[0].LineMatches) != 1 { 1148 t.Fatalf("got %v, want 1 match", matches) 1149 } 1150 }) 1151 1152 t.Run("ChunkMatches", func(t *testing.T) { 1153 sres := searchForTest(t, b, &query.Substring{ 1154 Pattern: "helpers.go", 1155 FileName: true, 1156 }, chunkOpts) 1157 1158 matches := sres.Files 1159 if len(matches) != 1 || len(matches[0].ChunkMatches) != 1 { 1160 t.Fatalf("got %v, want 1 match", matches) 1161 } 1162 }) 1163} 1164 1165func TestDocumentOrder(t *testing.T) { 1166 var docs []Document 1167 for i := range 3 { 1168 docs = append(docs, Document{Name: fmt.Sprintf("f%d", i), Content: []byte("needle")}) 1169 } 1170 1171 b := testShardBuilder(t, nil, docs...) 1172 1173 t.Run("LineMatches", func(t *testing.T) { 1174 sres := searchForTest(t, b, query.NewAnd( 1175 &query.Substring{ 1176 Pattern: "needle", 1177 })) 1178 1179 want := []string{"f0", "f1", "f2"} 1180 var got []string 1181 for _, f := range sres.Files { 1182 got = append(got, f.FileName) 1183 } 1184 if !reflect.DeepEqual(got, want) { 1185 t.Fatalf("got %v, want %v", got, want) 1186 } 1187 }) 1188 1189 t.Run("ChunkMatches", func(t *testing.T) { 1190 sres := searchForTest(t, b, 1191 query.NewAnd(&query.Substring{ 1192 Pattern: "needle", 1193 }), 1194 chunkOpts, 1195 ) 1196 1197 want := []string{"f0", "f1", "f2"} 1198 var got []string 1199 for _, f := range sres.Files { 1200 got = append(got, f.FileName) 1201 } 1202 if !reflect.DeepEqual(got, want) { 1203 t.Fatalf("got %v, want %v", got, want) 1204 } 1205 }) 1206} 1207 1208func TestBranchMask(t *testing.T) { 1209 b := testShardBuilder(t, &zoekt.Repository{ 1210 Branches: []zoekt.RepositoryBranch{ 1211 {"master", "v-master"}, 1212 {"stable", "v-stable"}, 1213 {"bonzai", "v-bonzai"}, 1214 }, 1215 }, Document{Name: "f1", Content: []byte("needle"), Branches: []string{"master"}}, 1216 Document{Name: "f2", Content: []byte("needle"), Branches: []string{"stable", "master"}}, 1217 Document{Name: "f3", Content: []byte("needle"), Branches: []string{"stable", "master"}}, 1218 Document{Name: "f4", Content: []byte("needle"), Branches: []string{"bonzai"}}, 1219 ) 1220 1221 t.Run("LineMatches", func(t *testing.T) { 1222 sres := searchForTest(t, b, query.NewAnd( 1223 &query.Substring{ 1224 Pattern: "needle", 1225 }, 1226 &query.Branch{ 1227 Pattern: "table", 1228 })) 1229 1230 if len(sres.Files) != 2 || sres.Files[0].FileName != "f2" || sres.Files[1].FileName != "f3" { 1231 t.Fatalf("got %v, want 2 result from [f2,f3]", sres.Files) 1232 } 1233 1234 if len(sres.Files[0].Branches) != 1 || sres.Files[0].Branches[0] != "stable" { 1235 t.Fatalf("got %v, want 1 branch 'stable'", sres.Files[0].Branches) 1236 } 1237 }) 1238 1239 t.Run("ChunkMatches", func(t *testing.T) { 1240 sres := searchForTest(t, b, query.NewAnd( 1241 &query.Substring{ 1242 Pattern: "needle", 1243 }, 1244 &query.Branch{ 1245 Pattern: "table", 1246 }), 1247 chunkOpts, 1248 ) 1249 1250 if len(sres.Files) != 2 || sres.Files[0].FileName != "f2" || sres.Files[1].FileName != "f3" { 1251 t.Fatalf("got %v, want 2 result from [f2,f3]", sres.Files) 1252 } 1253 1254 if len(sres.Files[0].Branches) != 1 || sres.Files[0].Branches[0] != "stable" { 1255 t.Fatalf("got %v, want 1 branch 'stable'", sres.Files[0].Branches) 1256 } 1257 }) 1258} 1259 1260func TestBranchLimit(t *testing.T) { 1261 for limit := 64; limit <= 65; limit++ { 1262 r := &zoekt.Repository{} 1263 for i := range limit { 1264 s := fmt.Sprintf("b%d", i) 1265 r.Branches = append(r.Branches, zoekt.RepositoryBranch{ 1266 s, "v-" + s, 1267 }) 1268 } 1269 _, err := NewShardBuilder(r) 1270 if limit == 64 && err != nil { 1271 t.Fatalf("NewShardBuilder: %v", err) 1272 } else if limit == 65 && err == nil { 1273 t.Fatalf("NewShardBuilder succeeded") 1274 } 1275 } 1276} 1277 1278func TestBranchReport(t *testing.T) { 1279 branches := []string{"stable", "master"} 1280 b := testShardBuilder(t, &zoekt.Repository{ 1281 Branches: []zoekt.RepositoryBranch{ 1282 {"stable", "vs"}, 1283 {"master", "vm"}, 1284 }, 1285 }, 1286 Document{Name: "f2", Content: []byte("needle"), Branches: branches}) 1287 1288 t.Run("LineMatches", func(t *testing.T) { 1289 sres := searchForTest(t, b, &query.Substring{ 1290 Pattern: "needle", 1291 }) 1292 if len(sres.Files) != 1 { 1293 t.Fatalf("got %v, want 1 result from f2", sres.Files) 1294 } 1295 1296 f := sres.Files[0] 1297 if !reflect.DeepEqual(f.Branches, branches) { 1298 t.Fatalf("got branches %q, want %q", f.Branches, branches) 1299 } 1300 }) 1301 1302 t.Run("ChunkMatches", func(t *testing.T) { 1303 sres := searchForTest(t, b, &query.Substring{ 1304 Pattern: "needle", 1305 }, chunkOpts) 1306 if len(sres.Files) != 1 { 1307 t.Fatalf("got %v, want 1 result from f2", sres.Files) 1308 } 1309 1310 f := sres.Files[0] 1311 if !reflect.DeepEqual(f.Branches, branches) { 1312 t.Fatalf("got branches %q, want %q", f.Branches, branches) 1313 } 1314 }) 1315} 1316 1317func TestBranchVersions(t *testing.T) { 1318 b := testShardBuilder(t, &zoekt.Repository{ 1319 Branches: []zoekt.RepositoryBranch{ 1320 {"stable", "v-stable"}, 1321 {"master", "v-master"}, 1322 }, 1323 }, Document{Name: "f2", Content: []byte("needle"), Branches: []string{"master"}}) 1324 1325 t.Run("LineMatches", func(t *testing.T) { 1326 sres := searchForTest(t, b, &query.Substring{ 1327 Pattern: "needle", 1328 }) 1329 if len(sres.Files) != 1 { 1330 t.Fatalf("got %v, want 1 result from f2", sres.Files) 1331 } 1332 1333 f := sres.Files[0] 1334 if f.Version != "v-master" { 1335 t.Fatalf("got file %#v, want version 'v-master'", f) 1336 } 1337 }) 1338 1339 t.Run("ChunkMatches", func(t *testing.T) { 1340 sres := searchForTest(t, b, &query.Substring{ 1341 Pattern: "needle", 1342 }, chunkOpts) 1343 if len(sres.Files) != 1 { 1344 t.Fatalf("got %v, want 1 result from f2", sres.Files) 1345 } 1346 1347 f := sres.Files[0] 1348 if f.Version != "v-master" { 1349 t.Fatalf("got file %#v, want version 'v-master'", f) 1350 } 1351 }) 1352} 1353 1354func mustParseRE(s string) *syntax.Regexp { 1355 r, err := syntax.Parse(s, syntax.Perl) 1356 if err != nil { 1357 panic(err) 1358 } 1359 1360 return r 1361} 1362 1363func TestRegexp(t *testing.T) { 1364 content := []byte("needle the bla") 1365 // ----------------01234567890123 1366 1367 b := testShardBuilder(t, nil, 1368 Document{ 1369 Name: "f1", 1370 Content: content, 1371 }) 1372 1373 t.Run("LineMatches", func(t *testing.T) { 1374 sres := searchForTest(t, b, 1375 &query.Regexp{ 1376 Regexp: mustParseRE("dle.*bla"), 1377 }) 1378 1379 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 { 1380 t.Fatalf("got %v, want 1 match in 1 file", sres.Files) 1381 } 1382 1383 got := sres.Files[0].LineMatches[0] 1384 want := zoekt.LineMatch{ 1385 LineFragments: []zoekt.LineFragmentMatch{{ 1386 LineOffset: 3, 1387 Offset: 3, 1388 MatchLength: 11, 1389 }}, 1390 Line: content, 1391 FileName: false, 1392 LineNumber: 1, 1393 LineStart: 0, 1394 LineEnd: 14, 1395 } 1396 1397 if !reflect.DeepEqual(got, want) { 1398 t.Errorf("got %#v, want %#v", got, want) 1399 } 1400 }) 1401 1402 t.Run("ChunkMatches", func(t *testing.T) { 1403 sres := searchForTest(t, b, 1404 &query.Regexp{ 1405 Regexp: mustParseRE("dle.*bla"), 1406 }, chunkOpts) 1407 1408 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 { 1409 t.Fatalf("got %v, want 1 match in 1 file", sres.Files) 1410 } 1411 1412 got := sres.Files[0].ChunkMatches[0] 1413 want := zoekt.ChunkMatch{ 1414 Content: content, 1415 ContentStart: zoekt.Location{ByteOffset: 0, LineNumber: 1, Column: 1}, 1416 Ranges: []zoekt.Range{{ 1417 Start: zoekt.Location{ByteOffset: 3, LineNumber: 1, Column: 4}, 1418 End: zoekt.Location{ByteOffset: 14, LineNumber: 1, Column: 15}, 1419 }}, 1420 } 1421 1422 if diff := cmp.Diff(want, got); diff != "" { 1423 t.Fatal(diff) 1424 } 1425 }) 1426} 1427 1428func TestRegexpFile(t *testing.T) { 1429 content := []byte("needle the bla") 1430 1431 name := "let's play: find the mussel" 1432 b := testShardBuilder(t, nil, 1433 Document{Name: name, Content: content}, 1434 Document{Name: "play.txt", Content: content}) 1435 1436 t.Run("LineMatches", func(t *testing.T) { 1437 sres := searchForTest(t, b, 1438 &query.Regexp{ 1439 Regexp: mustParseRE("play.*mussel"), 1440 FileName: true, 1441 }) 1442 1443 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 { 1444 t.Fatalf("got %v, want 1 match in 1 file", sres.Files) 1445 } 1446 1447 if sres.Files[0].FileName != name { 1448 t.Errorf("got match %#v, want name %q", sres.Files[0], name) 1449 } 1450 }) 1451 1452 t.Run("ChunkMatches", func(t *testing.T) { 1453 sres := searchForTest(t, b, 1454 &query.Regexp{ 1455 Regexp: mustParseRE("play.*mussel"), 1456 FileName: true, 1457 }, chunkOpts) 1458 1459 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 { 1460 t.Fatalf("got %v, want 1 match in 1 file", sres.Files) 1461 } 1462 1463 if sres.Files[0].FileName != name { 1464 t.Errorf("got match %#v, want name %q", sres.Files[0], name) 1465 } 1466 }) 1467} 1468 1469func TestRegexpOrder(t *testing.T) { 1470 content := []byte("bla the needle") 1471 // ----------------01234567890123 1472 1473 b := testShardBuilder(t, nil, 1474 Document{Name: "f1", Content: content}) 1475 1476 t.Run("LineMatches", func(t *testing.T) { 1477 sres := searchForTest(t, b, 1478 &query.Regexp{ 1479 Regexp: mustParseRE("dle.*bla"), 1480 }) 1481 1482 if len(sres.Files) != 0 { 1483 t.Fatalf("got %v, want 0 matches", sres.Files) 1484 } 1485 }) 1486 1487 t.Run("ChunkMatches", func(t *testing.T) { 1488 sres := searchForTest(t, b, 1489 &query.Regexp{ 1490 Regexp: mustParseRE("dle.*bla"), 1491 }) 1492 1493 if len(sres.Files) != 0 { 1494 t.Fatalf("got %v, want 0 matches", sres.Files) 1495 } 1496 }) 1497} 1498 1499func TestRepoName(t *testing.T) { 1500 content := []byte("bla the needle") 1501 // ----------------01234567890123 1502 1503 b := testShardBuilder(t, &zoekt.Repository{Name: "bla"}, 1504 Document{Name: "f1", Content: content}) 1505 1506 t.Run("LineMatches", func(t *testing.T) { 1507 sres := searchForTest(t, b, 1508 query.NewAnd( 1509 &query.Substring{Pattern: "needle"}, 1510 &query.Repo{Regexp: regexp.MustCompile("foo")}, 1511 )) 1512 1513 if len(sres.Files) != 0 { 1514 t.Fatalf("got %v, want 0 matches", sres.Files) 1515 } 1516 1517 if sres.Stats.FilesConsidered > 0 { 1518 t.Fatalf("got FilesConsidered %d, should have short circuited", sres.Stats.FilesConsidered) 1519 } 1520 1521 sres = searchForTest(t, b, 1522 query.NewAnd( 1523 &query.Substring{Pattern: "needle"}, 1524 &query.Repo{Regexp: regexp.MustCompile("bla")}, 1525 )) 1526 if len(sres.Files) != 1 { 1527 t.Fatalf("got %v, want 1 match", sres.Files) 1528 } 1529 }) 1530 1531 t.Run("ChunkMatches", func(t *testing.T) { 1532 sres := searchForTest(t, b, 1533 query.NewAnd( 1534 &query.Substring{Pattern: "needle"}, 1535 &query.Repo{Regexp: regexp.MustCompile("foo")}, 1536 ), 1537 chunkOpts, 1538 ) 1539 1540 if len(sres.Files) != 0 { 1541 t.Fatalf("got %v, want 0 matches", sres.Files) 1542 } 1543 1544 if sres.Stats.FilesConsidered > 0 { 1545 t.Fatalf("got FilesConsidered %d, should have short circuited", sres.Stats.FilesConsidered) 1546 } 1547 1548 sres = searchForTest(t, b, 1549 query.NewAnd( 1550 &query.Substring{Pattern: "needle"}, 1551 &query.Repo{Regexp: regexp.MustCompile("bla")}, 1552 )) 1553 if len(sres.Files) != 1 { 1554 t.Fatalf("got %v, want 1 match", sres.Files) 1555 } 1556 }) 1557} 1558 1559func TestMergeMatches(t *testing.T) { 1560 t.Run("LineMatches, adjacent matches", func(t *testing.T) { 1561 b := testShardBuilder(t, nil, 1562 Document{Name: "f1", Content: []byte("blablabla")}) 1563 sres := searchForTest(t, b, 1564 &query.Substring{Pattern: "bla"}) 1565 1566 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 { 1567 t.Fatalf("got %v, want 1 match", sres.Files) 1568 } 1569 1570 if len(sres.Files[0].LineMatches[0].LineFragments) != 3 { 1571 t.Fatalf("got %v, want 3 fragments", sres.Files[0].LineMatches[0].LineFragments) 1572 } 1573 }) 1574 1575 t.Run("LineMatches, overlapping matches", func(t *testing.T) { 1576 b := testShardBuilder(t, nil, 1577 Document{Name: "f1", Content: []byte("hellogoodbye")}) 1578 sres := searchForTest(t, b, 1579 &query.And{Children: []query.Q{ 1580 &query.Substring{Pattern: "hello"}, 1581 &query.Substring{Pattern: "logood"}, 1582 }}) 1583 1584 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 { 1585 t.Fatalf("got %v, want 1 match", sres.Files) 1586 } 1587 1588 lineFragments := sres.Files[0].LineMatches[0].LineFragments 1589 if len(lineFragments) != 1 || lineFragments[0].MatchLength != len("hello") { 1590 t.Fatalf("got %v, want single line fragment 'hello'", lineFragments) 1591 } 1592 }) 1593 1594 t.Run("ChunkMatches, no overlap", func(t *testing.T) { 1595 b := testShardBuilder(t, nil, 1596 Document{Name: "f1", Content: []byte("blablabla")}) 1597 1598 sres := searchForTest(t, b, 1599 &query.Substring{Pattern: "bla"}, 1600 chunkOpts, 1601 ) 1602 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 { 1603 t.Fatalf("got %v, want 1 match", sres.Files) 1604 } 1605 1606 if len(sres.Files[0].ChunkMatches[0].Ranges) != 3 { 1607 t.Fatalf("got %v, want 3 ranges", sres.Files[0].ChunkMatches[0].Ranges) 1608 } 1609 }) 1610 1611 t.Run("ChunkMatches, overlapping matches", func(t *testing.T) { 1612 b := testShardBuilder(t, nil, 1613 Document{Name: "f1", Content: []byte("hellogoodbye")}) 1614 sres := searchForTest(t, b, 1615 &query.And{Children: []query.Q{ 1616 &query.Substring{Pattern: "hello"}, 1617 &query.Substring{Pattern: "logood"}, 1618 }}, chunkOpts) 1619 1620 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 { 1621 t.Fatalf("got %v, want 1 chunk match", sres.Files) 1622 } 1623 1624 ranges := sres.Files[0].ChunkMatches[0].Ranges 1625 if len(ranges) != 1 || ranges[0].Start.ByteOffset != 0 || ranges[0].End.ByteOffset != 5 { 1626 t.Fatalf("got %v, want single chunk range 'hello'", ranges) 1627 } 1628 }) 1629} 1630 1631func TestRepoURL(t *testing.T) { 1632 content := []byte("blablabla") 1633 b := testShardBuilder(t, &zoekt.Repository{ 1634 Name: "name", 1635 URL: "URL", 1636 CommitURLTemplate: "commit", 1637 FileURLTemplate: "file-url", 1638 LineFragmentTemplate: "fragment", 1639 }, Document{Name: "f1", Content: content}) 1640 1641 sres := searchForTest(t, b, &query.Substring{Pattern: "bla"}) 1642 1643 // RepoURLs/LineFragments are keyed by the repo's URL (its unique identity). 1644 if sres.RepoURLs["URL"] != "file-url" { 1645 t.Errorf("got RepoURLs %v, want {URL: file-url}", sres.RepoURLs) 1646 } 1647 if sres.LineFragments["URL"] != "fragment" { 1648 t.Errorf("got URLs %v, want {URL: fragment}", sres.LineFragments) 1649 } 1650} 1651 1652func TestRegexpCaseSensitive(t *testing.T) { 1653 content := []byte("bla\nfunc unmarshalGitiles\n") 1654 b := testShardBuilder(t, nil, Document{ 1655 Name: "f1", 1656 Content: content, 1657 }) 1658 1659 t.Run("LineMatches", func(t *testing.T) { 1660 res := searchForTest(t, b, 1661 &query.Regexp{ 1662 Regexp: mustParseRE("func.*Gitiles"), 1663 CaseSensitive: true, 1664 }) 1665 1666 if len(res.Files) != 1 { 1667 t.Fatalf("got %v, want one index", res.Files) 1668 } 1669 }) 1670 1671 t.Run("ChunkMatches", func(t *testing.T) { 1672 res := searchForTest(t, b, 1673 &query.Regexp{ 1674 Regexp: mustParseRE("func.*Gitiles"), 1675 CaseSensitive: true, 1676 }, 1677 chunkOpts, 1678 ) 1679 1680 if len(res.Files) != 1 { 1681 t.Fatalf("got %v, want one index", res.Files) 1682 } 1683 }) 1684} 1685 1686func TestRegexpCaseFolding(t *testing.T) { 1687 content := []byte("bla\nfunc unmarshalGitiles\n") 1688 1689 b := testShardBuilder(t, nil, 1690 Document{Name: "f1", Content: content}) 1691 res := searchForTest(t, b, 1692 &query.Regexp{ 1693 Regexp: mustParseRE("func.*GITILES"), 1694 CaseSensitive: false, 1695 }) 1696 1697 if len(res.Files) != 1 { 1698 t.Fatalf("got %v, want one index", res.Files) 1699 } 1700} 1701 1702func TestCaseRegexp(t *testing.T) { 1703 content := []byte("BLABLABLA") 1704 b := testShardBuilder(t, nil, 1705 Document{Name: "f1", Content: content}) 1706 1707 t.Run("LineMatches", func(t *testing.T) { 1708 res := searchForTest(t, b, 1709 &query.Regexp{ 1710 Regexp: mustParseRE("[xb][xl][xa]"), 1711 CaseSensitive: true, 1712 }) 1713 1714 if len(res.Files) > 0 { 1715 t.Fatalf("got %v, want no matches", res.Files) 1716 } 1717 }) 1718 1719 t.Run("ChunkMatches", func(t *testing.T) { 1720 res := searchForTest(t, b, 1721 &query.Regexp{ 1722 Regexp: mustParseRE("[xb][xl][xa]"), 1723 CaseSensitive: true, 1724 }, 1725 chunkOpts, 1726 ) 1727 1728 if len(res.Files) > 0 { 1729 t.Fatalf("got %v, want no matches", res.Files) 1730 } 1731 }) 1732} 1733 1734func TestNegativeRegexp(t *testing.T) { 1735 content := []byte("BLABLABLA needle bla") 1736 b := testShardBuilder(t, nil, 1737 Document{Name: "f1", Content: content}) 1738 1739 t.Run("LineMatches", func(t *testing.T) { 1740 res := searchForTest(t, b, 1741 query.NewAnd( 1742 &query.Substring{ 1743 Pattern: "needle", 1744 }, 1745 &query.Not{ 1746 Child: &query.Regexp{ 1747 Regexp: mustParseRE(".cs"), 1748 }, 1749 })) 1750 1751 if len(res.Files) != 1 { 1752 t.Fatalf("got %v, want 1 match", res.Files) 1753 } 1754 }) 1755 1756 t.Run("ChunkMatches", func(t *testing.T) { 1757 res := searchForTest(t, b, 1758 query.NewAnd( 1759 &query.Substring{ 1760 Pattern: "needle", 1761 }, 1762 &query.Not{ 1763 Child: &query.Regexp{ 1764 Regexp: mustParseRE(".cs"), 1765 }, 1766 }, 1767 ), 1768 chunkOpts) 1769 1770 if len(res.Files) != 1 { 1771 t.Fatalf("got %v, want 1 match", res.Files) 1772 } 1773 }) 1774} 1775 1776func TestSymbolRank(t *testing.T) { 1777 t.Skip() 1778 1779 content := []byte("func bla() blubxxxxx") 1780 // ----------------01234567890123456789 1781 b := testShardBuilder(t, nil, 1782 Document{ 1783 Name: "f1", 1784 Content: content, 1785 }, Document{ 1786 Name: "f2", 1787 Content: content, 1788 Symbols: []DocumentSection{{5, 8}}, 1789 }, Document{ 1790 Name: "f3", 1791 Content: content, 1792 }) 1793 1794 t.Run("LineMatches", func(t *testing.T) { 1795 res := searchForTest(t, b, 1796 &query.Substring{ 1797 CaseSensitive: false, 1798 Pattern: "bla", 1799 }) 1800 1801 if len(res.Files) != 3 { 1802 t.Fatalf("got %d files, want 3 files. Full data: %v", len(res.Files), res.Files) 1803 } 1804 if res.Files[0].FileName != "f2" { 1805 t.Errorf("got %#v, want 'f2' as top index", res.Files[0]) 1806 } 1807 }) 1808 1809 t.Run("ChunkMatches", func(t *testing.T) { 1810 res := searchForTest(t, b, 1811 &query.Substring{ 1812 CaseSensitive: false, 1813 Pattern: "bla", 1814 }, chunkOpts) 1815 1816 if len(res.Files) != 3 { 1817 t.Fatalf("got %d files, want 3 files. Full data: %v", len(res.Files), res.Files) 1818 } 1819 if res.Files[0].FileName != "f2" { 1820 t.Errorf("got %#v, want 'f2' as top index", res.Files[0]) 1821 } 1822 }) 1823} 1824 1825func TestSymbolRankRegexpUTF8(t *testing.T) { 1826 t.Skip() 1827 1828 prefix := strings.Repeat(string([]rune{kelvinCodePoint}), 100) + "\n" 1829 content := []byte(prefix + 1830 "func bla() blub") 1831 // ------012345678901234 1832 b := testShardBuilder(t, nil, 1833 Document{ 1834 Name: "f1", 1835 Content: content, 1836 }, Document{ 1837 Name: "f2", 1838 Content: content, 1839 Symbols: []DocumentSection{{uint32(len(prefix) + 5), uint32(len(prefix) + 8)}}, 1840 }, Document{ 1841 Name: "f3", 1842 Content: content, 1843 }) 1844 1845 t.Run("LineMatches", func(t *testing.T) { 1846 res := searchForTest(t, b, 1847 &query.Regexp{ 1848 Regexp: mustParseRE("b.a"), 1849 }) 1850 1851 if len(res.Files) != 3 { 1852 t.Fatalf("got %#v, want 3 files", res.Files) 1853 } 1854 if res.Files[0].FileName != "f2" { 1855 t.Errorf("got %#v, want 'f2' as top index", res.Files[0]) 1856 } 1857 }) 1858 1859 t.Run("ChunjkMatches", func(t *testing.T) { 1860 res := searchForTest(t, b, 1861 &query.Regexp{ 1862 Regexp: mustParseRE("b.a"), 1863 }, chunkOpts) 1864 1865 if len(res.Files) != 3 { 1866 t.Fatalf("got %#v, want 3 files", res.Files) 1867 } 1868 if res.Files[0].FileName != "f2" { 1869 t.Errorf("got %#v, want 'f2' as top index", res.Files[0]) 1870 } 1871 }) 1872} 1873 1874func TestPartialSymbolRank(t *testing.T) { 1875 t.Skip() 1876 1877 content := []byte("func bla() blub") 1878 // ----------------012345678901234 1879 1880 b := testShardBuilder(t, nil, 1881 Document{ 1882 Name: "f1", 1883 Content: content, 1884 Symbols: []DocumentSection{{4, 9}}, 1885 }, Document{ 1886 Name: "f2", 1887 Content: content, 1888 Symbols: []DocumentSection{{4, 8}}, 1889 }, Document{ 1890 Name: "f3", 1891 Content: content, 1892 Symbols: []DocumentSection{{4, 9}}, 1893 }) 1894 1895 t.Run("LineMatches", func(t *testing.T) { 1896 res := searchForTest(t, b, 1897 &query.Substring{ 1898 Pattern: "bla", 1899 }) 1900 1901 if len(res.Files) != 3 { 1902 t.Fatalf("got %#v, want 3 files", res.Files) 1903 } 1904 if res.Files[0].FileName != "f2" { 1905 t.Errorf("got %#v, want 'f2' as top index", res.Files[0]) 1906 } 1907 }) 1908 1909 t.Run("ChunkMatches", func(t *testing.T) { 1910 res := searchForTest(t, b, 1911 &query.Substring{ 1912 Pattern: "bla", 1913 }, chunkOpts) 1914 1915 if len(res.Files) != 3 { 1916 t.Fatalf("got %#v, want 3 files", res.Files) 1917 } 1918 if res.Files[0].FileName != "f2" { 1919 t.Errorf("got %#v, want 'f2' as top index", res.Files[0]) 1920 } 1921 }) 1922} 1923 1924func TestNegativeRepo(t *testing.T) { 1925 content := []byte("bla the needle") 1926 // ----------------01234567890123 1927 b := testShardBuilder(t, &zoekt.Repository{ 1928 Name: "bla", 1929 }, Document{Name: "f1", Content: content}) 1930 1931 t.Run("LineMatches", func(t *testing.T) { 1932 sres := searchForTest(t, b, 1933 query.NewAnd( 1934 &query.Substring{Pattern: "needle"}, 1935 &query.Not{Child: &query.Repo{Regexp: regexp.MustCompile("bla")}}, 1936 )) 1937 1938 if len(sres.Files) != 0 { 1939 t.Fatalf("got %v, want 0 matches", sres.Files) 1940 } 1941 }) 1942 1943 t.Run("ChunkMatches", func(t *testing.T) { 1944 sres := searchForTest(t, b, 1945 query.NewAnd( 1946 &query.Substring{Pattern: "needle"}, 1947 &query.Not{Child: &query.Repo{Regexp: regexp.MustCompile("bla")}}, 1948 ), chunkOpts) 1949 1950 if len(sres.Files) != 0 { 1951 t.Fatalf("got %v, want 0 matches", sres.Files) 1952 } 1953 }) 1954} 1955 1956func TestListRepos(t *testing.T) { 1957 content := []byte("bla the needle\n") 1958 // ----------------012345678901234- 1959 1960 t.Run("default and minimal fallback", func(t *testing.T) { 1961 repo := &zoekt.Repository{ 1962 Name: "reponame", 1963 Branches: []zoekt.RepositoryBranch{{Name: "main"}, {Name: "dev"}}, 1964 } 1965 b := testShardBuilder(t, repo, 1966 Document{Name: "f1", Content: content, Branches: []string{"main", "dev"}}, 1967 Document{Name: "f2", Content: content, Branches: []string{"main"}}, 1968 Document{Name: "f2", Content: content, Branches: []string{"dev"}}, 1969 Document{Name: "f3", Content: content, Branches: []string{"dev"}}) 1970 1971 searcher := searcherForTest(t, b) 1972 1973 for _, opts := range []*zoekt.ListOptions{ 1974 nil, 1975 {}, 1976 {Field: zoekt.RepoListFieldRepos}, 1977 {Field: zoekt.RepoListFieldReposMap}, 1978 } { 1979 t.Run(fmt.Sprint(opts), func(t *testing.T) { 1980 q := &query.Repo{Regexp: regexp.MustCompile("epo")} 1981 1982 res, err := searcher.List(context.Background(), q, opts) 1983 if err != nil { 1984 t.Fatalf("List(%v): %v", q, err) 1985 } 1986 1987 want := &zoekt.RepoList{ 1988 Repos: []*zoekt.RepoListEntry{{ 1989 Repository: *repo, 1990 Stats: zoekt.RepoStats{ 1991 Documents: 4, 1992 ContentBytes: 68, // (15 bytes of content and 2 bytes of filename) x 4 1993 Shards: 1, 1994 1995 NewLinesCount: 4, 1996 DefaultBranchNewLinesCount: 2, 1997 OtherBranchesNewLinesCount: 3, 1998 }, 1999 }}, 2000 Stats: zoekt.RepoStats{ 2001 Repos: 1, 2002 Documents: 4, 2003 ContentBytes: 68, 2004 Shards: 1, 2005 2006 NewLinesCount: 4, 2007 DefaultBranchNewLinesCount: 2, 2008 OtherBranchesNewLinesCount: 3, 2009 }, 2010 } 2011 ignored := []cmp.Option{ 2012 cmpopts.EquateEmpty(), 2013 cmpopts.IgnoreFields(zoekt.RepoListEntry{}, "IndexMetadata"), 2014 cmpopts.IgnoreFields(zoekt.RepoStats{}, "IndexBytes"), 2015 cmpopts.IgnoreFields(zoekt.Repository{}, "SubRepoMap"), 2016 cmpopts.IgnoreFields(zoekt.Repository{}, "priority"), 2017 } 2018 if diff := cmp.Diff(want, res, ignored...); diff != "" { 2019 t.Fatalf("mismatch (-want +got):\n%s", diff) 2020 } 2021 2022 q = &query.Repo{Regexp: regexp.MustCompile("bla")} 2023 res, err = searcher.List(context.Background(), q, nil) 2024 if err != nil { 2025 t.Fatalf("List(%v): %v", q, err) 2026 } 2027 if len(res.Repos) != 0 || len(res.ReposMap) != 0 { 2028 t.Fatalf("got %v, want 0 matches", res) 2029 } 2030 }) 2031 } 2032 }) 2033 2034 t.Run("minimal", func(t *testing.T) { 2035 repo := &zoekt.Repository{ 2036 ID: 1234, 2037 Name: "reponame", 2038 Branches: []zoekt.RepositoryBranch{{Name: "main"}, {Name: "dev"}}, 2039 RawConfig: map[string]string{"repoid": "1234"}, 2040 } 2041 b := testShardBuilder(t, repo, 2042 Document{Name: "f1", Content: content, Branches: []string{"main", "dev"}}, 2043 Document{Name: "f2", Content: content, Branches: []string{"main"}}, 2044 Document{Name: "f2", Content: content, Branches: []string{"dev"}}, 2045 Document{Name: "f3", Content: content, Branches: []string{"dev"}}) 2046 2047 searcher := searcherForTest(t, b) 2048 2049 q := &query.Repo{Regexp: regexp.MustCompile("epo")} 2050 res, err := searcher.List(context.Background(), q, &zoekt.ListOptions{Field: zoekt.RepoListFieldReposMap}) 2051 if err != nil { 2052 t.Fatalf("List(%v): %v", q, err) 2053 } 2054 2055 want := &zoekt.RepoList{ 2056 ReposMap: zoekt.ReposMap{ 2057 repo.ID: { 2058 HasSymbols: repo.HasSymbols, 2059 Branches: repo.Branches, 2060 }, 2061 }, 2062 Stats: zoekt.RepoStats{ 2063 Repos: 1, 2064 Shards: 1, 2065 Documents: 4, 2066 IndexBytes: 412, 2067 ContentBytes: 68, 2068 NewLinesCount: 4, 2069 DefaultBranchNewLinesCount: 2, 2070 OtherBranchesNewLinesCount: 3, 2071 }, 2072 } 2073 2074 ignored := []cmp.Option{ 2075 cmpopts.IgnoreFields(zoekt.MinimalRepoListEntry{}, "IndexTimeUnix"), 2076 } 2077 if diff := cmp.Diff(want, res, ignored...); diff != "" { 2078 t.Fatalf("mismatch (-want +got):\n%s", diff) 2079 } 2080 2081 q = &query.Repo{Regexp: regexp.MustCompile("bla")} 2082 res, err = searcher.List(context.Background(), q, &zoekt.ListOptions{Field: zoekt.RepoListFieldReposMap}) 2083 if err != nil { 2084 t.Fatalf("List(%v): %v", q, err) 2085 } 2086 if len(res.Repos) != 0 || len(res.ReposMap) != 0 { 2087 t.Fatalf("got %v, want 0 matches", res) 2088 } 2089 }) 2090} 2091 2092func TestListReposByContent(t *testing.T) { 2093 content := []byte("bla the needle") 2094 2095 b := testShardBuilder(t, &zoekt.Repository{ 2096 Name: "reponame", 2097 }, 2098 Document{Name: "f1", Content: content}, 2099 Document{Name: "f2", Content: content}) 2100 2101 searcher := searcherForTest(t, b) 2102 q := &query.Substring{Pattern: "needle"} 2103 res, err := searcher.List(context.Background(), q, nil) 2104 if err != nil { 2105 t.Fatalf("List(%v): %v", q, err) 2106 } 2107 if len(res.Repos) != 1 || res.Repos[0].Repository.Name != "reponame" { 2108 t.Fatalf("got %v, want 1 matches", res) 2109 } 2110 if got := res.Repos[0].Stats.Shards; got != 1 { 2111 t.Fatalf("got %d, want 1 shard", got) 2112 } 2113 q = &query.Substring{Pattern: "foo"} 2114 res, err = searcher.List(context.Background(), q, nil) 2115 if err != nil { 2116 t.Fatalf("List(%v): %v", q, err) 2117 } 2118 if len(res.Repos) != 0 { 2119 t.Fatalf("got %v, want 0 matches", res) 2120 } 2121} 2122 2123func TestMetadata(t *testing.T) { 2124 content := []byte("bla the needle") 2125 2126 b := testShardBuilder(t, &zoekt.Repository{ 2127 Name: "reponame", 2128 }, Document{Name: "f1", Content: content}, 2129 Document{Name: "f2", Content: content}) 2130 2131 var buf bytes.Buffer 2132 if err := b.Write(&buf); err != nil { 2133 t.Fatal(err) 2134 } 2135 f := &memSeeker{buf.Bytes()} 2136 2137 rd, _, err := ReadMetadata(f) 2138 if err != nil { 2139 t.Fatalf("ReadMetadata: %v", err) 2140 } 2141 2142 if got, want := rd[0].Name, "reponame"; got != want { 2143 t.Fatalf("got %q want %q", got, want) 2144 } 2145} 2146 2147func TestRepoWithMetadata(t *testing.T) { 2148 sb := newShardBuilder(0) 2149 sb.repoList = []zoekt.Repository{ 2150 { 2151 Name: "repo1", 2152 Metadata: map[string]string{"language": "go", "custom_key": "value"}, 2153 }, 2154 } 2155 2156 var buf bytes.Buffer 2157 if err := sb.Write(&buf); err != nil { 2158 t.Fatalf("failed to write shard: %v", err) 2159 } 2160 2161 // Simulate reading the shard back 2162 f := &memSeeker{buf.Bytes()} 2163 repoMetaData, _, err := ReadMetadata(f) 2164 if err != nil { 2165 t.Fatalf("failed to read metadata: %v", err) 2166 } 2167 2168 // Verify the metadata 2169 if len(repoMetaData) != 1 { 2170 t.Fatalf("expected 1 repository, got %d", len(repoMetaData)) 2171 } 2172 if got, want := repoMetaData[0].Metadata["language"], "go"; got != want { 2173 t.Errorf("expected metadata 'language' to be %q, got %q", want, got) 2174 } 2175 if got, want := repoMetaData[0].Metadata["custom_key"], "value"; got != want { 2176 t.Errorf("expected metadata 'custom_key' to be %q, got %q", want, got) 2177 } 2178} 2179 2180func TestOr(t *testing.T) { 2181 b := testShardBuilder(t, nil, 2182 Document{Name: "f1", Content: []byte("needle")}, 2183 Document{Name: "f2", Content: []byte("banana")}) 2184 t.Run("LineMatches", func(t *testing.T) { 2185 sres := searchForTest(t, b, query.NewOr( 2186 &query.Substring{Pattern: "needle"}, 2187 &query.Substring{Pattern: "banana"})) 2188 2189 if len(sres.Files) != 2 { 2190 t.Fatalf("got %v, want 2 files", sres.Files) 2191 } 2192 }) 2193 2194 t.Run("ChunkMatches", func(t *testing.T) { 2195 sres := searchForTest(t, b, query.NewOr( 2196 &query.Substring{Pattern: "needle"}, 2197 &query.Substring{Pattern: "banana"})) 2198 2199 if len(sres.Files) != 2 { 2200 t.Fatalf("got %v, want 2 files", sres.Files) 2201 } 2202 }) 2203} 2204 2205func TestFrequency(t *testing.T) { 2206 content := []byte("sla _Py_HashDouble(double v sla las las shd dot dot") 2207 2208 b := testShardBuilder(t, nil, 2209 Document{ 2210 Name: "f1", 2211 Content: content, 2212 }) 2213 2214 t.Run("LineMatches", func(t *testing.T) { 2215 sres := searchForTest(t, b, &query.Substring{Pattern: "slashdot"}) 2216 if len(sres.Files) != 0 { 2217 t.Errorf("got %v, wanted 0 matches", sres.Files) 2218 } 2219 }) 2220 2221 t.Run("ChunkMatches", func(t *testing.T) { 2222 sres := searchForTest(t, b, &query.Substring{Pattern: "slashdot"}, chunkOpts) 2223 if len(sres.Files) != 0 { 2224 t.Errorf("got %v, wanted 0 matches", sres.Files) 2225 } 2226 }) 2227} 2228 2229func TestMatchNewline(t *testing.T) { 2230 re, err := syntax.Parse("[^a]a", syntax.ClassNL) 2231 if err != nil { 2232 t.Fatalf("syntax.Parse: %v", err) 2233 } 2234 2235 content := []byte("pqr\nalex") 2236 2237 b := testShardBuilder(t, nil, 2238 Document{ 2239 Name: "f1", 2240 Content: content, 2241 }) 2242 2243 t.Run("LineMatches", func(t *testing.T) { 2244 sres := searchForTest(t, b, &query.Regexp{Regexp: re, CaseSensitive: true}) 2245 if len(sres.Files) != 1 { 2246 t.Errorf("got %v, wanted 1 matches", sres.Files) 2247 } else if l := sres.Files[0].LineMatches[0].Line; !bytes.Equal(l, content[len("pqr\n"):]) { 2248 t.Errorf("got match line %q, want %q", l, content) 2249 } 2250 }) 2251 2252 t.Run("ChunkMatches", func(t *testing.T) { 2253 sres := searchForTest(t, b, &query.Regexp{Regexp: re, CaseSensitive: true}, chunkOpts) 2254 if len(sres.Files) != 1 { 2255 t.Errorf("got %v, wanted 1 matches", sres.Files) 2256 } else if c := sres.Files[0].ChunkMatches[0].Content; !bytes.Equal(c, content) { 2257 t.Errorf("got match line %q, want %q", c, content) 2258 } 2259 }) 2260} 2261 2262func TestSubRepo(t *testing.T) { 2263 subRepos := map[string]*zoekt.Repository{ 2264 "sub": { 2265 Name: "sub-name", 2266 LineFragmentTemplate: "sub-line", 2267 }, 2268 } 2269 2270 content := []byte("pqr\nalex") 2271 2272 b := testShardBuilder(t, &zoekt.Repository{ 2273 SubRepoMap: subRepos, 2274 }, Document{ 2275 Name: "sub/f1", 2276 Content: content, 2277 SubRepositoryPath: "sub", 2278 }) 2279 2280 sres := searchForTest(t, b, &query.Substring{Pattern: "alex"}) 2281 if len(sres.Files) != 1 { 2282 t.Fatalf("got %v, wanted 1 matches", sres.Files) 2283 } 2284 2285 f := sres.Files[0] 2286 if f.SubRepositoryPath != "sub" || f.SubRepositoryName != "sub-name" { 2287 t.Errorf("got %#v, want SubRepository{Path,Name} = {'sub', 'sub-name'}", f) 2288 } 2289 2290 if sres.LineFragments["sub-name"] != "sub-line" { 2291 t.Errorf("got LineFragmentTemplate %v, want {'sub':'sub-line'}", sres.LineFragments) 2292 } 2293} 2294 2295func TestSearchEither(t *testing.T) { 2296 b := testShardBuilder(t, nil, 2297 Document{Name: "f1", Content: []byte("bla needle bla")}, 2298 Document{Name: "needle-file-branch", Content: []byte("bla content")}) 2299 2300 t.Run("LineMatches", func(t *testing.T) { 2301 sres := searchForTest(t, b, &query.Substring{Pattern: "needle"}) 2302 if len(sres.Files) != 2 { 2303 t.Fatalf("got %v, wanted 2 matches", sres.Files) 2304 } 2305 2306 sres = searchForTest(t, b, &query.Substring{Pattern: "needle", Content: true}) 2307 if len(sres.Files) != 1 { 2308 t.Fatalf("got %v, wanted 1 index", sres.Files) 2309 } 2310 2311 if got, want := sres.Files[0].FileName, "f1"; got != want { 2312 t.Errorf("got %q, want %q", got, want) 2313 } 2314 }) 2315 2316 t.Run("ChunkMatches", func(t *testing.T) { 2317 sres := searchForTest(t, b, &query.Substring{Pattern: "needle"}, chunkOpts) 2318 if len(sres.Files) != 2 { 2319 t.Fatalf("got %v, wanted 2 matches", sres.Files) 2320 } 2321 2322 sres = searchForTest(t, b, &query.Substring{Pattern: "needle", Content: true}, chunkOpts) 2323 if len(sres.Files) != 1 { 2324 t.Fatalf("got %v, wanted 1 index", sres.Files) 2325 } 2326 2327 if got, want := sres.Files[0].FileName, "f1"; got != want { 2328 t.Errorf("got %q, want %q", got, want) 2329 } 2330 }) 2331} 2332 2333func TestUnicodeExactMatch(t *testing.T) { 2334 needle := "néédlÉ" 2335 content := []byte("blá blá " + needle + " blâ") 2336 2337 b := testShardBuilder(t, nil, 2338 Document{Name: "f1", Content: content}) 2339 2340 t.Run("LineMatches", func(t *testing.T) { 2341 if res := searchForTest(t, b, &query.Substring{Pattern: needle, CaseSensitive: true}); len(res.Files) != 1 { 2342 t.Fatalf("case sensitive: got %v, wanted 1 index", res.Files) 2343 } 2344 }) 2345 2346 t.Run("ChunkMatches", func(t *testing.T) { 2347 res := searchForTest(t, b, &query.Substring{Pattern: needle, CaseSensitive: true}, chunkOpts) 2348 if len(res.Files) != 1 { 2349 t.Fatalf("case sensitive: got %v, wanted 1 index", res.Files) 2350 } 2351 }) 2352} 2353 2354func TestUnicodeCoverContent(t *testing.T) { 2355 needle := "néédlÉ" 2356 content := []byte("blá blá " + needle + " blâ") 2357 2358 b := testShardBuilder(t, nil, 2359 Document{Name: "f1", Content: content}) 2360 2361 t.Run("LineMatches", func(t *testing.T) { 2362 if res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ", CaseSensitive: true}); len(res.Files) != 0 { 2363 t.Fatalf("case sensitive: got %v, wanted 0 index", res.Files) 2364 } 2365 2366 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ"}) 2367 if len(res.Files) != 1 { 2368 t.Fatalf("case insensitive: got %v, wanted 1 index", res.Files) 2369 } 2370 2371 if got, want := res.Files[0].LineMatches[0].LineFragments[0].Offset, uint32(strings.Index(string(content), needle)); got != want { 2372 t.Errorf("got %d want %d", got, want) 2373 } 2374 }) 2375 2376 t.Run("ChunkMatches", func(t *testing.T) { 2377 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ", CaseSensitive: true}, chunkOpts) 2378 if len(res.Files) != 0 { 2379 t.Fatalf("case sensitive: got %v, wanted 0 index", res.Files) 2380 } 2381 2382 res = searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ"}, chunkOpts) 2383 if len(res.Files) != 1 { 2384 t.Fatalf("case insensitive: got %v, wanted 1 index", res.Files) 2385 } 2386 2387 got := res.Files[0].ChunkMatches[0].Ranges[0].Start.ByteOffset 2388 want := uint32(strings.Index(string(content), needle)) 2389 if got != want { 2390 t.Errorf("got %d want %d", got, want) 2391 } 2392 }) 2393} 2394 2395func TestUnicodeNonCoverContent(t *testing.T) { 2396 needle := "nééáádlÉ" 2397 content := []byte("blá blá " + needle + " blâ") 2398 2399 b := testShardBuilder(t, nil, 2400 Document{Name: "f1", Content: content}) 2401 2402 t.Run("LineMatches", func(t *testing.T) { 2403 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉÁÁDLÉ", Content: true}) 2404 if len(res.Files) != 1 { 2405 t.Fatalf("got %v, wanted 1 index", res.Files) 2406 } 2407 2408 if got, want := res.Files[0].LineMatches[0].LineFragments[0].Offset, uint32(strings.Index(string(content), needle)); got != want { 2409 t.Errorf("got %d want %d", got, want) 2410 } 2411 }) 2412 2413 t.Run("ChunkMatches", func(t *testing.T) { 2414 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉÁÁDLÉ", Content: true}, chunkOpts) 2415 if len(res.Files) != 1 { 2416 t.Fatalf("got %v, wanted 1 index", res.Files) 2417 } 2418 2419 got := res.Files[0].ChunkMatches[0].Ranges[0].Start.ByteOffset 2420 want := uint32(strings.Index(string(content), needle)) 2421 if got != want { 2422 t.Errorf("got %d want %d", got, want) 2423 } 2424 }) 2425} 2426 2427const kelvinCodePoint = 8490 2428 2429func TestUnicodeVariableLength(t *testing.T) { 2430 lower := 'k' 2431 upper := rune(kelvinCodePoint) 2432 2433 needle := "nee" + string([]rune{lower}) + "eed" 2434 corpus := []byte("nee" + string([]rune{upper}) + "eed" + 2435 " ee" + string([]rune{lower}) + "ee" + 2436 " ee" + string([]rune{upper}) + "ee") 2437 2438 t.Run("LineMatches", func(t *testing.T) { 2439 b := testShardBuilder(t, nil, 2440 Document{Name: "f1", Content: corpus}) 2441 2442 res := searchForTest(t, b, &query.Substring{Pattern: needle, Content: true}) 2443 if len(res.Files) != 1 { 2444 t.Fatalf("got %v, wanted 1 index", res.Files) 2445 } 2446 }) 2447 2448 t.Run("ChunkMatches", func(t *testing.T) { 2449 b := testShardBuilder(t, nil, 2450 Document{Name: "f1", Content: corpus}) 2451 2452 res := searchForTest(t, b, &query.Substring{Pattern: needle, Content: true}, chunkOpts) 2453 if len(res.Files) != 1 { 2454 t.Fatalf("got %v, wanted 1 index", res.Files) 2455 } 2456 }) 2457} 2458 2459func TestUnicodeFileStartOffsets(t *testing.T) { 2460 unicode := "世界" 2461 wat := "waaaaaat" 2462 b := testShardBuilder(t, nil, 2463 Document{ 2464 Name: "f1", 2465 Content: []byte(unicode), 2466 }, 2467 Document{ 2468 Name: "f2", 2469 Content: []byte(wat), 2470 }, 2471 ) 2472 q := &query.Substring{Pattern: wat, Content: true} 2473 res := searchForTest(t, b, q) 2474 if len(res.Files) != 1 { 2475 t.Fatalf("got %v, wanted 1 index", res.Files) 2476 } 2477} 2478 2479func TestLongFileUTF8(t *testing.T) { 2480 needle := "neeedle" 2481 2482 // 6 bytes. 2483 unicode := "世界" 2484 content := []byte(strings.Repeat(unicode, 100) + needle) 2485 b := testShardBuilder(t, nil, 2486 Document{ 2487 Name: "f1", 2488 Content: []byte(strings.Repeat("a", 50)), 2489 }, 2490 Document{ 2491 Name: "f2", 2492 Content: content, 2493 }) 2494 2495 t.Run("LineMatches", func(t *testing.T) { 2496 q := &query.Substring{Pattern: needle, Content: true} 2497 res := searchForTest(t, b, q) 2498 if len(res.Files) != 1 { 2499 t.Errorf("got %v, want 1 result", res) 2500 } 2501 }) 2502 2503 t.Run("ChunkMatches", func(t *testing.T) { 2504 q := &query.Substring{Pattern: needle, Content: true} 2505 res := searchForTest(t, b, q, chunkOpts) 2506 if len(res.Files) != 1 { 2507 t.Errorf("got %v, want 1 result", res) 2508 } 2509 }) 2510} 2511 2512func TestEstimateDocCount(t *testing.T) { 2513 content := []byte("bla needle bla") 2514 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"}, 2515 Document{Name: "f1", Content: content}, 2516 Document{Name: "f2", Content: content}, 2517 ) 2518 2519 t.Run("LineMatches", func(t *testing.T) { 2520 if sres := searchForTest(t, b, 2521 query.NewAnd( 2522 &query.Substring{Pattern: "needle"}, 2523 &query.Repo{Regexp: regexp.MustCompile("reponame")}, 2524 ), zoekt.SearchOptions{ 2525 EstimateDocCount: true, 2526 }); sres.Stats.ShardFilesConsidered != 2 { 2527 t.Errorf("got FilesConsidered = %d, want 2", sres.Stats.FilesConsidered) 2528 } 2529 if sres := searchForTest(t, b, 2530 query.NewAnd( 2531 &query.Substring{Pattern: "needle"}, 2532 &query.Repo{Regexp: regexp.MustCompile("nomatch")}, 2533 ), zoekt.SearchOptions{ 2534 EstimateDocCount: true, 2535 }); sres.Stats.ShardFilesConsidered != 0 { 2536 t.Errorf("got FilesConsidered = %d, want 0", sres.Stats.FilesConsidered) 2537 } 2538 }) 2539 2540 t.Run("ChunkMatches", func(t *testing.T) { 2541 if sres := searchForTest(t, b, 2542 query.NewAnd( 2543 &query.Substring{Pattern: "needle"}, 2544 &query.Repo{Regexp: regexp.MustCompile("reponame")}, 2545 ), zoekt.SearchOptions{ 2546 EstimateDocCount: true, 2547 ChunkMatches: true, 2548 }); sres.Stats.ShardFilesConsidered != 2 { 2549 t.Errorf("got FilesConsidered = %d, want 2", sres.Stats.FilesConsidered) 2550 } 2551 if sres := searchForTest(t, b, 2552 query.NewAnd( 2553 &query.Substring{Pattern: "needle"}, 2554 &query.Repo{Regexp: regexp.MustCompile("nomatch")}, 2555 ), zoekt.SearchOptions{ 2556 EstimateDocCount: true, 2557 ChunkMatches: true, 2558 }); sres.Stats.ShardFilesConsidered != 0 { 2559 t.Errorf("got FilesConsidered = %d, want 0", sres.Stats.FilesConsidered) 2560 } 2561 }) 2562} 2563 2564func TestUTF8CorrectCorpus(t *testing.T) { 2565 needle := "neeedle" 2566 2567 // 6 bytes. 2568 unicode := "世界" 2569 b := testShardBuilder(t, nil, 2570 Document{ 2571 Name: "f1", 2572 Content: []byte(strings.Repeat(unicode, 100)), 2573 }, 2574 Document{ 2575 Name: "xxxxxneeedle", 2576 Content: []byte("hello"), 2577 }) 2578 2579 t.Run("LineMatches", func(t *testing.T) { 2580 q := &query.Substring{Pattern: needle, FileName: true} 2581 res := searchForTest(t, b, q) 2582 if len(res.Files) != 1 { 2583 t.Errorf("got %v, want 1 result", res) 2584 } 2585 }) 2586 2587 t.Run("ChunkMatches", func(t *testing.T) { 2588 q := &query.Substring{Pattern: needle, FileName: true} 2589 res := searchForTest(t, b, q, chunkOpts) 2590 if len(res.Files) != 1 { 2591 t.Errorf("got %v, want 1 result", res) 2592 } 2593 }) 2594} 2595 2596func TestBuilderStats(t *testing.T) { 2597 b := testShardBuilder(t, nil, 2598 Document{ 2599 Name: "f1", 2600 Content: []byte(strings.Repeat("abcd", 1024)), 2601 }) 2602 var buf bytes.Buffer 2603 if err := b.Write(&buf); err != nil { 2604 t.Fatal(err) 2605 } 2606 2607 if got, want := b.ContentSize(), uint32(2+4*1024); got != want { 2608 t.Errorf("got %d, want %d", got, want) 2609 } 2610} 2611 2612func TestIOStats(t *testing.T) { 2613 b := testShardBuilder(t, nil, 2614 Document{ 2615 Name: "f1", 2616 Content: []byte(strings.Repeat("abcd", 1024)), 2617 }) 2618 2619 t.Run("LineMatches", func(t *testing.T) { 2620 q := &query.Substring{Pattern: "abc", CaseSensitive: true, Content: true} 2621 res := searchForTest(t, b, q) 2622 2623 // 4096 (content) + 2 (overhead: newlines or doc sections) 2624 if got, want := res.Stats.ContentBytesLoaded, int64(4098); got != want { 2625 t.Errorf("got content I/O %d, want %d", got, want) 2626 } 2627 2628 // 1024 entries, each 4 bytes apart. 4 fits into single byte 2629 // delta encoded. 2630 if got, want := res.Stats.IndexBytesLoaded, int64(1024); got != want { 2631 t.Errorf("got index I/O %d, want %d", got, want) 2632 } 2633 }) 2634 2635 t.Run("ChunkMatches", func(t *testing.T) { 2636 q := &query.Substring{Pattern: "abc", CaseSensitive: true, Content: true} 2637 res := searchForTest(t, b, q, chunkOpts) 2638 2639 // 4096 (content) + 2 (overhead: newlines or doc sections) 2640 if got, want := res.Stats.ContentBytesLoaded, int64(4098); got != want { 2641 t.Errorf("got content I/O %d, want %d", got, want) 2642 } 2643 2644 // 1024 entries, each 4 bytes apart. 4 fits into single byte 2645 // delta encoded. 2646 if got, want := res.Stats.IndexBytesLoaded, int64(1024); got != want { 2647 t.Errorf("got index I/O %d, want %d", got, want) 2648 } 2649 }) 2650 2651 t.Run("LineMatches with BM25", func(t *testing.T) { 2652 q := &query.Substring{Pattern: "abc", CaseSensitive: true, Content: true} 2653 res := searchForTest(t, b, q, zoekt.SearchOptions{UseBM25Scoring: true}) 2654 2655 // 4096 (content) + 2 (overhead: newlines or doc sections) 2656 if got, want := res.Stats.ContentBytesLoaded, int64(4098); got != want { 2657 t.Errorf("got content I/O %d, want %d", got, want) 2658 } 2659 2660 // 1024 entries, each 4 bytes apart. 4 fits into single byte 2661 // delta encoded. 2662 if got, want := res.Stats.IndexBytesLoaded, int64(1024); got != want { 2663 t.Errorf("got index I/O %d, want %d", got, want) 2664 } 2665 }) 2666 2667 t.Run("ChunkMatches with BM25", func(t *testing.T) { 2668 q := &query.Substring{Pattern: "abc", CaseSensitive: true, Content: true} 2669 res := searchForTest(t, b, q, zoekt.SearchOptions{UseBM25Scoring: true, ChunkMatches: true}) 2670 2671 // 4096 (content) + 2 (overhead: newlines or doc sections) 2672 if got, want := res.Stats.ContentBytesLoaded, int64(4098); got != want { 2673 t.Errorf("got content I/O %d, want %d", got, want) 2674 } 2675 2676 // 1024 entries, each 4 bytes apart. 4 fits into single byte 2677 // delta encoded. 2678 if got, want := res.Stats.IndexBytesLoaded, int64(1024); got != want { 2679 t.Errorf("got index I/O %d, want %d", got, want) 2680 } 2681 }) 2682} 2683 2684func TestStartLineAnchor(t *testing.T) { 2685 b := testShardBuilder(t, nil, 2686 Document{ 2687 Name: "f1", 2688 Content: []byte( 2689 `hello 2690start of middle of line 2691`), 2692 }) 2693 2694 t.Run("LineMatches", func(t *testing.T) { 2695 q, err := query.Parse("^start") 2696 if err != nil { 2697 t.Errorf("parse: %v", err) 2698 } 2699 2700 res := searchForTest(t, b, q) 2701 if len(res.Files) != 1 { 2702 t.Errorf("got %v, want 1 file", res.Files) 2703 } 2704 2705 q, err = query.Parse("^middle") 2706 if err != nil { 2707 t.Errorf("parse: %v", err) 2708 } 2709 res = searchForTest(t, b, q) 2710 if len(res.Files) != 0 { 2711 t.Errorf("got %v, want 0 files", res.Files) 2712 } 2713 }) 2714 2715 t.Run("ChunkMatches", func(t *testing.T) { 2716 q, err := query.Parse("^start") 2717 if err != nil { 2718 t.Errorf("parse: %v", err) 2719 } 2720 2721 res := searchForTest(t, b, q, chunkOpts) 2722 if len(res.Files) != 1 { 2723 t.Errorf("got %v, want 1 file", res.Files) 2724 } 2725 2726 q, err = query.Parse("^middle") 2727 if err != nil { 2728 t.Errorf("parse: %v", err) 2729 } 2730 res = searchForTest(t, b, q, chunkOpts) 2731 if len(res.Files) != 0 { 2732 t.Errorf("got %v, want 0 files", res.Files) 2733 } 2734 }) 2735} 2736 2737func TestAndOrUnicode(t *testing.T) { 2738 q, err := query.Parse("orange.*apple") 2739 if err != nil { 2740 t.Errorf("parse: %v", err) 2741 } 2742 finalQ := query.NewAnd(q, 2743 query.NewOr(query.NewAnd(&query.Repo{Regexp: regexp.MustCompile("name")}, 2744 query.NewOr(&query.Branch{Pattern: "master"})))) 2745 2746 b := testShardBuilder(t, &zoekt.Repository{ 2747 Name: "name", 2748 Branches: []zoekt.RepositoryBranch{{"master", "master-version"}}, 2749 }, Document{ 2750 Name: "f2", 2751 Content: []byte("orange\u2318apple"), 2752 // --------------0123456 78901 2753 Branches: []string{"master"}, 2754 }) 2755 2756 t.Run("LineMatches", func(t *testing.T) { 2757 res := searchForTest(t, b, finalQ) 2758 if len(res.Files) != 1 { 2759 t.Errorf("got %v, want 1 result", res.Files) 2760 } 2761 }) 2762 2763 t.Run("ChunkMatches", func(t *testing.T) { 2764 res := searchForTest(t, b, finalQ, chunkOpts) 2765 if len(res.Files) != 1 { 2766 t.Errorf("got %v, want 1 result", res.Files) 2767 } 2768 }) 2769} 2770 2771func TestAndShort(t *testing.T) { 2772 content := []byte("bla needle at orange bla") 2773 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"}, 2774 Document{Name: "f1", Content: content}, 2775 Document{Name: "f2", Content: []byte("xx at xx")}, 2776 Document{Name: "f3", Content: []byte("yy orange xx")}, 2777 ) 2778 2779 q := query.NewAnd(&query.Substring{Pattern: "at"}, 2780 &query.Substring{Pattern: "orange"}) 2781 2782 t.Run("LineMatches", func(t *testing.T) { 2783 res := searchForTest(t, b, q) 2784 if len(res.Files) != 1 || res.Files[0].FileName != "f1" { 2785 t.Errorf("got %v, want 1 result", res.Files) 2786 } 2787 }) 2788 2789 t.Run("ChunkMatches", func(t *testing.T) { 2790 res := searchForTest(t, b, q, chunkOpts) 2791 if len(res.Files) != 1 || res.Files[0].FileName != "f1" { 2792 t.Errorf("got %v, want 1 result", res.Files) 2793 } 2794 }) 2795} 2796 2797func TestNoCollectRegexpSubstring(t *testing.T) { 2798 content := []byte("bla final bla\nfoo final, foo") 2799 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"}, 2800 Document{Name: "f1", Content: content}, 2801 ) 2802 2803 q := &query.Regexp{ 2804 Regexp: mustParseRE("final[,.]"), 2805 } 2806 2807 t.Run("LineMatches", func(t *testing.T) { 2808 res := searchForTest(t, b, q) 2809 if len(res.Files) != 1 { 2810 t.Fatalf("got %v, want 1 result", res.Files) 2811 } 2812 if f := res.Files[0]; len(f.LineMatches) != 1 { 2813 t.Fatalf("got line matches %v, want 1 line index", printLineMatches(f.LineMatches)) 2814 } 2815 }) 2816 2817 t.Run("ChunkMatches", func(t *testing.T) { 2818 res := searchForTest(t, b, q, chunkOpts) 2819 if len(res.Files) != 1 { 2820 t.Fatalf("got %v, want 1 result", res.Files) 2821 } 2822 if f := res.Files[0]; len(f.ChunkMatches) != 1 { 2823 t.Fatalf("got line matches %v, want 1 line index", printLineMatches(f.LineMatches)) 2824 } 2825 }) 2826} 2827 2828func printLineMatches(ms []zoekt.LineMatch) string { 2829 var ss []string 2830 for _, m := range ms { 2831 ss = append(ss, fmt.Sprintf("%d:%q %v", m.LineNumber, m.Line, m.LineFragments)) 2832 } 2833 2834 return strings.Join(ss, ", ") 2835} 2836 2837func TestLang(t *testing.T) { 2838 content := []byte("bla needle bla") 2839 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"}, 2840 Document{Name: "f1", Content: content}, 2841 Document{Name: "f2", Language: "java", Content: content}, 2842 Document{Name: "f3", Language: "cpp", Content: content}, 2843 ) 2844 2845 q := query.NewAnd(&query.Substring{Pattern: "needle"}, 2846 &query.Language{Language: "cpp"}) 2847 2848 t.Run("LineMatches", func(t *testing.T) { 2849 res := searchForTest(t, b, q) 2850 if len(res.Files) != 1 { 2851 t.Fatalf("got %v, want 1 result in f3", res.Files) 2852 } 2853 f := res.Files[0] 2854 if f.FileName != "f3" || f.Language != "cpp" { 2855 t.Fatalf("got %v, want 1 match with language cpp", f) 2856 } 2857 }) 2858 2859 t.Run("ChunkMatches", func(t *testing.T) { 2860 res := searchForTest(t, b, q, chunkOpts) 2861 if len(res.Files) != 1 { 2862 t.Fatalf("got %v, want 1 result in f3", res.Files) 2863 } 2864 f := res.Files[0] 2865 if f.FileName != "f3" || f.Language != "cpp" { 2866 t.Fatalf("got %v, want 1 match with language cpp", f) 2867 } 2868 }) 2869} 2870 2871func TestLangShortcut(t *testing.T) { 2872 content := []byte("bla needle bla") 2873 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"}, 2874 Document{Name: "f2", Language: "java", Content: content}, 2875 Document{Name: "f3", Language: "cpp", Content: content}, 2876 ) 2877 2878 q := query.NewAnd(&query.Substring{Pattern: "needle"}, 2879 &query.Language{Language: "fortran"}) 2880 2881 t.Run("LineMatches", func(t *testing.T) { 2882 res := searchForTest(t, b, q) 2883 if len(res.Files) != 0 { 2884 t.Fatalf("got %v, want 0 results", res.Files) 2885 } 2886 if res.Stats.IndexBytesLoaded > 0 { 2887 t.Errorf("got matchBytesLoaded %d, want 0", res.Stats.IndexBytesLoaded) 2888 } 2889 }) 2890 2891 t.Run("ChunkMatches", func(t *testing.T) { 2892 res := searchForTest(t, b, q, chunkOpts) 2893 if len(res.Files) != 0 { 2894 t.Fatalf("got %v, want 0 results", res.Files) 2895 } 2896 if res.Stats.IndexBytesLoaded > 0 { 2897 t.Errorf("got matchBytesLoaded %d, want 0", res.Stats.IndexBytesLoaded) 2898 } 2899 }) 2900} 2901 2902func TestNoTextMatchAtoms(t *testing.T) { 2903 content := []byte("bla needle bla") 2904 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"}, 2905 Document{Name: "f1", Content: content}, 2906 Document{Name: "f2", Language: "java", Content: content}, 2907 Document{Name: "f3", Language: "cpp", Content: content}, 2908 ) 2909 q := query.NewAnd(&query.Language{Language: "java"}) 2910 t.Run("LineMatches", func(t *testing.T) { 2911 res := searchForTest(t, b, q) 2912 if len(res.Files) != 1 { 2913 t.Fatalf("got %v, want 1 result in f3", res.Files) 2914 } 2915 }) 2916 2917 t.Run("ChunkMatches", func(t *testing.T) { 2918 res := searchForTest(t, b, q, chunkOpts) 2919 if len(res.Files) != 1 { 2920 t.Fatalf("got %v, want 1 result in f3", res.Files) 2921 } 2922 }) 2923} 2924 2925func TestNoPositiveAtoms(t *testing.T) { 2926 content := []byte("bla needle bla") 2927 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"}, 2928 Document{Name: "f1", Content: content}, 2929 Document{Name: "f2", Content: content}, 2930 ) 2931 2932 q := query.NewAnd( 2933 &query.Not{Child: &query.Substring{Pattern: "xyz"}}, 2934 &query.Repo{Regexp: regexp.MustCompile("reponame")}) 2935 t.Run("LineMatches", func(t *testing.T) { 2936 res := searchForTest(t, b, q) 2937 if len(res.Files) != 2 { 2938 t.Fatalf("got %v, want 2 results in f3", res.Files) 2939 } 2940 }) 2941 t.Run("ChunkMatches", func(t *testing.T) { 2942 res := searchForTest(t, b, q, chunkOpts) 2943 if len(res.Files) != 2 { 2944 t.Fatalf("got %v, want 2 results in f3", res.Files) 2945 } 2946 }) 2947} 2948 2949func TestSymbolBoundaryStart(t *testing.T) { 2950 content := []byte("start\nbla bla\nend") 2951 // ----------------012345-67890123-456 2952 2953 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"}, 2954 Document{ 2955 Name: "f1", 2956 Content: content, 2957 Symbols: []DocumentSection{{0, 5}, {14, 17}}, 2958 }, 2959 ) 2960 q := &query.Symbol{ 2961 Expr: &query.Substring{Pattern: "start"}, 2962 } 2963 t.Run("LineMatches", func(t *testing.T) { 2964 res := searchForTest(t, b, q) 2965 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 { 2966 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2967 } 2968 m := res.Files[0].LineMatches[0].LineFragments[0] 2969 if m.Offset != 0 { 2970 t.Fatalf("got offset %d want 0", m.Offset) 2971 } 2972 }) 2973 2974 t.Run("ChunkMatches", func(t *testing.T) { 2975 res := searchForTest(t, b, q, chunkOpts) 2976 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 { 2977 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 2978 } 2979 m := res.Files[0].ChunkMatches[0].Ranges[0] 2980 if m.Start.ByteOffset != 0 { 2981 t.Fatalf("got offset %d want 0", m.Start.ByteOffset) 2982 } 2983 }) 2984} 2985 2986func TestSymbolBoundaryEnd(t *testing.T) { 2987 content := []byte("start\nbla bla\nend") 2988 // ----------------012345-67890123-456 2989 2990 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"}, 2991 Document{ 2992 Name: "f1", 2993 Content: content, 2994 Symbols: []DocumentSection{{14, 17}}, 2995 }, 2996 ) 2997 q := &query.Symbol{ 2998 Expr: &query.Substring{Pattern: "end"}, 2999 } 3000 t.Run("LineMatches", func(t *testing.T) { 3001 res := searchForTest(t, b, q) 3002 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 { 3003 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 3004 } 3005 m := res.Files[0].LineMatches[0].LineFragments[0] 3006 if m.Offset != 14 { 3007 t.Fatalf("got offset %d want 0", m.Offset) 3008 } 3009 }) 3010 3011 t.Run("ChunkMatches", func(t *testing.T) { 3012 res := searchForTest(t, b, q, chunkOpts) 3013 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 { 3014 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 3015 } 3016 m := res.Files[0].ChunkMatches[0].Ranges[0] 3017 if m.Start.ByteOffset != 14 { 3018 t.Fatalf("got offset %d want 0", m.Start.ByteOffset) 3019 } 3020 }) 3021} 3022 3023func TestSymbolSubstring(t *testing.T) { 3024 content := []byte("bla\nsymblabla\nbla") 3025 // ----------------0123-4567890123-456 3026 3027 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"}, 3028 Document{ 3029 Name: "f1", 3030 Content: content, 3031 Symbols: []DocumentSection{{4, 12}}, 3032 }, 3033 ) 3034 q := &query.Symbol{ 3035 Expr: &query.Substring{Pattern: "bla"}, 3036 } 3037 t.Run("LineMatches", func(t *testing.T) { 3038 res := searchForTest(t, b, q) 3039 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 { 3040 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 3041 } 3042 m := res.Files[0].LineMatches[0].LineFragments[0] 3043 if m.Offset != 7 || m.MatchLength != 3 { 3044 t.Fatalf("got offset %d, size %d want 7 size 3", m.Offset, m.MatchLength) 3045 } 3046 }) 3047 3048 t.Run("ChunkMatches", func(t *testing.T) { 3049 res := searchForTest(t, b, q, chunkOpts) 3050 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 { 3051 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 3052 } 3053 m := res.Files[0].ChunkMatches[0].Ranges[0] 3054 if m.Start.ByteOffset != 7 || m.End.ByteOffset != 10 { 3055 t.Fatalf("got offset %d, end %d want 7, 10", m.Start.ByteOffset, m.End.ByteOffset) 3056 } 3057 }) 3058} 3059 3060func TestSymbolSubstringExact(t *testing.T) { 3061 content := []byte("bla\nsym\nbla\nsym\nasymb") 3062 // ----------------0123-4567-890123456-78901 3063 3064 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"}, 3065 Document{ 3066 Name: "f1", 3067 Content: content, 3068 Symbols: []DocumentSection{{4, 7}}, 3069 }, 3070 ) 3071 q := &query.Symbol{ 3072 Expr: &query.Substring{Pattern: "sym"}, 3073 } 3074 t.Run("LineMatches", func(t *testing.T) { 3075 res := searchForTest(t, b, q) 3076 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 { 3077 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 3078 } 3079 m := res.Files[0].LineMatches[0].LineFragments[0] 3080 if m.Offset != 4 { 3081 t.Fatalf("got offset %d, want 7", m.Offset) 3082 } 3083 }) 3084 3085 t.Run("ChunkMatches", func(t *testing.T) { 3086 res := searchForTest(t, b, q, chunkOpts) 3087 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 { 3088 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 3089 } 3090 m := res.Files[0].ChunkMatches[0].Ranges[0] 3091 if m.Start.ByteOffset != 4 { 3092 t.Fatalf("got offset %d, want 7", m.Start.ByteOffset) 3093 } 3094 }) 3095} 3096 3097func TestSymbolRegexpExact(t *testing.T) { 3098 content := []byte("blah\nbla\nbl") 3099 // ----------------01234-5678-90 3100 3101 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"}, 3102 Document{ 3103 Name: "f1", 3104 Content: content, 3105 Symbols: []DocumentSection{{0, 4}, {5, 8}, {9, 11}}, 3106 }, 3107 ) 3108 q := &query.Symbol{ 3109 Expr: &query.Regexp{Regexp: mustParseRE("^bla$")}, 3110 } 3111 t.Run("LineMatches", func(t *testing.T) { 3112 res := searchForTest(t, b, q) 3113 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 { 3114 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 3115 } 3116 m := res.Files[0].LineMatches[0].LineFragments[0] 3117 if m.Offset != 5 { 3118 t.Fatalf("got offset %d, want 5", m.Offset) 3119 } 3120 }) 3121 3122 t.Run("ChunkMatches", func(t *testing.T) { 3123 res := searchForTest(t, b, q, chunkOpts) 3124 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 { 3125 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 3126 } 3127 m := res.Files[0].ChunkMatches[0].Ranges[0] 3128 if m.Start.ByteOffset != 5 { 3129 t.Fatalf("got offset %d, want 5", m.Start.ByteOffset) 3130 } 3131 }) 3132} 3133 3134func TestSymbolRegexpPartial(t *testing.T) { 3135 content := []byte("abcdef") 3136 // ----------------012345 3137 3138 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"}, 3139 Document{ 3140 Name: "f1", 3141 Content: content, 3142 Symbols: []DocumentSection{{0, 6}}, 3143 }, 3144 ) 3145 q := &query.Symbol{ 3146 Expr: &query.Regexp{Regexp: mustParseRE("(b|d)c(d|b)")}, 3147 } 3148 t.Run("LineMatches", func(t *testing.T) { 3149 res := searchForTest(t, b, q) 3150 if len(res.Files) != 1 || len(res.Files[0].LineMatches) != 1 { 3151 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 3152 } 3153 m := res.Files[0].LineMatches[0].LineFragments[0] 3154 if m.Offset != 1 { 3155 t.Fatalf("got offset %d, want 1", m.Offset) 3156 } 3157 if m.MatchLength != 3 { 3158 t.Fatalf("got match length %d, want 3", m.MatchLength) 3159 } 3160 }) 3161 3162 t.Run("ChunkMatches", func(t *testing.T) { 3163 res := searchForTest(t, b, q, chunkOpts) 3164 if len(res.Files) != 1 || len(res.Files[0].ChunkMatches) != 1 { 3165 t.Fatalf("got %v, want 1 line in 1 file", res.Files) 3166 } 3167 m := res.Files[0].ChunkMatches[0].Ranges[0] 3168 if m.Start.ByteOffset != 1 { 3169 t.Fatalf("got offset %d, want 1", m.Start.ByteOffset) 3170 } 3171 if m.End.ByteOffset != 4 { 3172 t.Fatalf("got match end %d, want 4", m.End.ByteOffset) 3173 } 3174 }) 3175} 3176 3177func TestSymbolRegexpAll(t *testing.T) { 3178 docs := []Document{ 3179 { 3180 Name: "f1", 3181 Content: []byte("Hello Zoekt"), 3182 // --------------01234567890 3183 Symbols: []DocumentSection{{0, 5}, {6, 11}}, 3184 }, 3185 { 3186 Name: "f2", 3187 Content: []byte("Second Zoekt Third"), 3188 // --------------012345678901234567 3189 Symbols: []DocumentSection{{0, 6}, {7, 12}, {13, 18}}, 3190 }, 3191 } 3192 3193 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"}, docs...) 3194 q := &query.Symbol{ 3195 Expr: &query.Regexp{Regexp: mustParseRE(".*")}, 3196 } 3197 t.Run("LineMatches", func(t *testing.T) { 3198 res := searchForTest(t, b, q) 3199 if len(res.Files) != len(docs) { 3200 t.Fatalf("got %v, want %d file", res.Files, len(docs)) 3201 } 3202 for i, want := range docs { 3203 got := res.Files[i].LineMatches[0].LineFragments 3204 if len(got) != len(want.Symbols) { 3205 t.Fatalf("got %d symbols, want %d symbols in doc %s", len(got), len(want.Symbols), want.Name) 3206 } 3207 3208 for j, sec := range want.Symbols { 3209 if sec.Start != got[j].Offset { 3210 t.Fatalf("got offset %d, want %d in doc %s", got[j].Offset, sec.Start, want.Name) 3211 } 3212 } 3213 } 3214 }) 3215 3216 t.Run("ChunkMatches", func(t *testing.T) { 3217 res := searchForTest(t, b, q, chunkOpts) 3218 if len(res.Files) != len(docs) { 3219 t.Fatalf("got %v, want %d file", res.Files, len(docs)) 3220 } 3221 for i, want := range docs { 3222 got := res.Files[i].ChunkMatches[0].Ranges 3223 if len(got) != len(want.Symbols) { 3224 t.Fatalf("got %d symbols, want %d symbols in doc %s", len(got), len(want.Symbols), want.Name) 3225 } 3226 3227 for j, sec := range want.Symbols { 3228 if sec.Start != got[j].Start.ByteOffset { 3229 t.Fatalf("got offset %d, want %d in doc %s", got[j].Start.ByteOffset, sec.Start, want.Name) 3230 } 3231 } 3232 } 3233 }) 3234} 3235 3236func TestHitIterTerminate(t *testing.T) { 3237 // contrived input: trigram frequencies forces selecting abc + 3238 // def for the distance iteration. There is no index, so this 3239 // will advance the compressedPostingIterator to beyond the 3240 // end. 3241 content := []byte("abc bcdbcd cdecde abcabc def efg") 3242 b := testShardBuilder(t, nil, 3243 Document{ 3244 Name: "f1", 3245 Content: content, 3246 }, 3247 ) 3248 3249 t.Run("LineMatches", func(t *testing.T) { 3250 searchForTest(t, b, &query.Substring{Pattern: "abcdef"}) 3251 }) 3252 3253 t.Run("ChunkMatches", func(t *testing.T) { 3254 searchForTest(t, b, &query.Substring{Pattern: "abcdef"}, chunkOpts) 3255 }) 3256} 3257 3258func TestDistanceHitIterBailLast(t *testing.T) { 3259 content := []byte("AST AST AST UASH") 3260 b := testShardBuilder(t, nil, 3261 Document{ 3262 Name: "f1", 3263 Content: content, 3264 }, 3265 ) 3266 t.Run("LineMatches", func(t *testing.T) { 3267 res := searchForTest(t, b, &query.Substring{Pattern: "UAST"}) 3268 if len(res.Files) != 0 { 3269 t.Fatalf("got %v, want no results", res.Files) 3270 } 3271 }) 3272 3273 t.Run("LineMatches", func(t *testing.T) { 3274 res := searchForTest(t, b, &query.Substring{Pattern: "UAST"}, chunkOpts) 3275 if len(res.Files) != 0 { 3276 t.Fatalf("got %v, want no results", res.Files) 3277 } 3278 }) 3279} 3280 3281func TestDocumentSectionRuneBoundary(t *testing.T) { 3282 content := string([]rune{kelvinCodePoint, kelvinCodePoint, kelvinCodePoint}) 3283 b, err := NewShardBuilder(nil) 3284 if err != nil { 3285 t.Fatalf("NewShardBuilder: %v", err) 3286 } 3287 3288 for i, sec := range []DocumentSection{ 3289 {2, 6}, 3290 {3, 7}, 3291 } { 3292 if err := b.Add(Document{ 3293 Name: "f1", 3294 Content: []byte(content), 3295 Symbols: []DocumentSection{sec}, 3296 }); err == nil { 3297 t.Errorf("%d: Add succeeded", i) 3298 } 3299 } 3300} 3301 3302func TestUnicodeQuery(t *testing.T) { 3303 content := string([]rune{kelvinCodePoint, kelvinCodePoint, kelvinCodePoint}) 3304 b := testShardBuilder(t, nil, 3305 Document{ 3306 Name: "f1", 3307 Content: []byte(content), 3308 }, 3309 ) 3310 3311 q := &query.Substring{Pattern: content} 3312 3313 t.Run("LineMatches", func(t *testing.T) { 3314 res := searchForTest(t, b, q) 3315 if len(res.Files) != 1 { 3316 t.Fatalf("want 1 match, got %v", res.Files) 3317 } 3318 3319 f := res.Files[0] 3320 if len(f.LineMatches) != 1 { 3321 t.Fatalf("want 1 line, got %v", f.LineMatches) 3322 } 3323 l := f.LineMatches[0] 3324 3325 if len(l.LineFragments) != 1 { 3326 t.Fatalf("want 1 line fragment, got %v", l.LineFragments) 3327 } 3328 fr := l.LineFragments[0] 3329 if fr.MatchLength != len(content) { 3330 t.Fatalf("got MatchLength %d want %d", fr.MatchLength, len(content)) 3331 } 3332 }) 3333 3334 t.Run("ChunkMatches", func(t *testing.T) { 3335 res := searchForTest(t, b, q, chunkOpts) 3336 if len(res.Files) != 1 { 3337 t.Fatalf("want 1 match, got %v", res.Files) 3338 } 3339 3340 f := res.Files[0] 3341 if len(f.ChunkMatches) != 1 { 3342 t.Fatalf("want 1 line, got %v", f.LineMatches) 3343 } 3344 cm := f.ChunkMatches[0] 3345 3346 if len(cm.Ranges) != 1 { 3347 t.Fatalf("want 1 line fragment, got %v", cm.Ranges) 3348 } 3349 rr := cm.Ranges[0] 3350 if matchLen := rr.End.ByteOffset - rr.Start.ByteOffset; int(matchLen) != len(content) { 3351 t.Fatalf("got MatchLength %d want %d", matchLen, len(content)) 3352 } 3353 }) 3354} 3355 3356func TestSkipInvalidContent(t *testing.T) { 3357 for _, content := range []string{ 3358 // Binary 3359 "abc def \x00 abc", 3360 } { 3361 3362 b, err := NewShardBuilder(nil) 3363 if err != nil { 3364 t.Fatalf("NewShardBuilder: %v", err) 3365 } 3366 3367 if err := b.Add(Document{ 3368 Name: "f1", 3369 Content: []byte(content), 3370 }); err != nil { 3371 t.Fatal(err) 3372 } 3373 3374 t.Run("LineMatches", func(t *testing.T) { 3375 q := &query.Substring{Pattern: "abc def"} 3376 res := searchForTest(t, b, q) 3377 if len(res.Files) != 0 { 3378 t.Fatalf("got %v, want no results", res.Files) 3379 } 3380 3381 q = &query.Substring{Pattern: "NOT-INDEXED"} 3382 res = searchForTest(t, b, q) 3383 if len(res.Files) != 1 { 3384 t.Fatalf("got %v, want 1 result", res.Files) 3385 } 3386 }) 3387 3388 t.Run("ChunkMatches", func(t *testing.T) { 3389 q := &query.Substring{Pattern: "abc def"} 3390 res := searchForTest(t, b, q, chunkOpts) 3391 if len(res.Files) != 0 { 3392 t.Fatalf("got %v, want no results", res.Files) 3393 } 3394 3395 q = &query.Substring{Pattern: "NOT-INDEXED"} 3396 res = searchForTest(t, b, q, chunkOpts) 3397 if len(res.Files) != 1 { 3398 t.Fatalf("got %v, want 1 result", res.Files) 3399 } 3400 }) 3401 } 3402} 3403 3404func TestDocChecker(t *testing.T) { 3405 docChecker := DocChecker{} 3406 3407 // Test valid and invalid text 3408 for _, text := range []string{"", "simple ascii", "símplé unicödé", "\uFEFFwith utf8 'bom'", "with \uFFFD unicode replacement char"} { 3409 if skip := docChecker.Check([]byte(text), 20000, false); skip != SkipReasonNone { 3410 t.Errorf("Check(%q): %v", text, skip) 3411 } 3412 } 3413 for _, text := range []string{"zero\x00byte", "\x00starts with null byte", "xx", "0123456789abcdefghi"} { 3414 if skip := docChecker.Check([]byte(text), 15, false); skip == SkipReasonNone { 3415 t.Errorf("Check(%q) succeeded", text) 3416 } 3417 } 3418 3419 // Test valid and invalid text with an allowed large file 3420 for _, text := range []string{"0123456789abcdefghi", "qwertyuiopasdfghjklzxcvbnm"} { 3421 if skip := docChecker.Check([]byte(text), 15, true); skip != SkipReasonNone { 3422 t.Errorf("Check(%q): %v", text, skip) 3423 } 3424 } 3425 for _, text := range []string{"zero\x00byte", "\x00starts with null byte", "xx"} { 3426 if skip := docChecker.Check([]byte(text), 15, true); skip == SkipReasonNone { 3427 t.Errorf("Check(%q) succeeded", text) 3428 } 3429 } 3430} 3431 3432func TestLineAnd(t *testing.T) { 3433 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"}, 3434 Document{Name: "f1", Content: []byte("apple\nbanana\napple banana chocolate apple pudding banana\ngrape")}, 3435 Document{Name: "f2", Content: []byte("apple orange\nbanana")}, 3436 Document{Name: "f3", Content: []byte("banana grape")}, 3437 ) 3438 pattern := "(apple)(?-s:.)*?(banana)" 3439 r, _ := syntax.Parse(pattern, syntax.Perl) 3440 3441 q := query.Regexp{ 3442 Regexp: r, 3443 Content: true, 3444 } 3445 t.Run("LineMatches", func(t *testing.T) { 3446 res := searchForTest(t, b, &q) 3447 wantRegexpCount := 1 3448 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount { 3449 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount) 3450 } 3451 if len(res.Files) != 1 || res.Files[0].FileName != "f1" { 3452 t.Errorf("got %v, want 1 result", res.Files) 3453 } 3454 }) 3455 3456 t.Run("ChunkMatches", func(t *testing.T) { 3457 res := searchForTest(t, b, &q, chunkOpts) 3458 wantRegexpCount := 1 3459 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount { 3460 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount) 3461 } 3462 if len(res.Files) != 1 || res.Files[0].FileName != "f1" { 3463 t.Errorf("got %v, want 1 result", res.Files) 3464 } 3465 }) 3466} 3467 3468func TestLineAndFileName(t *testing.T) { 3469 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"}, 3470 Document{Name: "f1", Content: []byte("apple banana\ngrape")}, 3471 Document{Name: "f2", Content: []byte("apple banana\norange")}, 3472 Document{Name: "apple banana", Content: []byte("banana grape")}, 3473 ) 3474 pattern := "(apple)(?-s:.)*?(banana)" 3475 r, _ := syntax.Parse(pattern, syntax.Perl) 3476 3477 q := query.Regexp{ 3478 Regexp: r, 3479 FileName: true, 3480 } 3481 t.Run("LineMatches", func(t *testing.T) { 3482 res := searchForTest(t, b, &q) 3483 wantRegexpCount := 1 3484 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount { 3485 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount) 3486 } 3487 if len(res.Files) != 1 || res.Files[0].FileName != "apple banana" { 3488 t.Errorf("got %v, want 1 result", res.Files) 3489 } 3490 }) 3491 3492 t.Run("ChunkMatches", func(t *testing.T) { 3493 res := searchForTest(t, b, &q, chunkOpts) 3494 wantRegexpCount := 1 3495 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount { 3496 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount) 3497 } 3498 if len(res.Files) != 1 || res.Files[0].FileName != "apple banana" { 3499 t.Errorf("got %v, want 1 result", res.Files) 3500 } 3501 }) 3502} 3503 3504func TestMultiLineRegex(t *testing.T) { 3505 b := testShardBuilder(t, &zoekt.Repository{Name: "reponame"}, 3506 Document{Name: "f1", Content: []byte("apple banana\ngrape")}, 3507 Document{Name: "f2", Content: []byte("apple orange")}, 3508 Document{Name: "f3", Content: []byte("grape apple")}, 3509 ) 3510 pattern := "(apple).*?[[:space:]].*?(grape)" 3511 r, _ := syntax.Parse(pattern, syntax.Perl) 3512 3513 q := query.Regexp{ 3514 Regexp: r, 3515 } 3516 t.Run("LineMatches", func(t *testing.T) { 3517 res := searchForTest(t, b, &q) 3518 wantRegexpCount := 2 3519 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount { 3520 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount) 3521 } 3522 if len(res.Files) != 1 || res.Files[0].FileName != "f1" { 3523 t.Errorf("got %v, want 1 result", res.Files) 3524 } 3525 if l := len(res.Files[0].LineMatches); l != 2 { 3526 t.Errorf("got %v, want 2 line matches", l) 3527 } 3528 }) 3529 3530 t.Run("ChunkMatches", func(t *testing.T) { 3531 res := searchForTest(t, b, &q, chunkOpts) 3532 wantRegexpCount := 2 3533 if gotRegexpCount := res.RegexpsConsidered; gotRegexpCount != wantRegexpCount { 3534 t.Errorf("got %d, wanted %d", gotRegexpCount, wantRegexpCount) 3535 } 3536 if len(res.Files) != 1 || res.Files[0].FileName != "f1" { 3537 t.Errorf("got %v, want 1 result", res.Files) 3538 } 3539 if l := len(res.Files[0].ChunkMatches); l != 1 { 3540 t.Errorf("got %v, want 1 chunk matches", l) 3541 } 3542 if l := len(res.Files[0].ChunkMatches[0].Ranges); l != 1 { 3543 t.Errorf("got %v, want 1 chunk ranges", l) 3544 } 3545 }) 3546} 3547 3548func TestSearchTypeFileName(t *testing.T) { 3549 b := testShardBuilder(t, &zoekt.Repository{ 3550 Name: "reponame", 3551 }, 3552 Document{Name: "f1", Content: []byte("bla the needle")}, 3553 Document{Name: "f2", Content: []byte("another file another\nneedle")}, 3554 // -----------------------------------012345678901234567890-123456 3555 ) 3556 3557 t.Run("LineMatches", func(t *testing.T) { 3558 wantSingleMatch := func(res *zoekt.SearchResult, want string) { 3559 t.Helper() 3560 fmatches := res.Files 3561 if len(fmatches) != 1 { 3562 t.Errorf("got %v, want 1 matches", len(fmatches)) 3563 return 3564 } 3565 if len(fmatches[0].LineMatches) != 1 { 3566 t.Errorf("got %d line matches", len(fmatches[0].LineMatches)) 3567 return 3568 } 3569 var got string 3570 if fmatches[0].LineMatches[0].FileName { 3571 got = fmatches[0].FileName 3572 } else { 3573 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].LineMatches[0].LineFragments[0].Offset) 3574 } 3575 3576 if got != want { 3577 t.Errorf("got %s, want %s", got, want) 3578 } 3579 } 3580 3581 // Only return the later match in the second file 3582 res := searchForTest(t, b, query.NewAnd( 3583 &query.Type{ 3584 Type: query.TypeFileName, 3585 Child: &query.Substring{Pattern: "needle"}, 3586 }, 3587 &query.Substring{Pattern: "file"})) 3588 wantSingleMatch(res, "f2:8") 3589 3590 // Only return a filename result 3591 res = searchForTest(t, b, 3592 &query.Type{ 3593 Type: query.TypeFileName, 3594 Child: &query.Substring{Pattern: "file"}, 3595 }) 3596 wantSingleMatch(res, "f2") 3597 }) 3598 3599 t.Run("ChunkMatches", func(t *testing.T) { 3600 wantSingleMatch := func(res *zoekt.SearchResult, want string) { 3601 t.Helper() 3602 fmatches := res.Files 3603 if len(fmatches) != 1 { 3604 t.Errorf("got %v, want 1 matches", len(fmatches)) 3605 return 3606 } 3607 if len(fmatches[0].ChunkMatches) != 1 { 3608 t.Errorf("got %d line matches", len(fmatches[0].ChunkMatches)) 3609 return 3610 } 3611 var got string 3612 if fmatches[0].ChunkMatches[0].FileName { 3613 got = fmatches[0].FileName 3614 } else { 3615 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset) 3616 } 3617 3618 if got != want { 3619 t.Errorf("got %s, want %s", got, want) 3620 } 3621 } 3622 3623 // Only return the later match in the second file 3624 res := searchForTest(t, b, query.NewAnd( 3625 &query.Type{ 3626 Type: query.TypeFileName, 3627 Child: &query.Substring{Pattern: "needle"}, 3628 }, 3629 &query.Substring{Pattern: "file"}), 3630 chunkOpts, 3631 ) 3632 wantSingleMatch(res, "f2:8") 3633 3634 // Only return a filename result 3635 res = searchForTest(t, b, 3636 &query.Type{ 3637 Type: query.TypeFileName, 3638 Child: &query.Substring{Pattern: "file"}, 3639 }, 3640 chunkOpts, 3641 ) 3642 wantSingleMatch(res, "f2") 3643 }) 3644 3645 // type:filematch is the default result granularity. The wrapper must be 3646 // treated as a passthrough to the child instead of crashing the shard. 3647 t.Run("TypeFileMatch", func(t *testing.T) { 3648 res := searchForTest(t, b, 3649 &query.Type{ 3650 Type: query.TypeFileMatch, 3651 Child: &query.Substring{Pattern: "needle"}, 3652 }) 3653 if len(res.Files) != 2 { 3654 t.Fatalf("got %d file matches, want 2", len(res.Files)) 3655 } 3656 }) 3657} 3658 3659func TestSearchTypeLanguage(t *testing.T) { 3660 b := testShardBuilder(t, &zoekt.Repository{ 3661 Name: "reponame", 3662 }, 3663 Document{Name: "apex.cls", Content: []byte("public class Car extends Vehicle {")}, 3664 Document{Name: "tex.cls", Content: []byte(`\DeclareOption*{`)}, 3665 Document{Name: "hello.h", Content: []byte(`#include <stdio.h>`)}, 3666 Document{Name: "be.magik", Content: []byte(`_package unicorn`)}, 3667 ) 3668 3669 t.Log(b.languageMap) 3670 3671 t.Run("LineMatches", func(t *testing.T) { 3672 wantSingleMatch := func(res *zoekt.SearchResult, want string) { 3673 t.Helper() 3674 fmatches := res.Files 3675 if len(fmatches) != 1 { 3676 t.Errorf("got %v, want 1 matches", len(fmatches)) 3677 return 3678 } 3679 if len(fmatches[0].LineMatches) != 1 { 3680 t.Errorf("got %d line matches", len(fmatches[0].LineMatches)) 3681 return 3682 } 3683 var got string 3684 if fmatches[0].LineMatches[0].FileName { 3685 got = fmatches[0].FileName 3686 } else { 3687 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].LineMatches[0].LineFragments[0].Offset) 3688 } 3689 3690 if got != want { 3691 t.Errorf("got %s, want %s", got, want) 3692 } 3693 } 3694 3695 res := searchForTest(t, b, &query.Language{Language: "Apex"}) 3696 wantSingleMatch(res, "apex.cls") 3697 3698 res = searchForTest(t, b, &query.Language{Language: "TeX"}) 3699 wantSingleMatch(res, "tex.cls") 3700 3701 res = searchForTest(t, b, &query.Language{Language: "C"}) 3702 wantSingleMatch(res, "hello.h") 3703 3704 res = searchForTest(t, b, &query.Language{Language: "Magik"}) 3705 wantSingleMatch(res, "be.magik") 3706 3707 // test fallback language search by pretending it's an older index version 3708 res = searchForTest(t, b, &query.Language{Language: "C++"}) 3709 if len(res.Files) != 0 { 3710 t.Errorf("got %d results for C++, want 0", len(res.Files)) 3711 } 3712 3713 b.featureVersion = 11 // force fallback 3714 res = searchForTest(t, b, &query.Language{Language: "C++"}) 3715 wantSingleMatch(res, "hello.h") 3716 }) 3717 3718 t.Run("ChunkMatches", func(t *testing.T) { 3719 wantSingleMatch := func(res *zoekt.SearchResult, want string) { 3720 t.Helper() 3721 fmatches := res.Files 3722 if len(fmatches) != 1 { 3723 t.Errorf("got %v, want 1 matches", len(fmatches)) 3724 return 3725 } 3726 if len(fmatches[0].ChunkMatches) != 1 { 3727 t.Errorf("got %d line matches", len(fmatches[0].ChunkMatches)) 3728 return 3729 } 3730 var got string 3731 if fmatches[0].ChunkMatches[0].FileName { 3732 got = fmatches[0].FileName 3733 } else { 3734 got = fmt.Sprintf("%s:%d", fmatches[0].FileName, fmatches[0].ChunkMatches[0].Ranges[0].Start.ByteOffset) 3735 } 3736 3737 if got != want { 3738 t.Errorf("got %s, want %s", got, want) 3739 } 3740 } 3741 3742 b.featureVersion = FeatureVersion // reset feature version 3743 res := searchForTest(t, b, &query.Language{Language: "Apex"}, chunkOpts) 3744 wantSingleMatch(res, "apex.cls") 3745 3746 res = searchForTest(t, b, &query.Language{Language: "TeX"}, chunkOpts) 3747 wantSingleMatch(res, "tex.cls") 3748 3749 res = searchForTest(t, b, &query.Language{Language: "C"}, chunkOpts) 3750 wantSingleMatch(res, "hello.h") 3751 3752 // test fallback language search by pretending it's an older index version 3753 res = searchForTest(t, b, &query.Language{Language: "C++"}, chunkOpts) 3754 if len(res.Files) != 0 { 3755 t.Errorf("got %d results for C++, want 0", len(res.Files)) 3756 } 3757 3758 b.featureVersion = 11 // force fallback 3759 res = searchForTest(t, b, &query.Language{Language: "C++"}, chunkOpts) 3760 wantSingleMatch(res, "hello.h") 3761 }) 3762} 3763 3764func TestStats(t *testing.T) { 3765 ignored := []cmp.Option{ 3766 cmpopts.EquateEmpty(), 3767 cmpopts.IgnoreFields(zoekt.RepoListEntry{}, "Repository"), 3768 cmpopts.IgnoreFields(zoekt.RepoListEntry{}, "IndexMetadata"), 3769 cmpopts.IgnoreFields(zoekt.RepoStats{}, "IndexBytes"), 3770 } 3771 3772 repoListEntries := func(b *ShardBuilder) []zoekt.RepoListEntry { 3773 searcher := searcherForTest(t, b) 3774 indexdata := searcher.(*indexData) 3775 return indexdata.repoListEntry 3776 } 3777 3778 t.Run("one empty repo", func(t *testing.T) { 3779 b := testShardBuilder(t, nil) 3780 got := repoListEntries(b) 3781 want := []zoekt.RepoListEntry{ 3782 { 3783 Stats: zoekt.RepoStats{ 3784 Repos: 0, 3785 Shards: 1, 3786 Documents: 0, 3787 IndexBytes: 20, 3788 ContentBytes: 0, 3789 NewLinesCount: 0, 3790 DefaultBranchNewLinesCount: 0, 3791 OtherBranchesNewLinesCount: 0, 3792 }, 3793 }, 3794 } 3795 3796 if diff := cmp.Diff(want, got, ignored...); diff != "" { 3797 t.Fatalf("mismatch (-want +got):\n%s", diff) 3798 } 3799 }) 3800 3801 t.Run("one simple shard", func(t *testing.T) { 3802 b := testShardBuilder(t, nil, 3803 Document{Name: "doc 0", Content: []byte("content 0")}, 3804 Document{Name: "doc 1", Content: []byte("content 1")}, 3805 ) 3806 got := repoListEntries(b) 3807 want := []zoekt.RepoListEntry{ 3808 { 3809 Stats: zoekt.RepoStats{ 3810 Repos: 0, 3811 Shards: 1, 3812 Documents: 2, 3813 IndexBytes: 224, 3814 ContentBytes: 28, 3815 NewLinesCount: 0, 3816 DefaultBranchNewLinesCount: 0, 3817 OtherBranchesNewLinesCount: 0, 3818 }, 3819 }, 3820 } 3821 3822 if diff := cmp.Diff(want, got, ignored...); diff != "" { 3823 t.Fatalf("mismatch (-want +got):\n%s", diff) 3824 } 3825 }) 3826 3827 t.Run("one compound shard", func(t *testing.T) { 3828 b := testShardBuilderCompound(t, 3829 []*zoekt.Repository{ 3830 {Name: "repo 0"}, 3831 {Name: "repo 1"}, 3832 }, 3833 [][]Document{ 3834 { 3835 {Name: "doc 0", Content: []byte("content 0")}, 3836 {Name: "doc 1", Content: []byte("content 1")}, 3837 }, 3838 { 3839 {Name: "doc 2", Content: []byte("content 2")}, 3840 {Name: "doc 3", Content: []byte("content 3")}, 3841 }, 3842 }, 3843 ) 3844 got := repoListEntries(b) 3845 want := []zoekt.RepoListEntry{ 3846 { 3847 Stats: zoekt.RepoStats{ 3848 Repos: 0, 3849 Shards: 1, 3850 Documents: 2, 3851 IndexBytes: 180, 3852 ContentBytes: 28, 3853 NewLinesCount: 0, 3854 DefaultBranchNewLinesCount: 0, 3855 OtherBranchesNewLinesCount: 0, 3856 }, 3857 }, 3858 { 3859 Stats: zoekt.RepoStats{ 3860 Repos: 0, 3861 Shards: 1, 3862 Documents: 2, 3863 IndexBytes: 180, 3864 ContentBytes: 28, 3865 NewLinesCount: 0, 3866 DefaultBranchNewLinesCount: 0, 3867 OtherBranchesNewLinesCount: 0, 3868 }, 3869 }, 3870 } 3871 3872 if diff := cmp.Diff(want, got, ignored...); diff != "" { 3873 t.Fatalf("mismatch (-want +got):\n%s", diff) 3874 } 3875 }) 3876 3877 t.Run("compound shard with empty repos", func(t *testing.T) { 3878 b := testShardBuilderCompound(t, 3879 []*zoekt.Repository{ 3880 {Name: "repo 0"}, 3881 {Name: "repo 1"}, 3882 {Name: "repo 2"}, 3883 {Name: "repo 3"}, 3884 {Name: "repo 4"}, 3885 }, 3886 [][]Document{ 3887 {{Name: "doc 0", Content: []byte("content 0")}}, 3888 nil, 3889 {{Name: "doc 1", Content: []byte("content 1")}}, 3890 nil, 3891 nil, 3892 }, 3893 ) 3894 got := repoListEntries(b) 3895 3896 entryEmpty := zoekt.RepoListEntry{Stats: zoekt.RepoStats{ 3897 Shards: 1, 3898 Documents: 0, 3899 ContentBytes: 0, 3900 }} 3901 entryNonEmpty := zoekt.RepoListEntry{Stats: zoekt.RepoStats{ 3902 Shards: 1, 3903 Documents: 1, 3904 ContentBytes: 14, 3905 }} 3906 3907 want := []zoekt.RepoListEntry{ 3908 entryNonEmpty, 3909 entryEmpty, 3910 entryNonEmpty, 3911 entryEmpty, 3912 entryEmpty, 3913 } 3914 3915 if diff := cmp.Diff(want, got, ignored...); diff != "" { 3916 t.Fatalf("mismatch (-want +got):\n%s", diff) 3917 } 3918 }) 3919} 3920 3921// This tests the frequent pattern "\bLITERAL\b". 3922func TestWordSearch(t *testing.T) { 3923 content := []byte("needle the bla") 3924 // ----------------01234567890123 3925 3926 b := testShardBuilder(t, nil, 3927 Document{ 3928 Name: "f1", 3929 Content: content, 3930 }) 3931 3932 t.Run("LineMatches", func(t *testing.T) { 3933 sres := searchForTest(t, b, 3934 &query.Regexp{ 3935 Regexp: mustParseRE("\\bthe\\b"), 3936 CaseSensitive: true, 3937 Content: true, 3938 }) 3939 3940 if len(sres.Files) != 1 || len(sres.Files[0].LineMatches) != 1 { 3941 t.Fatalf("got %v, want 1 match in 1 file", sres.Files) 3942 } 3943 3944 if sres.Stats.RegexpsConsidered != 0 { 3945 t.Fatal("expected regexp to be skipped") 3946 } 3947 3948 got := sres.Files[0].LineMatches[0] 3949 want := zoekt.LineMatch{ 3950 LineFragments: []zoekt.LineFragmentMatch{{ 3951 LineOffset: 7, 3952 Offset: 7, 3953 MatchLength: 3, 3954 }}, 3955 Line: content, 3956 FileName: false, 3957 LineNumber: 1, 3958 LineStart: 0, 3959 LineEnd: 14, 3960 } 3961 3962 if !reflect.DeepEqual(got, want) { 3963 t.Errorf("got %#v, want %#v", got, want) 3964 } 3965 }) 3966 3967 t.Run("ChunkMatches", func(t *testing.T) { 3968 sres := searchForTest(t, b, 3969 &query.Regexp{ 3970 Regexp: mustParseRE("\\bthe\\b"), 3971 CaseSensitive: true, 3972 }, chunkOpts) 3973 3974 if len(sres.Files) != 1 || len(sres.Files[0].ChunkMatches) != 1 { 3975 t.Fatalf("got %v, want 1 match in 1 file", sres.Files) 3976 } 3977 3978 if sres.Stats.RegexpsConsidered != 0 { 3979 t.Fatal("expected regexp to be skipped") 3980 } 3981 3982 got := sres.Files[0].ChunkMatches[0] 3983 want := zoekt.ChunkMatch{ 3984 Content: content, 3985 ContentStart: zoekt.Location{ByteOffset: 0, LineNumber: 1, Column: 1}, 3986 Ranges: []zoekt.Range{{ 3987 Start: zoekt.Location{ByteOffset: 7, LineNumber: 1, Column: 8}, 3988 End: zoekt.Location{ByteOffset: 10, LineNumber: 1, Column: 11}, 3989 }}, 3990 } 3991 3992 if diff := cmp.Diff(want, got); diff != "" { 3993 t.Fatal(diff) 3994 } 3995 }) 3996} 3997 3998// Simple benchmark focused on chunk match scoring. It creates a single file that will have a 1000-line chunk match. 3999// The benchmark time is expected to be strongly correlated with time spent assembling and scoring this chunk. 4000func BenchmarkScoreChunkMatches(b *testing.B) { 4001 ctx := context.Background() 4002 var builder strings.Builder 4003 for i := range 1000 { 4004 builder.WriteString(fmt.Sprintf("line-%d one one one two two two three three three four four four five five\n", i)) 4005 } 4006 4007 searcher := searcherForTest(b, testShardBuilder(b, nil, 4008 Document{Name: "f1", Content: []byte(builder.String())}, 4009 )) 4010 4011 q := &query.Or{ 4012 Children: []query.Q{ 4013 &query.Substring{Pattern: "f"}, 4014 &query.Substring{Pattern: "t"}, 4015 }} 4016 4017 b.Run("score large ChunkMatch", func(b *testing.B) { 4018 b.ReportAllocs() 4019 b.ResetTimer() 4020 4021 for i := 0; i < b.N; i++ { 4022 sres, err := searcher.Search(ctx, q, &zoekt.SearchOptions{ChunkMatches: true, NumContextLines: 1}) 4023 if err != nil { 4024 b.Fatal(err) 4025 } 4026 4027 matches := sres.Files 4028 if len(matches) == 0 { 4029 b.Fatalf("want file index, got none") 4030 } 4031 } 4032 }) 4033}