fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

Add shard filtering for meta queries (#986)

Co-authored-by: John Mason <jmason@gitlab.com>

author
John Mason
co-author
John Mason
committer
GitHub
date (Oct 24, 2025, 3:57 PM +0200) commit eab7bf42 parent 90faf6de
+193 -1
+17 -1
search/shards.go
··· 436 436 } 437 437 return false 438 438 }) 439 + case *query.Meta: 440 + // Meta queries filter repositories based on metadata fields. 441 + // By checking this at the shard level, we can skip entire shards 442 + // that don't contain any matching repositories, avoiding expensive 443 + // I/O operations. 444 + setSize = 0 // Unknown size, we'll filter based on metadata 445 + hasRepos = hasReposForPredicate(func(repo *zoekt.Repository) bool { 446 + if repo.Metadata == nil { 447 + return false 448 + } 449 + v, ok := repo.Metadata[setQuery.Field] 450 + if !ok { 451 + return false 452 + } 453 + return setQuery.Value.MatchString(v) 454 + }) 439 455 default: 440 456 continue 441 457 } ··· 486 502 // shard indexData.simplify will simplify to (and true (content baz)) -> 487 503 // (content baz). This work can be done now once, rather than per shard. 488 504 switch c := c.(type) { 489 - case *query.RepoSet, *query.RepoIDs, *query.Repo: 505 + case *query.RepoSet, *query.RepoIDs, *query.Repo, *query.Meta: 490 506 and.Children[i] = &query.Const{Value: true} 491 507 return filtered, query.Simplify(and) 492 508
+176
search/shards_test.go
··· 387 387 } 388 388 } 389 389 390 + func TestFilteringShardsByMeta(t *testing.T) { 391 + ss := newShardedSearcher(1) 392 + 393 + // Create repos with different metadata values 394 + // We'll create 30 repos total: 395 + // - 10 with nickname="project-A" 396 + // - 10 with nickname="project-B" 397 + // - 10 with no metadata 398 + n := 30 399 + projectARepos := []string{} 400 + projectBRepos := []string{} 401 + 402 + // Common document that will be in all repos 403 + doc := index.Document{ 404 + Name: "common.go", 405 + Content: []byte("needle haystack"), 406 + } 407 + 408 + for i := range n { 409 + shardName := fmt.Sprintf("shard%d", i) 410 + repoName := fmt.Sprintf("repository%.3d", i) 411 + 412 + var metadata map[string]string 413 + if i < 10 { 414 + // First 10 repos have project-A 415 + metadata = map[string]string{"nickname": "project-A", "visibility": "public"} 416 + projectARepos = append(projectARepos, repoName) 417 + } else if i < 20 { 418 + // Next 10 repos have project-B 419 + metadata = map[string]string{"nickname": "project-B", "visibility": "private"} 420 + projectBRepos = append(projectBRepos, repoName) 421 + } 422 + // Last 10 repos have no metadata 423 + 424 + repo := &zoekt.Repository{ 425 + ID: uint32(i + 1), 426 + Name: repoName, 427 + Metadata: metadata, 428 + } 429 + 430 + ss.replace(map[string]zoekt.Searcher{ 431 + shardName: searcherForTest(t, testShardBuilder(t, repo, doc)), 432 + }) 433 + } 434 + 435 + // Test 1: Search without Meta filter - should search all shards 436 + res, err := ss.Search(context.Background(), &query.Substring{Pattern: "needle"}, &zoekt.SearchOptions{}) 437 + if err != nil { 438 + t.Fatalf("Search without filter: %v", err) 439 + } 440 + if len(res.Files) != n { 441 + t.Fatalf("no meta filter: got %d results, want %d", len(res.Files), n) 442 + } 443 + 444 + sub := &query.Substring{Pattern: "needle"} 445 + 446 + // Helper function to extract unique repo names from search results 447 + getRepoNames := func(files []zoekt.FileMatch) []string { 448 + repoSet := make(map[string]struct{}) 449 + for _, f := range files { 450 + repoSet[f.Repository] = struct{}{} 451 + } 452 + repos := make([]string, 0, len(repoSet)) 453 + for repo := range repoSet { 454 + repos = append(repos, repo) 455 + } 456 + sort.Strings(repos) 457 + return repos 458 + } 459 + 460 + // Test 2: Filter by nickname="project-A" - should only search 10 shards 461 + metaQueryA := &query.Meta{ 462 + Field: "nickname", 463 + Value: regexp.MustCompile("^project-A$"), 464 + } 465 + res, err = ss.Search(context.Background(), query.NewAnd(metaQueryA, sub), &zoekt.SearchOptions{}) 466 + if err != nil { 467 + t.Fatalf("Search with Meta filter A: %v", err) 468 + } 469 + gotRepos := getRepoNames(res.Files) 470 + wantRepos := append([]string{}, projectARepos...) 471 + sort.Strings(wantRepos) 472 + if !reflect.DeepEqual(gotRepos, wantRepos) { 473 + t.Fatalf("Meta(nickname=project-A):\ngot repos: %v\nwant repos: %v", gotRepos, wantRepos) 474 + } 475 + 476 + // Test 3: Filter by nickname="project-B" - should only search 10 shards 477 + metaQueryB := &query.Meta{ 478 + Field: "nickname", 479 + Value: regexp.MustCompile("^project-B$"), 480 + } 481 + res, err = ss.Search(context.Background(), query.NewAnd(metaQueryB, sub), &zoekt.SearchOptions{}) 482 + if err != nil { 483 + t.Fatalf("Search with Meta filter B: %v", err) 484 + } 485 + gotRepos = getRepoNames(res.Files) 486 + wantRepos = append([]string{}, projectBRepos...) 487 + sort.Strings(wantRepos) 488 + if !reflect.DeepEqual(gotRepos, wantRepos) { 489 + t.Fatalf("Meta(nickname=project-B):\ngot repos: %v\nwant repos: %v", gotRepos, wantRepos) 490 + } 491 + 492 + // Test 4: Filter by visibility="public" - should only search 10 shards (project-A repos) 493 + metaQueryPublic := &query.Meta{ 494 + Field: "visibility", 495 + Value: regexp.MustCompile("^public$"), 496 + } 497 + res, err = ss.Search(context.Background(), query.NewAnd(metaQueryPublic, sub), &zoekt.SearchOptions{}) 498 + if err != nil { 499 + t.Fatalf("Search with Meta filter public: %v", err) 500 + } 501 + gotRepos = getRepoNames(res.Files) 502 + wantRepos = append([]string{}, projectARepos...) 503 + sort.Strings(wantRepos) 504 + if !reflect.DeepEqual(gotRepos, wantRepos) { 505 + t.Fatalf("Meta(visibility=public):\ngot repos: %v\nwant repos: %v", gotRepos, wantRepos) 506 + } 507 + 508 + // Test 5: Filter by non-existent field - should return 0 results 509 + metaQueryNonExistent := &query.Meta{ 510 + Field: "nonexistent_field", 511 + Value: regexp.MustCompile(".*"), 512 + } 513 + res, err = ss.Search(context.Background(), query.NewAnd(metaQueryNonExistent, sub), &zoekt.SearchOptions{}) 514 + if err != nil { 515 + t.Fatalf("Search with Meta filter non-existent: %v", err) 516 + } 517 + if len(res.Files) != 0 { 518 + t.Fatalf("Meta(nonexistent_field): got %d results, want 0", len(res.Files)) 519 + } 520 + 521 + // Test 6: Filter by regex pattern matching multiple values 522 + metaQueryRegex := &query.Meta{ 523 + Field: "nickname", 524 + Value: regexp.MustCompile("project-.*"), // Matches both project-A and project-B 525 + } 526 + res, err = ss.Search(context.Background(), query.NewAnd(metaQueryRegex, sub), &zoekt.SearchOptions{}) 527 + if err != nil { 528 + t.Fatalf("Search with Meta regex filter: %v", err) 529 + } 530 + gotRepos = getRepoNames(res.Files) 531 + wantRepos = append(append([]string{}, projectARepos...), projectBRepos...) 532 + sort.Strings(wantRepos) 533 + if !reflect.DeepEqual(gotRepos, wantRepos) { 534 + t.Fatalf("Meta(nickname=project-.*):\ngot repos: %v\nwant repos: %v", gotRepos, wantRepos) 535 + } 536 + 537 + // Test 7: Test that Meta query alone (without content search) works 538 + res, err = ss.Search(context.Background(), metaQueryA, &zoekt.SearchOptions{}) 539 + if err != nil { 540 + t.Fatalf("Search with Meta query alone: %v", err) 541 + } 542 + gotRepos = getRepoNames(res.Files) 543 + wantRepos = append([]string{}, projectARepos...) 544 + sort.Strings(wantRepos) 545 + if !reflect.DeepEqual(gotRepos, wantRepos) { 546 + t.Fatalf("Meta query alone:\ngot repos: %v\nwant repos: %v", gotRepos, wantRepos) 547 + } 548 + 549 + // Test 8: Test with List operation (not just Search) 550 + listRes, err := ss.List(context.Background(), metaQueryA, nil) 551 + if err != nil { 552 + t.Fatalf("List with Meta filter: %v", err) 553 + } 554 + gotListRepos := make([]string, len(listRes.Repos)) 555 + for i, r := range listRes.Repos { 556 + gotListRepos[i] = r.Repository.Name 557 + } 558 + sort.Strings(gotListRepos) 559 + wantRepos = append([]string{}, projectARepos...) 560 + sort.Strings(wantRepos) 561 + if !reflect.DeepEqual(gotListRepos, wantRepos) { 562 + t.Fatalf("List with Meta(nickname=project-A):\ngot repos: %v\nwant repos: %v", gotListRepos, wantRepos) 563 + } 564 + } 565 + 390 566 func hash(name string) uint32 { 391 567 h := fnv.New32() 392 568 h.Write([]byte(name))