fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

indexserver: set blob:limit based on FileLimit (#1027)

Previously we hardcoded this value to 1mb (which aligns with what is set
in Sourcegraph). But we might as well set it based on the limit that is
passed in.

Additionally we set the special HTTP headers sourcegraph needs in the
config so that when git double checks if it needs to hydrate an object
it can do it successfully.

+32 -23
+15 -8
cmd/zoekt-sourcegraph-indexserver/index.go
··· 85 85 // Parallelism is the number of shards to compute in parallel. 86 86 Parallelism int 87 87 88 - // FileLimit is the maximum size of a file 89 - FileLimit int 90 - 91 88 // UseDelta is true if we want to use the new delta indexer. This should 92 89 // only be true for repositories we explicitly enable. 93 90 UseDelta bool ··· 126 123 }, 127 124 IndexDir: o.IndexDir, 128 125 Parallelism: o.Parallelism, 129 - SizeMax: o.FileLimit, 126 + SizeMax: MaxFileSize, 130 127 LargeFiles: o.LargeFiles, 131 128 CTagsMustSucceed: o.Symbols, 132 129 DisableCTags: !o.Symbols, ··· 232 229 return err 233 230 } 234 231 232 + for _, header := range []string{ 233 + "X-Sourcegraph-Actor-UID: internal", 234 + "X-Sourcegraph-Tenant-ID: " + strconv.Itoa(o.TenantID), 235 + } { 236 + cmd = exec.CommandContext(ctx, "git", "-C", gitDir, "config", "--add", "http.extraHeader", header) 237 + cmd.Stdin = &bytes.Buffer{} 238 + if err := c.runCmd(cmd); err != nil { 239 + return err 240 + } 241 + } 242 + 235 243 var fetchDuration time.Duration 236 244 successfullyFetchedCommitsCount := 0 237 245 allFetchesSucceeded := true ··· 249 257 fetchArgs := []string{ 250 258 "-C", gitDir, 251 259 "-c", "protocol.version=2", 252 - "-c", "http.extraHeader=X-Sourcegraph-Actor-UID: internal", 253 - "-c", "http.extraHeader=X-Sourcegraph-Tenant-ID: " + strconv.Itoa(o.TenantID), 254 260 "fetch", "--depth=1", "--no-tags", 255 261 } 256 262 257 - // If there are no exceptions to MaxFileSize (1MB), we can avoid fetching these large files. 263 + // Git's blob:limit filter excludes blobs whose size is >= the given limit, 264 + // while zoekt indexes files up to and including FileLimit bytes. 258 265 if len(o.LargeFiles) == 0 { 259 - fetchArgs = append(fetchArgs, "--filter=blob:limit=1m") 266 + fetchArgs = append(fetchArgs, fmt.Sprintf("--filter=blob:limit=%d", int64(MaxFileSize)+1)) 260 267 } 261 268 262 269 fetchArgs = append(fetchArgs, o.CloneURL)
+16 -10
cmd/zoekt-sourcegraph-indexserver/index_test.go
··· 487 487 }, 488 488 want: []string{ 489 489 "git -c init.defaultBranch=nonExistentBranchBB0FOFCH32 init --bare $TMPDIR/test%2Frepo.git", 490 - "git -C $TMPDIR/test%2Frepo.git -c protocol.version=2 -c http.extraHeader=X-Sourcegraph-Actor-UID: internal -c http.extraHeader=X-Sourcegraph-Tenant-ID: 42 fetch --depth=1 --no-tags --filter=blob:limit=1m http://api.test/.internal/git/test/repo deadbeef", 490 + "git -C $TMPDIR/test%2Frepo.git config --add http.extraHeader X-Sourcegraph-Actor-UID: internal", 491 + "git -C $TMPDIR/test%2Frepo.git config --add http.extraHeader X-Sourcegraph-Tenant-ID: 42", 492 + "git -C $TMPDIR/test%2Frepo.git -c protocol.version=2 fetch --depth=1 --no-tags --filter=blob:limit=1048577 http://api.test/.internal/git/test/repo deadbeef", 491 493 "git -C $TMPDIR/test%2Frepo.git update-ref HEAD deadbeef", 492 494 "git -C $TMPDIR/test%2Frepo.git config zoekt.archived 0", 493 495 "git -C $TMPDIR/test%2Frepo.git config zoekt.fork 0", ··· 497 499 "git -C $TMPDIR/test%2Frepo.git config zoekt.public 0", 498 500 "git -C $TMPDIR/test%2Frepo.git config zoekt.repoid 0", 499 501 "git -C $TMPDIR/test%2Frepo.git config zoekt.tenantID 42", 500 - "zoekt-git-index -submodules=false -branches HEAD -disable_ctags $TMPDIR/test%2Frepo.git", 502 + "zoekt-git-index -submodules=false -branches HEAD -file_limit 1048576 -disable_ctags $TMPDIR/test%2Frepo.git", 501 503 }, 502 504 }, { 503 505 name: "minimal-id", ··· 512 514 }, 513 515 want: []string{ 514 516 "git -c init.defaultBranch=nonExistentBranchBB0FOFCH32 init --bare $TMPDIR/test%2Frepo.git", 515 - "git -C $TMPDIR/test%2Frepo.git -c protocol.version=2 -c http.extraHeader=X-Sourcegraph-Actor-UID: internal -c http.extraHeader=X-Sourcegraph-Tenant-ID: 1 fetch --depth=1 --no-tags --filter=blob:limit=1m http://api.test/.internal/git/test/repo deadbeef", 517 + "git -C $TMPDIR/test%2Frepo.git config --add http.extraHeader X-Sourcegraph-Actor-UID: internal", 518 + "git -C $TMPDIR/test%2Frepo.git config --add http.extraHeader X-Sourcegraph-Tenant-ID: 1", 519 + "git -C $TMPDIR/test%2Frepo.git -c protocol.version=2 fetch --depth=1 --no-tags --filter=blob:limit=1048577 http://api.test/.internal/git/test/repo deadbeef", 516 520 "git -C $TMPDIR/test%2Frepo.git update-ref HEAD deadbeef", 517 521 "git -C $TMPDIR/test%2Frepo.git config zoekt.archived 0", 518 522 "git -C $TMPDIR/test%2Frepo.git config zoekt.fork 0", ··· 522 526 "git -C $TMPDIR/test%2Frepo.git config zoekt.public 0", 523 527 "git -C $TMPDIR/test%2Frepo.git config zoekt.repoid 123", 524 528 "git -C $TMPDIR/test%2Frepo.git config zoekt.tenantID 1", 525 - "zoekt-git-index -submodules=false -branches HEAD -disable_ctags $TMPDIR/test%2Frepo.git", 529 + "zoekt-git-index -submodules=false -branches HEAD -file_limit 1048576 -disable_ctags $TMPDIR/test%2Frepo.git", 526 530 }, 527 531 }, { 528 532 name: "all", ··· 530 534 Incremental: true, 531 535 IndexDir: "/data/index", 532 536 Parallelism: 4, 533 - FileLimit: 123, 534 537 IndexOptions: IndexOptions{ 535 538 Name: "test/repo", 536 539 CloneURL: "http://api.test/.internal/git/test/repo", ··· 545 548 }, 546 549 want: []string{ 547 550 "git -c init.defaultBranch=nonExistentBranchBB0FOFCH32 init --bare $TMPDIR/test%2Frepo.git", 548 - "git -C $TMPDIR/test%2Frepo.git -c protocol.version=2 -c http.extraHeader=X-Sourcegraph-Actor-UID: internal -c http.extraHeader=X-Sourcegraph-Tenant-ID: 1 fetch --depth=1 --no-tags http://api.test/.internal/git/test/repo deadbeef feebdaed", 551 + "git -C $TMPDIR/test%2Frepo.git config --add http.extraHeader X-Sourcegraph-Actor-UID: internal", 552 + "git -C $TMPDIR/test%2Frepo.git config --add http.extraHeader X-Sourcegraph-Tenant-ID: 1", 553 + "git -C $TMPDIR/test%2Frepo.git -c protocol.version=2 fetch --depth=1 --no-tags http://api.test/.internal/git/test/repo deadbeef feebdaed", 549 554 "git -C $TMPDIR/test%2Frepo.git update-ref HEAD deadbeef", 550 555 "git -C $TMPDIR/test%2Frepo.git update-ref refs/heads/dev feebdaed", 551 556 "git -C $TMPDIR/test%2Frepo.git config zoekt.archived 0", ··· 557 562 "git -C $TMPDIR/test%2Frepo.git config zoekt.repoid 0", 558 563 "git -C $TMPDIR/test%2Frepo.git config zoekt.tenantID 1", 559 564 "zoekt-git-index -submodules=false -incremental -branches HEAD,dev " + 560 - "-file_limit 123 -parallelism 4 -index /data/index -require_ctags -large_file foo -large_file bar " + 565 + "-file_limit 1048576 -parallelism 4 -index /data/index -require_ctags -large_file foo -large_file bar " + 561 566 "$TMPDIR/test%2Frepo.git", 562 567 }, 563 568 }, { ··· 566 571 Incremental: true, 567 572 IndexDir: "/data/index", 568 573 Parallelism: 4, 569 - FileLimit: 123, 570 574 UseDelta: true, 571 575 IndexOptions: IndexOptions{ 572 576 RepoID: 0, ··· 594 598 }, 595 599 want: []string{ 596 600 "git -c init.defaultBranch=nonExistentBranchBB0FOFCH32 init --bare $TMPDIR/test%2Frepo.git", 597 - "git -C $TMPDIR/test%2Frepo.git -c protocol.version=2 -c http.extraHeader=X-Sourcegraph-Actor-UID: internal -c http.extraHeader=X-Sourcegraph-Tenant-ID: 1 fetch --depth=1 --no-tags http://api.test/.internal/git/test/repo deadbeef feebdaed 12345678 oldhead olddev oldrelease", 601 + "git -C $TMPDIR/test%2Frepo.git config --add http.extraHeader X-Sourcegraph-Actor-UID: internal", 602 + "git -C $TMPDIR/test%2Frepo.git config --add http.extraHeader X-Sourcegraph-Tenant-ID: 1", 603 + "git -C $TMPDIR/test%2Frepo.git -c protocol.version=2 fetch --depth=1 --no-tags http://api.test/.internal/git/test/repo deadbeef feebdaed 12345678 oldhead olddev oldrelease", 598 604 "git -C $TMPDIR/test%2Frepo.git update-ref HEAD deadbeef", 599 605 "git -C $TMPDIR/test%2Frepo.git update-ref refs/heads/dev feebdaed", 600 606 "git -C $TMPDIR/test%2Frepo.git update-ref refs/heads/release 12345678", ··· 607 613 "git -C $TMPDIR/test%2Frepo.git config zoekt.repoid 0", 608 614 "git -C $TMPDIR/test%2Frepo.git config zoekt.tenantID 1", 609 615 "zoekt-git-index -submodules=false -incremental -branches HEAD,dev,release " + 610 - "-delta -delta_threshold 22 -file_limit 123 -parallelism 4 -index /data/index -require_ctags -large_file foo -large_file bar " + 616 + "-delta -delta_threshold 22 -file_limit 1048576 -parallelism 4 -index /data/index -require_ctags -large_file foo -large_file bar " + 611 617 "$TMPDIR/test%2Frepo.git", 612 618 }, 613 619 }}
+1 -3
cmd/zoekt-sourcegraph-indexserver/main.go
··· 169 169 }) 170 170 ) 171 171 172 - // 1 MB; match https://sourcegraph.sgdev.org/github.com/sourcegraph/sourcegraph/-/blob/cmd/symbols/internal/symbols/search.go#L22 173 - // NOTE: if you change this, you must also update gitIndex to use the same value when fetching the repo. 172 + // 1 MB; match https://sourcegraph.sourcegraph.com/r/github.com/sourcegraph/sourcegraph/-/blob/cmd/searcher/internal/search/store.go?L32 174 173 const MaxFileSize = 1 << 20 175 174 176 175 // set of repositories that we want to capture separate indexing metrics for ··· 732 731 IndexDir: s.IndexDir, 733 732 Parallelism: parallelism, 734 733 Incremental: true, 735 - FileLimit: MaxFileSize, 736 734 ShardMerging: s.shardMerging, 737 735 } 738 736 }
-2
cmd/zoekt-sourcegraph-indexserver/main_test.go
··· 44 44 IndexDir: "/testdata/index", 45 45 Parallelism: 6, 46 46 Incremental: true, 47 - FileLimit: 1 << 20, 48 47 } 49 48 got := s.indexArgs(IndexOptions{Name: "testName"}) 50 49 if !cmp.Equal(got, want) { ··· 218 217 Incremental: true, 219 218 IndexDir: dir, 220 219 Parallelism: 1, 221 - FileLimit: 1, 222 220 } 223 221 224 222 if err := createEmptyShard(args); err != nil {