fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

Add support for indexing submodules without repocache (#985)

* Fix git.Open() fs argument for .git dir instead of worktree

The filesystem argument submitted to git.Open() is of the .git
directory, it should be for the worktree, as in go-git this logic is copied from:

https://github.com/go-git/go-git/blob/145daf2492dd86b1d7e7787555ebd9837b93bff8/repository.go#L373-L375

One side effect of this bug was that go-git would refuse to fetch repo
submodules

* Fix missing RepoURLs and LineFragments for result subrepositories

RepoURLs for subrepositories are currently filtered out when results
are streamed per-repo from shards.

* Add submodule indexing without repo cache

When not using repo cache, index git submodules recursively into

subrepos using the go-git API

* Adjust subrepo naming conventions

- Revert repo-cache behavior to unchanged
- Add support for naming repos and modules with no remote (use the dirname for the
former and subrepopath for the latter)
- Add test for submodule no-repo-cache behavior

* Fix overallocation in repoURL and lineFragment filtering

* Revert basename treatment from 08f67597

Missed the fact that it is already done in zoekt-git-index
command, the test was improperly configured

https://github.com/sourcegraph/zoekt/blob/4e4a529c3b63c7d4c7897ba736f1cd52cc163134/cmd/zoekt-git-index/main.go#L93-L100

+207 -10
+108 -5
gitindex/index.go
··· 282 282 return nil 283 283 } 284 284 285 + // This attempts to get a repo URL similar to the main repository template processing as in setTemplatesFromConfig() 286 + func normalizeSubmoduleRemoteURL(cfg *config.Config) (string, error) { 287 + sec := cfg.Raw.Section("zoekt") 288 + remoteURL := sec.Options.Get("web-url") 289 + if remoteURL == "" { 290 + // fall back to "origin" remote 291 + remoteURL = configLookupRemoteURL(cfg, "origin") 292 + if remoteURL == "" { 293 + return "", nil 294 + } 295 + } 296 + 297 + if sm := sshRelativeURLRegexp.FindStringSubmatch(remoteURL); sm != nil { 298 + user := sm[1] 299 + host := sm[2] 300 + path := sm[3] 301 + 302 + remoteURL = fmt.Sprintf("ssh+git://%s@%s/%s", user, host, path) 303 + } 304 + 305 + u, err := url.Parse(remoteURL) 306 + if err != nil { 307 + return "", fmt.Errorf("unable to parse remote URL %q: %w", remoteURL, err) 308 + } 309 + 310 + if u.Scheme == "ssh+git" { 311 + u.Scheme = "https" 312 + u.User = nil 313 + } 314 + 315 + // Assume we cannot build templates for this URL, leave it empty 316 + if u.Scheme == "" { 317 + return "", nil 318 + } 319 + 320 + return u.String(), nil 321 + } 322 + 285 323 // SetTemplatesFromOrigin fills in templates based on the origin URL. 286 324 func SetTemplatesFromOrigin(desc *zoekt.Repository, u *url.URL) error { 287 325 desc.Name = filepath.Join(u.Host, strings.TrimSuffix(u.Path, ".git")) ··· 495 533 tpl := opts.BuildOptions.RepositoryDescription 496 534 if path != "" { 497 535 tpl = zoekt.Repository{URL: info.URL.String()} 498 - if err := SetTemplatesFromOrigin(&tpl, info.URL); err != nil { 499 - log.Printf("setTemplatesFromOrigin(%s, %s): %s", path, info.URL, err) 536 + if info.URL.String() != "" { 537 + if err := SetTemplatesFromOrigin(&tpl, info.URL); err != nil { 538 + log.Printf("setTemplatesFromOrigin(%s, %s): %s", path, info.URL, err) 539 + } 540 + } 541 + if tpl.Name == "" { 542 + tpl.Name = path 500 543 } 501 544 } 502 545 opts.BuildOptions.SubRepositories[path] = &tpl ··· 569 612 // It copies the relevant logic from git.PlainOpen, and tweaks certain filesystem options. 570 613 func openRepo(repoDir string) (*git.Repository, io.Closer, error) { 571 614 fs := osfs.New(repoDir) 615 + wt := fs 572 616 573 617 // Check if the root directory exists. 574 618 if _, err := fs.Stat(""); err != nil { ··· 591 635 }) 592 636 593 637 // Because we're keeping descriptors open, we need to close the storage object when we're done. 594 - repo, err := git.Open(s, fs) 638 + repo, err := git.Open(s, wt) 595 639 return repo, s, err 596 640 } 597 641 ··· 830 874 831 875 func prepareNormalBuild(options Options, repository *git.Repository) (repos map[fileKey]BlobLocation, branchVersions map[string]map[string]plumbing.Hash, err error) { 832 876 var repoCache *RepoCache 833 - if options.Submodules { 877 + if options.Submodules && options.RepoCacheDir != "" { 834 878 repoCache = NewRepoCache(options.RepoCacheDir) 835 879 } 880 + return prepareNormalBuildRecurse(options, repository, repoCache, false) 881 + } 836 882 883 + func prepareNormalBuildRecurse(options Options, repository *git.Repository, repoCache *RepoCache, isSubrepo bool) (repos map[fileKey]BlobLocation, branchVersions map[string]map[string]plumbing.Hash, err error) { 837 884 // Branch => Repo => SHA1 838 885 branchVersions = map[string]map[string]plumbing.Hash{} 839 886 ··· 842 889 return nil, nil, fmt.Errorf("expandBranches: %w", err) 843 890 } 844 891 845 - rw := NewRepoWalker(repository, options.BuildOptions.RepositoryDescription.URL, repoCache) 892 + repoURL := options.BuildOptions.RepositoryDescription.URL 893 + 894 + if isSubrepo { 895 + cfg, err := repository.Config() 896 + if err != nil { 897 + return nil, nil, fmt.Errorf("unable to get repository config: %w", err) 898 + } 899 + 900 + u, err := normalizeSubmoduleRemoteURL(cfg) 901 + if err != nil { 902 + return nil, nil, fmt.Errorf("failed to identify subrepository URL: %w", err) 903 + } 904 + repoURL = u 905 + } 906 + 907 + rw := NewRepoWalker(repository, repoURL, repoCache) 846 908 for _, b := range branches { 847 909 commit, err := getCommit(repository, options.BranchPrefix, b) 848 910 if err != nil { ··· 869 931 } 870 932 871 933 branchVersions[b] = subVersions 934 + } 935 + 936 + // Index submodules using go-git if we didn't do so using the repo cache 937 + if options.Submodules && options.RepoCacheDir == "" { 938 + worktree, err := repository.Worktree() 939 + if err != nil { 940 + return nil, nil, fmt.Errorf("failed to get repository worktree: %w", err) 941 + } 942 + 943 + submodules, err := worktree.Submodules() 944 + if err != nil { 945 + return nil, nil, fmt.Errorf("failed to get submodules: %w", err) 946 + } 947 + 948 + for _, submodule := range submodules { 949 + subRepository, err := submodule.Repository() 950 + if err != nil { 951 + log.Printf("failed to open submodule repository: %s, %s", submodule.Config().Name, err) 952 + continue 953 + } 954 + 955 + sw, subVersions, err := prepareNormalBuildRecurse(options, subRepository, repoCache, true) 956 + if err != nil { 957 + log.Printf("failed to index submodule repository: %s, %s", submodule.Config().Name, err) 958 + continue 959 + } 960 + 961 + log.Printf("adding subrepository files from: %s", submodule.Config().Name) 962 + 963 + for k, repo := range sw { 964 + rw.Files[fileKey{ 965 + SubRepoPath: filepath.Join(submodule.Config().Path, k.SubRepoPath), 966 + Path: k.Path, 967 + ID: k.ID, 968 + }] = repo 969 + } 970 + 971 + for k, v := range subVersions { 972 + branchVersions[filepath.Join(submodule.Config().Path, k)] = v 973 + } 974 + } 872 975 } 873 976 874 977 return rw.Files, branchVersions, nil
+10 -3
gitindex/tree.go
··· 178 178 } 179 179 180 180 func (rw *RepoWalker) handleEntry(p string, e *object.TreeEntry, branch string, subRepoVersions map[string]plumbing.Hash, ig *ignore.Matcher) error { 181 - if e.Mode == filemode.Submodule && rw.repoCache != nil { 182 - if err := rw.tryHandleSubmodule(p, &e.Hash, branch, subRepoVersions, ig); err != nil { 183 - return fmt.Errorf("submodule %s: %v", p, err) 181 + if e.Mode == filemode.Submodule { 182 + if rw.repoCache != nil { 183 + // Index the submodule using repo cache 184 + if err := rw.tryHandleSubmodule(p, &e.Hash, branch, subRepoVersions, ig); err != nil { 185 + return fmt.Errorf("submodule %s: %v", p, err) 186 + } 187 + } else { 188 + // Record the commit ID for the submodule path 189 + // This will be the submodule's commit hash, not the parent's 190 + subRepoVersions[p] = e.Hash 184 191 } 185 192 } 186 193
+64
gitindex/tree_test.go
··· 258 258 } 259 259 } 260 260 261 + func TestSubmoduleIndexWithoutRepocache(t *testing.T) { 262 + dir := t.TempDir() 263 + 264 + if err := createSubmoduleRepo(dir); err != nil { 265 + t.Fatalf("createSubmoduleRepo: %v", err) 266 + } 267 + 268 + indexDir := t.TempDir() 269 + 270 + buildOpts := index.Options{ 271 + RepositoryDescription: zoekt.Repository{Name: "adir"}, 272 + IndexDir: indexDir, 273 + } 274 + opts := Options{ 275 + RepoDir: filepath.Join(dir, "adir"), 276 + BuildOptions: buildOpts, 277 + BranchPrefix: "refs/heads/", 278 + Branches: []string{"master"}, 279 + Submodules: true, 280 + Incremental: true, 281 + } 282 + if _, err := IndexGitRepo(opts); err != nil { 283 + t.Fatalf("IndexGitRepo: %v", err) 284 + } 285 + 286 + searcher, err := search.NewDirectorySearcher(indexDir) 287 + if err != nil { 288 + t.Fatal("NewDirectorySearcher", err) 289 + } 290 + defer searcher.Close() 291 + 292 + results, err := searcher.Search(context.Background(), 293 + &query.Substring{Pattern: "bcont"}, 294 + &zoekt.SearchOptions{}) 295 + if err != nil { 296 + t.Fatal("Search", err) 297 + } 298 + 299 + if len(results.Files) != 1 { 300 + t.Fatalf("got search result %v, want 1 file", results.Files) 301 + } 302 + 303 + file := results.Files[0] 304 + if got, want := file.SubRepositoryName, "bname"; got != want { 305 + t.Errorf("got subrepo name %q, want %q", got, want) 306 + } 307 + if got, want := file.SubRepositoryPath, "bname"; got != want { 308 + t.Errorf("got subrepo path %q, want %q", got, want) 309 + } 310 + 311 + subVersion := file.Version 312 + if len(subVersion) != 40 { 313 + t.Fatalf("got %q, want hex sha1", subVersion) 314 + } 315 + 316 + if results, err := searcher.Search(context.Background(), &query.Substring{Pattern: "acont"}, &zoekt.SearchOptions{}); err != nil { 317 + t.Fatalf("Search('acont'): %v", err) 318 + } else if len(results.Files) != 1 { 319 + t.Errorf("got %v, want 1 result", results.Files) 320 + } else if f := results.Files[0]; f.Version == subVersion { 321 + t.Errorf("version in super repo matched version is subrepo.") 322 + } 323 + } 324 + 261 325 func createSymlinkRepo(dir string) error { 262 326 if err := os.MkdirAll(dir, 0o755); err != nil { 263 327 return err
+25 -2
search/shards.go
··· 816 816 817 817 send := func(repoName string, a, b int, stats zoekt.Stats) { 818 818 index.SortFiles(result.Files[a:b]) 819 + 820 + filteredRepoURLs := map[string]string{repoName: result.RepoURLs[repoName]} 821 + filteredLineFragments := map[string]string{repoName: result.LineFragments[repoName]} 822 + 823 + // Filter RepoURLs and LineFragments to only those of repoName and its 824 + // subRepositories if there are subRepositories 825 + for _, file := range result.Files[a:b] { 826 + name := file.SubRepositoryName 827 + if name == "" { 828 + continue 829 + } 830 + _, repoSet := filteredRepoURLs[name] 831 + url, ok := result.RepoURLs[name] 832 + if !repoSet && ok { 833 + filteredRepoURLs[name] = url 834 + } 835 + _, fragSet := filteredLineFragments[name] 836 + frag, ok := result.LineFragments[name] 837 + if !fragSet && ok { 838 + filteredLineFragments[name] = frag 839 + } 840 + } 841 + 819 842 sender.Send(&zoekt.SearchResult{ 820 843 Stats: stats, 821 844 Progress: zoekt.Progress{ ··· 823 846 MaxPendingPriority: result.MaxPendingPriority, 824 847 }, 825 848 Files: result.Files[a:b], 826 - RepoURLs: map[string]string{repoName: result.RepoURLs[repoName]}, 827 - LineFragments: map[string]string{repoName: result.LineFragments[repoName]}, 849 + RepoURLs: filteredRepoURLs, 850 + LineFragments: filteredLineFragments, 828 851 }) 829 852 } 830 853