fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

at main 6.5 kB View raw
1// Copyright 2016 Google Inc. All rights reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package gitindex 16 17import ( 18 "fmt" 19 "io" 20 "log" 21 "net/url" 22 "path" 23 "path/filepath" 24 "strings" 25 26 "github.com/go-git/go-git/v5" 27 "github.com/go-git/go-git/v5/plumbing" 28 "github.com/go-git/go-git/v5/plumbing/filemode" 29 "github.com/go-git/go-git/v5/plumbing/object" 30 31 "github.com/sourcegraph/zoekt/ignore" 32) 33 34// RepoWalker walks one or more commit trees, collecting the files to index in its Files map. 35// 36// It also recurses into submodules if Options.Submodules is enabled. 37type RepoWalker struct { 38 Files map[fileKey]BlobLocation 39 40 repo *git.Repository 41 repoURL *url.URL 42 43 // Path => SubmoduleEntry 44 submodules map[string]*SubmoduleEntry 45 repoCache *RepoCache 46} 47 48// subURL returns the URL for a submodule. 49func (rw *RepoWalker) subURL(relURL string) (*url.URL, error) { 50 if rw.repoURL == nil { 51 return nil, fmt.Errorf("no URL for base repo") 52 } 53 if strings.HasPrefix(relURL, "../") { 54 u := *rw.repoURL 55 u.Path = path.Join(u.Path, relURL) 56 return &u, nil 57 } 58 59 return url.Parse(relURL) 60} 61 62// NewRepoWalker creates a new RepoWalker. 63func NewRepoWalker(r *git.Repository, repoURL string, repoCache *RepoCache) *RepoWalker { 64 u, _ := url.Parse(repoURL) 65 return &RepoWalker{ 66 repo: r, 67 repoURL: u, 68 Files: map[fileKey]BlobLocation{}, 69 repoCache: repoCache, 70 } 71} 72 73// parseModuleMap initializes rw.submodules. 74func (rw *RepoWalker) parseModuleMap(t *object.Tree) error { 75 if rw.repoCache == nil { 76 return nil 77 } 78 modEntry, _ := t.File(".gitmodules") 79 if modEntry != nil { 80 c, err := blobContents(&modEntry.Blob) 81 if err != nil { 82 return fmt.Errorf("blobContents: %w", err) 83 } 84 mods, err := ParseGitModules(c) 85 if err != nil { 86 return fmt.Errorf("ParseGitModules: %w", err) 87 } 88 rw.submodules = map[string]*SubmoduleEntry{} 89 for _, entry := range mods { 90 rw.submodules[entry.Path] = entry 91 } 92 } 93 return nil 94} 95 96// CollectFiles fetches the blob SHA1s for the tree. If repoCache is 97// non-nil, recurse into submodules. In addition, it returns a mapping 98// that indicates in which repo each SHA1 can be found. 99// 100// The collected files are available through the RepoWalker.Files map. 101func (rw *RepoWalker) CollectFiles(t *object.Tree, branch string, ig *ignore.Matcher) (map[string]plumbing.Hash, error) { 102 if err := rw.parseModuleMap(t); err != nil { 103 return nil, fmt.Errorf("parseModuleMap: %w", err) 104 } 105 106 ig, err := newIgnoreMatcher(t) 107 if err != nil { 108 return nil, fmt.Errorf("newIgnoreMatcher: %w", err) 109 } 110 111 tw := object.NewTreeWalker(t, true, make(map[plumbing.Hash]bool)) 112 defer tw.Close() 113 114 // Path => commit SHA1 115 subRepoVersions := make(map[string]plumbing.Hash) 116 for { 117 name, entry, err := tw.Next() 118 if err == io.EOF { 119 break 120 } 121 if err := rw.handleEntry(name, &entry, branch, subRepoVersions, ig); err != nil { 122 return nil, fmt.Errorf("handleEntry: %w", err) 123 } 124 } 125 return subRepoVersions, nil 126} 127 128func (rw *RepoWalker) tryHandleSubmodule(p string, id *plumbing.Hash, branch string, subRepoVersions map[string]plumbing.Hash, ig *ignore.Matcher) error { 129 if err := rw.handleSubmodule(p, id, branch, subRepoVersions, ig); err != nil { 130 log.Printf("submodule %s: ignoring error %v", p, err) 131 } 132 return nil 133} 134 135func (rw *RepoWalker) handleSubmodule(p string, id *plumbing.Hash, branch string, subRepoVersions map[string]plumbing.Hash, ig *ignore.Matcher) error { 136 submod := rw.submodules[p] 137 if submod == nil { 138 return fmt.Errorf("no entry for submodule path %q", rw.repoURL) 139 } 140 141 subURL, err := rw.subURL(submod.URL) 142 if err != nil { 143 return err 144 } 145 146 subRepo, err := rw.repoCache.Open(subURL) 147 if err != nil { 148 return err 149 } 150 151 obj, err := subRepo.CommitObject(*id) 152 if err != nil { 153 return err 154 } 155 tree, err := subRepo.TreeObject(obj.TreeHash) 156 if err != nil { 157 return err 158 } 159 160 subRepoVersions[p] = *id 161 162 sw := NewRepoWalker(subRepo, subURL.String(), rw.repoCache) 163 subVersions, err := sw.CollectFiles(tree, branch, ig) 164 if err != nil { 165 return err 166 } 167 for k, repo := range sw.Files { 168 rw.Files[fileKey{ 169 SubRepoPath: filepath.Join(p, k.SubRepoPath), 170 Path: k.Path, 171 ID: k.ID, 172 }] = repo 173 } 174 for k, v := range subVersions { 175 subRepoVersions[filepath.Join(p, k)] = v 176 } 177 return nil 178} 179 180func (rw *RepoWalker) handleEntry(p string, e *object.TreeEntry, branch string, subRepoVersions map[string]plumbing.Hash, ig *ignore.Matcher) error { 181 if e.Mode == filemode.Submodule { 182 if rw.repoCache != nil { 183 // Index the submodule using repo cache 184 if err := rw.tryHandleSubmodule(p, &e.Hash, branch, subRepoVersions, ig); err != nil { 185 return fmt.Errorf("submodule %s: %v", p, err) 186 } 187 } else { 188 // Record the commit ID for the submodule path 189 // This will be the submodule's commit hash, not the parent's 190 subRepoVersions[p] = e.Hash 191 } 192 } 193 194 switch e.Mode { 195 case filemode.Regular, filemode.Executable, filemode.Symlink: 196 default: 197 return nil 198 } 199 200 // Skip ignored files 201 if ig.Match(p) { 202 return nil 203 } 204 205 key := fileKey{Path: p, ID: e.Hash} 206 if existing, ok := rw.Files[key]; ok { 207 existing.Branches = append(existing.Branches, branch) 208 rw.Files[key] = existing 209 } else { 210 rw.Files[key] = BlobLocation{GitRepo: rw.repo, URL: rw.repoURL, Branches: []string{branch}} 211 } 212 213 return nil 214} 215 216// fileKey describes a blob at a location in the final tree. We also 217// record the subrepository from where it came. 218type fileKey struct { 219 SubRepoPath string 220 Path string 221 ID plumbing.Hash 222} 223 224func (k *fileKey) FullPath() string { 225 return filepath.Join(k.SubRepoPath, k.Path) 226} 227 228// BlobLocation holds the repo where the blob can be found, plus other information 229// needed for indexing like its branches. 230type BlobLocation struct { 231 GitRepo *git.Repository 232 URL *url.URL 233 234 // Branches is the list of branches that contain the blob. 235 Branches []string 236} 237 238func (l *BlobLocation) Blob(id *plumbing.Hash) ([]byte, error) { 239 blob, err := l.GitRepo.BlobObject(*id) 240 if err != nil { 241 return nil, err 242 } 243 return blobContents(blob) 244}