fork of https://github.com/sourcegraph/zoekt
1// Copyright 2016 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package gitindex
16
17import (
18 "fmt"
19 "io"
20 "log"
21 "net/url"
22 "path"
23 "path/filepath"
24 "strings"
25
26 "github.com/go-git/go-git/v5/plumbing"
27 "github.com/go-git/go-git/v5/plumbing/filemode"
28 "github.com/go-git/go-git/v5/plumbing/object"
29
30 git "github.com/go-git/go-git/v5"
31)
32
33// repoWalker walks a tree, recursing into submodules.
34type repoWalker struct {
35 repo *git.Repository
36
37 repoURL *url.URL
38 tree map[fileKey]BlobRepo
39
40 // Path => SubmoduleEntry
41 submodules map[string]*SubmoduleEntry
42
43 // Path => commit SHA1
44 subRepoVersions map[string]plumbing.Hash
45 repoCache *RepoCache
46}
47
48// subURL returns the URL for a submodule.
49func (w *repoWalker) subURL(relURL string) (*url.URL, error) {
50 if w.repoURL == nil {
51 return nil, fmt.Errorf("no URL for base repo")
52 }
53 if strings.HasPrefix(relURL, "../") {
54 u := *w.repoURL
55 u.Path = path.Join(u.Path, relURL)
56 return &u, nil
57 }
58
59 return url.Parse(relURL)
60}
61
62// newRepoWalker creates a new repoWalker.
63func newRepoWalker(r *git.Repository, repoURL string, repoCache *RepoCache) *repoWalker {
64 u, _ := url.Parse(repoURL)
65 return &repoWalker{
66 repo: r,
67 repoURL: u,
68 tree: map[fileKey]BlobRepo{},
69 repoCache: repoCache,
70 subRepoVersions: map[string]plumbing.Hash{},
71 }
72}
73
74// parseModuleMap initializes rw.submodules.
75func (rw *repoWalker) parseModuleMap(t *object.Tree) error {
76 if rw.repoCache == nil {
77 return nil
78 }
79 modEntry, _ := t.File(".gitmodules")
80 if modEntry != nil {
81 c, err := blobContents(&modEntry.Blob)
82 if err != nil {
83 return fmt.Errorf("blobContents: %w", err)
84 }
85 mods, err := ParseGitModules(c)
86 if err != nil {
87 return fmt.Errorf("ParseGitModules: %w", err)
88 }
89 rw.submodules = map[string]*SubmoduleEntry{}
90 for _, entry := range mods {
91 rw.submodules[entry.Path] = entry
92 }
93 }
94 return nil
95}
96
97// TreeToFiles fetches the blob SHA1s for a tree. If repoCache is
98// non-nil, recurse into submodules. In addition, it returns a mapping
99// that indicates in which repo each SHA1 can be found.
100func TreeToFiles(r *git.Repository, t *object.Tree, repoURL string, repoCache *RepoCache) (map[fileKey]BlobRepo, map[string]plumbing.Hash, error) {
101 rw := newRepoWalker(r, repoURL, repoCache)
102
103 if err := rw.parseModuleMap(t); err != nil {
104 return nil, nil, fmt.Errorf("parseModuleMap: %w", err)
105 }
106
107 tw := object.NewTreeWalker(t, true, make(map[plumbing.Hash]bool))
108 defer tw.Close()
109 for {
110 name, entry, err := tw.Next()
111 if err == io.EOF {
112 break
113 }
114 if err := rw.handleEntry(name, &entry); err != nil {
115 return nil, nil, fmt.Errorf("handleEntry: %w", err)
116 }
117 }
118 return rw.tree, rw.subRepoVersions, nil
119}
120
121func (r *repoWalker) tryHandleSubmodule(p string, id *plumbing.Hash) error {
122 if err := r.handleSubmodule(p, id); err != nil {
123 log.Printf("submodule %s: ignoring error %v", p, err)
124 }
125 return nil
126}
127
128func (r *repoWalker) handleSubmodule(p string, id *plumbing.Hash) error {
129 submod := r.submodules[p]
130 if submod == nil {
131 return fmt.Errorf("no entry for submodule path %q", r.repoURL)
132 }
133
134 subURL, err := r.subURL(submod.URL)
135 if err != nil {
136 return err
137 }
138
139 subRepo, err := r.repoCache.Open(subURL)
140 if err != nil {
141 return err
142 }
143
144 obj, err := subRepo.CommitObject(*id)
145 if err != nil {
146 return err
147 }
148 tree, err := subRepo.TreeObject(obj.TreeHash)
149 if err != nil {
150 return err
151 }
152
153 r.subRepoVersions[p] = *id
154
155 subTree, subVersions, err := TreeToFiles(subRepo, tree, subURL.String(), r.repoCache)
156 if err != nil {
157 return err
158 }
159 for k, repo := range subTree {
160 r.tree[fileKey{
161 SubRepoPath: filepath.Join(p, k.SubRepoPath),
162 Path: k.Path,
163 ID: k.ID,
164 }] = repo
165 }
166 for k, v := range subVersions {
167 r.subRepoVersions[filepath.Join(p, k)] = v
168 }
169 return nil
170}
171
172func (r *repoWalker) handleEntry(p string, e *object.TreeEntry) error {
173 if e.Mode == filemode.Submodule && r.repoCache != nil {
174 if err := r.tryHandleSubmodule(p, &e.Hash); err != nil {
175 return fmt.Errorf("submodule %s: %v", p, err)
176 }
177 }
178
179 switch e.Mode {
180 case filemode.Regular, filemode.Executable, filemode.Symlink:
181 default:
182 return nil
183 }
184
185 r.tree[fileKey{Path: p, ID: e.Hash}] = BlobRepo{
186 GitRepo: r.repo,
187 URL: r.repoURL,
188 }
189 return nil
190}
191
192// fileKey describes a blob at a location in the final tree. We also
193// record the subrepository from where it came.
194type fileKey struct {
195 SubRepoPath string
196 Path string
197 ID plumbing.Hash
198}
199
200func (k *fileKey) FullPath() string {
201 return filepath.Join(k.SubRepoPath, k.Path)
202}
203
204// BlobIndexInfo contains information about the blob that's needed for indexing.
205type BlobIndexInfo struct {
206 Repo BlobRepo
207 // Branches is the list of branches that contain the blob.
208 Branches []string
209}
210
211// BlobRepo holds the repo where the blob can be found.
212type BlobRepo struct {
213 GitRepo *git.Repository
214 URL *url.URL
215}
216
217func (l *BlobRepo) Blob(id *plumbing.Hash) ([]byte, error) {
218 blob, err := l.GitRepo.BlobObject(*id)
219 if err != nil {
220 return nil, err
221 }
222 return blobContents(blob)
223}