fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

1// Copyright 2016 Google Inc. All rights reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15// Command zoekt-repo-index indexes repository that uses the Android 'repo' 16// tool (https://android.googlesource.com/tools/repo). The constituent git 17// repositories should already have been downloaded to the --repo_cache 18// directory, for example: 19// 20// go install github.com/sourcegraph/zoekt/cmd/zoekt-repo-index && 21// 22// zoekt-repo-index -base_url https://gfiber.googlesource.com/ \ 23// -manifest_repo_url https://gfiber.googlesource.com/manifests \ 24// -manifest_rev_prefix=refs/heads/ \ 25// -rev_prefix="refs/remotes/" \ 26// -repo_cache ~/zoekt-serving/repos/ \ 27// -shard_limit 50000000 \ 28// master:default_unrestricted.xml 29package main 30 31import ( 32 "crypto/sha1" 33 "flag" 34 "fmt" 35 "io" 36 "log" 37 "net/url" 38 "path" 39 "path/filepath" 40 "sort" 41 "strings" 42 43 git "github.com/go-git/go-git/v5" 44 "github.com/go-git/go-git/v5/plumbing" 45 "github.com/google/slothfs/manifest" 46 "go.uber.org/automaxprocs/maxprocs" 47 48 "github.com/sourcegraph/zoekt" 49 "github.com/sourcegraph/zoekt/ignore" 50 "github.com/sourcegraph/zoekt/index" 51 "github.com/sourcegraph/zoekt/internal/gitindex" 52) 53 54var _ = log.Println 55 56type fileKey struct { 57 SubRepoPath string 58 Path string 59 ID plumbing.Hash 60} 61 62func (k *fileKey) FullPath() string { 63 return filepath.Join(k.SubRepoPath, k.Path) 64} 65 66type branchFile struct { 67 branch, file string 68 mf *manifest.Manifest 69 manifestPath string 70} 71 72func parseBranches(manifestRepoURL, revPrefix string, cache *gitindex.RepoCache, args []string) ([]branchFile, error) { 73 var branches []branchFile 74 if manifestRepoURL != "" { 75 u, err := url.Parse(manifestRepoURL) 76 if err != nil { 77 return nil, err 78 } 79 80 repo, err := cache.Open(u) 81 if err != nil { 82 return nil, err 83 } 84 85 for _, f := range args { 86 fs := strings.SplitN(f, ":", 2) 87 if len(fs) != 2 { 88 return nil, fmt.Errorf("cannot parse %q as BRANCH:FILE", f) 89 } 90 mf, err := getManifest(repo, revPrefix+fs[0], fs[1]) 91 if err != nil { 92 return nil, fmt.Errorf("manifest %s:%s: %v", fs[0], fs[1], err) 93 } 94 95 branches = append(branches, branchFile{ 96 branch: fs[0], 97 file: fs[1], 98 mf: mf, 99 manifestPath: cache.Path(u), 100 }) 101 } 102 } else { 103 if len(args) == 0 { 104 return nil, fmt.Errorf("must give XML file argument") 105 } 106 for _, f := range args { 107 mf, err := manifest.ParseFile(f) 108 if err != nil { 109 return nil, err 110 } 111 112 branches = append(branches, branchFile{ 113 branch: "HEAD", 114 file: filepath.Base(f), 115 mf: mf, 116 manifestPath: f, 117 }) 118 } 119 } 120 return branches, nil 121} 122 123func main() { 124 sizeMax := flag.Int("file_limit", 128<<10, "maximum file size") 125 shardLimit := flag.Int("shard_limit", 100<<20, "maximum corpus size for a shard") 126 parallelism := flag.Int("parallelism", 1, "maximum number of parallel indexing processes") 127 128 revPrefix := flag.String("rev_prefix", "refs/remotes/origin/", "prefix for references") 129 baseURLStr := flag.String("base_url", "", "base url to interpret repository names") 130 repoCacheDir := flag.String("repo_cache", "", "root for repository cache") 131 indexDir := flag.String("index", index.DefaultDir, "index directory for *.zoekt files") 132 manifestRepoURL := flag.String("manifest_repo_url", "", "set a URL for a git repository holding manifest XML file. Provide the BRANCH:XML-FILE as further command-line arguments") 133 manifestRevPrefix := flag.String("manifest_rev_prefix", "refs/remotes/origin/", "prefixes for branches in manifest repository") 134 repoName := flag.String("name", "", "set repository name") 135 repoURL := flag.String("url", "", "set repository URL") 136 maxSubProjects := flag.Int("max_sub_projects", 0, "trim number of projects in manifest, for debugging.") 137 incremental := flag.Bool("incremental", true, "only index if the repository has changed.") 138 flag.Parse() 139 140 // Tune GOMAXPROCS to match Linux container CPU quota. 141 _, _ = maxprocs.Set() 142 143 if *repoCacheDir == "" { 144 log.Fatal("must set --repo_cache") 145 } 146 repoCache := gitindex.NewRepoCache(*repoCacheDir) 147 148 if u, err := url.Parse(*baseURLStr); err != nil { 149 log.Fatalf("Parse(%q): %v", u, err) 150 } else if *repoName == "" { 151 *repoName = filepath.Join(u.Host, u.Path) 152 } 153 154 opts := index.Options{ 155 Parallelism: *parallelism, 156 SizeMax: *sizeMax, 157 ShardMax: *shardLimit, 158 IndexDir: *indexDir, 159 RepositoryDescription: zoekt.Repository{ 160 Name: *repoName, 161 URL: *repoURL, 162 }, 163 } 164 opts.SetDefaults() 165 baseURL, err := url.Parse(*baseURLStr) 166 if err != nil { 167 log.Fatalf("Parse baseURL %q: %v", *baseURLStr, err) 168 } 169 170 branches, err := parseBranches(*manifestRepoURL, *manifestRevPrefix, repoCache, flag.Args()) 171 if err != nil { 172 log.Fatalf("parseBranches(%s, %s): %v", *manifestRepoURL, *manifestRevPrefix, err) 173 } 174 if len(branches) == 0 { 175 log.Fatal("must specify at least one branch") 176 } 177 if *maxSubProjects > 0 { 178 for _, b := range branches { 179 if *maxSubProjects < len(b.mf.Project) { 180 b.mf.Project = b.mf.Project[:*maxSubProjects] 181 } 182 } 183 } 184 185 perBranch := map[string]map[fileKey]gitindex.BlobLocation{} 186 opts.SubRepositories = map[string]*zoekt.Repository{} 187 188 // branch => repo => version 189 versionMap := map[string]map[string]plumbing.Hash{} 190 for _, br := range branches { 191 br.mf.Filter() 192 files, versions, err := iterateManifest(br.mf, *baseURL, *revPrefix, repoCache) 193 if err != nil { 194 log.Fatalf("iterateManifest: %v", err) 195 } 196 197 perBranch[br.branch] = files 198 for key, repo := range files { 199 _, ok := opts.SubRepositories[key.SubRepoPath] 200 if ok { 201 // This can be incorrect: if the layout of manifests 202 // changes across branches, then the same file could 203 // be in different subRepos. We'll pretend this is not 204 // a problem. 205 continue 206 } 207 208 desc := &zoekt.Repository{} 209 if err := gitindex.SetTemplatesFromOrigin(desc, repo.URL); err != nil { 210 log.Fatalf("SetTemplatesFromOrigin(%s): %v", repo.URL, err) 211 } 212 213 opts.SubRepositories[key.SubRepoPath] = desc 214 } 215 versionMap[br.branch] = versions 216 } 217 218 for _, br := range branches { 219 var paths []string 220 for p := range opts.SubRepositories { 221 paths = append(paths, p) 222 } 223 sort.Strings(paths) 224 225 // Compute a version of the aggregate. This version 226 // has nothing to do with git, but will let us do 227 // incrementality correctly. 228 hasher := sha1.New() 229 for _, p := range paths { 230 repo := opts.SubRepositories[p] 231 id := versionMap[br.branch][p] 232 233 // it is possible that 'id' is zero, if this 234 // branch of the manifest doesn't have this 235 // particular subrepository. 236 hasher.Write([]byte(p)) 237 hasher.Write([]byte(id.String())) 238 repo.Branches = append(repo.Branches, zoekt.RepositoryBranch{ 239 Name: br.branch, 240 Version: id.String(), 241 }) 242 } 243 244 opts.RepositoryDescription.Branches = append(opts.RepositoryDescription.Branches, zoekt.RepositoryBranch{ 245 Name: br.branch, 246 Version: fmt.Sprintf("%x", hasher.Sum(nil)), 247 }) 248 } 249 250 // key => branch 251 all := map[fileKey][]string{} 252 for br, files := range perBranch { 253 for k := range files { 254 all[k] = append(all[k], br) 255 } 256 } 257 258 if *incremental && opts.IncrementalSkipIndexing() { 259 return 260 } 261 262 builder, err := index.NewBuilder(opts) 263 if err != nil { 264 log.Fatal(err) 265 } 266 for k, branches := range all { 267 loc := perBranch[branches[0]][k] 268 data, err := loc.Blob(&k.ID) 269 if err != nil { 270 log.Fatal(err) 271 } 272 273 doc := index.Document{ 274 Name: k.FullPath(), 275 Content: data, 276 SubRepositoryPath: k.SubRepoPath, 277 } 278 279 doc.Branches = append(doc.Branches, branches...) 280 if err := builder.Add(doc); err != nil { 281 log.Printf("Add(%s): %v", doc.Name, err) 282 break 283 } 284 } 285 if err := builder.Finish(); err != nil { 286 log.Fatalf("Finish: %v", err) 287 } 288} 289 290// getManifest parses the manifest XML at the given branch/path inside a Git repository. 291func getManifest(repo *git.Repository, branch, path string) (*manifest.Manifest, error) { 292 ref, err := repo.Reference(plumbing.ReferenceName("refs/heads/"+branch), true) 293 if err != nil { 294 return nil, err 295 } 296 297 commit, err := repo.CommitObject(ref.Hash()) 298 if err != nil { 299 return nil, err 300 } 301 302 tree, err := repo.TreeObject(commit.TreeHash) 303 if err != nil { 304 return nil, err 305 } 306 307 entry, err := tree.FindEntry(path) 308 if err != nil { 309 return nil, err 310 } 311 312 blob, err := repo.BlobObject(entry.Hash) 313 if err != nil { 314 return nil, err 315 } 316 r, err := blob.Reader() 317 if err != nil { 318 return nil, err 319 } 320 defer r.Close() 321 322 content, _ := io.ReadAll(r) 323 return manifest.Parse(content) 324} 325 326// iterateManifest constructs a complete tree from the given Manifest. 327func iterateManifest(mf *manifest.Manifest, 328 baseURL url.URL, revPrefix string, 329 cache *gitindex.RepoCache, 330) (map[fileKey]gitindex.BlobLocation, map[string]plumbing.Hash, error) { 331 allFiles := map[fileKey]gitindex.BlobLocation{} 332 allVersions := map[string]plumbing.Hash{} 333 for _, p := range mf.Project { 334 rev := mf.ProjectRevision(&p) 335 336 projURL := baseURL 337 projURL.Path = path.Join(projURL.Path, p.Name) 338 339 topRepo, err := cache.Open(&projURL) 340 if err != nil { 341 return nil, nil, err 342 } 343 344 ref, err := topRepo.Reference(plumbing.ReferenceName(revPrefix+rev), true) 345 if err != nil { 346 return nil, nil, err 347 } 348 349 commit, err := topRepo.CommitObject(ref.Hash()) 350 if err != nil { 351 return nil, nil, err 352 } 353 if err != nil { 354 return nil, nil, err 355 } 356 357 allVersions[p.GetPath()] = commit.Hash 358 359 tree, err := commit.Tree() 360 if err != nil { 361 return nil, nil, err 362 } 363 364 rw := gitindex.NewRepoWalker(topRepo, projURL.String(), cache) 365 subVersions, err := rw.CollectFiles(tree, rev, &ignore.Matcher{}) 366 if err != nil { 367 return nil, nil, err 368 } 369 370 for key, repo := range rw.Files { 371 allFiles[fileKey{ 372 SubRepoPath: filepath.Join(p.GetPath(), key.SubRepoPath), 373 Path: key.Path, 374 ID: key.ID, 375 }] = repo 376 } 377 378 for path, version := range subVersions { 379 allVersions[filepath.Join(p.GetPath(), path)] = version 380 } 381 } 382 383 return allFiles, allVersions, nil 384}