fork of https://github.com/sourcegraph/zoekt
1// Copyright 2016 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15// Command zoekt-mirror-gitiles fetches all repos of a Gitiles host.
16// It does double duty for other "simple" web hosts.
17package main
18
19import (
20 "flag"
21 "fmt"
22 "log"
23 "net/url"
24 "os"
25 "path/filepath"
26
27 "github.com/sourcegraph/zoekt/internal/gitindex"
28)
29
30type crawlTarget struct {
31 cloneURL string
32 webURL string
33 webURLType string
34}
35
36type hostCrawler func(*url.URL, func(string) bool) (map[string]*crawlTarget, error)
37
38func main() {
39 dest := flag.String("dest", "", "destination directory")
40 namePattern := flag.String("name", "", "only clone repos whose name matches the regexp.")
41 excludePattern := flag.String("exclude", "", "don't mirror repos whose names match this regexp.")
42 hostType := flag.String("type", "gitiles", "which webserver to crawl. Choices: gitiles, cgit")
43 flag.Parse()
44
45 if len(flag.Args()) < 1 {
46 log.Fatal("must provide URL argument.")
47 }
48
49 var crawler hostCrawler
50 switch *hostType {
51 case "gitiles":
52 crawler = getGitilesRepos
53 case "cgit":
54 crawler = getCGitRepos
55 default:
56 log.Fatalf("unknown host type %q", *hostType)
57 }
58
59 rootURL, err := url.Parse(flag.Arg(0))
60 if err != nil {
61 log.Fatalf("url.Parse(): %v", err)
62 }
63
64 if *dest == "" {
65 log.Fatal("must set --dest")
66 }
67
68 if err := os.MkdirAll(filepath.Join(*dest, rootURL.Host, rootURL.Path), 0o755); err != nil {
69 log.Fatal(err)
70 }
71
72 filter, err := gitindex.NewFilter(*namePattern, *excludePattern)
73 if err != nil {
74 log.Fatal(err)
75 }
76
77 repos, err := crawler(rootURL, filter.Include)
78 if err != nil {
79 log.Fatal(err)
80 }
81
82 for nm, target := range repos {
83 // For git.savannah.gnu.org, this puts an ugly "CGit"
84 // path component into the name. However, it's
85 // possible that there are multiple, different CGit pages
86 // on the host, so we have to keep it.
87 fullName := filepath.Join(rootURL.Host, rootURL.Path, nm)
88 config := map[string]string{
89 "zoekt.web-url": target.webURL,
90 "zoekt.web-url-type": target.webURLType,
91 "zoekt.name": fullName,
92 }
93
94 dest, err := gitindex.CloneRepo(*dest, fullName, target.cloneURL, config)
95 if err != nil {
96 log.Fatal(err)
97 }
98 if dest != "" {
99 fmt.Println(dest)
100 }
101 }
102}