fork of https://github.com/sourcegraph/zoekt
1// Copyright 2016 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15// Command zoekt-mirror-github fetches all repos of a github user or organization
16// and clones them. It is strongly recommended to get a personal API token from
17// https://github.com/settings/tokens, save the token in a file, and point the
18// --token option to it.
19package main
20
21import (
22 "context"
23 "flag"
24 "fmt"
25 "log"
26 "net/http"
27 "net/url"
28 "os"
29 "path/filepath"
30 "strconv"
31 "strings"
32
33 "github.com/google/go-github/v78/github"
34 "golang.org/x/oauth2"
35
36 "github.com/sourcegraph/zoekt/gitindex"
37)
38
39type topicsFlag []string
40
41func (f *topicsFlag) String() string {
42 return strings.Join(*f, ",")
43}
44
45func (f *topicsFlag) Set(value string) error {
46 *f = append(*f, value)
47 return nil
48}
49
50type reposFilters struct {
51 topics []string
52 excludeTopics []string
53 noArchived *bool
54 visibility []string
55}
56
57func main() {
58 dest := flag.String("dest", "", "destination directory")
59 githubURL := flag.String("url", "", "GitHub Enterprise url. If not set github.com will be used as the host.")
60 org := flag.String("org", "", "organization to mirror")
61 user := flag.String("user", "", "user to mirror")
62 token := flag.String("token", "", "file holding API token. If not set defaults to $HOME/.github-token if present, else uses unauthenticated GitHub client.")
63 forks := flag.Bool("forks", false, "also mirror forks.")
64 deleteRepos := flag.Bool("delete", false, "delete missing repos")
65 namePattern := flag.String("name", "", "only clone repos whose name matches the given regexp.")
66 excludePattern := flag.String("exclude", "", "don't mirror repos whose names match this regexp.")
67 topics := topicsFlag{}
68 flag.Var(&topics, "topic", "only clone repos whose have one of given topics. You can add multiple topics by setting this more than once.")
69 excludeTopics := topicsFlag{}
70 flag.Var(&excludeTopics, "exclude_topic", "don't clone repos whose have one of given topics. You can add multiple topics by setting this more than once.")
71 noArchived := flag.Bool("no_archived", false, "mirror only projects that are not archived")
72 visibility := topicsFlag{}
73 flag.Var(&visibility, "visibility", "filter repos by visibility (public, private, internal). You can add multiple values by setting this more than once.")
74
75 flag.Parse()
76
77 if *dest == "" {
78 log.Fatal("must set --dest")
79 }
80 if *githubURL == "" && *org == "" && *user == "" {
81 log.Fatal("must set either --org or --user when github.com is used as host")
82 }
83
84 var host string
85 var client *github.Client
86 tc := newOAuthClient(token)
87 if *githubURL != "" {
88 rootURL, err := url.Parse(*githubURL)
89 if err != nil {
90 log.Fatal(err)
91 }
92 host = rootURL.Host
93 apiPath, err := url.Parse("/api/v3/")
94 if err != nil {
95 log.Fatal(err)
96 }
97 apiBaseURL := rootURL.ResolveReference(apiPath).String()
98 client, err = github.NewEnterpriseClient(apiBaseURL, apiBaseURL, tc)
99 if err != nil {
100 log.Fatal(err)
101 }
102 } else {
103 host = "github.com"
104 client = github.NewClient(tc)
105 }
106 destDir := filepath.Join(*dest, host)
107 if err := os.MkdirAll(destDir, 0o755); err != nil {
108 log.Fatal(err)
109 }
110
111 reposFilters := reposFilters{
112 topics: topics,
113 excludeTopics: excludeTopics,
114 noArchived: noArchived,
115 visibility: visibility,
116 }
117 var repos []*github.Repository
118 var err error
119 if *org != "" {
120 repos, err = getOrgRepos(client, *org, reposFilters)
121 } else if *user != "" {
122 repos, err = getUserRepos(client, *user, reposFilters)
123 } else {
124 log.Printf("no user or org specified, cloning all repos.")
125 repos, err = getUserRepos(client, "", reposFilters)
126 }
127
128 if err != nil {
129 log.Fatal(err)
130 }
131
132 if !*forks {
133 trimmed := repos[:0]
134 for _, r := range repos {
135 if r.Fork == nil || !*r.Fork {
136 trimmed = append(trimmed, r)
137 }
138 }
139 repos = trimmed
140 }
141
142 filter, err := gitindex.NewFilter(*namePattern, *excludePattern)
143 if err != nil {
144 log.Fatal(err)
145 }
146
147 {
148 trimmed := repos[:0]
149 for _, r := range repos {
150 if filter.Include(*r.Name) {
151 trimmed = append(trimmed, r)
152 }
153 }
154 repos = trimmed
155 }
156
157 if err := cloneRepos(destDir, repos); err != nil {
158 log.Fatalf("cloneRepos: %v", err)
159 }
160
161 if *deleteRepos {
162 if err := deleteStaleRepos(*dest, filter, repos, *org+*user); err != nil {
163 log.Fatalf("deleteStaleRepos: %v", err)
164 }
165 }
166}
167
168func newOAuthClient(token *string) *http.Client {
169 var content []byte
170 var err error
171
172 if *token != "" { // user explicitly provided a token which must exist
173 content, err = os.ReadFile(*token)
174 if err != nil {
175 log.Fatal(err)
176 }
177 } else {
178 defaultToken := filepath.Join(os.Getenv("HOME"), ".github-token")
179 content, err = os.ReadFile(defaultToken)
180 if err != nil && os.IsNotExist(err) { // use unauthenticated client
181 return nil
182 } else if err != nil {
183 log.Fatal(err)
184 }
185 }
186
187 ts := oauth2.StaticTokenSource(
188 &oauth2.Token{
189 AccessToken: strings.TrimSpace(string(content)),
190 })
191 return oauth2.NewClient(context.Background(), ts)
192}
193
194func deleteStaleRepos(destDir string, filter *gitindex.Filter, repos []*github.Repository, user string) error {
195 var baseURL string
196 if len(repos) > 0 {
197 baseURL = *repos[0].HTMLURL
198 } else {
199 return nil
200 }
201 u, err := url.Parse(baseURL)
202 if err != nil {
203 return err
204 }
205 u.Path = user
206
207 names := map[string]struct{}{}
208 for _, r := range repos {
209 u, err := url.Parse(*r.HTMLURL)
210 if err != nil {
211 return err
212 }
213
214 names[filepath.Join(u.Host, u.Path+".git")] = struct{}{}
215 }
216 if err := gitindex.DeleteRepos(destDir, u, names, filter); err != nil {
217 log.Fatalf("deleteRepos: %v", err)
218 }
219 return nil
220}
221
222func hasIntersection(s1, s2 []string) bool {
223 hash := make(map[string]bool)
224 for _, e := range s1 {
225 hash[e] = true
226 }
227 for _, e := range s2 {
228 if hash[e] {
229 return true
230 }
231 }
232 return false
233}
234
235func filterRepositories(repos []*github.Repository, include []string, exclude []string, noArchived bool, visibility []string) (filteredRepos []*github.Repository) {
236 for _, repo := range repos {
237 if noArchived && *repo.Archived {
238 continue
239 }
240 if len(visibility) > 0 && !hasIntersection(visibility, []string{repo.GetVisibility()}) {
241 continue
242 }
243 if (len(include) == 0 || hasIntersection(include, repo.Topics)) &&
244 !hasIntersection(exclude, repo.Topics) {
245 filteredRepos = append(filteredRepos, repo)
246 }
247 }
248 return
249}
250
251func getOrgRepos(client *github.Client, org string, reposFilters reposFilters) ([]*github.Repository, error) {
252 var allRepos []*github.Repository
253 opt := &github.RepositoryListByOrgOptions{}
254 for {
255 repos, resp, err := client.Repositories.ListByOrg(context.Background(), org, opt)
256 if err != nil {
257 return nil, err
258 }
259 if len(repos) == 0 {
260 break
261 }
262
263 opt.Page = resp.NextPage
264 repos = filterRepositories(repos, reposFilters.topics, reposFilters.excludeTopics, *reposFilters.noArchived, reposFilters.visibility)
265 allRepos = append(allRepos, repos...)
266 if resp.NextPage == 0 {
267 break
268 }
269 }
270 return allRepos, nil
271}
272
273func getUserRepos(client *github.Client, user string, reposFilters reposFilters) ([]*github.Repository, error) {
274 var allRepos []*github.Repository
275 opt := &github.RepositoryListOptions{}
276 for {
277 repos, resp, err := client.Repositories.List(context.Background(), user, opt)
278 if err != nil {
279 return nil, err
280 }
281 if len(repos) == 0 {
282 break
283 }
284
285 opt.Page = resp.NextPage
286 repos = filterRepositories(repos, reposFilters.topics, reposFilters.excludeTopics, *reposFilters.noArchived, reposFilters.visibility)
287 allRepos = append(allRepos, repos...)
288 if resp.NextPage == 0 {
289 break
290 }
291 }
292 return allRepos, nil
293}
294
295func itoa(p *int) string {
296 if p != nil {
297 return strconv.Itoa(*p)
298 }
299 return ""
300}
301
302func cloneRepos(destDir string, repos []*github.Repository) error {
303 for _, r := range repos {
304 host, err := url.Parse(*r.HTMLURL)
305 if err != nil {
306 return err
307 }
308
309 config := map[string]string{
310 "zoekt.web-url-type": "github",
311 "zoekt.web-url": *r.HTMLURL,
312 "zoekt.name": filepath.Join(host.Hostname(), *r.FullName),
313
314 "zoekt.github-stars": itoa(r.StargazersCount),
315 "zoekt.github-watchers": itoa(r.WatchersCount),
316 "zoekt.github-subscribers": itoa(r.SubscribersCount),
317 "zoekt.github-forks": itoa(r.ForksCount),
318
319 "zoekt.archived": marshalBool(r.Archived != nil && *r.Archived),
320 "zoekt.fork": marshalBool(r.Fork != nil && *r.Fork),
321 "zoekt.public": marshalBool(r.Private == nil || !*r.Private),
322 }
323 dest, err := gitindex.CloneRepo(destDir, *r.FullName, *r.CloneURL, config)
324 if err != nil {
325 return err
326 }
327 if dest != "" {
328 fmt.Println(dest)
329 }
330
331 }
332
333 return nil
334}
335
336func marshalBool(b bool) string {
337 if b {
338 return "1"
339 }
340 return "0"
341}