fork of https://github.com/sourcegraph/zoekt
1// Copyright 2016 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15// Command zoekt-mirror-github fetches all repos of a github user or organization
16// and clones them. It is strongly recommended to get a personal API token from
17// https://github.com/settings/tokens, save the token in a file, and point the
18// --token option to it.
19package main
20
21import (
22 "context"
23 "flag"
24 "fmt"
25 "log"
26 "net/http"
27 "net/url"
28 "os"
29 "path/filepath"
30 "strconv"
31 "strings"
32
33 "github.com/google/go-github/v27/github"
34 "golang.org/x/oauth2"
35
36 "github.com/sourcegraph/zoekt/internal/gitindex"
37)
38
39type topicsFlag []string
40
41func (f *topicsFlag) String() string {
42 return strings.Join(*f, ",")
43}
44
45func (f *topicsFlag) Set(value string) error {
46 *f = append(*f, value)
47 return nil
48}
49
50type reposFilters struct {
51 topics []string
52 excludeTopics []string
53 noArchived *bool
54}
55
56func main() {
57 dest := flag.String("dest", "", "destination directory")
58 githubURL := flag.String("url", "", "GitHub Enterprise url. If not set github.com will be used as the host.")
59 org := flag.String("org", "", "organization to mirror")
60 user := flag.String("user", "", "user to mirror")
61 token := flag.String("token", "", "file holding API token. If not set defaults to $HOME/.github-token if present, else uses unauthenticated GitHub client.")
62 forks := flag.Bool("forks", false, "also mirror forks.")
63 deleteRepos := flag.Bool("delete", false, "delete missing repos")
64 namePattern := flag.String("name", "", "only clone repos whose name matches the given regexp.")
65 excludePattern := flag.String("exclude", "", "don't mirror repos whose names match this regexp.")
66 topics := topicsFlag{}
67 flag.Var(&topics, "topic", "only clone repos whose have one of given topics. You can add multiple topics by setting this more than once.")
68 excludeTopics := topicsFlag{}
69 flag.Var(&excludeTopics, "exclude_topic", "don't clone repos whose have one of given topics. You can add multiple topics by setting this more than once.")
70 noArchived := flag.Bool("no_archived", false, "mirror only projects that are not archived")
71
72 flag.Parse()
73
74 if *dest == "" {
75 log.Fatal("must set --dest")
76 }
77 if *githubURL == "" && *org == "" && *user == "" {
78 log.Fatal("must set either --org or --user when github.com is used as host")
79 }
80
81 var host string
82 var client *github.Client
83 tc := newOAuthClient(token)
84 if *githubURL != "" {
85 rootURL, err := url.Parse(*githubURL)
86 if err != nil {
87 log.Fatal(err)
88 }
89 host = rootURL.Host
90 apiPath, err := url.Parse("/api/v3/")
91 if err != nil {
92 log.Fatal(err)
93 }
94 apiBaseURL := rootURL.ResolveReference(apiPath).String()
95 client, err = github.NewEnterpriseClient(apiBaseURL, apiBaseURL, tc)
96 if err != nil {
97 log.Fatal(err)
98 }
99 } else {
100 host = "github.com"
101 client = github.NewClient(tc)
102 }
103 destDir := filepath.Join(*dest, host)
104 if err := os.MkdirAll(destDir, 0o755); err != nil {
105 log.Fatal(err)
106 }
107
108 reposFilters := reposFilters{
109 topics: topics,
110 excludeTopics: excludeTopics,
111 noArchived: noArchived,
112 }
113 var repos []*github.Repository
114 var err error
115 if *org != "" {
116 repos, err = getOrgRepos(client, *org, reposFilters)
117 } else if *user != "" {
118 repos, err = getUserRepos(client, *user, reposFilters)
119 } else {
120 log.Printf("no user or org specified, cloning all repos.")
121 repos, err = getUserRepos(client, "", reposFilters)
122 }
123
124 if err != nil {
125 log.Fatal(err)
126 }
127
128 if !*forks {
129 trimmed := repos[:0]
130 for _, r := range repos {
131 if r.Fork == nil || !*r.Fork {
132 trimmed = append(trimmed, r)
133 }
134 }
135 repos = trimmed
136 }
137
138 filter, err := gitindex.NewFilter(*namePattern, *excludePattern)
139 if err != nil {
140 log.Fatal(err)
141 }
142
143 {
144 trimmed := repos[:0]
145 for _, r := range repos {
146 if filter.Include(*r.Name) {
147 trimmed = append(trimmed, r)
148 }
149 }
150 repos = trimmed
151 }
152
153 if err := cloneRepos(destDir, repos); err != nil {
154 log.Fatalf("cloneRepos: %v", err)
155 }
156
157 if *deleteRepos {
158 if err := deleteStaleRepos(*dest, filter, repos, *org+*user); err != nil {
159 log.Fatalf("deleteStaleRepos: %v", err)
160 }
161 }
162}
163
164func newOAuthClient(token *string) *http.Client {
165 var content []byte
166 var err error
167
168 if *token != "" { // user explicitly provided a token which must exist
169 content, err = os.ReadFile(*token)
170 if err != nil {
171 log.Fatal(err)
172 }
173 } else {
174 defaultToken := filepath.Join(os.Getenv("HOME"), ".github-token")
175 content, err = os.ReadFile(defaultToken)
176 if err != nil && os.IsNotExist(err) { // use unauthenticated client
177 return nil
178 } else if err != nil {
179 log.Fatal(err)
180 }
181 }
182
183 ts := oauth2.StaticTokenSource(
184 &oauth2.Token{
185 AccessToken: strings.TrimSpace(string(content)),
186 })
187 return oauth2.NewClient(context.Background(), ts)
188}
189
190func deleteStaleRepos(destDir string, filter *gitindex.Filter, repos []*github.Repository, user string) error {
191 var baseURL string
192 if len(repos) > 0 {
193 baseURL = *repos[0].HTMLURL
194 } else {
195 return nil
196 }
197 u, err := url.Parse(baseURL)
198 if err != nil {
199 return err
200 }
201 u.Path = user
202
203 names := map[string]struct{}{}
204 for _, r := range repos {
205 u, err := url.Parse(*r.HTMLURL)
206 if err != nil {
207 return err
208 }
209
210 names[filepath.Join(u.Host, u.Path+".git")] = struct{}{}
211 }
212 if err := gitindex.DeleteRepos(destDir, u, names, filter); err != nil {
213 log.Fatalf("deleteRepos: %v", err)
214 }
215 return nil
216}
217
218func hasIntersection(s1, s2 []string) bool {
219 hash := make(map[string]bool)
220 for _, e := range s1 {
221 hash[e] = true
222 }
223 for _, e := range s2 {
224 if hash[e] {
225 return true
226 }
227 }
228 return false
229}
230
231func filterRepositories(repos []*github.Repository, include []string, exclude []string, noArchived bool) (filteredRepos []*github.Repository) {
232 for _, repo := range repos {
233 if noArchived && *repo.Archived {
234 continue
235 }
236 if (len(include) == 0 || hasIntersection(include, repo.Topics)) &&
237 !hasIntersection(exclude, repo.Topics) {
238 filteredRepos = append(filteredRepos, repo)
239 }
240 }
241 return
242}
243
244func getOrgRepos(client *github.Client, org string, reposFilters reposFilters) ([]*github.Repository, error) {
245 var allRepos []*github.Repository
246 opt := &github.RepositoryListByOrgOptions{}
247 for {
248 repos, resp, err := client.Repositories.ListByOrg(context.Background(), org, opt)
249 if err != nil {
250 return nil, err
251 }
252 if len(repos) == 0 {
253 break
254 }
255
256 opt.Page = resp.NextPage
257 repos = filterRepositories(repos, reposFilters.topics, reposFilters.excludeTopics, *reposFilters.noArchived)
258 allRepos = append(allRepos, repos...)
259 if resp.NextPage == 0 {
260 break
261 }
262 }
263 return allRepos, nil
264}
265
266func getUserRepos(client *github.Client, user string, reposFilters reposFilters) ([]*github.Repository, error) {
267 var allRepos []*github.Repository
268 opt := &github.RepositoryListOptions{}
269 for {
270 repos, resp, err := client.Repositories.List(context.Background(), user, opt)
271 if err != nil {
272 return nil, err
273 }
274 if len(repos) == 0 {
275 break
276 }
277
278 opt.Page = resp.NextPage
279 repos = filterRepositories(repos, reposFilters.topics, reposFilters.excludeTopics, *reposFilters.noArchived)
280 allRepos = append(allRepos, repos...)
281 if resp.NextPage == 0 {
282 break
283 }
284 }
285 return allRepos, nil
286}
287
288func itoa(p *int) string {
289 if p != nil {
290 return strconv.Itoa(*p)
291 }
292 return ""
293}
294
295func cloneRepos(destDir string, repos []*github.Repository) error {
296 for _, r := range repos {
297 host, err := url.Parse(*r.HTMLURL)
298 if err != nil {
299 return err
300 }
301
302 config := map[string]string{
303 "zoekt.web-url-type": "github",
304 "zoekt.web-url": *r.HTMLURL,
305 "zoekt.name": filepath.Join(host.Hostname(), *r.FullName),
306
307 "zoekt.github-stars": itoa(r.StargazersCount),
308 "zoekt.github-watchers": itoa(r.WatchersCount),
309 "zoekt.github-subscribers": itoa(r.SubscribersCount),
310 "zoekt.github-forks": itoa(r.ForksCount),
311
312 "zoekt.archived": marshalBool(r.Archived != nil && *r.Archived),
313 "zoekt.fork": marshalBool(r.Fork != nil && *r.Fork),
314 "zoekt.public": marshalBool(r.Private == nil || !*r.Private),
315 }
316 dest, err := gitindex.CloneRepo(destDir, *r.FullName, *r.CloneURL, config)
317 if err != nil {
318 return err
319 }
320 if dest != "" {
321 fmt.Println(dest)
322 }
323
324 }
325
326 return nil
327}
328
329func marshalBool(b bool) string {
330 if b {
331 return "1"
332 }
333 return "0"
334}