fork of https://github.com/sourcegraph/zoekt
1// Copyright 2016 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15// Command zoekt-mirror-github fetches all repos of a github user or organization
16// and clones them. It is strongly recommended to get a personal API token from
17// https://github.com/settings/tokens, save the token in a file, and point the
18// --token option to it.
19package main
20
21import (
22 "context"
23 "flag"
24 "fmt"
25 "log"
26 "net/url"
27 "os"
28 "path/filepath"
29 "strconv"
30 "strings"
31
32 "github.com/google/go-github/v27/github"
33 "golang.org/x/oauth2"
34
35 "github.com/sourcegraph/zoekt/internal/gitindex"
36)
37
38type topicsFlag []string
39
40func (f *topicsFlag) String() string {
41 return strings.Join(*f, ",")
42}
43
44func (f *topicsFlag) Set(value string) error {
45 *f = append(*f, value)
46 return nil
47}
48
49type reposFilters struct {
50 topics []string
51 excludeTopics []string
52 noArchived *bool
53}
54
55func main() {
56 dest := flag.String("dest", "", "destination directory")
57 githubURL := flag.String("url", "", "GitHub Enterprise url. If not set github.com will be used as the host.")
58 org := flag.String("org", "", "organization to mirror")
59 user := flag.String("user", "", "user to mirror")
60 token := flag.String("token",
61 filepath.Join(os.Getenv("HOME"), ".github-token"),
62 "file holding API token.")
63 forks := flag.Bool("forks", false, "also mirror forks.")
64 deleteRepos := flag.Bool("delete", false, "delete missing repos")
65 namePattern := flag.String("name", "", "only clone repos whose name matches the given regexp.")
66 excludePattern := flag.String("exclude", "", "don't mirror repos whose names match this regexp.")
67 topics := topicsFlag{}
68 flag.Var(&topics, "topic", "only clone repos whose have one of given topics. You can add multiple topics by setting this more than once.")
69 excludeTopics := topicsFlag{}
70 flag.Var(&excludeTopics, "exclude_topic", "don't clone repos whose have one of given topics. You can add multiple topics by setting this more than once.")
71 noArchived := flag.Bool("no_archived", false, "mirror only projects that are not archived")
72
73 flag.Parse()
74
75 if *dest == "" {
76 log.Fatal("must set --dest")
77 }
78 if *githubURL == "" && *org == "" && *user == "" {
79 log.Fatal("must set either --org or --user when github.com is used as host")
80 }
81
82 var host string
83 var apiBaseURL string
84 var client *github.Client
85 if *githubURL != "" {
86 rootURL, err := url.Parse(*githubURL)
87 if err != nil {
88 log.Fatal(err)
89 }
90 host = rootURL.Host
91 apiPath, err := url.Parse("/api/v3/")
92 if err != nil {
93 log.Fatal(err)
94 }
95 apiBaseURL = rootURL.ResolveReference(apiPath).String()
96 client, err = github.NewEnterpriseClient(apiBaseURL, apiBaseURL, nil)
97 if err != nil {
98 log.Fatal(err)
99 }
100 } else {
101 host = "github.com"
102 apiBaseURL = "https://github.com/"
103 client = github.NewClient(nil)
104 }
105 destDir := filepath.Join(*dest, host)
106 if err := os.MkdirAll(destDir, 0o755); err != nil {
107 log.Fatal(err)
108 }
109
110 if *token != "" {
111 content, err := os.ReadFile(*token)
112 if err != nil {
113 log.Fatal(err)
114 }
115
116 ts := oauth2.StaticTokenSource(
117 &oauth2.Token{
118 AccessToken: strings.TrimSpace(string(content)),
119 })
120 tc := oauth2.NewClient(context.Background(), ts)
121 if *githubURL != "" {
122 client, err = github.NewEnterpriseClient(apiBaseURL, apiBaseURL, tc)
123 if err != nil {
124 log.Fatal(err)
125 }
126 } else {
127 client = github.NewClient(tc)
128 }
129 }
130
131 reposFilters := reposFilters{
132 topics: topics,
133 excludeTopics: excludeTopics,
134 noArchived: noArchived,
135 }
136 var repos []*github.Repository
137 var err error
138 if *org != "" {
139 repos, err = getOrgRepos(client, *org, reposFilters)
140 } else if *user != "" {
141 repos, err = getUserRepos(client, *user, reposFilters)
142 } else {
143 log.Printf("no user or org specified, cloning all repos.")
144 repos, err = getUserRepos(client, "", reposFilters)
145 }
146
147 if err != nil {
148 log.Fatal(err)
149 }
150
151 if !*forks {
152 trimmed := repos[:0]
153 for _, r := range repos {
154 if r.Fork == nil || !*r.Fork {
155 trimmed = append(trimmed, r)
156 }
157 }
158 repos = trimmed
159 }
160
161 filter, err := gitindex.NewFilter(*namePattern, *excludePattern)
162 if err != nil {
163 log.Fatal(err)
164 }
165
166 {
167 trimmed := repos[:0]
168 for _, r := range repos {
169 if filter.Include(*r.Name) {
170 trimmed = append(trimmed, r)
171 }
172 }
173 repos = trimmed
174 }
175
176 if err := cloneRepos(destDir, repos); err != nil {
177 log.Fatalf("cloneRepos: %v", err)
178 }
179
180 if *deleteRepos {
181 if err := deleteStaleRepos(*dest, filter, repos, *org+*user); err != nil {
182 log.Fatalf("deleteStaleRepos: %v", err)
183 }
184 }
185}
186
187func deleteStaleRepos(destDir string, filter *gitindex.Filter, repos []*github.Repository, user string) error {
188 var baseURL string
189 if len(repos) > 0 {
190 baseURL = *repos[0].HTMLURL
191 } else {
192 return nil
193 }
194 u, err := url.Parse(baseURL)
195 if err != nil {
196 return err
197 }
198 u.Path = user
199
200 names := map[string]struct{}{}
201 for _, r := range repos {
202 u, err := url.Parse(*r.HTMLURL)
203 if err != nil {
204 return err
205 }
206
207 names[filepath.Join(u.Host, u.Path+".git")] = struct{}{}
208 }
209 if err := gitindex.DeleteRepos(destDir, u, names, filter); err != nil {
210 log.Fatalf("deleteRepos: %v", err)
211 }
212 return nil
213}
214
215func hasIntersection(s1, s2 []string) bool {
216 hash := make(map[string]bool)
217 for _, e := range s1 {
218 hash[e] = true
219 }
220 for _, e := range s2 {
221 if hash[e] {
222 return true
223 }
224 }
225 return false
226}
227
228func filterRepositories(repos []*github.Repository, include []string, exclude []string, noArchived bool) (filteredRepos []*github.Repository) {
229 for _, repo := range repos {
230 if noArchived && *repo.Archived {
231 continue
232 }
233 if (len(include) == 0 || hasIntersection(include, repo.Topics)) &&
234 !hasIntersection(exclude, repo.Topics) {
235 filteredRepos = append(filteredRepos, repo)
236 }
237 }
238 return
239}
240
241func getOrgRepos(client *github.Client, org string, reposFilters reposFilters) ([]*github.Repository, error) {
242 var allRepos []*github.Repository
243 opt := &github.RepositoryListByOrgOptions{}
244 for {
245 repos, resp, err := client.Repositories.ListByOrg(context.Background(), org, opt)
246 if err != nil {
247 return nil, err
248 }
249 if len(repos) == 0 {
250 break
251 }
252
253 opt.Page = resp.NextPage
254 repos = filterRepositories(repos, reposFilters.topics, reposFilters.excludeTopics, *reposFilters.noArchived)
255 allRepos = append(allRepos, repos...)
256 if resp.NextPage == 0 {
257 break
258 }
259 }
260 return allRepos, nil
261}
262
263func getUserRepos(client *github.Client, user string, reposFilters reposFilters) ([]*github.Repository, error) {
264 var allRepos []*github.Repository
265 opt := &github.RepositoryListOptions{}
266 for {
267 repos, resp, err := client.Repositories.List(context.Background(), user, opt)
268 if err != nil {
269 return nil, err
270 }
271 if len(repos) == 0 {
272 break
273 }
274
275 opt.Page = resp.NextPage
276 repos = filterRepositories(repos, reposFilters.topics, reposFilters.excludeTopics, *reposFilters.noArchived)
277 allRepos = append(allRepos, repos...)
278 if resp.NextPage == 0 {
279 break
280 }
281 }
282 return allRepos, nil
283}
284
285func itoa(p *int) string {
286 if p != nil {
287 return strconv.Itoa(*p)
288 }
289 return ""
290}
291
292func cloneRepos(destDir string, repos []*github.Repository) error {
293 for _, r := range repos {
294 host, err := url.Parse(*r.HTMLURL)
295 if err != nil {
296 return err
297 }
298
299 config := map[string]string{
300 "zoekt.web-url-type": "github",
301 "zoekt.web-url": *r.HTMLURL,
302 "zoekt.name": filepath.Join(host.Hostname(), *r.FullName),
303
304 "zoekt.github-stars": itoa(r.StargazersCount),
305 "zoekt.github-watchers": itoa(r.WatchersCount),
306 "zoekt.github-subscribers": itoa(r.SubscribersCount),
307 "zoekt.github-forks": itoa(r.ForksCount),
308
309 "zoekt.archived": marshalBool(r.Archived != nil && *r.Archived),
310 "zoekt.fork": marshalBool(r.Fork != nil && *r.Fork),
311 "zoekt.public": marshalBool(r.Private == nil || !*r.Private),
312 }
313 dest, err := gitindex.CloneRepo(destDir, *r.FullName, *r.CloneURL, config)
314 if err != nil {
315 return err
316 }
317 if dest != "" {
318 fmt.Println(dest)
319 }
320
321 }
322
323 return nil
324}
325
326func marshalBool(b bool) string {
327 if b {
328 return "1"
329 }
330 return "0"
331}