fork of https://github.com/sourcegraph/zoekt
1// Copyright 2016 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package main
16
17import (
18 "bytes"
19 "encoding/json"
20 "io"
21 "log"
22 "math/rand"
23 "net/http"
24 "net/url"
25 "os"
26 "os/exec"
27 "path/filepath"
28 "time"
29
30 "github.com/fsnotify/fsnotify"
31)
32
33type ConfigEntry struct {
34 GithubUser string
35 GithubOrg string
36 BitBucketServerProject string
37 GitHubURL string
38 GitilesURL string
39 CGitURL string
40 BitBucketServerURL string
41 GiteaURL string
42 GiteaUser string
43 GiteaOrg string
44 DisableTLS bool
45 CredentialPath string
46 ProjectType string
47 Name string
48 Exclude string
49 GitLabURL string
50 OnlyPublic bool
51 GerritApiURL string
52 Topics []string
53 ExcludeTopics []string
54 Active bool
55 NoArchived bool
56 KeepDeleted bool
57 GerritFetchMetaConfig bool
58 GerritRepoNameFormat string
59 ExcludeUserRepos bool
60 Forks bool
61 Visibility []string
62}
63
64func randomize(entries []ConfigEntry) []ConfigEntry {
65 perm := rand.Perm(len(entries))
66
67 var shuffled []ConfigEntry
68 for _, i := range perm {
69 shuffled = append(shuffled, entries[i])
70 }
71
72 return shuffled
73}
74
75func isHTTP(u string) bool {
76 asURL, err := url.Parse(u)
77 return err == nil && (asURL.Scheme == "http" || asURL.Scheme == "https")
78}
79
80func readConfigURL(u string) ([]ConfigEntry, error) {
81 var body []byte
82 var readErr error
83
84 if isHTTP(u) {
85 rep, err := http.Get(u)
86 if err != nil {
87 return nil, err
88 }
89 defer rep.Body.Close()
90
91 body, readErr = io.ReadAll(rep.Body)
92 } else {
93 body, readErr = os.ReadFile(u)
94 }
95
96 if readErr != nil {
97 return nil, readErr
98 }
99
100 var result []ConfigEntry
101 if err := json.Unmarshal(body, &result); err != nil {
102 return nil, err
103 }
104 return result, nil
105}
106
107func watchFile(path string) (<-chan struct{}, error) {
108 watcher, err := fsnotify.NewWatcher()
109 if err != nil {
110 return nil, err
111 }
112
113 if err := watcher.Add(filepath.Dir(path)); err != nil {
114 return nil, err
115 }
116
117 out := make(chan struct{}, 1)
118 go func() {
119 var last time.Time
120 for {
121 select {
122 case <-watcher.Events:
123 fi, err := os.Stat(path)
124 if err == nil && fi.ModTime() != last {
125 out <- struct{}{}
126 last = fi.ModTime()
127 }
128 case err := <-watcher.Errors:
129 if err != nil {
130 log.Printf("watcher error: %v", err)
131 }
132 }
133 }
134 }()
135 return out, nil
136}
137
138func periodicMirrorFile(repoDir string, opts *Options, pendingRepos chan<- string) {
139 ticker := time.NewTicker(opts.mirrorInterval)
140
141 var watcher <-chan struct{}
142 if !isHTTP(opts.mirrorConfigFile) {
143 var err error
144 watcher, err = watchFile(opts.mirrorConfigFile)
145 if err != nil {
146 log.Printf("watchFile(%q): %v", opts.mirrorConfigFile, err)
147 }
148 }
149
150 var lastCfg []ConfigEntry
151 for {
152 cfg, err := readConfigURL(opts.mirrorConfigFile)
153 if err != nil {
154 log.Printf("readConfig(%s): %v", opts.mirrorConfigFile, err)
155 } else {
156 lastCfg = cfg
157 }
158
159 executeMirror(lastCfg, repoDir, pendingRepos)
160
161 select {
162 case <-watcher:
163 log.Printf("mirror config %s changed", opts.mirrorConfigFile)
164 case <-ticker.C:
165 }
166 }
167}
168
169func executeMirror(cfg []ConfigEntry, repoDir string, pendingRepos chan<- string) {
170 // Randomize the ordering in which we query
171 // things. This is to ensure that quota limits don't
172 // always hit the last one in the list.
173 cfg = randomize(cfg)
174 for _, c := range cfg {
175 var cmd *exec.Cmd
176 if c.GitHubURL != "" || c.GithubUser != "" || c.GithubOrg != "" {
177 cmd = exec.Command("zoekt-mirror-github",
178 "-dest", repoDir)
179 if c.GitHubURL != "" {
180 cmd.Args = append(cmd.Args, "-url", c.GitHubURL)
181 }
182 if c.GithubUser != "" {
183 cmd.Args = append(cmd.Args, "-user", c.GithubUser)
184 } else if c.GithubOrg != "" {
185 cmd.Args = append(cmd.Args, "-org", c.GithubOrg)
186 }
187 if c.Name != "" {
188 cmd.Args = append(cmd.Args, "-name", c.Name)
189 }
190 if c.Exclude != "" {
191 cmd.Args = append(cmd.Args, "-exclude", c.Exclude)
192 }
193 if c.CredentialPath != "" {
194 cmd.Args = append(cmd.Args, "-token", c.CredentialPath)
195 }
196 for _, topic := range c.Topics {
197 cmd.Args = append(cmd.Args, "-topic", topic)
198 }
199 for _, topic := range c.ExcludeTopics {
200 cmd.Args = append(cmd.Args, "-exclude_topic", topic)
201 }
202 if c.NoArchived {
203 cmd.Args = append(cmd.Args, "-no_archived")
204 }
205 if !c.KeepDeleted {
206 cmd.Args = append(cmd.Args, "-delete")
207 }
208 if c.Forks {
209 cmd.Args = append(cmd.Args, "-forks")
210 }
211 for _, v := range c.Visibility {
212 cmd.Args = append(cmd.Args, "-visibility", v)
213 }
214 } else if c.GitilesURL != "" {
215 cmd = exec.Command("zoekt-mirror-gitiles",
216 "-dest", repoDir, "-name", c.Name)
217 if c.Exclude != "" {
218 cmd.Args = append(cmd.Args, "-exclude", c.Exclude)
219 }
220 cmd.Args = append(cmd.Args, c.GitilesURL)
221 } else if c.CGitURL != "" {
222 cmd = exec.Command("zoekt-mirror-gitiles",
223 "-type", "cgit",
224 "-dest", repoDir, "-name", c.Name)
225 if c.Exclude != "" {
226 cmd.Args = append(cmd.Args, "-exclude", c.Exclude)
227 }
228 cmd.Args = append(cmd.Args, c.CGitURL)
229 } else if c.BitBucketServerURL != "" {
230 cmd = exec.Command("zoekt-mirror-bitbucket-server",
231 "-dest", repoDir, "-url", c.BitBucketServerURL)
232 if c.BitBucketServerProject != "" {
233 cmd.Args = append(cmd.Args, "-project", c.BitBucketServerProject)
234 }
235 if c.DisableTLS {
236 cmd.Args = append(cmd.Args, "-disable-tls")
237 }
238 if c.ProjectType != "" {
239 cmd.Args = append(cmd.Args, "-type", c.ProjectType)
240 }
241 if c.Name != "" {
242 cmd.Args = append(cmd.Args, "-name", c.Name)
243 }
244 if c.Exclude != "" {
245 cmd.Args = append(cmd.Args, "-exclude", c.Exclude)
246 }
247 if c.CredentialPath != "" {
248 cmd.Args = append(cmd.Args, "-credentials", c.CredentialPath)
249 }
250 if !c.KeepDeleted {
251 cmd.Args = append(cmd.Args, "-delete")
252 }
253 } else if c.GitLabURL != "" {
254 cmd = exec.Command("zoekt-mirror-gitlab",
255 "-dest", repoDir, "-url", c.GitLabURL)
256 if c.Name != "" {
257 cmd.Args = append(cmd.Args, "-name", c.Name)
258 }
259 if c.Exclude != "" {
260 cmd.Args = append(cmd.Args, "-exclude", c.Exclude)
261 }
262 if c.OnlyPublic {
263 cmd.Args = append(cmd.Args, "-public")
264 }
265 if c.ExcludeUserRepos {
266 cmd.Args = append(cmd.Args, "-exclude_user")
267 }
268 if c.CredentialPath != "" {
269 cmd.Args = append(cmd.Args, "-token", c.CredentialPath)
270 }
271 if c.NoArchived {
272 cmd.Args = append(cmd.Args, "-no_archived")
273 }
274 if !c.KeepDeleted {
275 cmd.Args = append(cmd.Args, "-delete")
276 }
277 } else if c.GerritApiURL != "" {
278 cmd = exec.Command("zoekt-mirror-gerrit",
279 "-dest", repoDir)
280 if c.CredentialPath != "" {
281 cmd.Args = append(cmd.Args, "-http-credentials", c.CredentialPath)
282 }
283 if c.Name != "" {
284 cmd.Args = append(cmd.Args, "-name", c.Name)
285 }
286 if c.Exclude != "" {
287 cmd.Args = append(cmd.Args, "-exclude", c.Exclude)
288 }
289 if c.Active {
290 cmd.Args = append(cmd.Args, "-active")
291 }
292 if c.GerritFetchMetaConfig {
293 cmd.Args = append(cmd.Args, "-fetch-meta-config")
294 }
295 if c.GerritRepoNameFormat != "" {
296 cmd.Args = append(cmd.Args, "-repo-name-format", c.GerritRepoNameFormat)
297 }
298 if !c.KeepDeleted {
299 cmd.Args = append(cmd.Args, "-delete")
300 }
301 cmd.Args = append(cmd.Args, c.GerritApiURL)
302 } else if c.GiteaURL != "" {
303 cmd = exec.Command("zoekt-mirror-gitea", "-dest", repoDir)
304 if c.GiteaURL != "" {
305 cmd.Args = append(cmd.Args, "-url", c.GiteaURL)
306 }
307 if c.GiteaUser != "" {
308 cmd.Args = append(cmd.Args, "-user", c.GiteaUser)
309 } else if c.GiteaOrg != "" {
310 cmd.Args = append(cmd.Args, "-org", c.GiteaOrg)
311 }
312 if c.Name != "" {
313 cmd.Args = append(cmd.Args, "-name", c.Name)
314 }
315 if c.Exclude != "" {
316 cmd.Args = append(cmd.Args, "-exclude", c.Exclude)
317 }
318 if c.CredentialPath != "" {
319 cmd.Args = append(cmd.Args, "-token", c.CredentialPath)
320 }
321 for _, topic := range c.Topics {
322 cmd.Args = append(cmd.Args, "-topic", topic)
323 }
324 for _, topic := range c.ExcludeTopics {
325 cmd.Args = append(cmd.Args, "-exclude_topic", topic)
326 }
327 if c.NoArchived {
328 cmd.Args = append(cmd.Args, "-no_archived")
329 }
330 if !c.KeepDeleted {
331 cmd.Args = append(cmd.Args, "-delete")
332 }
333 if c.Forks {
334 cmd.Args = append(cmd.Args, "-forks")
335 }
336 } else {
337 log.Printf("executeMirror: ignoring config, because it does not contain any valid repository definition: %v", c)
338 continue
339 }
340
341 stdout, _ := loggedRun(cmd)
342
343 for _, fn := range bytes.Split(stdout, []byte{'\n'}) {
344 if len(fn) == 0 {
345 continue
346 }
347
348 pendingRepos <- string(fn)
349 }
350
351 }
352}