fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

1// Command zoekt-archive-index indexes an archive. 2// 3// Example via github.com: 4// 5// zoekt-archive-index -incremental -commit b57cb1605fd11ba2ecfa7f68992b4b9cc791934d -name github.com/gorilla/mux -strip_components 1 https://codeload.github.com/gorilla/mux/legacy.tar.gz/b57cb1605fd11ba2ecfa7f68992b4b9cc791934d 6// 7// zoekt-archive-index -branch master https://github.com/gorilla/mux/commit/b57cb1605fd11ba2ecfa7f68992b4b9cc791934d 8package main 9 10import ( 11 "errors" 12 "flag" 13 "fmt" 14 "io" 15 "log" 16 "net/url" 17 "strings" 18 19 "github.com/sourcegraph/zoekt" 20 "github.com/sourcegraph/zoekt/build" 21 "github.com/sourcegraph/zoekt/cmd" 22 "go.uber.org/automaxprocs/maxprocs" 23) 24 25// stripComponents removes the specified number of leading path 26// elements. Pathnames with fewer elements will return the empty string. 27func stripComponents(path string, count int) string { 28 for i := 0; path != "" && i < count; i++ { 29 i := strings.Index(path, "/") 30 if i < 0 { 31 return "" 32 } 33 path = path[i+1:] 34 } 35 return path 36} 37 38// isGitOID checks if the revision is a git OID SHA string. 39// 40// Note: This doesn't mean the SHA exists in a repository, nor does it mean it 41// isn't a ref. Git allows 40-char hexadecimal strings to be references. 42func isGitOID(s string) bool { 43 if len(s) != 40 { 44 return false 45 } 46 for _, r := range s { 47 if !(('0' <= r && r <= '9') || 48 ('a' <= r && r <= 'f') || 49 ('A' <= r && r <= 'F')) { 50 return false 51 } 52 } 53 return true 54} 55 56type Options struct { 57 Incremental bool 58 59 Archive string 60 Name string 61 RepoURL string 62 Branch string 63 Commit string 64 Strip int 65} 66 67func (o *Options) SetDefaults() { 68 // We guess based on the archive URL. 69 u, _ := url.Parse(o.Archive) 70 if u == nil { 71 return 72 } 73 74 setRef := func(ref string) { 75 if isGitOID(ref) && o.Commit == "" { 76 o.Commit = ref 77 } 78 if !isGitOID(ref) && o.Branch == "" { 79 o.Branch = ref 80 } 81 } 82 83 switch u.Host { 84 case "github.com", "codeload.github.com": 85 // https://github.com/octokit/octokit.rb/commit/3d21ec53a331a6f037a91c368710b99387d012c1 86 // https://github.com/octokit/octokit.rb/blob/master/README.md 87 // https://github.com/octokit/octokit.rb/tree/master/lib 88 // https://codeload.github.com/octokit/octokit.rb/legacy.tar.gz/master 89 parts := strings.Split(u.Path, "/") 90 if len(parts) > 2 && o.Name == "" { 91 o.Name = fmt.Sprintf("github.com/%s/%s", parts[1], parts[2]) 92 o.RepoURL = fmt.Sprintf("https://github.com/%s/%s", parts[1], parts[2]) 93 } 94 if len(parts) > 4 { 95 setRef(parts[4]) 96 if u.Host == "github.com" { 97 o.Archive = fmt.Sprintf("https://codeload.github.com/%s/%s/legacy.tar.gz/%s", parts[1], parts[2], parts[4]) 98 } 99 } 100 o.Strip = 1 101 case "api.github.com": 102 // https://api.github.com/repos/octokit/octokit.rb/tarball/master 103 parts := strings.Split(u.Path, "/") 104 if len(parts) > 2 && o.Name == "" { 105 o.Name = fmt.Sprintf("github.com/%s/%s", parts[1], parts[2]) 106 o.RepoURL = fmt.Sprintf("https://github.com/%s/%s", parts[1], parts[2]) 107 } 108 if len(parts) > 5 { 109 setRef(parts[5]) 110 } 111 o.Strip = 1 112 } 113} 114 115func do(opts Options, bopts build.Options) error { 116 opts.SetDefaults() 117 118 if opts.Name == "" && opts.RepoURL == "" { 119 return errors.New("-name or -url required") 120 } 121 if opts.Branch == "" { 122 return errors.New("-branch required") 123 } 124 125 if opts.Name != "" { 126 bopts.RepositoryDescription.Name = opts.Name 127 } 128 // We do not use this functionality to avoid pulling in the transitive deps of gitindex 129 /* 130 if opts.RepoURL != "" { 131 u, err := url.Parse(opts.RepoURL) 132 if err != nil { 133 return err 134 } 135 if err := gitindex.SetTemplatesFromOrigin(&bopts.RepositoryDescription, u); err != nil { 136 return err 137 } 138 } 139 */ 140 bopts.SetDefaults() 141 bopts.RepositoryDescription.Branches = []zoekt.RepositoryBranch{{Name: opts.Branch, Version: opts.Commit}} 142 brs := []string{opts.Branch} 143 144 if opts.Incremental && bopts.IncrementalSkipIndexing() { 145 return nil 146 } 147 148 a, err := openArchive(opts.Archive) 149 if err != nil { 150 return err 151 } 152 defer a.Close() 153 154 bopts.RepositoryDescription.Source = opts.Archive 155 builder, err := build.NewBuilder(bopts) 156 if err != nil { 157 return err 158 } 159 160 add := func(f *File) error { 161 defer f.Close() 162 163 contents, err := io.ReadAll(f) 164 if err != nil { 165 return err 166 } 167 168 name := stripComponents(f.Name, opts.Strip) 169 if name == "" { 170 return nil 171 } 172 173 return builder.Add(zoekt.Document{ 174 Name: name, 175 Content: contents, 176 Branches: brs, 177 }) 178 } 179 180 for { 181 f, err := a.Next() 182 if err == io.EOF { 183 break 184 } 185 if err != nil { 186 return err 187 } 188 189 if err := add(f); err != nil { 190 return err 191 } 192 } 193 194 return builder.Finish() 195} 196 197func main() { 198 var ( 199 incremental = flag.Bool("incremental", true, "only index changed repositories") 200 201 name = flag.String("name", "", "The repository name for the archive") 202 urlRaw = flag.String("url", "", "The repository URL for the archive") 203 branch = flag.String("branch", "", "The branch name for the archive") 204 commit = flag.String("commit", "", "The commit sha for the archive. If incremental this will avoid updating shards already at commit") 205 strip = flag.Int("strip_components", 0, "Remove the specified number of leading path elements. Pathnames with fewer elements will be silently skipped.") 206 207 downloadLimitMbps = flag.Int64("download-limit-mbps", 0, "If non-zero, limit archive downloads to specified amount in megabits per second") 208 ) 209 flag.Parse() 210 211 // Tune GOMAXPROCS to match Linux container CPU quota. 212 _, _ = maxprocs.Set() 213 214 log.SetFlags(log.LstdFlags | log.Lshortfile) 215 216 if len(flag.Args()) != 1 { 217 log.Fatal("expected argument for archive location") 218 } 219 archive := flag.Args()[0] 220 bopts := cmd.OptionsFromFlags() 221 opts := Options{ 222 Incremental: *incremental, 223 224 Archive: archive, 225 Name: *name, 226 RepoURL: *urlRaw, 227 Branch: *branch, 228 Commit: *commit, 229 Strip: *strip, 230 } 231 232 // Sourcegraph specific: Limit HTTP traffic 233 limitHTTPDefaultClient(*downloadLimitMbps) 234 235 if err := do(opts, *bopts); err != nil { 236 log.Fatal(err) 237 } 238}