fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

at main 4.5 kB View raw
1// package archive provides indexing of archives from remote URLs. 2package archive 3 4import ( 5 "errors" 6 "fmt" 7 "io" 8 "net/url" 9 "strings" 10 "sync" 11 12 "github.com/sourcegraph/zoekt" 13 "github.com/sourcegraph/zoekt/index" 14) 15 16// Options specify the archive specific indexing options. 17type Options struct { 18 Incremental bool 19 20 Archive string 21 Name string 22 RepoURL string 23 Branch string 24 Commit string 25 Strip int 26} 27 28func (o *Options) SetDefaults() { 29 // We guess based on the archive URL. 30 u, _ := url.Parse(o.Archive) 31 if u == nil { 32 return 33 } 34 35 setRef := func(ref string) { 36 if isGitOID(ref) && o.Commit == "" { 37 o.Commit = ref 38 } 39 if !isGitOID(ref) && o.Branch == "" { 40 o.Branch = ref 41 } 42 } 43 44 switch u.Host { 45 case "github.com", "codeload.github.com": 46 // https://github.com/octokit/octokit.rb/commit/3d21ec53a331a6f037a91c368710b99387d012c1 47 // https://github.com/octokit/octokit.rb/blob/master/README.md 48 // https://github.com/octokit/octokit.rb/tree/master/lib 49 // https://codeload.github.com/octokit/octokit.rb/legacy.tar.gz/master 50 parts := strings.Split(u.Path, "/") 51 if len(parts) > 2 && o.Name == "" { 52 o.Name = fmt.Sprintf("github.com/%s/%s", parts[1], parts[2]) 53 o.RepoURL = fmt.Sprintf("https://github.com/%s/%s", parts[1], parts[2]) 54 } 55 if len(parts) > 4 { 56 setRef(parts[4]) 57 if u.Host == "github.com" { 58 o.Archive = fmt.Sprintf("https://codeload.github.com/%s/%s/legacy.tar.gz/%s", parts[1], parts[2], parts[4]) 59 } 60 } 61 o.Strip = 1 62 case "api.github.com": 63 // https://api.github.com/repos/octokit/octokit.rb/tarball/master 64 parts := strings.Split(u.Path, "/") 65 if len(parts) > 2 && o.Name == "" { 66 o.Name = fmt.Sprintf("github.com/%s/%s", parts[1], parts[2]) 67 o.RepoURL = fmt.Sprintf("https://github.com/%s/%s", parts[1], parts[2]) 68 } 69 if len(parts) > 5 { 70 setRef(parts[5]) 71 } 72 o.Strip = 1 73 } 74} 75 76// Index archive specified in opts using bopts. 77func Index(opts Options, bopts index.Options) error { 78 opts.SetDefaults() 79 80 if opts.Name == "" && opts.RepoURL == "" { 81 return errors.New("-name or -url required") 82 } 83 if opts.Branch == "" { 84 return errors.New("-branch required") 85 } 86 87 if opts.Name != "" { 88 bopts.RepositoryDescription.Name = opts.Name 89 } 90 // We do not use this functionality to avoid pulling in the transitive deps of gitindex 91 /* 92 if opts.RepoURL != "" { 93 u, err := url.Parse(opts.RepoURL) 94 if err != nil { 95 return err 96 } 97 if err := gitindex.SetTemplatesFromOrigin(&bopts.RepositoryDescription, u); err != nil { 98 return err 99 } 100 } 101 */ 102 bopts.SetDefaults() 103 bopts.RepositoryDescription.Branches = []zoekt.RepositoryBranch{{Name: opts.Branch, Version: opts.Commit}} 104 brs := []string{opts.Branch} 105 106 if opts.Incremental && bopts.IncrementalSkipIndexing() { 107 return nil 108 } 109 110 a, err := openArchive(opts.Archive) 111 if err != nil { 112 return err 113 } 114 defer a.Close() 115 116 bopts.RepositoryDescription.Source = opts.Archive 117 var builder *index.Builder 118 119 once := sync.Once{} 120 var onceErr error 121 add := func(f *File) error { 122 defer f.Close() 123 124 once.Do(func() { 125 // We use the ModTime of the first file as a proxy for the latest commit date. 126 bopts.RepositoryDescription.LatestCommitDate = f.ModTime 127 builder, onceErr = index.NewBuilder(bopts) 128 }) 129 if onceErr != nil { 130 return onceErr 131 } 132 133 contents, err := io.ReadAll(f) 134 if err != nil { 135 return err 136 } 137 138 name := stripComponents(f.Name, opts.Strip) 139 if name == "" { 140 return nil 141 } 142 143 return builder.Add(index.Document{ 144 Name: name, 145 Content: contents, 146 Branches: brs, 147 }) 148 } 149 150 for { 151 f, err := a.Next() 152 if err == io.EOF { 153 break 154 } 155 if err != nil { 156 return err 157 } 158 159 if err := add(f); err != nil { 160 return err 161 } 162 } 163 164 return builder.Finish() 165} 166 167// stripComponents removes the specified number of leading path 168// elements. Pathnames with fewer elements will return the empty string. 169func stripComponents(path string, count int) string { 170 for i := 0; path != "" && i < count; i++ { 171 i := strings.Index(path, "/") 172 if i < 0 { 173 return "" 174 } 175 path = path[i+1:] 176 } 177 return path 178} 179 180// isGitOID checks if the revision is a git OID SHA string. 181// 182// Note: This doesn't mean the SHA exists in a repository, nor does it mean it 183// isn't a ref. Git allows 40-char hexadecimal strings to be references. 184func isGitOID(s string) bool { 185 if len(s) != 40 { 186 return false 187 } 188 for _, r := range s { 189 if !(('0' <= r && r <= '9') || 190 ('a' <= r && r <= 'f') || 191 ('A' <= r && r <= 'F')) { 192 return false 193 } 194 } 195 return true 196}