fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

1// package archive provides indexing of archives from remote URLs. 2package archive 3 4import ( 5 "errors" 6 "fmt" 7 "io" 8 "net/url" 9 "strings" 10 11 "github.com/sourcegraph/zoekt" 12 "github.com/sourcegraph/zoekt/build" 13) 14 15// Options specify the archive specific indexing options. 16type Options struct { 17 Incremental bool 18 19 Archive string 20 Name string 21 RepoURL string 22 Branch string 23 Commit string 24 Strip int 25} 26 27func (o *Options) SetDefaults() { 28 // We guess based on the archive URL. 29 u, _ := url.Parse(o.Archive) 30 if u == nil { 31 return 32 } 33 34 setRef := func(ref string) { 35 if isGitOID(ref) && o.Commit == "" { 36 o.Commit = ref 37 } 38 if !isGitOID(ref) && o.Branch == "" { 39 o.Branch = ref 40 } 41 } 42 43 switch u.Host { 44 case "github.com", "codeload.github.com": 45 // https://github.com/octokit/octokit.rb/commit/3d21ec53a331a6f037a91c368710b99387d012c1 46 // https://github.com/octokit/octokit.rb/blob/master/README.md 47 // https://github.com/octokit/octokit.rb/tree/master/lib 48 // https://codeload.github.com/octokit/octokit.rb/legacy.tar.gz/master 49 parts := strings.Split(u.Path, "/") 50 if len(parts) > 2 && o.Name == "" { 51 o.Name = fmt.Sprintf("github.com/%s/%s", parts[1], parts[2]) 52 o.RepoURL = fmt.Sprintf("https://github.com/%s/%s", parts[1], parts[2]) 53 } 54 if len(parts) > 4 { 55 setRef(parts[4]) 56 if u.Host == "github.com" { 57 o.Archive = fmt.Sprintf("https://codeload.github.com/%s/%s/legacy.tar.gz/%s", parts[1], parts[2], parts[4]) 58 } 59 } 60 o.Strip = 1 61 case "api.github.com": 62 // https://api.github.com/repos/octokit/octokit.rb/tarball/master 63 parts := strings.Split(u.Path, "/") 64 if len(parts) > 2 && o.Name == "" { 65 o.Name = fmt.Sprintf("github.com/%s/%s", parts[1], parts[2]) 66 o.RepoURL = fmt.Sprintf("https://github.com/%s/%s", parts[1], parts[2]) 67 } 68 if len(parts) > 5 { 69 setRef(parts[5]) 70 } 71 o.Strip = 1 72 } 73} 74 75// Index archive specified in opts using bopts. 76func Index(opts Options, bopts build.Options) error { 77 opts.SetDefaults() 78 79 if opts.Name == "" && opts.RepoURL == "" { 80 return errors.New("-name or -url required") 81 } 82 if opts.Branch == "" { 83 return errors.New("-branch required") 84 } 85 86 if opts.Name != "" { 87 bopts.RepositoryDescription.Name = opts.Name 88 } 89 // We do not use this functionality to avoid pulling in the transitive deps of gitindex 90 /* 91 if opts.RepoURL != "" { 92 u, err := url.Parse(opts.RepoURL) 93 if err != nil { 94 return err 95 } 96 if err := gitindex.SetTemplatesFromOrigin(&bopts.RepositoryDescription, u); err != nil { 97 return err 98 } 99 } 100 */ 101 bopts.SetDefaults() 102 bopts.RepositoryDescription.Branches = []zoekt.RepositoryBranch{{Name: opts.Branch, Version: opts.Commit}} 103 brs := []string{opts.Branch} 104 105 if opts.Incremental && bopts.IncrementalSkipIndexing() { 106 return nil 107 } 108 109 a, err := openArchive(opts.Archive) 110 if err != nil { 111 return err 112 } 113 defer a.Close() 114 115 bopts.RepositoryDescription.Source = opts.Archive 116 builder, err := build.NewBuilder(bopts) 117 if err != nil { 118 return err 119 } 120 121 add := func(f *File) error { 122 defer f.Close() 123 124 contents, err := io.ReadAll(f) 125 if err != nil { 126 return err 127 } 128 129 name := stripComponents(f.Name, opts.Strip) 130 if name == "" { 131 return nil 132 } 133 134 return builder.Add(zoekt.Document{ 135 Name: name, 136 Content: contents, 137 Branches: brs, 138 }) 139 } 140 141 for { 142 f, err := a.Next() 143 if err == io.EOF { 144 break 145 } 146 if err != nil { 147 return err 148 } 149 150 if err := add(f); err != nil { 151 return err 152 } 153 } 154 155 return builder.Finish() 156} 157 158// stripComponents removes the specified number of leading path 159// elements. Pathnames with fewer elements will return the empty string. 160func stripComponents(path string, count int) string { 161 for i := 0; path != "" && i < count; i++ { 162 i := strings.Index(path, "/") 163 if i < 0 { 164 return "" 165 } 166 path = path[i+1:] 167 } 168 return path 169} 170 171// isGitOID checks if the revision is a git OID SHA string. 172// 173// Note: This doesn't mean the SHA exists in a repository, nor does it mean it 174// isn't a ref. Git allows 40-char hexadecimal strings to be references. 175func isGitOID(s string) bool { 176 if len(s) != 40 { 177 return false 178 } 179 for _, r := range s { 180 if !(('0' <= r && r <= '9') || 181 ('a' <= r && r <= 'f') || 182 ('A' <= r && r <= 'F')) { 183 return false 184 } 185 } 186 return true 187}