fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

Add a zoekt-dynamic-indexserver cmd with HTTP interface for indexing (#496)

This is intended to be used for making it possible for Zoekt to power code
search in GitLab.

Co-authored-by: Dmitry Gruzd <dgruzd@gitlab.com>

author
Dylan
co-author
Dmitry Gruzd
committer
GitHub
date (Feb 3, 2023, 9:23 AM +0200) commit c08faec6 parent 28453275
+400 -2
+252
cmd/zoekt-dynamic-indexserver/main.go
··· 1 + // Copyright 2016 Google Inc. All rights reserved. 2 + // 3 + // Licensed under the Apache License, Version 2.0 (the "License"); 4 + // you may not use this file except in compliance with the License. 5 + // You may obtain a copy of the License at 6 + // 7 + // http://www.apache.org/licenses/LICENSE-2.0 8 + // 9 + // Unless required by applicable law or agreed to in writing, software 10 + // distributed under the License is distributed on an "AS IS" BASIS, 11 + // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 + // See the License for the specific language governing permissions and 13 + // limitations under the License. 14 + 15 + // This program manages a zoekt dynamic indexing deployment: 16 + // * listens to indexing commands 17 + // * reindexes specified repositories 18 + 19 + package main 20 + 21 + import ( 22 + "bytes" 23 + "context" 24 + "encoding/json" 25 + "flag" 26 + "fmt" 27 + "log" 28 + "net/http" 29 + "os" 30 + "os/exec" 31 + "path/filepath" 32 + "strconv" 33 + "time" 34 + ) 35 + 36 + func loggedRun(cmd *exec.Cmd) error { 37 + outBuf := &bytes.Buffer{} 38 + errBuf := &bytes.Buffer{} 39 + cmd.Stdout = outBuf 40 + cmd.Stderr = errBuf 41 + 42 + log.Printf("run %v", cmd.Args) 43 + if err := cmd.Run(); err != nil { 44 + log.Printf("command %s failed: %v\nOUT: %s\nERR: %s", 45 + cmd.Args, err, outBuf.String(), errBuf.String()) 46 + return fmt.Errorf("command %s failed: %v", cmd.Args, err) 47 + } 48 + 49 + return nil 50 + } 51 + 52 + type Options struct { 53 + indexTimeout time.Duration 54 + dataDir string 55 + indexDir string 56 + repoDir string 57 + listen string 58 + } 59 + 60 + func (o *Options) createMissingDirectories() { 61 + for _, s := range []string{o.dataDir, o.indexDir, o.repoDir} { 62 + if err := os.MkdirAll(s, 0o755); err != nil { 63 + log.Fatalf("MkdirAll %s: %v", s, err) 64 + } 65 + } 66 + } 67 + 68 + type indexRequest struct { 69 + CloneURL string // TODO: Decide if tokens can be in the URL or if we should pass separately 70 + RepoID uint32 71 + } 72 + 73 + // This function is declared as var so that we can stub it in test 74 + var executeCmd = func(ctx context.Context, name string, arg ...string) error { 75 + cmd := exec.CommandContext(ctx, name, arg...) 76 + cmd.Stdin = &bytes.Buffer{} 77 + err := loggedRun(cmd) 78 + 79 + return err 80 + } 81 + 82 + func indexRepository(opts Options, req indexRequest) (map[string]any, error) { 83 + ctx, cancel := context.WithTimeout(context.Background(), opts.indexTimeout) 84 + defer cancel() 85 + 86 + args := []string{} 87 + args = append(args, "-dest", opts.repoDir) 88 + args = append(args, "-name", strconv.FormatUint(uint64(req.RepoID), 10)) 89 + args = append(args, "-repoid", strconv.FormatUint(uint64(req.RepoID), 10)) 90 + args = append(args, req.CloneURL) 91 + err := executeCmd(ctx, "zoekt-git-clone", args...) 92 + if err != nil { 93 + return nil, err 94 + } 95 + 96 + gitRepoPath, err := filepath.Abs(filepath.Join(opts.repoDir, fmt.Sprintf("%d.git", req.RepoID))) 97 + if err != nil { 98 + return nil, err 99 + } 100 + 101 + args = []string{ 102 + "-C", 103 + gitRepoPath, 104 + "fetch", 105 + } 106 + err = executeCmd(ctx, "git", args...) 107 + if err != nil { 108 + return nil, err 109 + } 110 + 111 + args = []string{ 112 + "-index", opts.indexDir, 113 + gitRepoPath, 114 + } 115 + err = executeCmd(ctx, "zoekt-git-index", args...) 116 + if err != nil { 117 + return nil, err 118 + } 119 + 120 + response := map[string]any{ 121 + "Success": true, 122 + } 123 + 124 + return response, nil 125 + } 126 + 127 + type indexServer struct { 128 + opts Options 129 + } 130 + 131 + func (s *indexServer) serveIndex(w http.ResponseWriter, r *http.Request) { 132 + dec := json.NewDecoder(r.Body) 133 + dec.DisallowUnknownFields() 134 + var req indexRequest 135 + err := dec.Decode(&req) 136 + 137 + if err != nil { 138 + log.Printf("Error decoding index request: %v", err) 139 + http.Error(w, "JSON parser error", http.StatusBadRequest) 140 + return 141 + } 142 + 143 + response, err := indexRepository(s.opts, req) 144 + if err != nil { 145 + respondWithError(w, err) 146 + return 147 + } 148 + 149 + w.Header().Set("Content-Type", "application/json") 150 + _ = json.NewEncoder(w).Encode(response) 151 + } 152 + 153 + func (s *indexServer) serveTruncate(w http.ResponseWriter, r *http.Request) { 154 + err := emptyDirectory(s.opts.repoDir) 155 + 156 + if err != nil { 157 + err = fmt.Errorf("Failed to empty repoDir repoDir: %v with error: %v", s.opts.repoDir, err) 158 + 159 + respondWithError(w, err) 160 + return 161 + } 162 + 163 + err = emptyDirectory(s.opts.indexDir) 164 + 165 + if err != nil { 166 + err = fmt.Errorf("Failed to empty repoDir indexDir: %v with error: %v", s.opts.repoDir, err) 167 + 168 + respondWithError(w, err) 169 + return 170 + } 171 + 172 + response := map[string]any{ 173 + "Success": true, 174 + } 175 + w.Header().Set("Content-Type", "application/json") 176 + _ = json.NewEncoder(w).Encode(response) 177 + } 178 + 179 + func respondWithError(w http.ResponseWriter, err error) { 180 + log.Print(err) 181 + 182 + w.Header().Set("Content-Type", "application/json") 183 + w.WriteHeader(http.StatusInternalServerError) 184 + response := map[string]any{ 185 + "Success": false, 186 + "Error": err.Error(), 187 + } 188 + 189 + _ = json.NewEncoder(w).Encode(response) 190 + } 191 + 192 + func (s *indexServer) startIndexingApi() { 193 + http.HandleFunc("/index", s.serveIndex) 194 + http.HandleFunc("/truncate", s.serveTruncate) 195 + 196 + if err := http.ListenAndServe(s.opts.listen, nil); err != nil { 197 + log.Fatal(err) 198 + } 199 + } 200 + 201 + func emptyDirectory(dir string) error { 202 + files, err := os.ReadDir(dir) 203 + 204 + if err != nil { 205 + return err 206 + } 207 + 208 + for _, file := range files { 209 + filePath := filepath.Join(dir, file.Name()) 210 + err := os.RemoveAll(filePath) 211 + if err != nil { 212 + return err 213 + } 214 + } 215 + 216 + return nil 217 + } 218 + 219 + func parseOptions() Options { 220 + dataDir := flag.String("data_dir", "", "directory holding all data.") 221 + indexDir := flag.String("index_dir", "", "directory holding index shards. Defaults to $data_dir/index/") 222 + timeout := flag.Duration("index_timeout", time.Hour, "kill index job after this much time") 223 + listen := flag.String("listen", ":6060", "listen on this address.") 224 + flag.Parse() 225 + 226 + if *dataDir == "" { 227 + log.Fatal("must set -data_dir") 228 + } 229 + 230 + if *indexDir == "" { 231 + *indexDir = filepath.Join(*dataDir, "index") 232 + } 233 + 234 + return Options{ 235 + dataDir: *dataDir, 236 + repoDir: filepath.Join(*dataDir, "repos"), 237 + indexDir: *indexDir, 238 + indexTimeout: *timeout, 239 + listen: *listen, 240 + } 241 + } 242 + 243 + func main() { 244 + opts := parseOptions() 245 + opts.createMissingDirectories() 246 + 247 + server := indexServer{ 248 + opts: opts, 249 + } 250 + 251 + server.startIndexingApi() 252 + }
+135
cmd/zoekt-dynamic-indexserver/main_test.go
··· 1 + package main 2 + 3 + import ( 4 + "bytes" 5 + "context" 6 + "errors" 7 + "log" 8 + "os" 9 + "os/exec" 10 + "reflect" 11 + "strings" 12 + "testing" 13 + "time" 14 + ) 15 + 16 + var ( 17 + cmdTimeout = 100 * time.Millisecond 18 + ) 19 + 20 + func captureOutput(f func()) string { 21 + var buf bytes.Buffer 22 + log.SetOutput(&buf) 23 + defer func() { log.SetOutput(os.Stderr) }() 24 + f() 25 + return buf.String() 26 + } 27 + 28 + func TestLoggedRun(t *testing.T) { 29 + ctx, cancel := context.WithTimeout(context.Background(), cmdTimeout) 30 + defer cancel() 31 + 32 + cmd := exec.CommandContext(ctx, "echo", "-n", "1") 33 + 34 + stdout := captureOutput(func() { 35 + loggedRun(cmd) 36 + }) 37 + 38 + if !strings.Contains(stdout, "run [echo -n 1]") { 39 + t.Errorf("loggedRun output is incorrect: %v", stdout) 40 + } 41 + } 42 + 43 + func TestLoggedRunFailure(t *testing.T) { 44 + ctx, cancel := context.WithTimeout(context.Background(), cmdTimeout) 45 + defer cancel() 46 + 47 + cmd := exec.CommandContext(ctx, "false") 48 + 49 + stdout := captureOutput(func() { 50 + loggedRun(cmd) 51 + }) 52 + 53 + if !strings.Contains(stdout, "failed: exit status 1") { 54 + t.Errorf("loggedRun output is incorrect: %v", stdout) 55 + } 56 + } 57 + 58 + func TestIndexRepository(t *testing.T) { 59 + var cmdHistory [][]string 60 + 61 + executeCmd = func(ctx context.Context, name string, arg ...string) (err error) { 62 + currentCmd := append([]string{name}, arg...) 63 + cmdHistory = append(cmdHistory, currentCmd) 64 + 65 + return 66 + } 67 + 68 + opts := Options{ 69 + indexTimeout: cmdTimeout, 70 + repoDir: "/repo_dir", 71 + indexDir: "/index_dir", 72 + } 73 + 74 + req := indexRequest{ 75 + CloneURL: "https://example.com/repository.git", 76 + RepoID: 100, 77 + } 78 + 79 + _, err := indexRepository(opts, req) 80 + 81 + if err != nil { 82 + t.Fatal(err) 83 + } 84 + 85 + expectedHistory := [][]string{ 86 + {"zoekt-git-clone", "-dest", "/repo_dir", "-name", "100", "-repoid", "100", "https://example.com/repository.git"}, 87 + {"git", "-C", "/repo_dir/100.git", "fetch"}, 88 + {"zoekt-git-index", "-index", "/index_dir", "/repo_dir/100.git"}, 89 + } 90 + 91 + if !reflect.DeepEqual(cmdHistory, expectedHistory) { 92 + t.Errorf("cmdHistory output is incorrect: %v, expected output: %v", cmdHistory, expectedHistory) 93 + } 94 + } 95 + 96 + func TestIndexRepositoryWhenErr(t *testing.T) { 97 + var cmdHistory [][]string 98 + 99 + executeCmd = func(ctx context.Context, name string, arg ...string) (err error) { 100 + currentCmd := append([]string{name}, arg...) 101 + cmdHistory = append(cmdHistory, currentCmd) 102 + 103 + if len(cmdHistory) > 1 { 104 + return errors.New("command failed") 105 + } 106 + 107 + return 108 + } 109 + 110 + opts := Options{ 111 + indexTimeout: cmdTimeout, 112 + repoDir: "/repo_dir", 113 + indexDir: "/index_dir", 114 + } 115 + 116 + req := indexRequest{ 117 + CloneURL: "https://example.com/repository.git", 118 + RepoID: 100, 119 + } 120 + 121 + _, err := indexRepository(opts, req) 122 + 123 + if err == nil { 124 + t.Errorf("Error is empty, when it should be present") 125 + } 126 + 127 + expectedHistory := [][]string{ 128 + {"zoekt-git-clone", "-dest", "/repo_dir", "-name", "100", "-repoid", "100", "https://example.com/repository.git"}, 129 + {"git", "-C", "/repo_dir/100.git", "fetch"}, 130 + } 131 + 132 + if !reflect.DeepEqual(cmdHistory, expectedHistory) { 133 + t.Errorf("cmdHistory output is incorrect: %v, expected output: %v", cmdHistory, expectedHistory) 134 + } 135 + }
+13 -2
cmd/zoekt-git-clone/main.go
··· 25 25 "net/url" 26 26 "os" 27 27 "path/filepath" 28 + "strconv" 28 29 "strings" 29 30 30 31 "github.com/sourcegraph/zoekt/gitindex" ··· 32 33 33 34 func main() { 34 35 dest := flag.String("dest", "", "destination directory") 36 + nameFlag := flag.String("name", "", "name of repository") 37 + repoIDFlag := flag.Uint("repoid", 0, "id of repository") 35 38 flag.Parse() 36 39 37 40 if *dest == "" { ··· 45 48 log.Fatalf("url.Parse: %v", err) 46 49 } 47 50 48 - name := filepath.Join(u.Host, u.Path) 49 - name = strings.TrimSuffix(name, ".git") 51 + name := *nameFlag 52 + if name == "" { 53 + name = filepath.Join(u.Host, u.Path) 54 + name = strings.TrimSuffix(name, ".git") 55 + } 50 56 51 57 destDir := filepath.Dir(filepath.Join(*dest, name)) 52 58 if err := os.MkdirAll(destDir, 0o755); err != nil { ··· 55 61 56 62 config := map[string]string{ 57 63 "zoekt.name": name, 64 + } 65 + 66 + repoID := *repoIDFlag 67 + if repoID != 0 { 68 + config["zoekt.repoid"] = strconv.FormatUint(uint64(repoID), 10) 58 69 } 59 70 60 71 destRepo, err := gitindex.CloneRepo(destDir, filepath.Base(name), u.String(), config)