fork of https://github.com/sourcegraph/zoekt
1// Licensed under the Apache License, Version 2.0 (the "License");
2// you may not use this file except in compliance with the License.
3// You may obtain a copy of the License at
4//
5// http://www.apache.org/licenses/LICENSE-2.0
6//
7// Unless required by applicable law or agreed to in writing, software
8// distributed under the License is distributed on an "AS IS" BASIS,
9// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10// See the License for the specific language governing permissions and
11// limitations under the License.
12
13// Command zoekt-merge-index merges a set of index shards into a compound shard.
14package main
15
16import (
17 "bufio"
18 "fmt"
19 "log"
20 "os"
21 "path/filepath"
22 "strings"
23
24 "github.com/sourcegraph/zoekt/index"
25)
26
27// merge merges the input shards into a compound shard in dstDir. It returns the
28// full path to the compound shard. The input shards are removed on success.
29func merge(dstDir string, names []string) (string, error) {
30 var files []index.IndexFile
31 for _, fn := range names {
32 f, err := os.Open(fn)
33 if err != nil {
34 return "", nil
35 }
36 defer f.Close()
37
38 indexFile, err := index.NewIndexFile(f)
39 if err != nil {
40 return "", err
41 }
42 defer indexFile.Close()
43
44 files = append(files, indexFile)
45 }
46
47 tmpName, dstName, err := index.Merge(dstDir, files...)
48 if err != nil {
49 return "", err
50 }
51
52 // Delete input shards.
53 for _, name := range names {
54 paths, err := index.IndexFilePaths(name)
55 if err != nil {
56 return "", fmt.Errorf("zoekt-merge-index: %w", err)
57 }
58 for _, p := range paths {
59 if err := os.Remove(p); err != nil {
60 return "", fmt.Errorf("zoekt-merge-index: failed to remove simple shard: %w", err)
61 }
62 }
63 }
64
65 // We only rename the compound shard if all simple shards could be deleted in the
66 // previous step. This guarantees we won't have duplicate indexes.
67 if err := os.Rename(tmpName, dstName); err != nil {
68 return "", fmt.Errorf("zoekt-merge-index: failed to rename compound shard: %w", err)
69 }
70
71 return dstName, nil
72}
73
74func mergeCmd(paths []string) (string, error) {
75 if paths[0] == "-" {
76 paths = []string{}
77 scanner := bufio.NewScanner(os.Stdin)
78 for scanner.Scan() {
79 paths = append(paths, strings.TrimSpace(scanner.Text()))
80 }
81 if err := scanner.Err(); err != nil {
82 return "", err
83 }
84 log.Printf("merging %d paths from stdin", len(paths))
85 }
86
87 return merge(filepath.Dir(paths[0]), paths)
88}
89
90// explode splits the input shard into individual shards and places them in dstDir.
91// Temporary files created in the process are removed on a best effort basis.
92func explode(dstDir string, inputShard string) error {
93 f, err := os.Open(inputShard)
94 if err != nil {
95 return err
96 }
97 defer f.Close()
98
99 indexFile, err := index.NewIndexFile(f)
100 if err != nil {
101 return err
102 }
103 defer indexFile.Close()
104
105 exploded, err := index.Explode(dstDir, indexFile)
106 defer func() {
107 // best effort removal of tmp files. If os.Remove fails, indexserver will delete
108 // the leftover tmp files during the next cleanup.
109 for tmpFn := range exploded {
110 os.Remove(tmpFn)
111 }
112 }()
113 if err != nil {
114 return fmt.Errorf("zoekt.Explode: %w", err)
115 }
116
117 // remove the input shard first to avoid duplicate indexes. In the worst case,
118 // the process is interrupted just after we delete the compound shard, in which
119 // case we have to reindex the lost repos.
120 paths, err := index.IndexFilePaths(inputShard)
121 if err != nil {
122 return err
123 }
124 for _, path := range paths {
125 err = os.Remove(path)
126 if err != nil {
127 return err
128 }
129 }
130
131 // best effort rename shards.
132 for tmpFn, dstFn := range exploded {
133 if err := os.Rename(tmpFn, dstFn); err != nil {
134 log.Printf("explode: rename failed: %s", err)
135 }
136 }
137
138 return nil
139}
140
141func explodeCmd(path string) error {
142 return explode(filepath.Dir(path), path)
143}
144
145func main() {
146 switch subCommand := os.Args[1]; subCommand {
147 case "merge":
148 compoundShardPath, err := mergeCmd(os.Args[2:])
149 if err != nil {
150 log.Fatal(err)
151 }
152 fmt.Println(compoundShardPath)
153 case "explode":
154 if err := explodeCmd(os.Args[2]); err != nil {
155 log.Fatal(err)
156 }
157 default:
158 log.Fatalf("unknown subcommand %s", subCommand)
159 }
160}