fork of https://github.com/sourcegraph/zoekt
1// Copyright 2016 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package build
16
17import (
18 "bytes"
19 "fmt"
20 "os"
21 "os/exec"
22 "path/filepath"
23 "time"
24
25 "github.com/sourcegraph/zoekt"
26 "github.com/sourcegraph/zoekt/ctags"
27)
28
29func runCTags(bin string, inputs map[string][]byte) ([]*ctags.Entry, error) {
30 const debug = false
31 if len(inputs) == 0 {
32 return nil, nil
33 }
34 dir, err := os.MkdirTemp("", "ctags-input")
35 if err != nil {
36 return nil, err
37 }
38 if !debug {
39 defer os.RemoveAll(dir)
40 }
41
42 // --sort shells out to sort(1).
43 args := []string{bin, "-n", "-f", "-", "--sort=no"}
44
45 fileCount := 0
46 for n, c := range inputs {
47 if len(c) == 0 {
48 continue
49 }
50
51 full := filepath.Join(dir, n)
52 if err := os.MkdirAll(filepath.Dir(full), 0o700); err != nil {
53 return nil, err
54 }
55 err := os.WriteFile(full, c, 0o600)
56 if err != nil {
57 return nil, err
58 }
59 args = append(args, n)
60 fileCount++
61 }
62 if fileCount == 0 {
63 return nil, nil
64 }
65
66 cmd := exec.Command(args[0], args[1:]...)
67 cmd.Dir = dir
68
69 var errBuf, outBuf bytes.Buffer
70 cmd.Stderr = &errBuf
71 cmd.Stdout = &outBuf
72
73 if err := cmd.Start(); err != nil {
74 return nil, err
75 }
76
77 errChan := make(chan error, 1)
78 go func() {
79 err := cmd.Wait()
80 errChan <- err
81 }()
82 timeout := time.After(60 * time.Second)
83 select {
84 case <-timeout:
85 _ = cmd.Process.Kill()
86 return nil, fmt.Errorf("timeout executing ctags")
87 case err := <-errChan:
88 if err != nil {
89 return nil, fmt.Errorf("exec(%s): %v, stderr: %s", cmd.Args, err, errBuf.String())
90 }
91 }
92
93 var entries []*ctags.Entry
94 for _, l := range bytes.Split(outBuf.Bytes(), []byte{'\n'}) {
95 if len(l) == 0 {
96 continue
97 }
98 e, err := ctags.Parse(string(l))
99 if err != nil {
100 return nil, err
101 }
102
103 if len(e.Name) == 1 {
104 continue
105 }
106 entries = append(entries, e)
107 }
108 return entries, nil
109}
110
111func runCTagsChunked(bin string, in map[string][]byte) ([]*ctags.Entry, error) {
112 var res []*ctags.Entry
113
114 cur := map[string][]byte{}
115 sz := 0
116 for k, v := range in {
117 cur[k] = v
118 sz += len(k)
119
120 // 100k seems reasonable.
121 if sz > (100 << 10) {
122 r, err := runCTags(bin, cur)
123 if err != nil {
124 return nil, err
125 }
126 res = append(res, r...)
127
128 cur = map[string][]byte{}
129 sz = 0
130 }
131 }
132 r, err := runCTags(bin, cur)
133 if err != nil {
134 return nil, err
135 }
136 res = append(res, r...)
137 return res, nil
138}
139
140func ctagsAddSymbolsParser(todo []*zoekt.Document, parser ctags.Parser) error {
141 for _, doc := range todo {
142 if doc.Symbols != nil {
143 continue
144 }
145
146 es, err := parser.Parse(doc.Name, doc.Content)
147 if err != nil {
148 return err
149 }
150 if len(es) == 0 {
151 continue
152 }
153
154 symOffsets, symMetaData, err := tagsToSections(doc.Content, es)
155 if err != nil {
156 return fmt.Errorf("%s: %v", doc.Name, err)
157 }
158 doc.Symbols = symOffsets
159 doc.SymbolsMetaData = symMetaData
160 }
161
162 return nil
163}
164
165func ctagsAddSymbols(todo []*zoekt.Document, parser ctags.Parser, bin string) error {
166 if parser != nil {
167 return ctagsAddSymbolsParser(todo, parser)
168 }
169
170 pathIndices := map[string]int{}
171 contents := map[string][]byte{}
172 for i, t := range todo {
173 if t.Symbols != nil {
174 continue
175 }
176
177 _, ok := pathIndices[t.Name]
178 if ok {
179 continue
180 }
181
182 pathIndices[t.Name] = i
183 contents[t.Name] = t.Content
184 }
185
186 var err error
187 var entries []*ctags.Entry
188 entries, err = runCTagsChunked(bin, contents)
189 if err != nil {
190 return err
191 }
192
193 fileTags := map[string][]*ctags.Entry{}
194 for _, e := range entries {
195 fileTags[e.Path] = append(fileTags[e.Path], e)
196 }
197
198 for k, tags := range fileTags {
199 symOffsets, symMetaData, err := tagsToSections(contents[k], tags)
200 if err != nil {
201 return fmt.Errorf("%s: %v", k, err)
202 }
203 todo[pathIndices[k]].Symbols = symOffsets
204 todo[pathIndices[k]].SymbolsMetaData = symMetaData
205 }
206 return nil
207}
208
209// overlaps finds the proper position to insert a zoekt.DocumentSection with
210// "start and "end" into "symOffsets". It returns -1 if the new section overlaps
211// with one of the existing ones.
212func overlaps(symOffsets []zoekt.DocumentSection, start, end uint32) int {
213 i := 0
214 for i = len(symOffsets) - 1; i >= 0; i-- {
215 // The most common case is that we exit here, because symOffsets is sorted by
216 // construction and start is in many cases monotonically increasing.
217 if start >= symOffsets[i].End {
218 break
219 }
220 if end <= symOffsets[i].Start {
221 continue
222 }
223 // overlap
224 return -1
225 }
226 return i + 1
227}
228
229// tagsToSections converts ctags entries to byte ranges (zoekt.DocumentSection)
230// with corresponding metadata (zoekt.Symbol).
231func tagsToSections(content []byte, tags []*ctags.Entry) ([]zoekt.DocumentSection, []*zoekt.Symbol, error) {
232 nls := newLinesIndices(content)
233 nls = append(nls, uint32(len(content)))
234 var symOffsets []zoekt.DocumentSection
235 var symMetaData []*zoekt.Symbol
236
237 for _, t := range tags {
238 if t.Line <= 0 {
239 // Observed this with a .JS file.
240 continue
241 }
242 lineIdx := t.Line - 1
243 if lineIdx >= len(nls) {
244 return nil, nil, fmt.Errorf("linenum for entry out of range %v", t)
245 }
246
247 lineOff := uint32(0)
248 if lineIdx > 0 {
249 lineOff = nls[lineIdx-1] + 1
250 }
251
252 end := nls[lineIdx]
253 line := content[lineOff:end]
254
255 // This is best-effort only. For short symbol names, we will often determine the
256 // wrong offset.
257 intraOff := bytes.Index(line, []byte(t.Name))
258 if intraOff < 0 {
259 // for Go code, this is very common, since
260 // ctags barfs on multi-line declarations
261 continue
262 }
263
264 start := lineOff + uint32(intraOff)
265 endSym := start + uint32(len(t.Name))
266
267 i := overlaps(symOffsets, start, endSym)
268 if i == -1 {
269 // Detected an overlap. Give up.
270 continue
271 }
272
273 symOffsets = append(
274 symOffsets[:i],
275 append([]zoekt.DocumentSection{{Start: start, End: endSym}}, symOffsets[i:]...)...,
276 )
277 symMetaData = append(
278 symMetaData[:i],
279 append(
280 []*zoekt.Symbol{{Sym: t.Name, Kind: t.Kind, Parent: t.Parent, ParentKind: t.ParentKind}},
281 symMetaData[i:]...,
282 )...,
283 )
284 }
285
286 return symOffsets, symMetaData, nil
287}
288
289func newLinesIndices(in []byte) []uint32 {
290 out := make([]uint32, 0, len(in)/30)
291 for i, c := range in {
292 if c == '\n' {
293 out = append(out, uint32(i))
294 }
295 }
296 return out
297}