fork of https://github.com/sourcegraph/zoekt
1// Copyright 2016 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package build
16
17import (
18 "bytes"
19 "fmt"
20 "strings"
21
22 "github.com/sourcegraph/zoekt"
23 "github.com/sourcegraph/zoekt/ctags"
24)
25
26// Make sure all names are lowercase here, since they are normalized
27var enryLanguageMappings = map[string]string{
28 "c#": "c_sharp",
29}
30
31func normalizeLanguage(filetype string) string {
32 normalized := strings.ToLower(filetype)
33 if mapped, ok := enryLanguageMappings[normalized]; ok {
34 normalized = mapped
35 }
36
37 return normalized
38}
39
40func ctagsAddSymbolsParserMap(todo []*zoekt.Document, languageMap ctags.LanguageMap, parserMap ctags.ParserMap) error {
41 for _, doc := range todo {
42 if doc.Symbols != nil {
43 continue
44 }
45
46 zoekt.DetermineLanguageIfUnknown(doc)
47
48 parserKind := languageMap[normalizeLanguage(doc.Language)]
49 if parserKind == ctags.NoCTags {
50 continue
51 }
52
53 parser := parserMap[parserKind]
54 if parser == nil {
55 parser = parserMap[ctags.UniversalCTags]
56 if parser == nil {
57 // this happens if CTagsMustSucceed is not true and we didn't find universal-ctags
58 continue
59 }
60 }
61
62 es, err := parser.Parse(doc.Name, doc.Content)
63 if err != nil {
64 return err
65 }
66 if len(es) == 0 {
67 continue
68 }
69
70 symOffsets, symMetaData, err := tagsToSections(doc.Content, es)
71 if err != nil {
72 return fmt.Errorf("%s: %v", doc.Name, err)
73 }
74 doc.Symbols = symOffsets
75 doc.SymbolsMetaData = symMetaData
76 }
77
78 return nil
79}
80
81// overlaps finds the proper position to insert a zoekt.DocumentSection with
82// "start and "end" into "symOffsets". It returns -1 if the new section overlaps
83// with one of the existing ones.
84func overlaps(symOffsets []zoekt.DocumentSection, start, end uint32) int {
85 i := 0
86 for i = len(symOffsets) - 1; i >= 0; i-- {
87 // The most common case is that we exit here, because symOffsets is sorted by
88 // construction and start is in many cases monotonically increasing.
89 if start >= symOffsets[i].End {
90 break
91 }
92 if end <= symOffsets[i].Start {
93 continue
94 }
95 // overlap
96 return -1
97 }
98 return i + 1
99}
100
101// tagsToSections converts ctags entries to byte ranges (zoekt.DocumentSection)
102// with corresponding metadata (zoekt.Symbol).
103func tagsToSections(content []byte, tags []*ctags.Entry) ([]zoekt.DocumentSection, []*zoekt.Symbol, error) {
104 nls := newLinesIndices(content)
105 nls = append(nls, uint32(len(content)))
106 var symOffsets []zoekt.DocumentSection
107 var symMetaData []*zoekt.Symbol
108
109 for _, t := range tags {
110 if t.Line <= 0 {
111 // Observed this with a .JS file.
112 continue
113 }
114 lineIdx := t.Line - 1
115 if lineIdx >= len(nls) {
116 return nil, nil, fmt.Errorf("linenum for entry out of range %v", t)
117 }
118
119 lineOff := uint32(0)
120 if lineIdx > 0 {
121 lineOff = nls[lineIdx-1] + 1
122 }
123
124 end := nls[lineIdx]
125 line := content[lineOff:end]
126
127 // This is best-effort only. For short symbol names, we will often determine the
128 // wrong offset.
129 intraOff := bytes.Index(line, []byte(t.Name))
130 if intraOff < 0 {
131 // for Go code, this is very common, since
132 // ctags barfs on multi-line declarations
133 continue
134 }
135
136 start := lineOff + uint32(intraOff)
137 endSym := start + uint32(len(t.Name))
138
139 i := overlaps(symOffsets, start, endSym)
140 if i == -1 {
141 // Detected an overlap. Give up.
142 continue
143 }
144
145 symOffsets = append(
146 symOffsets[:i],
147 append([]zoekt.DocumentSection{{Start: start, End: endSym}}, symOffsets[i:]...)...,
148 )
149 symMetaData = append(
150 symMetaData[:i],
151 append(
152 []*zoekt.Symbol{{Sym: t.Name, Kind: t.Kind, Parent: t.Parent, ParentKind: t.ParentKind}},
153 symMetaData[i:]...,
154 )...,
155 )
156 }
157
158 return symOffsets, symMetaData, nil
159}
160
161func newLinesIndices(in []byte) []uint32 {
162 out := make([]uint32, 0, len(in)/30)
163 for i, c := range in {
164 if c == '\n' {
165 out = append(out, uint32(i))
166 }
167 }
168 return out
169}