fork of https://github.com/sourcegraph/zoekt
1// Copyright 2016 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package build
16
17import (
18 "bytes"
19 "fmt"
20 "strings"
21
22 "github.com/sourcegraph/zoekt"
23 "github.com/sourcegraph/zoekt/ctags"
24)
25
26// Make sure all names are lowercase here, since they are normalized
27var enryLanguageMappings = map[string]string{
28 "c#": "c_sharp",
29}
30
31func normalizeLanguage(filetype string) string {
32 normalized := strings.ToLower(filetype)
33 if mapped, ok := enryLanguageMappings[normalized]; ok {
34 normalized = mapped
35 }
36
37 return normalized
38}
39
40func ctagsAddSymbolsParserMap(todo []*zoekt.Document, languageMap ctags.LanguageMap, parserMap ctags.ParserMap) error {
41 for _, doc := range todo {
42 if doc.Symbols != nil {
43 continue
44 }
45
46 zoekt.DetermineLanguageIfUnknown(doc)
47
48 parserKind := languageMap[normalizeLanguage(doc.Language)]
49 if parserKind == ctags.NoCTags {
50 continue
51 }
52
53 parser := parserMap[parserKind]
54 if parser == nil {
55 parser = parserMap[ctags.UniversalCTags]
56 }
57
58 es, err := parser.Parse(doc.Name, doc.Content)
59 if err != nil {
60 return err
61 }
62 if len(es) == 0 {
63 continue
64 }
65
66 symOffsets, symMetaData, err := tagsToSections(doc.Content, es)
67 if err != nil {
68 return fmt.Errorf("%s: %v", doc.Name, err)
69 }
70 doc.Symbols = symOffsets
71 doc.SymbolsMetaData = symMetaData
72 }
73
74 return nil
75}
76
77// overlaps finds the proper position to insert a zoekt.DocumentSection with
78// "start and "end" into "symOffsets". It returns -1 if the new section overlaps
79// with one of the existing ones.
80func overlaps(symOffsets []zoekt.DocumentSection, start, end uint32) int {
81 i := 0
82 for i = len(symOffsets) - 1; i >= 0; i-- {
83 // The most common case is that we exit here, because symOffsets is sorted by
84 // construction and start is in many cases monotonically increasing.
85 if start >= symOffsets[i].End {
86 break
87 }
88 if end <= symOffsets[i].Start {
89 continue
90 }
91 // overlap
92 return -1
93 }
94 return i + 1
95}
96
97// tagsToSections converts ctags entries to byte ranges (zoekt.DocumentSection)
98// with corresponding metadata (zoekt.Symbol).
99func tagsToSections(content []byte, tags []*ctags.Entry) ([]zoekt.DocumentSection, []*zoekt.Symbol, error) {
100 nls := newLinesIndices(content)
101 nls = append(nls, uint32(len(content)))
102 var symOffsets []zoekt.DocumentSection
103 var symMetaData []*zoekt.Symbol
104
105 for _, t := range tags {
106 if t.Line <= 0 {
107 // Observed this with a .JS file.
108 continue
109 }
110 lineIdx := t.Line - 1
111 if lineIdx >= len(nls) {
112 return nil, nil, fmt.Errorf("linenum for entry out of range %v", t)
113 }
114
115 lineOff := uint32(0)
116 if lineIdx > 0 {
117 lineOff = nls[lineIdx-1] + 1
118 }
119
120 end := nls[lineIdx]
121 line := content[lineOff:end]
122
123 // This is best-effort only. For short symbol names, we will often determine the
124 // wrong offset.
125 intraOff := bytes.Index(line, []byte(t.Name))
126 if intraOff < 0 {
127 // for Go code, this is very common, since
128 // ctags barfs on multi-line declarations
129 continue
130 }
131
132 start := lineOff + uint32(intraOff)
133 endSym := start + uint32(len(t.Name))
134
135 i := overlaps(symOffsets, start, endSym)
136 if i == -1 {
137 // Detected an overlap. Give up.
138 continue
139 }
140
141 symOffsets = append(
142 symOffsets[:i],
143 append([]zoekt.DocumentSection{{Start: start, End: endSym}}, symOffsets[i:]...)...,
144 )
145 symMetaData = append(
146 symMetaData[:i],
147 append(
148 []*zoekt.Symbol{{Sym: t.Name, Kind: t.Kind, Parent: t.Parent, ParentKind: t.ParentKind}},
149 symMetaData[i:]...,
150 )...,
151 )
152 }
153
154 return symOffsets, symMetaData, nil
155}
156
157func newLinesIndices(in []byte) []uint32 {
158 out := make([]uint32, 0, len(in)/30)
159 for i, c := range in {
160 if c == '\n' {
161 out = append(out, uint32(i))
162 }
163 }
164 return out
165}