fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

1// Copyright 2016 Google Inc. All rights reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package build 16 17import ( 18 "bytes" 19 "fmt" 20 "log" 21 "strings" 22 "sync" 23 "time" 24 25 "golang.org/x/exp/slices" 26 27 "github.com/sourcegraph/zoekt" 28 "github.com/sourcegraph/zoekt/ctags" 29) 30 31// Make sure all names are lowercase here, since they are normalized 32var enryLanguageMappings = map[string]string{ 33 "c#": "c_sharp", 34} 35 36func normalizeLanguage(filetype string) string { 37 normalized := strings.ToLower(filetype) 38 if mapped, ok := enryLanguageMappings[normalized]; ok { 39 normalized = mapped 40 } 41 42 return normalized 43} 44 45func ctagsAddSymbolsParserMap(todo []*zoekt.Document, languageMap ctags.LanguageMap, parserMap ctags.ParserMap) error { 46 monitor := newMonitor() 47 defer monitor.Stop() 48 49 var tagsToSections tagsToSections 50 51 for _, doc := range todo { 52 if len(doc.Content) == 0 || doc.Symbols != nil { 53 continue 54 } 55 56 zoekt.DetermineLanguageIfUnknown(doc) 57 58 parserKind := languageMap[normalizeLanguage(doc.Language)] 59 if parserKind == ctags.NoCTags { 60 continue 61 } 62 63 // If the parser kind is unknown, default to universal-ctags 64 if parserKind == ctags.UnknownCTags { 65 parserKind = ctags.UniversalCTags 66 } 67 68 parser := parserMap[parserKind] 69 if parser == nil { 70 // this happens if CTagsMustSucceed is false and we didn't find the binary 71 continue 72 } 73 74 monitor.BeginParsing(doc) 75 es, err := parser.Parse(doc.Name, doc.Content) 76 monitor.EndParsing(es) 77 78 if err != nil { 79 return err 80 } 81 if len(es) == 0 { 82 continue 83 } 84 85 symOffsets, symMetaData, err := tagsToSections.Convert(doc.Content, es) 86 if err != nil { 87 return fmt.Errorf("%s: %v", doc.Name, err) 88 } 89 doc.Symbols = symOffsets 90 doc.SymbolsMetaData = symMetaData 91 } 92 93 return nil 94} 95 96// overlaps finds the proper position to insert a zoekt.DocumentSection with 97// "start and "end" into "symOffsets". It returns -1 if the new section overlaps 98// with one of the existing ones. 99func overlaps(symOffsets []zoekt.DocumentSection, start, end uint32) int { 100 i := 0 101 for i = len(symOffsets) - 1; i >= 0; i-- { 102 // The most common case is that we exit here, because symOffsets is sorted by 103 // construction and start is in many cases monotonically increasing. 104 if start >= symOffsets[i].End { 105 break 106 } 107 if end <= symOffsets[i].Start { 108 continue 109 } 110 // overlap 111 return -1 112 } 113 return i + 1 114} 115 116// tagsToSections contains buffers to be reused between conversions of bytes 117// ranges to metadata. This is done to reduce pressure on the garbage 118// collector. 119type tagsToSections struct { 120 nlsBuf []uint32 121} 122 123// Convert ctags entries to byte ranges (zoekt.DocumentSection) with 124// corresponding metadata (zoekt.Symbol). 125// 126// This can not be called concurrently. 127func (t *tagsToSections) Convert(content []byte, tags []*ctags.Entry) ([]zoekt.DocumentSection, []*zoekt.Symbol, error) { 128 nls := t.newLinesIndices(content) 129 symOffsets := make([]zoekt.DocumentSection, 0, len(tags)) 130 symMetaData := make([]*zoekt.Symbol, 0, len(tags)) 131 132 for _, t := range tags { 133 if t.Line <= 0 { 134 // Observed this with a .JS file. 135 continue 136 } 137 lineIdx := t.Line - 1 138 if lineIdx >= len(nls) { 139 // Observed this with a .TS file. 140 continue 141 } 142 143 lineOff := uint32(0) 144 if lineIdx > 0 { 145 lineOff = nls[lineIdx-1] + 1 146 } 147 148 end := nls[lineIdx] 149 line := content[lineOff:end] 150 151 // This is best-effort only. For short symbol names, we will often determine the 152 // wrong offset. 153 intraOff := bytes.Index(line, []byte(t.Name)) 154 if intraOff < 0 { 155 // for Go code, this is very common, since 156 // ctags barfs on multi-line declarations 157 continue 158 } 159 160 start := lineOff + uint32(intraOff) 161 endSym := start + uint32(len(t.Name)) 162 163 i := overlaps(symOffsets, start, endSym) 164 if i == -1 { 165 // Detected an overlap. Give up. 166 continue 167 } 168 169 symOffsets = slices.Insert(symOffsets, i, zoekt.DocumentSection{ 170 Start: start, 171 End: endSym, 172 }) 173 symMetaData = slices.Insert(symMetaData, i, &zoekt.Symbol{ 174 Sym: t.Name, 175 Kind: t.Kind, 176 Parent: t.Parent, 177 ParentKind: t.ParentKind, 178 }) 179 } 180 181 return symOffsets, symMetaData, nil 182} 183 184// newLinesIndices returns an array of all indexes of '\n' aswell as a final 185// value for the length of the document. 186func (t *tagsToSections) newLinesIndices(in []byte) []uint32 { 187 // reuse nlsBuf between calls to tagsToSections.Convert 188 out := t.nlsBuf 189 if out == nil { 190 out = make([]uint32, 0, len(in)/30) 191 } 192 193 finalEntry := uint32(len(in)) 194 off := uint32(0) 195 for len(in) > 0 { 196 i := bytes.IndexByte(in, '\n') 197 if i < 0 { 198 out = append(out, finalEntry) 199 break 200 } 201 202 off += uint32(i) 203 out = append(out, off) 204 205 in = in[i+1:] 206 off++ 207 } 208 209 // save buffer for reuse 210 t.nlsBuf = out[:0] 211 212 return out 213} 214 215// monitorReportStuck is how long we need to be analysing a document before 216// reporting it to stdout. 217const monitorReportStuck = 10 * time.Second 218 219// monitorReportStatus is how often we given status updates 220const monitorReportStatus = time.Minute 221 222type monitor struct { 223 mu sync.Mutex 224 225 lastUpdate time.Time 226 227 start time.Time 228 totalSize int 229 totalSymbols int 230 231 currentDocName string 232 currentDocSize int 233 currentDocStuckCount int 234 235 done chan struct{} 236} 237 238func newMonitor() *monitor { 239 start := time.Now() 240 m := &monitor{ 241 start: start, 242 lastUpdate: start, 243 done: make(chan struct{}), 244 } 245 go m.run() 246 return m 247} 248 249func (m *monitor) BeginParsing(doc *zoekt.Document) { 250 now := time.Now() 251 m.mu.Lock() 252 m.lastUpdate = now 253 254 // set current doc 255 m.currentDocName = doc.Name 256 m.currentDocSize = len(doc.Content) 257 258 m.mu.Unlock() 259} 260 261func (m *monitor) EndParsing(entries []*ctags.Entry) { 262 now := time.Now() 263 m.mu.Lock() 264 m.lastUpdate = now 265 266 // update aggregate stats 267 m.totalSize += m.currentDocSize 268 m.totalSymbols += len(entries) 269 270 // inform done if we warned about current document 271 if m.currentDocStuckCount > 0 { 272 log.Printf("symbol analysis for %s (size %d bytes) is done and found %d symbols", m.currentDocName, m.currentDocSize, len(entries)) 273 m.currentDocStuckCount = 0 274 } 275 276 // unset current document 277 m.currentDocName = "" 278 m.currentDocSize = 0 279 280 m.mu.Unlock() 281} 282 283func (m *monitor) Stop() { 284 close(m.done) 285} 286 287func (m *monitor) run() { 288 stuckTicker := time.NewTicker(monitorReportStuck / 2) // half due to sampling theorem (nyquist) 289 statusTicker := time.NewTicker(monitorReportStatus) 290 291 defer stuckTicker.Stop() 292 defer statusTicker.Stop() 293 294 for { 295 select { 296 case <-m.done: 297 now := time.Now() 298 m.mu.Lock() 299 log.Printf("symbol analysis finished for shard statistics: duration=%v symbols=%d bytes=%d", now.Sub(m.start).Truncate(time.Second), m.totalSymbols, m.totalSize) 300 m.mu.Unlock() 301 return 302 303 case <-stuckTicker.C: 304 now := time.Now() 305 m.mu.Lock() 306 running := now.Sub(m.lastUpdate).Truncate(time.Second) 307 report := monitorReportStuck * (1 << m.currentDocStuckCount) // double the amount of time each time we report 308 if m.currentDocName != "" && running >= report { 309 m.currentDocStuckCount++ 310 log.Printf("WARN: symbol analysis for %s (%d bytes) has been running for %v", m.currentDocName, m.currentDocSize, running) 311 } 312 m.mu.Unlock() 313 314 case <-statusTicker.C: 315 now := time.Now() 316 m.mu.Lock() 317 log.Printf("DEBUG: symbol analysis still running for shard statistics: duration=%v symbols=%d bytes=%d", now.Sub(m.start).Truncate(time.Second), m.totalSymbols, m.totalSize) 318 m.mu.Unlock() 319 } 320 } 321}