fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

1// Copyright 2016 Google Inc. All rights reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package build 16 17import ( 18 "bytes" 19 "fmt" 20 "log" 21 "strings" 22 "sync" 23 "time" 24 25 "golang.org/x/exp/slices" 26 27 "github.com/sourcegraph/zoekt" 28 "github.com/sourcegraph/zoekt/ctags" 29) 30 31// Make sure all names are lowercase here, since they are normalized 32var enryLanguageMappings = map[string]string{ 33 "c#": "c_sharp", 34} 35 36func normalizeLanguage(filetype string) string { 37 normalized := strings.ToLower(filetype) 38 if mapped, ok := enryLanguageMappings[normalized]; ok { 39 normalized = mapped 40 } 41 42 return normalized 43} 44 45func ctagsAddSymbolsParserMap(todo []*zoekt.Document, languageMap ctags.LanguageMap, parserMap ctags.ParserMap) error { 46 monitor := newMonitor() 47 defer monitor.Stop() 48 49 var tagsToSections tagsToSections 50 51 for _, doc := range todo { 52 if doc.Symbols != nil { 53 continue 54 } 55 56 zoekt.DetermineLanguageIfUnknown(doc) 57 58 parserKind := languageMap[normalizeLanguage(doc.Language)] 59 if parserKind == ctags.NoCTags { 60 continue 61 } 62 63 parser := parserMap[parserKind] 64 if parser == nil { 65 parser = parserMap[ctags.UniversalCTags] 66 if parser == nil { 67 // this happens if CTagsMustSucceed is not true and we didn't find universal-ctags 68 continue 69 } 70 } 71 72 monitor.BeginParsing(doc) 73 es, err := parser.Parse(doc.Name, doc.Content) 74 monitor.EndParsing(es) 75 76 if err != nil { 77 return err 78 } 79 if len(es) == 0 { 80 continue 81 } 82 83 symOffsets, symMetaData, err := tagsToSections.Convert(doc.Content, es) 84 if err != nil { 85 return fmt.Errorf("%s: %v", doc.Name, err) 86 } 87 doc.Symbols = symOffsets 88 doc.SymbolsMetaData = symMetaData 89 } 90 91 return nil 92} 93 94// overlaps finds the proper position to insert a zoekt.DocumentSection with 95// "start and "end" into "symOffsets". It returns -1 if the new section overlaps 96// with one of the existing ones. 97func overlaps(symOffsets []zoekt.DocumentSection, start, end uint32) int { 98 i := 0 99 for i = len(symOffsets) - 1; i >= 0; i-- { 100 // The most common case is that we exit here, because symOffsets is sorted by 101 // construction and start is in many cases monotonically increasing. 102 if start >= symOffsets[i].End { 103 break 104 } 105 if end <= symOffsets[i].Start { 106 continue 107 } 108 // overlap 109 return -1 110 } 111 return i + 1 112} 113 114// tagsToSections contains buffers to be reused between conversions of bytes 115// ranges to metadata. This is done to reduce pressure on the garbage 116// collector. 117type tagsToSections struct { 118 nlsBuf []uint32 119} 120 121// Convert ctags entries to byte ranges (zoekt.DocumentSection) with 122// corresponding metadata (zoekt.Symbol). 123// 124// This can not be called concurrently. 125func (t *tagsToSections) Convert(content []byte, tags []*ctags.Entry) ([]zoekt.DocumentSection, []*zoekt.Symbol, error) { 126 nls := t.newLinesIndices(content) 127 symOffsets := make([]zoekt.DocumentSection, 0, len(tags)) 128 symMetaData := make([]*zoekt.Symbol, 0, len(tags)) 129 130 for _, t := range tags { 131 if t.Line <= 0 { 132 // Observed this with a .JS file. 133 continue 134 } 135 lineIdx := t.Line - 1 136 if lineIdx >= len(nls) { 137 return nil, nil, fmt.Errorf("linenum for entry out of range %v", t) 138 } 139 140 lineOff := uint32(0) 141 if lineIdx > 0 { 142 lineOff = nls[lineIdx-1] + 1 143 } 144 145 end := nls[lineIdx] 146 line := content[lineOff:end] 147 148 // This is best-effort only. For short symbol names, we will often determine the 149 // wrong offset. 150 intraOff := bytes.Index(line, []byte(t.Name)) 151 if intraOff < 0 { 152 // for Go code, this is very common, since 153 // ctags barfs on multi-line declarations 154 continue 155 } 156 157 start := lineOff + uint32(intraOff) 158 endSym := start + uint32(len(t.Name)) 159 160 i := overlaps(symOffsets, start, endSym) 161 if i == -1 { 162 // Detected an overlap. Give up. 163 continue 164 } 165 166 symOffsets = slices.Insert(symOffsets, i, zoekt.DocumentSection{ 167 Start: start, 168 End: endSym, 169 }) 170 symMetaData = slices.Insert(symMetaData, i, &zoekt.Symbol{ 171 Sym: t.Name, 172 Kind: t.Kind, 173 Parent: t.Parent, 174 ParentKind: t.ParentKind, 175 }) 176 } 177 178 return symOffsets, symMetaData, nil 179} 180 181// newLinesIndices returns an array of all indexes of '\n' aswell as a final 182// value for the length of the document. 183func (t *tagsToSections) newLinesIndices(in []byte) []uint32 { 184 // reuse nlsBuf between calls to tagsToSections.Convert 185 out := t.nlsBuf 186 if out == nil { 187 out = make([]uint32, 0, len(in)/30) 188 } 189 190 finalEntry := uint32(len(in)) 191 off := uint32(0) 192 for len(in) > 0 { 193 i := bytes.IndexByte(in, '\n') 194 if i < 0 { 195 out = append(out, finalEntry) 196 break 197 } 198 199 off += uint32(i) 200 out = append(out, off) 201 202 in = in[i+1:] 203 off++ 204 } 205 206 // save buffer for reuse 207 t.nlsBuf = out[:0] 208 209 return out 210} 211 212// monitorReportStuck is how long we need to be analysing a document before 213// reporting it to stdout. 214const monitorReportStuck = 10 * time.Second 215 216// monitorReportStatus is how often we given status updates 217const monitorReportStatus = time.Minute 218 219type monitor struct { 220 mu sync.Mutex 221 222 lastUpdate time.Time 223 224 start time.Time 225 totalSize int 226 totalSymbols int 227 228 currentDocName string 229 currentDocSize int 230 currentDocStuckCount int 231 232 done chan struct{} 233} 234 235func newMonitor() *monitor { 236 start := time.Now() 237 m := &monitor{ 238 start: start, 239 lastUpdate: start, 240 done: make(chan struct{}), 241 } 242 go m.run() 243 return m 244} 245 246func (m *monitor) BeginParsing(doc *zoekt.Document) { 247 now := time.Now() 248 m.mu.Lock() 249 m.lastUpdate = now 250 251 // set current doc 252 m.currentDocName = doc.Name 253 m.currentDocSize = len(doc.Content) 254 255 m.mu.Unlock() 256} 257 258func (m *monitor) EndParsing(entries []*ctags.Entry) { 259 now := time.Now() 260 m.mu.Lock() 261 m.lastUpdate = now 262 263 // update aggregate stats 264 m.totalSize += m.currentDocSize 265 m.totalSymbols += len(entries) 266 267 // inform done if we warned about current document 268 if m.currentDocStuckCount > 0 { 269 log.Printf("symbol analysis for %s (size %d bytes) is done and found %d symbols", m.currentDocName, m.currentDocSize, len(entries)) 270 m.currentDocStuckCount = 0 271 } 272 273 // unset current document 274 m.currentDocName = "" 275 m.currentDocSize = 0 276 277 m.mu.Unlock() 278} 279 280func (m *monitor) Stop() { 281 close(m.done) 282} 283 284func (m *monitor) run() { 285 stuckTicker := time.NewTicker(monitorReportStuck / 2) // half due to sampling theorem (nyquist) 286 statusTicker := time.NewTicker(monitorReportStatus) 287 288 defer stuckTicker.Stop() 289 defer statusTicker.Stop() 290 291 for { 292 select { 293 case <-m.done: 294 now := time.Now() 295 m.mu.Lock() 296 log.Printf("symbol analysis finished for shard statistics: duration=%v symbols=%d bytes=%d", now.Sub(m.start).Truncate(time.Second), m.totalSymbols, m.totalSize) 297 m.mu.Unlock() 298 return 299 300 case <-stuckTicker.C: 301 now := time.Now() 302 m.mu.Lock() 303 running := now.Sub(m.lastUpdate).Truncate(time.Second) 304 report := monitorReportStuck * (1 << m.currentDocStuckCount) // double the amount of time each time we report 305 if m.currentDocName != "" && running >= report { 306 m.currentDocStuckCount++ 307 log.Printf("WARN: symbol analysis for %s (%d bytes) has been running for %v", m.currentDocName, m.currentDocSize, running) 308 } 309 m.mu.Unlock() 310 311 case <-statusTicker.C: 312 now := time.Now() 313 m.mu.Lock() 314 log.Printf("DEBUG: symbol analysis still running for shard statistics: duration=%v symbols=%d bytes=%d", now.Sub(m.start).Truncate(time.Second), m.totalSymbols, m.totalSize) 315 m.mu.Unlock() 316 } 317 } 318}