fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

Indexing: clean up ctags parser wrapper (#708)

This change cleans up the Go ctags parser wrapper as a follow-up to #702. Specific changes:
* Remove synchronization in `lockedParser` and rename it to `CTagsParser`
* Push delegation to universal vs. SCIP ctags into parser wrapper
* Simplify document timeout logic
* Rename some files

+125 -174
+6 -6
build/builder.go
··· 247 247 todo []*zoekt.Document 248 248 docChecker zoekt.DocChecker 249 249 size int 250 - 251 - parserFactory ctags.ParserFactory 252 - building sync.WaitGroup 250 + 251 + parserBins ctags.ParserBinMap 252 + building sync.WaitGroup 253 253 254 254 errMu sync.Mutex 255 255 buildError error ··· 560 560 finishedShards: map[string]string{}, 561 561 } 562 562 563 - parserFactory, err := ctags.NewParserFactory( 563 + parserBins, err := ctags.NewParserBinMap( 564 564 b.opts.CTagsPath, 565 565 b.opts.ScipCTagsPath, 566 566 opts.LanguageMap, ··· 570 570 return nil, err 571 571 } 572 572 573 - b.parserFactory = parserFactory 573 + b.parserBins = parserBins 574 574 575 575 if opts.IsDelta { 576 576 // Delta shards build on top of previously existing shards. ··· 994 994 995 995 func (b *Builder) buildShard(todo []*zoekt.Document, nextShardNum int) (*finishedShard, error) { 996 996 if !b.opts.DisableCTags && (b.opts.CTagsPath != "" || b.opts.ScipCTagsPath != "") { 997 - err := parseSymbols(todo, b.opts.LanguageMap, b.parserFactory) 997 + err := parseSymbols(todo, b.opts.LanguageMap, b.parserBins) 998 998 if b.opts.CTagsMustSucceed && err != nil { 999 999 return nil, err 1000 1000 }
+9 -20
build/ctags.go
··· 42 42 return normalized 43 43 } 44 44 45 - func parseSymbols(todo []*zoekt.Document, languageMap ctags.LanguageMap, parserFactory ctags.ParserFactory) error { 45 + func parseSymbols(todo []*zoekt.Document, languageMap ctags.LanguageMap, parserBins ctags.ParserBinMap) error { 46 46 monitor := newMonitor() 47 47 defer monitor.Stop() 48 48 49 49 var tagsToSections tagsToSections 50 50 51 - parsers := make(map[ctags.CTagsParserType]ctags.Parser) 51 + parser := ctags.NewCTagsParser(parserBins) 52 + defer parser.Close() 52 53 53 54 for _, doc := range todo { 54 55 if len(doc.Content) == 0 || doc.Symbols != nil { ··· 57 58 58 59 zoekt.DetermineLanguageIfUnknown(doc) 59 60 60 - parserKind := languageMap[normalizeLanguage(doc.Language)] 61 - if parserKind == ctags.NoCTags { 61 + parserType := languageMap[normalizeLanguage(doc.Language)] 62 + if parserType == ctags.NoCTags { 62 63 continue 63 64 } 64 65 65 - // If the parser kind is unknown, default to universal-ctags 66 - if parserKind == ctags.UnknownCTags { 67 - parserKind = ctags.UniversalCTags 68 - } 69 - 70 - parser := parsers[parserKind] 71 - if parser == nil { 72 - // Spin up a new parser for this parser kind 73 - parser = parserFactory.NewParser(parserKind) 74 - if parser == nil { 75 - // this happens if CTagsMustSucceed is false and we didn't find the binary 76 - continue 77 - } 78 - parsers[parserKind] = parser 79 - defer parser.Close() 66 + // If the parser type is unknown, default to universal-ctags 67 + if parserType == ctags.UnknownCTags { 68 + parserType = ctags.UniversalCTags 80 69 } 81 70 82 71 monitor.BeginParsing(doc) 83 - es, err := parser.Parse(doc.Name, doc.Content) 72 + es, err := parser.Parse(doc.Name, doc.Content, parserType) 84 73 monitor.EndParsing(es) 85 74 86 75 if err != nil {
+3 -3
build/ctags_test.go
··· 261 261 b.Fatal(err) 262 262 } 263 263 264 - factory, err := ctags.NewParserFactory("universal-ctags", "", ctags.LanguageMap{}, true) 264 + bins, err := ctags.NewParserBinMap("universal-ctags", "", ctags.LanguageMap{}, true) 265 265 if err != nil { 266 266 b.Fatal(err) 267 267 } 268 268 269 - parser := factory.NewParser(ctags.UniversalCTags) 270 - entries, err := parser.Parse("./testdata/large_file.cc", file) 269 + parser := ctags.NewCTagsParser(bins) 270 + entries, err := parser.Parse("./testdata/large_file.cc", file, ctags.UniversalCTags) 271 271 if err != nil { 272 272 b.Fatal(err) 273 273 }
-115
ctags/json.go
··· 1 - // Copyright 2017 Google Inc. All rights reserved. 2 - // 3 - // Licensed under the Apache License, Version 2.0 (the "License"); 4 - // you may not use this file except in compliance with the License. 5 - // You may obtain a copy of the License at 6 - // 7 - // http://www.apache.org/licenses/LICENSE-2.0 8 - // 9 - // Unless required by applicable law or agreed to in writing, software 10 - // distributed under the License is distributed on an "AS IS" BASIS, 11 - // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 - // See the License for the specific language governing permissions and 13 - // limitations under the License. 14 - 15 - package ctags 16 - 17 - import ( 18 - "fmt" 19 - "sync" 20 - "time" 21 - 22 - goctags "github.com/sourcegraph/go-ctags" 23 - ) 24 - 25 - const debug = false 26 - 27 - type Parser = goctags.Parser 28 - type Entry = goctags.Entry 29 - 30 - type parseReq struct { 31 - Name string 32 - Content []byte 33 - } 34 - 35 - type parseResp struct { 36 - Entries []*Entry 37 - Err error 38 - } 39 - 40 - type lockedParser struct { 41 - mu sync.Mutex 42 - opts goctags.Options 43 - p Parser 44 - send chan<- parseReq 45 - recv <-chan parseResp 46 - } 47 - 48 - // parseTimeout is how long we wait for a response for parsing a single file 49 - // in ctags. 1 minute is a very conservative timeout which we should only hit 50 - // if ctags hangs. 51 - const parseTimeout = time.Minute 52 - 53 - // Parse wraps go-ctags Parse. It lazily starts the process and adds a timeout 54 - // around parse requests. Additionally it serializes access to the parsing 55 - // process. The timeout is important since we occasionally come across 56 - // documents which hang universal-ctags. 57 - func (lp *lockedParser) Parse(name string, content []byte) ([]*Entry, error) { 58 - lp.mu.Lock() 59 - defer lp.mu.Unlock() 60 - 61 - if lp.p == nil { 62 - p, err := goctags.New(lp.opts) 63 - if err != nil { 64 - return nil, err 65 - } 66 - send := make(chan parseReq) 67 - // buf of 1 so we avoid blocking sends in the parser if we exit early. 68 - recv := make(chan parseResp, 1) 69 - 70 - go func() { 71 - defer close(recv) 72 - for req := range send { 73 - entries, err := p.Parse(req.Name, req.Content) 74 - recv <- parseResp{Entries: entries, Err: err} 75 - } 76 - }() 77 - 78 - lp.p = p 79 - lp.send = send 80 - lp.recv = recv 81 - } 82 - 83 - lp.send <- parseReq{Name: name, Content: content} 84 - 85 - deadline := time.NewTimer(parseTimeout) 86 - defer deadline.Stop() 87 - 88 - select { 89 - case resp := <-lp.recv: 90 - return resp.Entries, resp.Err 91 - case <-deadline.C: 92 - // Error out since ctags hanging is a sign something bad is happening. 93 - lp.close() 94 - return nil, fmt.Errorf("ctags timedout after %s parsing %s", parseTimeout, name) 95 - } 96 - } 97 - 98 - func (lp *lockedParser) Close() { 99 - lp.mu.Lock() 100 - defer lp.mu.Unlock() 101 - lp.close() 102 - } 103 - 104 - // close assumes lp.mu is held. 105 - func (lp *lockedParser) close() { 106 - if lp.p == nil { 107 - return 108 - } 109 - 110 - lp.p.Close() 111 - lp.p = nil 112 - close(lp.send) 113 - lp.send = nil 114 - lp.recv = nil 115 - }
+2 -7
ctags/json_test.go ctags/parser_test.go
··· 27 27 t.Skip(err) 28 28 } 29 29 30 - factory, err := NewParserFactory("universal-ctags", "", LanguageMap{}, true) 31 - if err != nil { 32 - t.Fatal(err) 33 - } 34 - 35 - p := factory.NewParser(UniversalCTags) 30 + p := NewCTagsParser(map[CTagsParserType]string{UniversalCTags: "universal-ctags"}) 36 31 defer p.Close() 37 32 38 33 java := ` ··· 50 45 } 51 46 ` 52 47 name := "io/zoekt/Back.java" 53 - got, err := p.Parse(name, []byte(java)) 48 + got, err := p.Parse(name, []byte(java), UniversalCTags) 54 49 if err != nil { 55 50 t.Errorf("Process: %v", err) 56 51 }
+99
ctags/parser.go
··· 1 + // Copyright 2017 Google Inc. All rights reserved. 2 + // 3 + // Licensed under the Apache License, Version 2.0 (the "License"); 4 + // you may not use this file except in compliance with the License. 5 + // You may obtain a copy of the License at 6 + // 7 + // http://www.apache.org/licenses/LICENSE-2.0 8 + // 9 + // Unless required by applicable law or agreed to in writing, software 10 + // distributed under the License is distributed on an "AS IS" BASIS, 11 + // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 + // See the License for the specific language governing permissions and 13 + // limitations under the License. 14 + 15 + package ctags 16 + 17 + import ( 18 + "fmt" 19 + "log" 20 + "os" 21 + "time" 22 + 23 + goctags "github.com/sourcegraph/go-ctags" 24 + ) 25 + 26 + type Entry = goctags.Entry 27 + 28 + // CTagsParser wraps go-ctags and delegates to the right process (like universal-ctags or scip-ctags). 29 + // It is only safe for single-threaded use. This wrapper also enforces a timeout on parsing a single 30 + // document, which is important since documents can occasionally hang universal-ctags. 31 + // documents which hang universal-ctags. 32 + type CTagsParser struct { 33 + bins ParserBinMap 34 + parsers map[CTagsParserType]goctags.Parser 35 + } 36 + 37 + // parseTimeout is how long we wait for a response for parsing a single file 38 + // in ctags. 1 minute is a very conservative timeout which we should only hit 39 + // if ctags hangs. 40 + const parseTimeout = time.Minute 41 + 42 + func NewCTagsParser(bins ParserBinMap) CTagsParser { 43 + return CTagsParser{bins: bins, parsers: make(map[CTagsParserType]goctags.Parser)} 44 + } 45 + 46 + type parseResult struct { 47 + entries []*Entry 48 + err error 49 + } 50 + 51 + func (lp *CTagsParser) Parse(name string, content []byte, typ CTagsParserType) ([]*Entry, error) { 52 + if lp.parsers[typ] == nil { 53 + parser, err := lp.newParserProcess(typ) 54 + if parser == nil || err != nil { 55 + return nil, err 56 + } 57 + lp.parsers[typ] = parser 58 + } 59 + 60 + deadline := time.NewTimer(parseTimeout) 61 + defer deadline.Stop() 62 + 63 + parser := lp.parsers[typ] 64 + recv := make(chan parseResult, 1) 65 + go func() { 66 + entry, err := parser.Parse(name, content) 67 + recv <- parseResult{entries: entry, err: err} 68 + }() 69 + 70 + select { 71 + case resp := <-recv: 72 + return resp.entries, resp.err 73 + case <-deadline.C: 74 + // Error out since ctags hanging is a sign something bad is happening. 75 + return nil, fmt.Errorf("ctags timedout after %s parsing %s", parseTimeout, name) 76 + } 77 + } 78 + 79 + func (lp *CTagsParser) newParserProcess(typ CTagsParserType) (goctags.Parser, error) { 80 + bin := lp.bins[typ] 81 + if bin == "" { 82 + // This happens if CTagsMustSucceed is false and we didn't find the binary 83 + return nil, nil 84 + } 85 + 86 + opts := goctags.Options{Bin: bin} 87 + parserType := ParserToString(typ) 88 + if debug { 89 + opts.Info = log.New(os.Stderr, "CTAGS (" + parserType + ") INF: ", log.LstdFlags) 90 + opts.Debug = log.New(os.Stderr, "CTAGS (" + parserType + ") DBG: ", log.LstdFlags) 91 + } 92 + return goctags.New(opts) 93 + } 94 + 95 + func (lp *CTagsParser) Close() { 96 + for _, parser := range lp.parsers { 97 + parser.Close() 98 + } 99 + }
+6 -23
ctags/parser_factory.go ctags/parser_bins.go
··· 17 17 import ( 18 18 "bytes" 19 19 "fmt" 20 - "log" 21 - "os" 22 20 "os/exec" 23 21 "strings" 24 - 25 - goctags "github.com/sourcegraph/go-ctags" 26 22 ) 27 23 28 24 type CTagsParserType uint8 ··· 33 29 UniversalCTags 34 30 ScipCTags 35 31 ) 32 + 33 + const debug = false 36 34 37 35 type LanguageMap = map[string]CTagsParserType 38 36 ··· 64 62 } 65 63 } 66 64 67 - type ParserFactory map[CTagsParserType]string 65 + type ParserBinMap map[CTagsParserType]string 68 66 69 - func NewParserFactory( 67 + func NewParserBinMap( 70 68 ctagsPath string, 71 69 scipCTagsPath string, 72 70 languageMap LanguageMap, 73 71 cTagsMustSucceed bool, 74 - ) (ParserFactory, error) { 72 + ) (ParserBinMap, error) { 75 73 validBins := make(map[CTagsParserType]string) 76 74 requiredBins := map[CTagsParserType]string{UniversalCTags: ctagsPath} 77 75 for _, parserType := range languageMap { ··· 86 84 return nil, fmt.Errorf("ctags binary not found for %s parser type", ParserToString(parserType)) 87 85 } 88 86 if err := checkBinary(parserType, bin); err != nil && cTagsMustSucceed { 89 - return nil, fmt.Errorf("ctags.NewParserFactory: %v", err) 87 + return nil, fmt.Errorf("ctags.NewParserBinMap: %v", err) 90 88 } 91 89 validBins[parserType] = bin 92 90 } ··· 116 114 return nil 117 115 } 118 116 119 - // NewParser creates a parser that is implemented by the given 120 - // ctags binary. The parser is safe for concurrent use. 121 - func (p ParserFactory) NewParser(typ CTagsParserType) Parser { 122 - bin := p[typ] 123 - if bin == "" { 124 - return nil 125 - } 126 - 127 - opts := goctags.Options{Bin: bin} 128 - if debug { 129 - opts.Info = log.New(os.Stderr, "CTAGS INF: ", log.LstdFlags) 130 - opts.Debug = log.New(os.Stderr, "CTAGS DBG: ", log.LstdFlags) 131 - } 132 - return &lockedParser{opts: opts} 133 - }