fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

Indexing: use one ctags process per shard (#702)

Currently, we use a single ctags process for indexing an entire repository.
Even though we build shards in parallel, they all share the same (single
threaded) ctags process. Since ctags is one of the most expensive parts of
shard building, this creates a bottleneck that can really slow down indexing.

This change proposes to launch a new ctags process per shard. For
`sgtest/megarepo`, this speeds up indexing by almost 2x (enabling scip-ctags
and setting `-parallelism=4`):
* Before: took 4 min 48 sec to index repo
* After: took 2 min 30 sec to index repo

+161 -153
+7 -10
build/builder.go
··· 249 249 docChecker zoekt.DocChecker 250 250 size int 251 251 252 - parserMap ctags.ParserMap 253 - building sync.WaitGroup 252 + parserFactory ctags.ParserFactory 253 + building sync.WaitGroup 254 254 255 255 errMu sync.Mutex 256 256 buildError error ··· 563 563 finishedShards: map[string]string{}, 564 564 } 565 565 566 - parserMap, err := ctags.NewParserMap( 567 - ctags.ParserBinMap{ 568 - ctags.UniversalCTags: b.opts.CTagsPath, 569 - ctags.ScipCTags: b.opts.ScipCTagsPath, 570 - }, 566 + parserFactory, err := ctags.NewParserFactory( 567 + b.opts.CTagsPath, 568 + b.opts.ScipCTagsPath, 571 569 opts.LanguageMap, 572 570 b.opts.CTagsMustSucceed, 573 571 ) 574 - 575 572 if err != nil { 576 573 return nil, err 577 574 } 578 575 579 - b.parserMap = parserMap 576 + b.parserFactory = parserFactory 580 577 581 578 b.shardLogger = &lumberjack.Logger{ 582 579 Filename: filepath.Join(opts.IndexDir, "zoekt-builder-shard-log.tsv"), ··· 1021 1018 1022 1019 func (b *Builder) buildShard(todo []*zoekt.Document, nextShardNum int) (*finishedShard, error) { 1023 1020 if !b.opts.DisableCTags && (b.opts.CTagsPath != "" || b.opts.ScipCTagsPath != "") { 1024 - err := ctagsAddSymbolsParserMap(todo, b.opts.LanguageMap, b.parserMap) 1021 + err := parseSymbols(todo, b.opts.LanguageMap, b.parserFactory) 1025 1022 if b.opts.CTagsMustSucceed && err != nil { 1026 1023 return nil, err 1027 1024 }
+12 -4
build/ctags.go
··· 42 42 return normalized 43 43 } 44 44 45 - func ctagsAddSymbolsParserMap(todo []*zoekt.Document, languageMap ctags.LanguageMap, parserMap ctags.ParserMap) error { 45 + func parseSymbols(todo []*zoekt.Document, languageMap ctags.LanguageMap, parserFactory ctags.ParserFactory) error { 46 46 monitor := newMonitor() 47 47 defer monitor.Stop() 48 48 49 49 var tagsToSections tagsToSections 50 + 51 + parsers := make(map[ctags.CTagsParserType]ctags.Parser) 50 52 51 53 for _, doc := range todo { 52 54 if len(doc.Content) == 0 || doc.Symbols != nil { ··· 65 67 parserKind = ctags.UniversalCTags 66 68 } 67 69 68 - parser := parserMap[parserKind] 70 + parser := parsers[parserKind] 69 71 if parser == nil { 70 - // this happens if CTagsMustSucceed is false and we didn't find the binary 71 - continue 72 + // Spin up a new parser for this parser kind 73 + parser = parserFactory.NewParser(parserKind) 74 + if parser == nil { 75 + // this happens if CTagsMustSucceed is false and we didn't find the binary 76 + continue 77 + } 78 + parsers[parserKind] = parser 79 + defer parser.Close() 72 80 } 73 81 74 82 monitor.BeginParsing(doc)
+6 -2
build/ctags_test.go
··· 257 257 requireCTags(b) 258 258 259 259 file, err := os.ReadFile("./testdata/large_file.cc") 260 - parser, err := ctags.NewParser(ctags.UniversalCTags, "universal-ctags") 261 260 if err != nil { 262 261 b.Fatal(err) 263 262 } 264 263 265 - var tagsToSections tagsToSections 264 + factory, err := ctags.NewParserFactory("universal-ctags", "", ctags.LanguageMap{}, true) 265 + if err != nil { 266 + b.Fatal(err) 267 + } 266 268 269 + parser := factory.NewParser(ctags.UniversalCTags) 267 270 entries, err := parser.Parse("./testdata/large_file.cc", file) 268 271 if err != nil { 269 272 b.Fatal(err) 270 273 } 271 274 275 + var tagsToSections tagsToSections 272 276 secs, _, err := tagsToSections.Convert(file, entries) 273 277 if err != nil { 274 278 b.Fatal(err)
-46
ctags/json.go
··· 15 15 package ctags 16 16 17 17 import ( 18 - "bytes" 19 18 "fmt" 20 - "log" 21 - "os" 22 - "os/exec" 23 - "strings" 24 19 "sync" 25 20 "time" 26 21 ··· 118 113 lp.send = nil 119 114 lp.recv = nil 120 115 } 121 - 122 - // NewParser creates a parser that is implemented by the given 123 - // universal-ctags binary. The parser is safe for concurrent use. 124 - func NewParser(parserType CTagsParserType, bin string) (Parser, error) { 125 - if err := checkBinary(parserType, bin); err != nil { 126 - return nil, err 127 - } 128 - 129 - opts := goctags.Options{ 130 - Bin: bin, 131 - } 132 - if debug { 133 - opts.Info = log.New(os.Stderr, "CTAGS INF: ", log.LstdFlags) 134 - opts.Debug = log.New(os.Stderr, "CTAGS DBG: ", log.LstdFlags) 135 - } 136 - return &lockedParser{ 137 - opts: opts, 138 - }, nil 139 - } 140 - 141 - // checkBinary does checks on bin to ensure we can correctly use the binary 142 - // for symbols. It is more user friendly to fail early in this case. 143 - func checkBinary(typ CTagsParserType, bin string) error { 144 - switch typ { 145 - case UniversalCTags: 146 - helpOutput, err := exec.Command(bin, "--help").CombinedOutput() 147 - if err != nil { 148 - return fmt.Errorf("failed to check if %s is universal-ctags: %w\n--help output:\n%s", bin, err, string(helpOutput)) 149 - } 150 - if !bytes.Contains(helpOutput, []byte("+interactive")) { 151 - return fmt.Errorf("ctags binary is not universal-ctags or is not compiled with +interactive feature: bin=%s", bin) 152 - } 153 - 154 - case ScipCTags: 155 - if !strings.Contains(bin, "scip-ctags") { 156 - return fmt.Errorf("only supports scip-ctags, not %s", bin) 157 - } 158 - } 159 - 160 - return nil 161 - }
+3 -2
ctags/json_test.go
··· 27 27 t.Skip(err) 28 28 } 29 29 30 - p, err := NewParser(UniversalCTags, "universal-ctags") 30 + factory, err := NewParserFactory("universal-ctags", "", LanguageMap{}, true) 31 31 if err != nil { 32 - t.Fatal("newProcess", err) 32 + t.Fatal(err) 33 33 } 34 34 35 + p := factory.NewParser(UniversalCTags) 35 36 defer p.Close() 36 37 37 38 java := `
+133
ctags/parser_factory.go
··· 1 + // Copyright 2017 Google Inc. All rights reserved. 2 + // 3 + // Licensed under the Apache License, Version 2.0 (the "License"); 4 + // you may not use this file except in compliance with the License. 5 + // You may obtain a copy of the License at 6 + // 7 + // http://www.apache.org/licenses/LICENSE-2.0 8 + // 9 + // Unless required by applicable law or agreed to in writing, software 10 + // distributed under the License is distributed on an "AS IS" BASIS, 11 + // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 + // See the License for the specific language governing permissions and 13 + // limitations under the License. 14 + 15 + package ctags 16 + 17 + import ( 18 + "bytes" 19 + "fmt" 20 + "log" 21 + "os" 22 + "os/exec" 23 + "strings" 24 + 25 + goctags "github.com/sourcegraph/go-ctags" 26 + ) 27 + 28 + type CTagsParserType uint8 29 + 30 + const ( 31 + UnknownCTags CTagsParserType = iota 32 + NoCTags 33 + UniversalCTags 34 + ScipCTags 35 + ) 36 + 37 + type LanguageMap = map[string]CTagsParserType 38 + 39 + func ParserToString(parser CTagsParserType) string { 40 + switch parser { 41 + case UnknownCTags: 42 + return "unknown" 43 + case NoCTags: 44 + return "no" 45 + case UniversalCTags: 46 + return "universal" 47 + case ScipCTags: 48 + return "scip" 49 + default: 50 + panic("Reached impossible CTagsParserType state") 51 + } 52 + } 53 + 54 + func StringToParser(str string) CTagsParserType { 55 + switch str { 56 + case "no": 57 + return NoCTags 58 + case "universal": 59 + return UniversalCTags 60 + case "scip": 61 + return ScipCTags 62 + default: 63 + return UniversalCTags 64 + } 65 + } 66 + 67 + type ParserFactory map[CTagsParserType]string 68 + 69 + func NewParserFactory( 70 + ctagsPath string, 71 + scipCTagsPath string, 72 + languageMap LanguageMap, 73 + cTagsMustSucceed bool, 74 + ) (ParserFactory, error) { 75 + validBins := make(map[CTagsParserType]string) 76 + requiredBins := map[CTagsParserType]string{UniversalCTags: ctagsPath} 77 + for _, parserType := range languageMap { 78 + if parserType == ScipCTags { 79 + requiredBins[ScipCTags] = scipCTagsPath 80 + break 81 + } 82 + } 83 + 84 + for parserType, bin := range requiredBins { 85 + if bin == "" && cTagsMustSucceed { 86 + return nil, fmt.Errorf("ctags binary not found for %s parser type", ParserToString(parserType)) 87 + } 88 + if err := checkBinary(parserType, bin); err != nil && cTagsMustSucceed { 89 + return nil, fmt.Errorf("ctags.NewParserFactory: %v", err) 90 + } 91 + validBins[parserType] = bin 92 + } 93 + 94 + return validBins, nil 95 + } 96 + 97 + // checkBinary does checks on bin to ensure we can correctly use the binary 98 + // for symbols. It is more user friendly to fail early in this case. 99 + func checkBinary(typ CTagsParserType, bin string) error { 100 + switch typ { 101 + case UniversalCTags: 102 + helpOutput, err := exec.Command(bin, "--help").CombinedOutput() 103 + if err != nil { 104 + return fmt.Errorf("failed to check if %s is universal-ctags: %w\n--help output:\n%s", bin, err, string(helpOutput)) 105 + } 106 + if !bytes.Contains(helpOutput, []byte("+interactive")) { 107 + return fmt.Errorf("ctags binary is not universal-ctags or is not compiled with +interactive feature: bin=%s", bin) 108 + } 109 + 110 + case ScipCTags: 111 + if !strings.Contains(bin, "scip-ctags") { 112 + return fmt.Errorf("only supports scip-ctags, not %s", bin) 113 + } 114 + } 115 + 116 + return nil 117 + } 118 + 119 + // NewParser creates a parser that is implemented by the given 120 + // ctags binary. The parser is safe for concurrent use. 121 + func (p ParserFactory) NewParser(typ CTagsParserType) Parser { 122 + bin := p[typ] 123 + if bin == "" { 124 + return nil 125 + } 126 + 127 + opts := goctags.Options{Bin: bin} 128 + if debug { 129 + opts.Info = log.New(os.Stderr, "CTAGS INF: ", log.LstdFlags) 130 + opts.Debug = log.New(os.Stderr, "CTAGS DBG: ", log.LstdFlags) 131 + } 132 + return &lockedParser{opts: opts} 133 + }
-89
ctags/parser_map.go
··· 1 - // Copyright 2017 Google Inc. All rights reserved. 2 - // 3 - // Licensed under the Apache License, Version 2.0 (the "License"); 4 - // you may not use this file except in compliance with the License. 5 - // You may obtain a copy of the License at 6 - // 7 - // http://www.apache.org/licenses/LICENSE-2.0 8 - // 9 - // Unless required by applicable law or agreed to in writing, software 10 - // distributed under the License is distributed on an "AS IS" BASIS, 11 - // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 - // See the License for the specific language governing permissions and 13 - // limitations under the License. 14 - 15 - package ctags 16 - 17 - import ( 18 - "fmt" 19 - ) 20 - 21 - type CTagsParserType uint8 22 - 23 - const ( 24 - UnknownCTags CTagsParserType = iota 25 - NoCTags 26 - UniversalCTags 27 - ScipCTags 28 - ) 29 - 30 - type LanguageMap = map[string]CTagsParserType 31 - 32 - func ParserToString(parser CTagsParserType) string { 33 - switch parser { 34 - case UnknownCTags: 35 - return "unknown" 36 - case NoCTags: 37 - return "no" 38 - case UniversalCTags: 39 - return "universal" 40 - case ScipCTags: 41 - return "scip" 42 - default: 43 - panic("Reached impossible CTagsParserType state") 44 - } 45 - } 46 - 47 - func StringToParser(str string) CTagsParserType { 48 - switch str { 49 - case "no": 50 - return NoCTags 51 - case "universal": 52 - return UniversalCTags 53 - case "scip": 54 - return ScipCTags 55 - default: 56 - return UniversalCTags 57 - } 58 - } 59 - 60 - type ParserMap map[CTagsParserType]Parser 61 - type ParserBinMap map[CTagsParserType]string 62 - 63 - func NewParserMap(bins ParserBinMap, languageMap LanguageMap, cTagsMustSucceed bool) (ParserMap, error) { 64 - parsers := make(ParserMap) 65 - 66 - requiredTypes := []CTagsParserType{UniversalCTags} 67 - for _, parserType := range languageMap { 68 - if parserType == ScipCTags { 69 - requiredTypes = append(requiredTypes, ScipCTags) 70 - break 71 - } 72 - } 73 - 74 - for _, parserType := range requiredTypes { 75 - bin := bins[parserType] 76 - if bin == "" && cTagsMustSucceed { 77 - return nil, fmt.Errorf("ctags binary not found for %s parser type", ParserToString(parserType)) 78 - } else { 79 - parser, err := NewParser(parserType, bin) 80 - if err != nil && cTagsMustSucceed { 81 - return nil, fmt.Errorf("ctags.NewParserMap: %v", err) 82 - } 83 - 84 - parsers[parserType] = parser 85 - } 86 - } 87 - 88 - return parsers, nil 89 - }