all: compound shard support (#95) · boltless.me/zoekt@c2b24f0

+40

bits.go

··· 251 251 return ps 252 252 } 253 253 254 + func toSizedDeltas16(offsets []uint16) []byte { 255 + var enc [8]byte 256 + 257 + deltas := make([]byte, 0, len(offsets)*2) 258 + 259 + m := binary.PutUvarint(enc[:], uint64(len(offsets))) 260 + deltas = append(deltas, enc[:m]...) 261 + 262 + var last uint16 263 + for _, p := range offsets { 264 + delta := p - last 265 + last = p 266 + 267 + m := binary.PutUvarint(enc[:], uint64(delta)) 268 + deltas = append(deltas, enc[:m]...) 269 + } 270 + return deltas 271 + } 272 + 273 + func fromSizedDeltas16(data []byte, ps []uint16) []uint16 { 274 + sz, m := binary.Uvarint(data) 275 + data = data[m:] 276 + 277 + if cap(ps) < int(sz) { 278 + ps = make([]uint16, 0, sz) 279 + } else { 280 + ps = ps[:0] 281 + } 282 + 283 + var last uint16 284 + for len(data) > 0 { 285 + delta, m := binary.Uvarint(data) 286 + offset := last + uint16(delta) 287 + last = offset 288 + data = data[m:] 289 + ps = append(ps, offset) 290 + } 291 + return ps 292 + } 293 + 254 294 func fromDeltas(data []byte, buf []uint32) []uint32 { 255 295 buf = buf[:0] 256 296 if cap(buf) < len(data)/2 {

+44 -11

build/builder.go

··· 265 265 266 266 // ShardName returns the name the given index shard. 267 267 func (o *Options) ShardName(n int) string { 268 + return o.shardNameVersion(zoekt.IndexFormatVersion, n) 269 + } 270 + 271 + func (o *Options) shardNameVersion(version, n int) string { 268 272 abs := url.QueryEscape(o.RepositoryDescription.Name) 269 273 if len(abs) > 200 { 270 274 abs = abs[:200] + hashString(abs)[:8] 271 275 } 272 276 return filepath.Join(o.IndexDir, 273 - fmt.Sprintf("%s_v%d.%05d.zoekt", abs, zoekt.IndexFormatVersion, n)) 277 + fmt.Sprintf("%s_v%d.%05d.zoekt", abs, version, n)) 274 278 } 275 279 276 280 type IndexState string 277 281 278 282 const ( 279 - IndexStateMissing IndexState = "missing" 280 - IndexStateCorrupt = "corrupt" 281 - IndexStateVersion = "version-mismatch" 282 - IndexStateOption = "option-mismatch" 283 - IndexStateMeta = "meta-mismatch" 284 - IndexStateContent = "content-mismatch" 285 - IndexStateEqual = "equal" 283 + IndexStateMissing IndexState = "missing" 284 + IndexStateCorrupt = "corrupt" 285 + IndexStateUnexpectedCompound = "unexpected-compound" 286 + IndexStateVersion = "version-mismatch" 287 + IndexStateOption = "option-mismatch" 288 + IndexStateMeta = "meta-mismatch" 289 + IndexStateContent = "content-mismatch" 290 + IndexStateEqual = "equal" 286 291 ) 287 292 293 + var readVersions = []struct { 294 + IndexFormatVersion int 295 + FeatureVersion int 296 + }{{ 297 + IndexFormatVersion: zoekt.IndexFormatVersion, 298 + FeatureVersion: zoekt.FeatureVersion, 299 + }, { 300 + IndexFormatVersion: zoekt.NextIndexFormatVersion, 301 + FeatureVersion: zoekt.NextFeatureVersion, 302 + }} 303 + 288 304 // IncrementalSkipIndexing returns true if the index present on disk matches 289 305 // the build options. 290 306 func (o *Options) IncrementalSkipIndexing() bool { ··· 294 310 // IndexState checks how the index present on disk compares to the build 295 311 // options. 296 312 func (o *Options) IndexState() IndexState { 297 - fn := o.ShardName(0) 313 + // Open the latest version we support that is on disk. 314 + fn := "" 315 + featureVersion := -1 316 + for _, v := range readVersions { 317 + fn = o.shardNameVersion(v.IndexFormatVersion, 0) 318 + if _, err := os.Stat(fn); err == nil { 319 + featureVersion = v.FeatureVersion 320 + break 321 + } 322 + } 298 323 299 - repo, index, err := zoekt.ReadMetadataPath(fn) 324 + repos, index, err := zoekt.ReadMetadataPath(fn) 300 325 if os.IsNotExist(err) { 301 326 return IndexStateMissing 302 327 } else if err != nil { 303 328 return IndexStateCorrupt 304 329 } 305 330 306 - if index.IndexFeatureVersion != zoekt.FeatureVersion { 331 + if index.IndexFeatureVersion != featureVersion { 307 332 return IndexStateVersion 308 333 } 334 + 335 + // This shouldn't happen. Options only references one repository, so 336 + // shardName will only return non compound repositories. We still need to 337 + // work out how to do IndexState with compound shards. 338 + if len(repos) != 1 { 339 + return IndexStateUnexpectedCompound 340 + } 341 + repo := repos[0] 309 342 310 343 if repo.IndexOptions != o.HashOptions() { 311 344 return IndexStateOption

+72

build/builder_test.go

··· 2 2 3 3 import ( 4 4 "flag" 5 + "io" 6 + "log" 5 7 "os" 6 8 "path/filepath" 7 9 "testing" ··· 114 116 } 115 117 } 116 118 } 119 + 120 + func TestIncrementalSkipIndexing(t *testing.T) { 121 + cases := []struct { 122 + name string 123 + want bool 124 + opts Options 125 + }{{ 126 + name: "v17-noop", 127 + want: true, 128 + opts: Options{ 129 + RepositoryDescription: zoekt.Repository{ 130 + Name: "repo17", 131 + }, 132 + SizeMax: 2097152, 133 + DisableCTags: true, 134 + }, 135 + }, { 136 + name: "v16-noop", 137 + want: true, 138 + opts: Options{ 139 + RepositoryDescription: zoekt.Repository{ 140 + Name: "repo", 141 + }, 142 + SizeMax: 2097152, 143 + DisableCTags: true, 144 + }, 145 + }, { 146 + name: "v17-id", 147 + want: false, 148 + opts: Options{ 149 + RepositoryDescription: zoekt.Repository{ 150 + Name: "repo17", 151 + RawConfig: map[string]string{ 152 + "repoid": "123", 153 + }, 154 + }, 155 + SizeMax: 2097152, 156 + DisableCTags: true, 157 + }, 158 + }, { 159 + name: "doesnotexist", 160 + want: false, 161 + opts: Options{ 162 + RepositoryDescription: zoekt.Repository{ 163 + Name: "doesnotexist", 164 + }, 165 + SizeMax: 2097152, 166 + DisableCTags: true, 167 + }, 168 + }} 169 + 170 + for _, tc := range cases { 171 + t.Run(tc.name, func(t *testing.T) { 172 + tc.opts.IndexDir = "../testdata/shards" 173 + t.Log(tc.opts.IndexState()) 174 + got := tc.opts.IncrementalSkipIndexing() 175 + if got != tc.want { 176 + t.Fatalf("want %v got %v", tc.want, got) 177 + } 178 + }) 179 + } 180 + } 181 + 182 + func TestMain(m *testing.M) { 183 + flag.Parse() 184 + if !testing.Verbose() { 185 + log.SetOutput(io.Discard) 186 + } 187 + os.Exit(m.Run()) 188 + }

+3 -3

build/e2e_test.go

··· 100 100 // "repo-mutated". We do this inside retry helper since we have noticed 101 101 // some flakiness on github CI. 102 102 for _, p := range fs { 103 - repo, _, err := zoekt.ReadMetadataPath(p) 103 + repos, _, err := zoekt.ReadMetadataPath(p) 104 104 if err != nil { 105 105 t.Fatal(err) 106 106 } 107 - repo.Name = "repo-mutated" 108 - b, err := json.Marshal(repo) 107 + repos[0].Name = "repo-mutated" 108 + b, err := json.Marshal(repos[0]) 109 109 if err != nil { 110 110 t.Fatal(err) 111 111 }

+7 -1

cmd/zoekt-indexserver/main.go

··· 217 217 } 218 218 defer ifile.Close() 219 219 220 - repo, _, err := zoekt.ReadMetadata(ifile) 220 + repos, _, err := zoekt.ReadMetadata(ifile) 221 221 if err != nil { 222 222 return nil 223 223 } 224 + 225 + // TODO support compound shards in zoekt-indexserver 226 + if len(repos) != 1 { 227 + return nil 228 + } 229 + repo := repos[0] 224 230 225 231 _, err = os.Stat(repo.Source) 226 232 if os.IsNotExist(err) {

+37

cmd/zoekt-merge-index/main.go

··· 1 + package main 2 + 3 + import ( 4 + "log" 5 + "os" 6 + "path/filepath" 7 + 8 + "github.com/google/zoekt" 9 + ) 10 + 11 + func merge(dstDir string, names []string) error { 12 + var files []zoekt.IndexFile 13 + for _, fn := range names { 14 + f, err := os.Open(fn) 15 + if err != nil { 16 + return err 17 + } 18 + defer f.Close() 19 + 20 + indexFile, err := zoekt.NewIndexFile(f) 21 + if err != nil { 22 + return err 23 + } 24 + defer indexFile.Close() 25 + 26 + files = append(files, indexFile) 27 + } 28 + 29 + return zoekt.Merge(dstDir, files...) 30 + } 31 + 32 + func main() { 33 + err := merge(filepath.Dir(os.Args[1]), os.Args[1:]) 34 + if err != nil { 35 + log.Fatal(err) 36 + } 37 + }

+51

cmd/zoekt-merge-index/main_test.go

··· 1 + package main 2 + 3 + import ( 4 + "context" 5 + "path/filepath" 6 + "sort" 7 + "testing" 8 + 9 + "github.com/google/zoekt" 10 + "github.com/google/zoekt/query" 11 + "github.com/google/zoekt/shards" 12 + ) 13 + 14 + func TestMerge(t *testing.T) { 15 + dir := t.TempDir() 16 + 17 + v16Shards, err := filepath.Glob("../../testdata/shards/*_v16.*.zoekt") 18 + if err != nil { 19 + t.Fatal(err) 20 + } 21 + sort.Strings(v16Shards) 22 + t.Log(v16Shards) 23 + 24 + err = merge(dir, v16Shards) 25 + if err != nil { 26 + t.Fatal(err) 27 + } 28 + 29 + ss, err := shards.NewDirectorySearcher(dir) 30 + if err != nil { 31 + t.Fatalf("NewDirectorySearcher(%s): %v", dir, err) 32 + } 33 + defer ss.Close() 34 + 35 + q, err := query.Parse("hello") 36 + if err != nil { 37 + t.Fatalf("Parse(hello): %v", err) 38 + } 39 + 40 + var sOpts zoekt.SearchOptions 41 + ctx := context.Background() 42 + result, err := ss.Search(ctx, q, &sOpts) 43 + if err != nil { 44 + t.Fatalf("Search(%v): %v", q, err) 45 + } 46 + 47 + // we are merging the same shard twice, so we expect the same file twice. 48 + if len(result.Files) != 2 { 49 + t.Errorf("got %v, want 2 files.", result.Files) 50 + } 51 + }

+15 -5

cmd/zoekt-sourcegraph-indexserver/cleanup.go

··· 124 124 continue 125 125 } 126 126 127 - name, err := shardRepoName(path) 127 + names, err := shardRepoNames(path) 128 128 if err != nil { 129 129 debug.Printf("failed to read shard: %v", err) 130 130 continue 131 131 } 132 132 133 + // TODO support compound shards once we support tombstones 134 + if len(names) != 1 { 135 + continue 136 + } 137 + name := names[0] 138 + 133 139 shards[name] = append(shards[name], shard{ 134 140 Repo: name, 135 141 Path: path, ··· 139 145 return shards 140 146 } 141 147 142 - func shardRepoName(path string) (string, error) { 143 - repo, _, err := zoekt.ReadMetadataPath(path) 148 + func shardRepoNames(path string) ([]string, error) { 149 + repos, _, err := zoekt.ReadMetadataPath(path) 144 150 if err != nil { 145 - return "", err 151 + return nil, err 146 152 } 147 153 148 - return repo.Name, nil 154 + names := make([]string, 0, len(repos)) 155 + for _, repo := range repos { 156 + names = append(names, repo.Name) 157 + } 158 + return names, nil 149 159 } 150 160 151 161 var incompleteRE = regexp.MustCompile(`\.zoekt[0-9]+(\.\w+)?$`)

+8 -6

cmd/zoekt-sourcegraph-indexserver/cleanup_test.go

··· 34 34 if filepath.Ext(path) != ".zoekt" { 35 35 continue 36 36 } 37 - name, _ := shardRepoName(path) 37 + names, _ := shardRepoNames(path) 38 38 fi, _ := os.Stat(path) 39 - shards = append(shards, shard{ 40 - Repo: name, 41 - Path: filepath.Base(path), 42 - ModTime: fi.ModTime(), 43 - }) 39 + for _, name := range names { 40 + shards = append(shards, shard{ 41 + Repo: name, 42 + Path: filepath.Base(path), 43 + ModTime: fi.ModTime(), 44 + }) 45 + } 44 46 } 45 47 return shards 46 48 }

+7 -1

cmd/zoekt-sourcegraph-indexserver/meta.go

··· 2 2 3 3 import ( 4 4 "encoding/json" 5 + "fmt" 5 6 "io/ioutil" 6 7 "os" 7 8 "path/filepath" ··· 22 23 for i := 0; ; i++ { 23 24 fn := o.ShardName(i) 24 25 25 - repo, _, err := zoekt.ReadMetadataPath(fn) 26 + repos, _, err := zoekt.ReadMetadataPath(fn) 26 27 if os.IsNotExist(err) { 27 28 break 28 29 } else if err != nil { 29 30 return err 30 31 } 32 + 33 + if len(repos) != 1 { 34 + return fmt.Errorf("mergeMeta: does not support compound shards: %s", fn) 35 + } 36 + repo := repos[0] 31 37 32 38 if updated, err := repo.MergeMutable(&o.RepositoryDescription); err != nil { 33 39 return err

+13 -3

eval.go

··· 44 44 // otherwise. 45 45 func (d *indexData) simplifyMultiRepo(q query.Q, predicate func(repoName string) bool) query.Q { 46 46 count := 0 47 - for _, md := range d.repoMetaData { 48 - if predicate(md.Name) { 47 + alive := len(d.repoMetaData) 48 + for i, md := range d.repoMetaData { 49 + if d.repoTombstone[i] { 50 + alive-- 51 + } else if predicate(md.Name) { 49 52 count++ 50 53 } 51 54 } 52 - if count == len(d.repoMetaData) { 55 + if count == alive { 53 56 return &query.Const{Value: true} 54 57 } 55 58 if count > 0 { ··· 180 183 nextDoc := mt.nextDoc() 181 184 if int(nextDoc) <= lastDoc { 182 185 nextDoc = uint32(lastDoc + 1) 186 + } 187 + // Skip tombstoned docs 188 + for nextDoc < docCount && d.repoTombstone[d.repos[nextDoc]] { 189 + nextDoc++ 183 190 } 184 191 if nextDoc >= docCount { 185 192 break ··· 486 493 } 487 494 488 495 for i := range d.repoListEntry { 496 + if d.repoTombstone[i] { 497 + continue 498 + } 489 499 rle := &d.repoListEntry[i] 490 500 ok, err := include(rle) 491 501 if err != nil {

+22 -12

eval_test.go

··· 128 128 } 129 129 } 130 130 131 - func TestSimplifyRepoSet(t *testing.T) { 132 - d := &indexData{ 133 - repoMetaData: []Repository{{Name: "foo"}, {Name: "bar"}}, 131 + // compoundReposShard returns a compound shard where each repo has 1 document. 132 + func compoundReposShard(t *testing.T, names ...string) *indexData { 133 + t.Helper() 134 + b := newIndexBuilder() 135 + b.indexFormatVersion = NextIndexFormatVersion 136 + b.featureVersion = NextFeatureVersion 137 + for _, name := range names { 138 + if err := b.setRepository(&Repository{Name: name}); err != nil { 139 + t.Fatal(err) 140 + } 141 + if err := b.AddFile(name+".txt", []byte(name+" content")); err != nil { 142 + t.Fatal(err) 143 + } 134 144 } 145 + s := searcherForTest(t, b) 146 + return s.(*indexData) 147 + } 148 + 149 + func TestSimplifyRepoSet(t *testing.T) { 150 + d := compoundReposShard(t, "foo", "bar") 135 151 all := &query.RepoSet{Set: map[string]bool{"foo": true, "bar": true}} 136 152 some := &query.RepoSet{Set: map[string]bool{"foo": true, "banana": true}} 137 153 none := &query.RepoSet{Set: map[string]bool{"banana": true}} ··· 153 169 } 154 170 155 171 func TestSimplifyRepo(t *testing.T) { 156 - d := &indexData{ 157 - repoMetaData: []Repository{{Name: "foo"}, {Name: "fool"}}, 158 - } 172 + d := compoundReposShard(t, "foo", "fool") 159 173 all := &query.Repo{"foo"} 160 174 some := &query.Repo{"fool"} 161 175 none := &query.Repo{"bar"} ··· 177 191 } 178 192 179 193 func TestSimplifyRepoBranch(t *testing.T) { 180 - d := &indexData{ 181 - repoMetaData: []Repository{{Name: "foo"}, {Name: "bar"}}, 182 - } 194 + d := compoundReposShard(t, "foo", "bar") 183 195 184 196 some := &query.RepoBranches{Set: map[string][]string{"bar": {"branch1"}}} 185 197 none := &query.Repo{"banana"} ··· 196 208 } 197 209 198 210 func TestSimplifyRepoBranchSimple(t *testing.T) { 199 - d := &indexData{ 200 - repoMetaData: []Repository{{Name: "foo"}}, 201 - } 211 + d := compoundReposShard(t, "foo") 202 212 q := &query.RepoBranches{Set: map[string][]string{"foo": {"HEAD", "b1"}, "bar": {"HEAD"}}} 203 213 204 214 want := &query.Or{[]query.Q{&query.Branch{

+1 -1

index_test.go

··· 1239 1239 t.Fatalf("ReadMetadata: %v", err) 1240 1240 } 1241 1241 1242 - if got, want := rd.Name, "reponame"; got != want { 1242 + if got, want := rd[0].Name, "reponame"; got != want { 1243 1243 t.Fatalf("got %q want %q", got, want) 1244 1244 } 1245 1245 }

+56 -27

indexbuilder.go

··· 148 148 149 149 // IndexBuilder builds a single index shard. 150 150 type IndexBuilder struct { 151 + // The version we will write to disk. Sourcegraph Specific. This is to 152 + // enable feature flagging new format versions. 153 + indexFormatVersion int 154 + featureVersion int 155 + 151 156 contentStrings []*searchableString 152 157 nameStrings []*searchableString 153 158 docSections [][]DocumentSection ··· 166 171 branchMasks []uint64 167 172 subRepos []uint32 168 173 174 + // docID => repoID 175 + repos []uint16 176 + 169 177 contentPostings *postingsBuilder 170 178 namePostings *postingsBuilder 171 179 172 - // root repository 173 - repo Repository 180 + // root repositories 181 + repoList []Repository 174 182 175 183 // name to index. 176 - subRepoIndices map[string]uint32 184 + subRepoIndices []map[string]uint32 177 185 178 186 // language => language code 179 187 languageMap map[string]byte ··· 205 213 // NewIndexBuilder creates a fresh IndexBuilder. The passed in 206 214 // Repository contains repo metadata, and may be set to nil. 207 215 func NewIndexBuilder(r *Repository) (*IndexBuilder, error) { 208 - b := &IndexBuilder{ 209 - contentPostings: newPostingsBuilder(), 210 - namePostings: newPostingsBuilder(), 211 - fileEndSymbol: []uint32{0}, 212 - symIndex: make(map[string]uint32), 213 - symKindIndex: make(map[string]uint32), 214 - languageMap: map[string]byte{}, 215 - } 216 + b := newIndexBuilder() 216 217 217 218 if r == nil { 218 219 r = &Repository{} ··· 223 224 return b, nil 224 225 } 225 226 226 - func (b *IndexBuilder) setRepository(desc *Repository) error { 227 - if len(b.contentStrings) > 0 { 228 - return fmt.Errorf("setRepository called after adding files") 227 + func newIndexBuilder() *IndexBuilder { 228 + return &IndexBuilder{ 229 + indexFormatVersion: IndexFormatVersion, 230 + featureVersion: FeatureVersion, 231 + 232 + contentPostings: newPostingsBuilder(), 233 + namePostings: newPostingsBuilder(), 234 + fileEndSymbol: []uint32{0}, 235 + symIndex: make(map[string]uint32), 236 + symKindIndex: make(map[string]uint32), 237 + languageMap: map[string]byte{}, 229 238 } 239 + } 240 + 241 + func (b *IndexBuilder) setRepository(desc *Repository) error { 230 242 if err := desc.verify(); err != nil { 231 243 return err 232 244 } ··· 235 247 return fmt.Errorf("too many branches") 236 248 } 237 249 238 - b.repo = *desc 250 + repo := *desc 239 251 240 252 // copy subrepomap without root 241 - b.repo.SubRepoMap = map[string]*Repository{} 253 + repo.SubRepoMap = map[string]*Repository{} 242 254 for k, v := range desc.SubRepoMap { 243 255 if k != "" { 244 - b.repo.SubRepoMap[k] = v 256 + repo.SubRepoMap[k] = v 245 257 } 246 258 } 247 259 248 - b.populateSubRepoIndices() 249 - return nil 260 + b.repoList = append(b.repoList, repo) 261 + 262 + return b.populateSubRepoIndices() 250 263 } 251 264 252 265 type DocumentSection struct { ··· 329 342 return nil 330 343 } 331 344 332 - func (b *IndexBuilder) populateSubRepoIndices() { 333 - if b.subRepoIndices != nil { 334 - return 345 + func (b *IndexBuilder) populateSubRepoIndices() error { 346 + if len(b.subRepoIndices) == len(b.repoList) { 347 + return nil 335 348 } 349 + if len(b.subRepoIndices) != len(b.repoList)-1 { 350 + return fmt.Errorf("populateSubRepoIndices not called for a repo: %d != %d - 1", len(b.subRepoIndices), len(b.repoList)) 351 + } 352 + repo := b.repoList[len(b.repoList)-1] 353 + b.subRepoIndices = append(b.subRepoIndices, mkSubRepoIndices(repo)) 354 + return nil 355 + } 356 + 357 + func mkSubRepoIndices(repo Repository) map[string]uint32 { 336 358 paths := []string{""} 337 - for k := range b.repo.SubRepoMap { 359 + for k := range repo.SubRepoMap { 338 360 paths = append(paths, k) 339 361 } 340 362 sort.Strings(paths) 341 - b.subRepoIndices = make(map[string]uint32, len(paths)) 363 + subRepoIndices := make(map[string]uint32, len(paths)) 342 364 for i, p := range paths { 343 - b.subRepoIndices[p] = uint32(i) 365 + subRepoIndices[p] = uint32(i) 344 366 } 367 + return subRepoIndices 345 368 } 346 369 347 370 const notIndexedMarker = "NOT-INDEXED: " ··· 424 447 } 425 448 b.addSymbols(doc.SymbolsMetaData) 426 449 427 - subRepoIdx, ok := b.subRepoIndices[doc.SubRepositoryPath] 450 + repoIdx := len(b.repoList) - 1 451 + subRepoIdx, ok := b.subRepoIndices[repoIdx][doc.SubRepositoryPath] 428 452 if !ok { 429 453 return fmt.Errorf("unknown subrepo path %q", doc.SubRepositoryPath) 430 454 } ··· 438 462 mask |= m 439 463 } 440 464 465 + if repoIdx > 1<<16 { 466 + return fmt.Errorf("too many repos in shard: max is %d", 1<<16) 467 + } 468 + 441 469 b.subRepos = append(b.subRepos, subRepoIdx) 470 + b.repos = append(b.repos, uint16(repoIdx)) 442 471 443 472 hasher.Write(doc.Content) 444 473 ··· 465 494 } 466 495 467 496 func (b *IndexBuilder) branchMask(br string) uint64 { 468 - for i, b := range b.repo.Branches { 497 + for i, b := range b.repoList[len(b.repoList)-1].Branches { 469 498 if b.Name == br { 470 499 return uint64(1) << uint(i) 471 500 }

+4 -2

indexdata.go

··· 77 77 metaData IndexMetadata 78 78 repoMetaData []Repository 79 79 80 + // repoTombstone[repoID] is true if we are not allowed to search 81 + // repoID. 82 + repoTombstone []bool 83 + 80 84 subRepos []uint32 81 85 subRepoPaths [][]string 82 86 ··· 271 275 272 276 return 273 277 } 274 - 275 - func (d *indexData) Repository() []Repository { return d.repoMetaData } 276 278 277 279 func (d *indexData) String() string { 278 280 return fmt.Sprintf("shard(%s)", d.file.Name())

+157

merge.go

··· 1 + package zoekt 2 + 3 + import ( 4 + "crypto/sha1" 5 + "fmt" 6 + "io/ioutil" 7 + "log" 8 + "os" 9 + "path/filepath" 10 + "runtime" 11 + ) 12 + 13 + func Merge(dstDir string, files ...IndexFile) error { 14 + var ds []*indexData 15 + for _, f := range files { 16 + searcher, err := NewSearcher(f) 17 + if err != nil { 18 + return err 19 + } 20 + ds = append(ds, searcher.(*indexData)) 21 + } 22 + 23 + ib, err := merge(ds...) 24 + if err != nil { 25 + return err 26 + } 27 + 28 + hasher := sha1.New() 29 + for _, d := range ds { 30 + for i, md := range d.repoMetaData { 31 + if d.repoTombstone[i] { 32 + continue 33 + } 34 + hasher.Write([]byte(md.Name)) 35 + hasher.Write([]byte{0}) 36 + } 37 + } 38 + 39 + fn := filepath.Join(dstDir, fmt.Sprintf("compound-%x_v%d.%05d.zoekt", hasher.Sum(nil), NextIndexFormatVersion, 0)) 40 + return builderWriteAll(fn, ib) 41 + } 42 + 43 + func builderWriteAll(fn string, ib *IndexBuilder) error { 44 + dir := filepath.Dir(fn) 45 + if err := os.MkdirAll(dir, 0o700); err != nil { 46 + return err 47 + } 48 + 49 + f, err := ioutil.TempFile(dir, filepath.Base(fn)+".*.tmp") 50 + if err != nil { 51 + return err 52 + } 53 + if runtime.GOOS != "windows" { 54 + // umask? 55 + if err := f.Chmod(0o666); err != nil { 56 + return err 57 + } 58 + } 59 + 60 + defer f.Close() 61 + if err := ib.Write(f); err != nil { 62 + return err 63 + } 64 + fi, err := f.Stat() 65 + if err != nil { 66 + return err 67 + } 68 + if err := f.Close(); err != nil { 69 + return err 70 + } 71 + 72 + if err := os.Rename(f.Name(), fn); err != nil { 73 + return err 74 + } 75 + 76 + log.Printf("finished %s: %d index bytes (overhead %3.1f)", fn, fi.Size(), 77 + float64(fi.Size())/float64(ib.ContentSize()+1)) 78 + 79 + return nil 80 + } 81 + 82 + func merge(ds ...*indexData) (*IndexBuilder, error) { 83 + if len(ds) == 0 { 84 + return nil, fmt.Errorf("need 1 or more indexData to merge") 85 + } 86 + 87 + ib := newIndexBuilder() 88 + ib.indexFormatVersion = NextIndexFormatVersion 89 + ib.featureVersion = NextFeatureVersion 90 + 91 + for _, d := range ds { 92 + lastRepoID := -1 93 + for docID := uint32(0); int(docID) < len(d.fileBranchMasks); docID++ { 94 + repoID := int(d.repos[docID]) 95 + 96 + if d.repoTombstone[repoID] { 97 + continue 98 + } 99 + 100 + if repoID != lastRepoID { 101 + if lastRepoID > repoID { 102 + return nil, fmt.Errorf("non-contiguous repo ids in %s for document %d: old=%d current=%d", d.String(), docID, lastRepoID, repoID) 103 + } 104 + lastRepoID = repoID 105 + 106 + // TODO we are losing empty repos on merging since we only get here if 107 + // there is an associated document. 108 + 109 + if err := ib.setRepository(&d.repoMetaData[repoID]); err != nil { 110 + return nil, err 111 + } 112 + } 113 + 114 + doc := Document{ 115 + Name: string(d.fileName(docID)), 116 + // Content set below since it can return an error 117 + // Branches set below since it requires lookups 118 + SubRepositoryPath: d.subRepoPaths[repoID][d.subRepos[docID]], 119 + Language: d.languageMap[d.languages[docID]], 120 + // SkipReason not set, will be part of content from original indexer. 121 + } 122 + 123 + var err error 124 + if doc.Content, err = d.readContents(docID); err != nil { 125 + return nil, err 126 + } 127 + 128 + if doc.Symbols, _, err = d.readDocSections(docID, nil); err != nil { 129 + return nil, err 130 + } 131 + 132 + doc.SymbolsMetaData = make([]*Symbol, len(doc.Symbols)) 133 + for i := range doc.SymbolsMetaData { 134 + doc.SymbolsMetaData[i] = d.symbols.data(d.fileEndSymbol[docID] + uint32(i)) 135 + } 136 + 137 + // calculate branches 138 + { 139 + mask := d.fileBranchMasks[docID] 140 + id := uint32(1) 141 + for mask != 0 { 142 + if mask&0x1 != 0 { 143 + doc.Branches = append(doc.Branches, d.branchNames[repoID][uint(id)]) 144 + } 145 + id <<= 1 146 + mask >>= 1 147 + } 148 + } 149 + 150 + if err := ib.Add(doc); err != nil { 151 + return nil, err 152 + } 153 + } 154 + } 155 + 156 + return ib, nil 157 + }

+65 -39

read.go

··· 79 79 } 80 80 81 81 secs := toc.sections() 82 + if len(secs) != int(sectionCount) { 83 + secs = toc.sectionsNext() 84 + } 82 85 83 86 if len(secs) != int(sectionCount) { 84 87 return fmt.Errorf("section count mismatch: got %d want %d", sectionCount, len(secs)) ··· 137 140 return json.Unmarshal(blob, data) 138 141 } 139 142 143 + // canReadVersion returns checks if zoekt can read in md. If it can't a 144 + // non-nil error is returned. 145 + func canReadVersion(md *IndexMetadata) bool { 146 + // Backwards compatible with v16 147 + return md.IndexFormatVersion == IndexFormatVersion || md.IndexFormatVersion == NextIndexFormatVersion 148 + } 149 + 140 150 func (r *reader) readIndexData(toc *indexTOC) (*indexData, error) { 141 151 d := indexData{ 142 152 file: r.r, ··· 145 155 branchNames: []map[uint]string{}, 146 156 } 147 157 148 - repo, md, err := r.readMetadata(toc) 149 - if md != nil && md.IndexFormatVersion != IndexFormatVersion { 158 + repos, md, err := r.readMetadata(toc) 159 + if md != nil && !canReadVersion(md) { 150 160 return nil, fmt.Errorf("file is v%d, want v%d", md.IndexFormatVersion, IndexFormatVersion) 151 161 } else if err != nil { 152 162 return nil, err 153 163 } 154 164 155 165 d.metaData = *md 156 - d.repoMetaData = []Repository{*repo} 166 + d.repoMetaData = make([]Repository, 0, len(repos)) 167 + for _, r := range repos { 168 + d.repoMetaData = append(d.repoMetaData, *r) 169 + } 157 170 158 171 d.boundariesStart = toc.fileContents.data.off 159 172 d.boundaries = toc.fileContents.relativeIndex() ··· 162 175 d.docSectionsStart = toc.fileSections.data.off 163 176 d.docSectionsIndex = toc.fileSections.relativeIndex() 164 177 165 - if d.metaData.IndexFormatVersion == 16 { 166 - d.symbols.symKindIndex = toc.symbolKindMap.relativeIndex() 167 - d.fileEndSymbol, err = readSectionU32(d.file, toc.fileEndSymbol) 168 - if err != nil { 169 - return nil, err 170 - } 178 + d.symbols.symKindIndex = toc.symbolKindMap.relativeIndex() 179 + d.fileEndSymbol, err = readSectionU32(d.file, toc.fileEndSymbol) 180 + if err != nil { 181 + return nil, err 182 + } 171 183 172 - // Call readSectionBlob on each section key, and store the result in 173 - // the blob value. 174 - for sect, blob := range map[simpleSection]*[]byte{ 175 - toc.symbolMap.index: &d.symbols.symIndex, 176 - toc.symbolMap.data: &d.symbols.symContent, 177 - toc.symbolKindMap.data: &d.symbols.symKindContent, 178 - toc.symbolMetaData: &d.symbols.symMetaData, 179 - } { 180 - if *blob, err = d.readSectionBlob(sect); err != nil { 181 - return nil, err 182 - } 184 + // Call readSectionBlob on each section key, and store the result in 185 + // the blob value. 186 + for sect, blob := range map[simpleSection]*[]byte{ 187 + toc.symbolMap.index: &d.symbols.symIndex, 188 + toc.symbolMap.data: &d.symbols.symContent, 189 + toc.symbolKindMap.data: &d.symbols.symKindContent, 190 + toc.symbolMetaData: &d.symbols.symMetaData, 191 + } { 192 + if *blob, err = d.readSectionBlob(sect); err != nil { 193 + return nil, err 183 194 } 184 195 } 185 196 ··· 228 239 d.rawConfigMasks = append(d.rawConfigMasks, encodeRawConfig(md.RawConfig)) 229 240 } 230 241 242 + d.repoTombstone = make([]bool, len(d.repoMetaData)) 243 + 231 244 blob, err := d.readSectionBlob(toc.runeDocSections) 232 245 if err != nil { 233 246 return nil, err ··· 275 288 return nil, err 276 289 } 277 290 278 - // This is a hack for now to keep the shard format unchanged. To support shard 279 - // merging we will store "repos" in the shard. 280 - repos := make([]uint16, 0, len(d.fileBranchMasks)) 281 - for i := 0; i < len(d.fileBranchMasks); i++ { 282 - repos = append(repos, 0) // just support 1 repo for now. 291 + if d.metaData.IndexFormatVersion >= 17 { 292 + blob, err := d.readSectionBlob(toc.repos) 293 + if err != nil { 294 + return nil, err 295 + } 296 + d.repos = fromSizedDeltas16(blob, nil) 297 + } else { 298 + // every document is for repo index 0 (default value of uint16) 299 + d.repos = make([]uint16, len(d.fileBranchMasks)) 283 300 } 284 - d.repos = repos 285 301 286 302 if err := d.calculateStats(); err != nil { 287 303 return nil, err ··· 290 306 return &d, nil 291 307 } 292 308 293 - func (r *reader) readMetadata(toc *indexTOC) (*Repository, *IndexMetadata, error) { 309 + func (r *reader) readMetadata(toc *indexTOC) ([]*Repository, *IndexMetadata, error) { 294 310 var md IndexMetadata 295 311 if err := r.readJSON(&md, &toc.metaData); err != nil { 296 312 return nil, nil, err 297 313 } 298 314 299 - var repo Repository 300 - if err := r.readJSON(&repo, &toc.repoMetaData); err != nil { 301 - return nil, &md, err 302 - } 303 - 304 315 // Sourcegraph specific: we support mutating metadata via an additional 305 316 // ".meta" file. This is to support tombstoning. An additional benefit is we 306 317 // can update metadata (such as Rank and Name) without re-indexing content. 307 - if b, err := os.ReadFile(r.r.Name() + ".meta"); err != nil && !os.IsNotExist(err) { 318 + blob, err := os.ReadFile(r.r.Name() + ".meta") 319 + if err != nil && !os.IsNotExist(err) { 308 320 return nil, &md, fmt.Errorf("failed to read meta file: %w", err) 309 - } else if len(b) > 0 { 310 - err = json.Unmarshal(b, &repo) 321 + } 322 + 323 + if len(blob) == 0 { 324 + blob, err = r.r.Read(toc.repoMetaData.off, toc.repoMetaData.sz) 311 325 if err != nil { 312 - return nil, &md, fmt.Errorf("failed to unmarshal meta file: %w", err) 326 + return nil, &md, err 313 327 } 314 328 } 315 329 316 - return &repo, &md, nil 330 + var repos []*Repository 331 + if md.IndexFormatVersion >= 17 { 332 + if err := json.Unmarshal(blob, &repos); err != nil { 333 + return nil, &md, err 334 + } 335 + } else { 336 + repos = make([]*Repository, 1) 337 + if err := json.Unmarshal(blob, &repos[0]); err != nil { 338 + return nil, &md, err 339 + } 340 + } 341 + 342 + return repos, &md, nil 317 343 } 318 344 319 345 const ngramEncoding = 8 ··· 449 475 450 476 // ReadMetadata returns the metadata of index shard without reading 451 477 // the index data. The IndexFile is not closed. 452 - func ReadMetadata(inf IndexFile) (*Repository, *IndexMetadata, error) { 478 + func ReadMetadata(inf IndexFile) ([]*Repository, *IndexMetadata, error) { 453 479 rd := &reader{r: inf} 454 480 var toc indexTOC 455 481 if err := rd.readTOC(&toc); err != nil { ··· 462 488 // ReadMetadataPath returns the metadata of index shard at p without reading 463 489 // the index data. ReadMetadataPath is a helper for ReadMetadata which opens 464 490 // the IndexFile at p. 465 - func ReadMetadataPath(p string) (*Repository, *IndexMetadata, error) { 491 + func ReadMetadataPath(p string) ([]*Repository, *IndexMetadata, error) { 466 492 f, err := os.Open(p) 467 493 if err != nil { 468 494 return nil, nil, err

+12 -3

read_test.go

··· 22 22 "fmt" 23 23 "io/ioutil" 24 24 "os" 25 + "path/filepath" 25 26 "reflect" 26 27 "strconv" 28 + "strings" 27 29 "testing" 28 30 29 31 "github.com/google/zoekt/query" ··· 143 145 &query.Symbol{Expr: &query.Regexp{Regexp: mustParseRE("sage$")}}, 144 146 } 145 147 146 - shards := []string{"ctagsrepo_v16.00000", "repo_v16.00000"} 147 - for _, name := range shards { 148 - shard, err := loadShard("testdata/shards/" + name + ".zoekt") 148 + shards, err := filepath.Glob("testdata/shards/*.zoekt") 149 + if err != nil { 150 + t.Fatal(err) 151 + } 152 + 153 + for _, path := range shards { 154 + name := filepath.Base(path) 155 + name = strings.TrimSuffix(name, ".zoekt") 156 + 157 + shard, err := loadShard(path) 149 158 if err != nil { 150 159 t.Fatalf("error loading shard %s %v", name, err) 151 160 }

+59 -24

shards/shards.go

··· 131 131 132 132 type rankedShard struct { 133 133 zoekt.Searcher 134 - name string 134 + 135 135 priority float64 136 + 137 + // We have out of band ranking on compound shards which can change even if 138 + // the shard file does not. So we compute a rank in getShards. We store 139 + // names here to avoid the cost of List in the search request path. 140 + names []string 136 141 } 137 142 138 143 type shardedSearcher struct { ··· 236 241 // (and (repobranches ...) (q)) 237 242 // (and (repobranches ...) (q)) 238 243 244 + hasReposForPredicate := func(pred func(name string) bool) func(names []string) (any, all bool) { 245 + return func(names []string) (any, all bool) { 246 + any = false 247 + all = true 248 + for _, name := range names { 249 + b := pred(name) 250 + any = any || b 251 + all = all && b 252 + } 253 + return any, all 254 + } 255 + } 256 + 239 257 for i, c := range and.Children { 240 258 var setSize int 241 - var hasRepo func(string) bool 242 - 259 + var hasRepos func([]string) (bool, bool) 243 260 switch setQuery := c.(type) { 244 261 case *query.RepoSet: 245 262 setSize = len(setQuery.Set) 246 - hasRepo = func(name string) bool { 247 - return setQuery.Set[name] 248 - } 263 + hasRepos = hasReposForPredicate(func(name string) bool { return setQuery.Set[name] }) 249 264 case *query.RepoBranches: 250 265 setSize = len(setQuery.Set) 251 - hasRepo = func(name string) bool { 252 - return len(setQuery.Set[name]) > 0 253 - } 266 + hasRepos = hasReposForPredicate(func(name string) bool { return len(setQuery.Set[name]) > 0 }) 254 267 default: 255 268 continue 256 269 } ··· 262 275 } 263 276 264 277 filtered := make([]rankedShard, 0, setSize) 278 + filteredAll := true 265 279 266 280 for _, s := range shards { 267 - if hasRepo(s.name) { 281 + if any, all := hasRepos(s.names); any { 268 282 filtered = append(filtered, s) 283 + filteredAll = filteredAll && all 269 284 } 270 285 } 271 286 ··· 275 290 return filtered, and 276 291 } 277 292 293 + // We can't simplify the query since we are searching shards which contain 294 + // repos we aren't supposed to search. 295 + if !filteredAll { 296 + return filtered, and 297 + } 298 + 278 299 // This optimization allows us to avoid the work done by 279 300 // indexData.simplify for each shard. 280 301 // ··· 288 309 } 289 310 if b, ok := c.(*query.RepoBranches); ok { 290 311 // We can only replace if all the repos want the same branches. 291 - want := b.Set[filtered[0].name] 292 - for _, s := range filtered[1:] { 293 - if !strSliceEqual(want, b.Set[s.name]) { 294 - return filtered, and 312 + want := b.Set[filtered[0].names[0]] 313 + for _, s := range filtered { 314 + for _, name := range s.names { 315 + if !strSliceEqual(want, b.Set[name]) { 316 + return filtered, and 317 + } 295 318 } 296 319 } 297 320 298 321 // Every repo wants the same branches, so we can replace RepoBranches 299 322 // with a list of branch queries. 300 - and.Children[i] = b.Branches(filtered[0].name) 323 + and.Children[i] = b.Branches(filtered[0].names[0]) 301 324 return filtered, query.Simplify(and) 302 325 } 303 326 ··· 683 706 if priorityDiff != 0 { 684 707 return priorityDiff > 0 685 708 } 686 - return res[i].name < res[j].name 709 + if len(res[i].names) == 0 || len(res[j].names) == 0 { 710 + // Protect against empty names which can happen if we fail to List or 711 + // the shard is full of tombstones. Prefer the shard which has names. 712 + return len(res[i].names) >= len(res[j].names) 713 + } 714 + return res[i].names[0] < res[j].names[0] 687 715 }) 688 716 689 717 // Cache ranked. We currently hold a read lock, so start a goroutine which ··· 701 729 } 702 730 703 731 func mkRankedShard(s zoekt.Searcher) rankedShard { 704 - q := query.Repo{} 732 + q := query.Const{Value: true} 705 733 result, err := s.List(context.Background(), &q, nil) 706 734 if err != nil { 707 735 return rankedShard{Searcher: s} ··· 710 738 return rankedShard{Searcher: s} 711 739 } 712 740 713 - repo := result.Repos[0].Repository 714 - 715 - var priority float64 716 - if repo.RawConfig != nil { 717 - priority, _ = strconv.ParseFloat(repo.RawConfig["priority"], 64) 741 + var ( 742 + maxPriority float64 743 + names = make([]string, 0, len(result.Repos)) 744 + ) 745 + for _, r := range result.Repos { 746 + names = append(names, r.Repository.Name) 747 + if r.Repository.RawConfig != nil { 748 + priority, _ := strconv.ParseFloat(r.Repository.RawConfig["priority"], 64) 749 + if priority > maxPriority { 750 + priority = maxPriority 751 + } 752 + } 718 753 } 719 754 720 755 return rankedShard{ 721 756 Searcher: s, 722 - name: repo.Name, 723 - priority: priority, 757 + names: names, 758 + priority: maxPriority, 724 759 } 725 760 } 726 761

+1 -1

shards/watcher.go

··· 112 112 113 113 // In the case of downgrades, avoid reading 114 114 // newer index formats. 115 - if version > zoekt.IndexFormatVersion { 115 + if version > zoekt.IndexFormatVersion && version > zoekt.NextIndexFormatVersion { 116 116 continue 117 117 } 118 118

+8 -5

shards/watcher_test.go

··· 21 21 "path/filepath" 22 22 "testing" 23 23 "time" 24 + 25 + "github.com/google/zoekt" 24 26 ) 25 27 26 28 type loggingLoader struct { ··· 187 189 // t.Fatalf("got %v, want 'empty'", err) 188 190 // } 189 191 190 - shardv16 := filepath.Join(dir, "foo_v16.00000.zoekt") 192 + want := zoekt.NextIndexFormatVersion 193 + shardLatest := filepath.Join(dir, fmt.Sprintf("foo_v%d.00000.zoekt", want)) 191 194 192 - for _, v := range []int{15, 16, 17} { 193 - repo := fmt.Sprintf("foo_v%d.00000.zoekt", v) 195 + for delta := -1; delta <= 1; delta++ { 196 + repo := fmt.Sprintf("foo_v%d.00000.zoekt", want+delta) 194 197 shard := filepath.Join(dir, repo) 195 198 if err := ioutil.WriteFile(shard, []byte("hello"), 0644); err != nil { 196 199 t.Fatalf("WriteFile: %v", err) ··· 203 206 } 204 207 defer dw.Stop() 205 208 206 - if got := <-logger.loads; got != shardv16 { 207 - t.Fatalf("got load event %v, want %v", got, shardv16) 209 + if got := <-logger.loads; got != shardLatest { 210 + t.Fatalf("got load event %v, want %v", got, shardLatest) 208 211 } 209 212 210 213 advanceFS()

+13

testdata/gen-shards.sh

··· 1 + #!/bin/bash 2 + 3 + set -ex 4 + 5 + go build ../cmd/zoekt-index 6 + 7 + cp -r repo repo17 8 + 9 + ./zoekt-index -disable_ctags repo17 10 + 11 + rm -rf repo17 12 + 13 + mv *.zoekt shards/

+148

testdata/golden/TestReadSearch/ctagsrepo_v17.00000.golden

··· 1 + { 2 + "FormatVersion": 17, 3 + "FeatureVersion": 9, 4 + "FileMatches": [ 5 + [ 6 + { 7 + "Score": 910, 8 + "Debug": "", 9 + "FileName": "main.go", 10 + "Repository": "repo", 11 + "Branches": null, 12 + "LineMatches": [ 13 + { 14 + "Line": "ZnVuYyBtYWluKCkgew==", 15 + "LineStart": 69, 16 + "LineEnd": 82, 17 + "LineNumber": 10, 18 + "FileName": false, 19 + "Score": 501, 20 + "LineFragments": [ 21 + { 22 + "LineOffset": 0, 23 + "Offset": 69, 24 + "MatchLength": 9, 25 + "SymbolInfo": null 26 + } 27 + ] 28 + } 29 + ], 30 + "Content": null, 31 + "Checksum": "n9fUYqacPXg=", 32 + "Language": "go", 33 + "SubRepositoryName": "", 34 + "SubRepositoryPath": "", 35 + "Version": "" 36 + } 37 + ], 38 + [ 39 + { 40 + "Score": 710, 41 + "Debug": "", 42 + "FileName": "main.go", 43 + "Repository": "repo", 44 + "Branches": null, 45 + "LineMatches": [ 46 + { 47 + "Line": "cGFja2FnZSBtYWlu", 48 + "LineStart": 0, 49 + "LineEnd": 12, 50 + "LineNumber": 1, 51 + "FileName": false, 52 + "Score": 501, 53 + "LineFragments": [ 54 + { 55 + "LineOffset": 0, 56 + "Offset": 0, 57 + "MatchLength": 7, 58 + "SymbolInfo": null 59 + } 60 + ] 61 + } 62 + ], 63 + "Content": null, 64 + "Checksum": "n9fUYqacPXg=", 65 + "Language": "go", 66 + "SubRepositoryName": "", 67 + "SubRepositoryPath": "", 68 + "Version": "" 69 + } 70 + ], 71 + [ 72 + { 73 + "Score": 910, 74 + "Debug": "", 75 + "FileName": "main.go", 76 + "Repository": "repo", 77 + "Branches": null, 78 + "LineMatches": [ 79 + { 80 + "Line": "CW51bSAgICAgPSA1", 81 + "LineStart": 34, 82 + "LineEnd": 46, 83 + "LineNumber": 6, 84 + "FileName": false, 85 + "Score": 501, 86 + "LineFragments": [ 87 + { 88 + "LineOffset": 1, 89 + "Offset": 35, 90 + "MatchLength": 3, 91 + "SymbolInfo": { 92 + "Sym": "num", 93 + "Kind": "var", 94 + "Parent": "main", 95 + "ParentKind": "package" 96 + } 97 + } 98 + ] 99 + } 100 + ], 101 + "Content": null, 102 + "Checksum": "n9fUYqacPXg=", 103 + "Language": "go", 104 + "SubRepositoryName": "", 105 + "SubRepositoryPath": "", 106 + "Version": "" 107 + } 108 + ], 109 + [ 110 + { 111 + "Score": 260, 112 + "Debug": "", 113 + "FileName": "main.go", 114 + "Repository": "repo", 115 + "Branches": null, 116 + "LineMatches": [ 117 + { 118 + "Line": "CW1lc3NhZ2UgPSAiaGVsbG8i", 119 + "LineStart": 47, 120 + "LineEnd": 65, 121 + "LineNumber": 7, 122 + "FileName": false, 123 + "Score": 51, 124 + "LineFragments": [ 125 + { 126 + "LineOffset": 4, 127 + "Offset": 51, 128 + "MatchLength": 4, 129 + "SymbolInfo": { 130 + "Sym": "message", 131 + "Kind": "var", 132 + "Parent": "main", 133 + "ParentKind": "package" 134 + } 135 + } 136 + ] 137 + } 138 + ], 139 + "Content": null, 140 + "Checksum": "n9fUYqacPXg=", 141 + "Language": "go", 142 + "SubRepositoryName": "", 143 + "SubRepositoryPath": "", 144 + "Version": "" 145 + } 146 + ] 147 + ] 148 + }

+4 -4

testdata/golden/TestReadSearch/repo_v15.00000.golden testdata/golden/TestReadSearch/repo17_v17.00000.golden

··· 1 1 { 2 - "FormatVersion": 15, 3 - "FeatureVersion": 8, 2 + "FormatVersion": 17, 3 + "FeatureVersion": 1, 4 4 "FileMatches": [ 5 5 [ 6 6 { 7 7 "Score": 910, 8 8 "Debug": "", 9 9 "FileName": "main.go", 10 - "Repository": "repo", 10 + "Repository": "repo17", 11 11 "Branches": null, 12 12 "LineMatches": [ 13 13 { ··· 40 40 "Score": 710, 41 41 "Debug": "", 42 42 "FileName": "main.go", 43 - "Repository": "repo", 43 + "Repository": "repo17", 44 44 "Branches": null, 45 45 "LineMatches": [ 46 46 {

testdata/shards/ctagsrepo_v17.00000.zoekt

This is a binary file and will not be displayed.

testdata/shards/repo17_v17.00000.zoekt

This is a binary file and will not be displayed.

testdata/shards/repo_v15.00000.zoekt

This is a binary file and will not be displayed.

+10

toc.go

··· 42 42 // 9: Store ctags metadata & bump default max file size 43 43 const FeatureVersion = 9 44 44 45 + // 17: compound shard (multi repo) 46 + const NextIndexFormatVersion = 17 47 + const NextFeatureVersion = 1 48 + 45 49 type indexTOC struct { 46 50 fileContents compoundSection 47 51 fileNames compoundSection ··· 69 73 nameEndRunes simpleSection 70 74 contentChecksums simpleSection 71 75 runeDocSections simpleSection 76 + 77 + repos simpleSection 72 78 } 73 79 74 80 func (t *indexTOC) sections() []section { ··· 100 106 &t.runeDocSections, 101 107 } 102 108 } 109 + 110 + func (t *indexTOC) sectionsNext() []section { 111 + return append(t.sections(), &t.repos) 112 + }

+29 -7

write.go

··· 19 19 "bytes" 20 20 "encoding/binary" 21 21 "encoding/json" 22 + "fmt" 22 23 "io" 23 24 "sort" 24 25 "time" 25 26 ) 26 27 27 - func (w *writer) writeTOC(toc *indexTOC) { 28 - secs := toc.sections() 28 + func (w *writer) writeTOC(secs []section) { 29 29 w.U32(uint32(len(secs))) 30 30 for _, s := range secs { 31 31 s.write(w) ··· 85 85 } 86 86 87 87 func (b *IndexBuilder) Write(out io.Writer) error { 88 + next := b.indexFormatVersion == NextIndexFormatVersion 89 + 88 90 buffered := bufio.NewWriterSize(out, 1<<20) 89 91 defer buffered.Flush() 90 92 ··· 147 149 w.Write(marshalDocSections(b.runeDocSections)) 148 150 toc.runeDocSections.end(w) 149 151 152 + if next { 153 + toc.repos.start(w) 154 + w.Write(toSizedDeltas16(b.repos)) 155 + toc.repos.end(w) 156 + } 157 + 150 158 indexTime := b.IndexTime 151 159 if indexTime.IsZero() { 152 160 indexTime = time.Now() 153 161 } 154 162 155 163 if err := b.writeJSON(&IndexMetadata{ 156 - IndexFormatVersion: IndexFormatVersion, 164 + IndexFormatVersion: b.indexFormatVersion, 157 165 IndexTime: indexTime, 158 - IndexFeatureVersion: FeatureVersion, 166 + IndexFeatureVersion: b.featureVersion, 159 167 PlainASCII: b.contentPostings.isPlainASCII && b.namePostings.isPlainASCII, 160 168 LanguageMap: b.languageMap, 161 169 ZoektVersion: Version, 162 170 }, &toc.metaData, w); err != nil { 163 171 return err 164 172 } 165 - if err := b.writeJSON(b.repo, &toc.repoMetaData, w); err != nil { 166 - return err 173 + 174 + if next { 175 + if err := b.writeJSON(b.repoList, &toc.repoMetaData, w); err != nil { 176 + return err 177 + } 178 + } else { 179 + if len(b.repoList) != 1 { 180 + return fmt.Errorf("have %d repos, but only support 1 in index format version %d", len(b.repoList), b.indexFormatVersion) 181 + } 182 + if err := b.writeJSON(b.repoList[0], &toc.repoMetaData, w); err != nil { 183 + return err 184 + } 167 185 } 168 186 169 187 var tocSection simpleSection 170 188 171 189 tocSection.start(w) 172 - w.writeTOC(&toc) 190 + if next { 191 + w.writeTOC(toc.sectionsNext()) 192 + } else { 193 + w.writeTOC(toc.sections()) 194 + } 173 195 tocSection.end(w) 174 196 tocSection.write(w) 175 197 return w.err

Configure Feed

Configure Feed