fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

all: compound shard support (#95)

This commit adds support for compound shards. A shard now has multiple
repositories associated with it, rather than always one. Most of zoekt
is document based, so minimal changes are required in the core search
evaluation codepath. The only change here is the addition of a mapping
from document to repo and storing the subrepo paths per repo.

The other change is the addition of tombstones. A tombstoned repository
is hidden from List and Search results. This was added so we can index a
new version of a repository in a compound shard without the need of
recomputing the whole shard.

This commit is mostly focussed on the read path. It ensures everything
keeps working correctly once compound shards are introduced. However,
the write path is mostly missing. We add a merge command for manual
merging. However, zoekt-sourcegraph-indexserver is mostly unaware of
compound shards and has no way to mutate them. This will be follow-up
work.

To support compound shards we had to bump the indexed format
version. This is since the repoMetaData field is not backwards
compatible. However, we know we plan on making other changes to the
index format. So we introduced NextIndexFormatVersion. With this change
we will continue to use v16, unless a common opts into v17. This will
allow us to effectively feature flag the new format while we work on it.

Co-authored-by: Stefan Hengl <stefan@sourcegraph.com>

+886 -155
+40
bits.go
··· 251 251 return ps 252 252 } 253 253 254 + func toSizedDeltas16(offsets []uint16) []byte { 255 + var enc [8]byte 256 + 257 + deltas := make([]byte, 0, len(offsets)*2) 258 + 259 + m := binary.PutUvarint(enc[:], uint64(len(offsets))) 260 + deltas = append(deltas, enc[:m]...) 261 + 262 + var last uint16 263 + for _, p := range offsets { 264 + delta := p - last 265 + last = p 266 + 267 + m := binary.PutUvarint(enc[:], uint64(delta)) 268 + deltas = append(deltas, enc[:m]...) 269 + } 270 + return deltas 271 + } 272 + 273 + func fromSizedDeltas16(data []byte, ps []uint16) []uint16 { 274 + sz, m := binary.Uvarint(data) 275 + data = data[m:] 276 + 277 + if cap(ps) < int(sz) { 278 + ps = make([]uint16, 0, sz) 279 + } else { 280 + ps = ps[:0] 281 + } 282 + 283 + var last uint16 284 + for len(data) > 0 { 285 + delta, m := binary.Uvarint(data) 286 + offset := last + uint16(delta) 287 + last = offset 288 + data = data[m:] 289 + ps = append(ps, offset) 290 + } 291 + return ps 292 + } 293 + 254 294 func fromDeltas(data []byte, buf []uint32) []uint32 { 255 295 buf = buf[:0] 256 296 if cap(buf) < len(data)/2 {
+44 -11
build/builder.go
··· 265 265 266 266 // ShardName returns the name the given index shard. 267 267 func (o *Options) ShardName(n int) string { 268 + return o.shardNameVersion(zoekt.IndexFormatVersion, n) 269 + } 270 + 271 + func (o *Options) shardNameVersion(version, n int) string { 268 272 abs := url.QueryEscape(o.RepositoryDescription.Name) 269 273 if len(abs) > 200 { 270 274 abs = abs[:200] + hashString(abs)[:8] 271 275 } 272 276 return filepath.Join(o.IndexDir, 273 - fmt.Sprintf("%s_v%d.%05d.zoekt", abs, zoekt.IndexFormatVersion, n)) 277 + fmt.Sprintf("%s_v%d.%05d.zoekt", abs, version, n)) 274 278 } 275 279 276 280 type IndexState string 277 281 278 282 const ( 279 - IndexStateMissing IndexState = "missing" 280 - IndexStateCorrupt = "corrupt" 281 - IndexStateVersion = "version-mismatch" 282 - IndexStateOption = "option-mismatch" 283 - IndexStateMeta = "meta-mismatch" 284 - IndexStateContent = "content-mismatch" 285 - IndexStateEqual = "equal" 283 + IndexStateMissing IndexState = "missing" 284 + IndexStateCorrupt = "corrupt" 285 + IndexStateUnexpectedCompound = "unexpected-compound" 286 + IndexStateVersion = "version-mismatch" 287 + IndexStateOption = "option-mismatch" 288 + IndexStateMeta = "meta-mismatch" 289 + IndexStateContent = "content-mismatch" 290 + IndexStateEqual = "equal" 286 291 ) 287 292 293 + var readVersions = []struct { 294 + IndexFormatVersion int 295 + FeatureVersion int 296 + }{{ 297 + IndexFormatVersion: zoekt.IndexFormatVersion, 298 + FeatureVersion: zoekt.FeatureVersion, 299 + }, { 300 + IndexFormatVersion: zoekt.NextIndexFormatVersion, 301 + FeatureVersion: zoekt.NextFeatureVersion, 302 + }} 303 + 288 304 // IncrementalSkipIndexing returns true if the index present on disk matches 289 305 // the build options. 290 306 func (o *Options) IncrementalSkipIndexing() bool { ··· 294 310 // IndexState checks how the index present on disk compares to the build 295 311 // options. 296 312 func (o *Options) IndexState() IndexState { 297 - fn := o.ShardName(0) 313 + // Open the latest version we support that is on disk. 314 + fn := "" 315 + featureVersion := -1 316 + for _, v := range readVersions { 317 + fn = o.shardNameVersion(v.IndexFormatVersion, 0) 318 + if _, err := os.Stat(fn); err == nil { 319 + featureVersion = v.FeatureVersion 320 + break 321 + } 322 + } 298 323 299 - repo, index, err := zoekt.ReadMetadataPath(fn) 324 + repos, index, err := zoekt.ReadMetadataPath(fn) 300 325 if os.IsNotExist(err) { 301 326 return IndexStateMissing 302 327 } else if err != nil { 303 328 return IndexStateCorrupt 304 329 } 305 330 306 - if index.IndexFeatureVersion != zoekt.FeatureVersion { 331 + if index.IndexFeatureVersion != featureVersion { 307 332 return IndexStateVersion 308 333 } 334 + 335 + // This shouldn't happen. Options only references one repository, so 336 + // shardName will only return non compound repositories. We still need to 337 + // work out how to do IndexState with compound shards. 338 + if len(repos) != 1 { 339 + return IndexStateUnexpectedCompound 340 + } 341 + repo := repos[0] 309 342 310 343 if repo.IndexOptions != o.HashOptions() { 311 344 return IndexStateOption
+72
build/builder_test.go
··· 2 2 3 3 import ( 4 4 "flag" 5 + "io" 6 + "log" 5 7 "os" 6 8 "path/filepath" 7 9 "testing" ··· 114 116 } 115 117 } 116 118 } 119 + 120 + func TestIncrementalSkipIndexing(t *testing.T) { 121 + cases := []struct { 122 + name string 123 + want bool 124 + opts Options 125 + }{{ 126 + name: "v17-noop", 127 + want: true, 128 + opts: Options{ 129 + RepositoryDescription: zoekt.Repository{ 130 + Name: "repo17", 131 + }, 132 + SizeMax: 2097152, 133 + DisableCTags: true, 134 + }, 135 + }, { 136 + name: "v16-noop", 137 + want: true, 138 + opts: Options{ 139 + RepositoryDescription: zoekt.Repository{ 140 + Name: "repo", 141 + }, 142 + SizeMax: 2097152, 143 + DisableCTags: true, 144 + }, 145 + }, { 146 + name: "v17-id", 147 + want: false, 148 + opts: Options{ 149 + RepositoryDescription: zoekt.Repository{ 150 + Name: "repo17", 151 + RawConfig: map[string]string{ 152 + "repoid": "123", 153 + }, 154 + }, 155 + SizeMax: 2097152, 156 + DisableCTags: true, 157 + }, 158 + }, { 159 + name: "doesnotexist", 160 + want: false, 161 + opts: Options{ 162 + RepositoryDescription: zoekt.Repository{ 163 + Name: "doesnotexist", 164 + }, 165 + SizeMax: 2097152, 166 + DisableCTags: true, 167 + }, 168 + }} 169 + 170 + for _, tc := range cases { 171 + t.Run(tc.name, func(t *testing.T) { 172 + tc.opts.IndexDir = "../testdata/shards" 173 + t.Log(tc.opts.IndexState()) 174 + got := tc.opts.IncrementalSkipIndexing() 175 + if got != tc.want { 176 + t.Fatalf("want %v got %v", tc.want, got) 177 + } 178 + }) 179 + } 180 + } 181 + 182 + func TestMain(m *testing.M) { 183 + flag.Parse() 184 + if !testing.Verbose() { 185 + log.SetOutput(io.Discard) 186 + } 187 + os.Exit(m.Run()) 188 + }
+3 -3
build/e2e_test.go
··· 100 100 // "repo-mutated". We do this inside retry helper since we have noticed 101 101 // some flakiness on github CI. 102 102 for _, p := range fs { 103 - repo, _, err := zoekt.ReadMetadataPath(p) 103 + repos, _, err := zoekt.ReadMetadataPath(p) 104 104 if err != nil { 105 105 t.Fatal(err) 106 106 } 107 - repo.Name = "repo-mutated" 108 - b, err := json.Marshal(repo) 107 + repos[0].Name = "repo-mutated" 108 + b, err := json.Marshal(repos[0]) 109 109 if err != nil { 110 110 t.Fatal(err) 111 111 }
+7 -1
cmd/zoekt-indexserver/main.go
··· 217 217 } 218 218 defer ifile.Close() 219 219 220 - repo, _, err := zoekt.ReadMetadata(ifile) 220 + repos, _, err := zoekt.ReadMetadata(ifile) 221 221 if err != nil { 222 222 return nil 223 223 } 224 + 225 + // TODO support compound shards in zoekt-indexserver 226 + if len(repos) != 1 { 227 + return nil 228 + } 229 + repo := repos[0] 224 230 225 231 _, err = os.Stat(repo.Source) 226 232 if os.IsNotExist(err) {
+37
cmd/zoekt-merge-index/main.go
··· 1 + package main 2 + 3 + import ( 4 + "log" 5 + "os" 6 + "path/filepath" 7 + 8 + "github.com/google/zoekt" 9 + ) 10 + 11 + func merge(dstDir string, names []string) error { 12 + var files []zoekt.IndexFile 13 + for _, fn := range names { 14 + f, err := os.Open(fn) 15 + if err != nil { 16 + return err 17 + } 18 + defer f.Close() 19 + 20 + indexFile, err := zoekt.NewIndexFile(f) 21 + if err != nil { 22 + return err 23 + } 24 + defer indexFile.Close() 25 + 26 + files = append(files, indexFile) 27 + } 28 + 29 + return zoekt.Merge(dstDir, files...) 30 + } 31 + 32 + func main() { 33 + err := merge(filepath.Dir(os.Args[1]), os.Args[1:]) 34 + if err != nil { 35 + log.Fatal(err) 36 + } 37 + }
+51
cmd/zoekt-merge-index/main_test.go
··· 1 + package main 2 + 3 + import ( 4 + "context" 5 + "path/filepath" 6 + "sort" 7 + "testing" 8 + 9 + "github.com/google/zoekt" 10 + "github.com/google/zoekt/query" 11 + "github.com/google/zoekt/shards" 12 + ) 13 + 14 + func TestMerge(t *testing.T) { 15 + dir := t.TempDir() 16 + 17 + v16Shards, err := filepath.Glob("../../testdata/shards/*_v16.*.zoekt") 18 + if err != nil { 19 + t.Fatal(err) 20 + } 21 + sort.Strings(v16Shards) 22 + t.Log(v16Shards) 23 + 24 + err = merge(dir, v16Shards) 25 + if err != nil { 26 + t.Fatal(err) 27 + } 28 + 29 + ss, err := shards.NewDirectorySearcher(dir) 30 + if err != nil { 31 + t.Fatalf("NewDirectorySearcher(%s): %v", dir, err) 32 + } 33 + defer ss.Close() 34 + 35 + q, err := query.Parse("hello") 36 + if err != nil { 37 + t.Fatalf("Parse(hello): %v", err) 38 + } 39 + 40 + var sOpts zoekt.SearchOptions 41 + ctx := context.Background() 42 + result, err := ss.Search(ctx, q, &sOpts) 43 + if err != nil { 44 + t.Fatalf("Search(%v): %v", q, err) 45 + } 46 + 47 + // we are merging the same shard twice, so we expect the same file twice. 48 + if len(result.Files) != 2 { 49 + t.Errorf("got %v, want 2 files.", result.Files) 50 + } 51 + }
+15 -5
cmd/zoekt-sourcegraph-indexserver/cleanup.go
··· 124 124 continue 125 125 } 126 126 127 - name, err := shardRepoName(path) 127 + names, err := shardRepoNames(path) 128 128 if err != nil { 129 129 debug.Printf("failed to read shard: %v", err) 130 130 continue 131 131 } 132 132 133 + // TODO support compound shards once we support tombstones 134 + if len(names) != 1 { 135 + continue 136 + } 137 + name := names[0] 138 + 133 139 shards[name] = append(shards[name], shard{ 134 140 Repo: name, 135 141 Path: path, ··· 139 145 return shards 140 146 } 141 147 142 - func shardRepoName(path string) (string, error) { 143 - repo, _, err := zoekt.ReadMetadataPath(path) 148 + func shardRepoNames(path string) ([]string, error) { 149 + repos, _, err := zoekt.ReadMetadataPath(path) 144 150 if err != nil { 145 - return "", err 151 + return nil, err 146 152 } 147 153 148 - return repo.Name, nil 154 + names := make([]string, 0, len(repos)) 155 + for _, repo := range repos { 156 + names = append(names, repo.Name) 157 + } 158 + return names, nil 149 159 } 150 160 151 161 var incompleteRE = regexp.MustCompile(`\.zoekt[0-9]+(\.\w+)?$`)
+8 -6
cmd/zoekt-sourcegraph-indexserver/cleanup_test.go
··· 34 34 if filepath.Ext(path) != ".zoekt" { 35 35 continue 36 36 } 37 - name, _ := shardRepoName(path) 37 + names, _ := shardRepoNames(path) 38 38 fi, _ := os.Stat(path) 39 - shards = append(shards, shard{ 40 - Repo: name, 41 - Path: filepath.Base(path), 42 - ModTime: fi.ModTime(), 43 - }) 39 + for _, name := range names { 40 + shards = append(shards, shard{ 41 + Repo: name, 42 + Path: filepath.Base(path), 43 + ModTime: fi.ModTime(), 44 + }) 45 + } 44 46 } 45 47 return shards 46 48 }
+7 -1
cmd/zoekt-sourcegraph-indexserver/meta.go
··· 2 2 3 3 import ( 4 4 "encoding/json" 5 + "fmt" 5 6 "io/ioutil" 6 7 "os" 7 8 "path/filepath" ··· 22 23 for i := 0; ; i++ { 23 24 fn := o.ShardName(i) 24 25 25 - repo, _, err := zoekt.ReadMetadataPath(fn) 26 + repos, _, err := zoekt.ReadMetadataPath(fn) 26 27 if os.IsNotExist(err) { 27 28 break 28 29 } else if err != nil { 29 30 return err 30 31 } 32 + 33 + if len(repos) != 1 { 34 + return fmt.Errorf("mergeMeta: does not support compound shards: %s", fn) 35 + } 36 + repo := repos[0] 31 37 32 38 if updated, err := repo.MergeMutable(&o.RepositoryDescription); err != nil { 33 39 return err
+13 -3
eval.go
··· 44 44 // otherwise. 45 45 func (d *indexData) simplifyMultiRepo(q query.Q, predicate func(repoName string) bool) query.Q { 46 46 count := 0 47 - for _, md := range d.repoMetaData { 48 - if predicate(md.Name) { 47 + alive := len(d.repoMetaData) 48 + for i, md := range d.repoMetaData { 49 + if d.repoTombstone[i] { 50 + alive-- 51 + } else if predicate(md.Name) { 49 52 count++ 50 53 } 51 54 } 52 - if count == len(d.repoMetaData) { 55 + if count == alive { 53 56 return &query.Const{Value: true} 54 57 } 55 58 if count > 0 { ··· 180 183 nextDoc := mt.nextDoc() 181 184 if int(nextDoc) <= lastDoc { 182 185 nextDoc = uint32(lastDoc + 1) 186 + } 187 + // Skip tombstoned docs 188 + for nextDoc < docCount && d.repoTombstone[d.repos[nextDoc]] { 189 + nextDoc++ 183 190 } 184 191 if nextDoc >= docCount { 185 192 break ··· 486 493 } 487 494 488 495 for i := range d.repoListEntry { 496 + if d.repoTombstone[i] { 497 + continue 498 + } 489 499 rle := &d.repoListEntry[i] 490 500 ok, err := include(rle) 491 501 if err != nil {
+22 -12
eval_test.go
··· 128 128 } 129 129 } 130 130 131 - func TestSimplifyRepoSet(t *testing.T) { 132 - d := &indexData{ 133 - repoMetaData: []Repository{{Name: "foo"}, {Name: "bar"}}, 131 + // compoundReposShard returns a compound shard where each repo has 1 document. 132 + func compoundReposShard(t *testing.T, names ...string) *indexData { 133 + t.Helper() 134 + b := newIndexBuilder() 135 + b.indexFormatVersion = NextIndexFormatVersion 136 + b.featureVersion = NextFeatureVersion 137 + for _, name := range names { 138 + if err := b.setRepository(&Repository{Name: name}); err != nil { 139 + t.Fatal(err) 140 + } 141 + if err := b.AddFile(name+".txt", []byte(name+" content")); err != nil { 142 + t.Fatal(err) 143 + } 134 144 } 145 + s := searcherForTest(t, b) 146 + return s.(*indexData) 147 + } 148 + 149 + func TestSimplifyRepoSet(t *testing.T) { 150 + d := compoundReposShard(t, "foo", "bar") 135 151 all := &query.RepoSet{Set: map[string]bool{"foo": true, "bar": true}} 136 152 some := &query.RepoSet{Set: map[string]bool{"foo": true, "banana": true}} 137 153 none := &query.RepoSet{Set: map[string]bool{"banana": true}} ··· 153 169 } 154 170 155 171 func TestSimplifyRepo(t *testing.T) { 156 - d := &indexData{ 157 - repoMetaData: []Repository{{Name: "foo"}, {Name: "fool"}}, 158 - } 172 + d := compoundReposShard(t, "foo", "fool") 159 173 all := &query.Repo{"foo"} 160 174 some := &query.Repo{"fool"} 161 175 none := &query.Repo{"bar"} ··· 177 191 } 178 192 179 193 func TestSimplifyRepoBranch(t *testing.T) { 180 - d := &indexData{ 181 - repoMetaData: []Repository{{Name: "foo"}, {Name: "bar"}}, 182 - } 194 + d := compoundReposShard(t, "foo", "bar") 183 195 184 196 some := &query.RepoBranches{Set: map[string][]string{"bar": {"branch1"}}} 185 197 none := &query.Repo{"banana"} ··· 196 208 } 197 209 198 210 func TestSimplifyRepoBranchSimple(t *testing.T) { 199 - d := &indexData{ 200 - repoMetaData: []Repository{{Name: "foo"}}, 201 - } 211 + d := compoundReposShard(t, "foo") 202 212 q := &query.RepoBranches{Set: map[string][]string{"foo": {"HEAD", "b1"}, "bar": {"HEAD"}}} 203 213 204 214 want := &query.Or{[]query.Q{&query.Branch{
+1 -1
index_test.go
··· 1239 1239 t.Fatalf("ReadMetadata: %v", err) 1240 1240 } 1241 1241 1242 - if got, want := rd.Name, "reponame"; got != want { 1242 + if got, want := rd[0].Name, "reponame"; got != want { 1243 1243 t.Fatalf("got %q want %q", got, want) 1244 1244 } 1245 1245 }
+56 -27
indexbuilder.go
··· 148 148 149 149 // IndexBuilder builds a single index shard. 150 150 type IndexBuilder struct { 151 + // The version we will write to disk. Sourcegraph Specific. This is to 152 + // enable feature flagging new format versions. 153 + indexFormatVersion int 154 + featureVersion int 155 + 151 156 contentStrings []*searchableString 152 157 nameStrings []*searchableString 153 158 docSections [][]DocumentSection ··· 166 171 branchMasks []uint64 167 172 subRepos []uint32 168 173 174 + // docID => repoID 175 + repos []uint16 176 + 169 177 contentPostings *postingsBuilder 170 178 namePostings *postingsBuilder 171 179 172 - // root repository 173 - repo Repository 180 + // root repositories 181 + repoList []Repository 174 182 175 183 // name to index. 176 - subRepoIndices map[string]uint32 184 + subRepoIndices []map[string]uint32 177 185 178 186 // language => language code 179 187 languageMap map[string]byte ··· 205 213 // NewIndexBuilder creates a fresh IndexBuilder. The passed in 206 214 // Repository contains repo metadata, and may be set to nil. 207 215 func NewIndexBuilder(r *Repository) (*IndexBuilder, error) { 208 - b := &IndexBuilder{ 209 - contentPostings: newPostingsBuilder(), 210 - namePostings: newPostingsBuilder(), 211 - fileEndSymbol: []uint32{0}, 212 - symIndex: make(map[string]uint32), 213 - symKindIndex: make(map[string]uint32), 214 - languageMap: map[string]byte{}, 215 - } 216 + b := newIndexBuilder() 216 217 217 218 if r == nil { 218 219 r = &Repository{} ··· 223 224 return b, nil 224 225 } 225 226 226 - func (b *IndexBuilder) setRepository(desc *Repository) error { 227 - if len(b.contentStrings) > 0 { 228 - return fmt.Errorf("setRepository called after adding files") 227 + func newIndexBuilder() *IndexBuilder { 228 + return &IndexBuilder{ 229 + indexFormatVersion: IndexFormatVersion, 230 + featureVersion: FeatureVersion, 231 + 232 + contentPostings: newPostingsBuilder(), 233 + namePostings: newPostingsBuilder(), 234 + fileEndSymbol: []uint32{0}, 235 + symIndex: make(map[string]uint32), 236 + symKindIndex: make(map[string]uint32), 237 + languageMap: map[string]byte{}, 229 238 } 239 + } 240 + 241 + func (b *IndexBuilder) setRepository(desc *Repository) error { 230 242 if err := desc.verify(); err != nil { 231 243 return err 232 244 } ··· 235 247 return fmt.Errorf("too many branches") 236 248 } 237 249 238 - b.repo = *desc 250 + repo := *desc 239 251 240 252 // copy subrepomap without root 241 - b.repo.SubRepoMap = map[string]*Repository{} 253 + repo.SubRepoMap = map[string]*Repository{} 242 254 for k, v := range desc.SubRepoMap { 243 255 if k != "" { 244 - b.repo.SubRepoMap[k] = v 256 + repo.SubRepoMap[k] = v 245 257 } 246 258 } 247 259 248 - b.populateSubRepoIndices() 249 - return nil 260 + b.repoList = append(b.repoList, repo) 261 + 262 + return b.populateSubRepoIndices() 250 263 } 251 264 252 265 type DocumentSection struct { ··· 329 342 return nil 330 343 } 331 344 332 - func (b *IndexBuilder) populateSubRepoIndices() { 333 - if b.subRepoIndices != nil { 334 - return 345 + func (b *IndexBuilder) populateSubRepoIndices() error { 346 + if len(b.subRepoIndices) == len(b.repoList) { 347 + return nil 335 348 } 349 + if len(b.subRepoIndices) != len(b.repoList)-1 { 350 + return fmt.Errorf("populateSubRepoIndices not called for a repo: %d != %d - 1", len(b.subRepoIndices), len(b.repoList)) 351 + } 352 + repo := b.repoList[len(b.repoList)-1] 353 + b.subRepoIndices = append(b.subRepoIndices, mkSubRepoIndices(repo)) 354 + return nil 355 + } 356 + 357 + func mkSubRepoIndices(repo Repository) map[string]uint32 { 336 358 paths := []string{""} 337 - for k := range b.repo.SubRepoMap { 359 + for k := range repo.SubRepoMap { 338 360 paths = append(paths, k) 339 361 } 340 362 sort.Strings(paths) 341 - b.subRepoIndices = make(map[string]uint32, len(paths)) 363 + subRepoIndices := make(map[string]uint32, len(paths)) 342 364 for i, p := range paths { 343 - b.subRepoIndices[p] = uint32(i) 365 + subRepoIndices[p] = uint32(i) 344 366 } 367 + return subRepoIndices 345 368 } 346 369 347 370 const notIndexedMarker = "NOT-INDEXED: " ··· 424 447 } 425 448 b.addSymbols(doc.SymbolsMetaData) 426 449 427 - subRepoIdx, ok := b.subRepoIndices[doc.SubRepositoryPath] 450 + repoIdx := len(b.repoList) - 1 451 + subRepoIdx, ok := b.subRepoIndices[repoIdx][doc.SubRepositoryPath] 428 452 if !ok { 429 453 return fmt.Errorf("unknown subrepo path %q", doc.SubRepositoryPath) 430 454 } ··· 438 462 mask |= m 439 463 } 440 464 465 + if repoIdx > 1<<16 { 466 + return fmt.Errorf("too many repos in shard: max is %d", 1<<16) 467 + } 468 + 441 469 b.subRepos = append(b.subRepos, subRepoIdx) 470 + b.repos = append(b.repos, uint16(repoIdx)) 442 471 443 472 hasher.Write(doc.Content) 444 473 ··· 465 494 } 466 495 467 496 func (b *IndexBuilder) branchMask(br string) uint64 { 468 - for i, b := range b.repo.Branches { 497 + for i, b := range b.repoList[len(b.repoList)-1].Branches { 469 498 if b.Name == br { 470 499 return uint64(1) << uint(i) 471 500 }
+4 -2
indexdata.go
··· 77 77 metaData IndexMetadata 78 78 repoMetaData []Repository 79 79 80 + // repoTombstone[repoID] is true if we are not allowed to search 81 + // repoID. 82 + repoTombstone []bool 83 + 80 84 subRepos []uint32 81 85 subRepoPaths [][]string 82 86 ··· 271 275 272 276 return 273 277 } 274 - 275 - func (d *indexData) Repository() []Repository { return d.repoMetaData } 276 278 277 279 func (d *indexData) String() string { 278 280 return fmt.Sprintf("shard(%s)", d.file.Name())
+157
merge.go
··· 1 + package zoekt 2 + 3 + import ( 4 + "crypto/sha1" 5 + "fmt" 6 + "io/ioutil" 7 + "log" 8 + "os" 9 + "path/filepath" 10 + "runtime" 11 + ) 12 + 13 + func Merge(dstDir string, files ...IndexFile) error { 14 + var ds []*indexData 15 + for _, f := range files { 16 + searcher, err := NewSearcher(f) 17 + if err != nil { 18 + return err 19 + } 20 + ds = append(ds, searcher.(*indexData)) 21 + } 22 + 23 + ib, err := merge(ds...) 24 + if err != nil { 25 + return err 26 + } 27 + 28 + hasher := sha1.New() 29 + for _, d := range ds { 30 + for i, md := range d.repoMetaData { 31 + if d.repoTombstone[i] { 32 + continue 33 + } 34 + hasher.Write([]byte(md.Name)) 35 + hasher.Write([]byte{0}) 36 + } 37 + } 38 + 39 + fn := filepath.Join(dstDir, fmt.Sprintf("compound-%x_v%d.%05d.zoekt", hasher.Sum(nil), NextIndexFormatVersion, 0)) 40 + return builderWriteAll(fn, ib) 41 + } 42 + 43 + func builderWriteAll(fn string, ib *IndexBuilder) error { 44 + dir := filepath.Dir(fn) 45 + if err := os.MkdirAll(dir, 0o700); err != nil { 46 + return err 47 + } 48 + 49 + f, err := ioutil.TempFile(dir, filepath.Base(fn)+".*.tmp") 50 + if err != nil { 51 + return err 52 + } 53 + if runtime.GOOS != "windows" { 54 + // umask? 55 + if err := f.Chmod(0o666); err != nil { 56 + return err 57 + } 58 + } 59 + 60 + defer f.Close() 61 + if err := ib.Write(f); err != nil { 62 + return err 63 + } 64 + fi, err := f.Stat() 65 + if err != nil { 66 + return err 67 + } 68 + if err := f.Close(); err != nil { 69 + return err 70 + } 71 + 72 + if err := os.Rename(f.Name(), fn); err != nil { 73 + return err 74 + } 75 + 76 + log.Printf("finished %s: %d index bytes (overhead %3.1f)", fn, fi.Size(), 77 + float64(fi.Size())/float64(ib.ContentSize()+1)) 78 + 79 + return nil 80 + } 81 + 82 + func merge(ds ...*indexData) (*IndexBuilder, error) { 83 + if len(ds) == 0 { 84 + return nil, fmt.Errorf("need 1 or more indexData to merge") 85 + } 86 + 87 + ib := newIndexBuilder() 88 + ib.indexFormatVersion = NextIndexFormatVersion 89 + ib.featureVersion = NextFeatureVersion 90 + 91 + for _, d := range ds { 92 + lastRepoID := -1 93 + for docID := uint32(0); int(docID) < len(d.fileBranchMasks); docID++ { 94 + repoID := int(d.repos[docID]) 95 + 96 + if d.repoTombstone[repoID] { 97 + continue 98 + } 99 + 100 + if repoID != lastRepoID { 101 + if lastRepoID > repoID { 102 + return nil, fmt.Errorf("non-contiguous repo ids in %s for document %d: old=%d current=%d", d.String(), docID, lastRepoID, repoID) 103 + } 104 + lastRepoID = repoID 105 + 106 + // TODO we are losing empty repos on merging since we only get here if 107 + // there is an associated document. 108 + 109 + if err := ib.setRepository(&d.repoMetaData[repoID]); err != nil { 110 + return nil, err 111 + } 112 + } 113 + 114 + doc := Document{ 115 + Name: string(d.fileName(docID)), 116 + // Content set below since it can return an error 117 + // Branches set below since it requires lookups 118 + SubRepositoryPath: d.subRepoPaths[repoID][d.subRepos[docID]], 119 + Language: d.languageMap[d.languages[docID]], 120 + // SkipReason not set, will be part of content from original indexer. 121 + } 122 + 123 + var err error 124 + if doc.Content, err = d.readContents(docID); err != nil { 125 + return nil, err 126 + } 127 + 128 + if doc.Symbols, _, err = d.readDocSections(docID, nil); err != nil { 129 + return nil, err 130 + } 131 + 132 + doc.SymbolsMetaData = make([]*Symbol, len(doc.Symbols)) 133 + for i := range doc.SymbolsMetaData { 134 + doc.SymbolsMetaData[i] = d.symbols.data(d.fileEndSymbol[docID] + uint32(i)) 135 + } 136 + 137 + // calculate branches 138 + { 139 + mask := d.fileBranchMasks[docID] 140 + id := uint32(1) 141 + for mask != 0 { 142 + if mask&0x1 != 0 { 143 + doc.Branches = append(doc.Branches, d.branchNames[repoID][uint(id)]) 144 + } 145 + id <<= 1 146 + mask >>= 1 147 + } 148 + } 149 + 150 + if err := ib.Add(doc); err != nil { 151 + return nil, err 152 + } 153 + } 154 + } 155 + 156 + return ib, nil 157 + }
+65 -39
read.go
··· 79 79 } 80 80 81 81 secs := toc.sections() 82 + if len(secs) != int(sectionCount) { 83 + secs = toc.sectionsNext() 84 + } 82 85 83 86 if len(secs) != int(sectionCount) { 84 87 return fmt.Errorf("section count mismatch: got %d want %d", sectionCount, len(secs)) ··· 137 140 return json.Unmarshal(blob, data) 138 141 } 139 142 143 + // canReadVersion returns checks if zoekt can read in md. If it can't a 144 + // non-nil error is returned. 145 + func canReadVersion(md *IndexMetadata) bool { 146 + // Backwards compatible with v16 147 + return md.IndexFormatVersion == IndexFormatVersion || md.IndexFormatVersion == NextIndexFormatVersion 148 + } 149 + 140 150 func (r *reader) readIndexData(toc *indexTOC) (*indexData, error) { 141 151 d := indexData{ 142 152 file: r.r, ··· 145 155 branchNames: []map[uint]string{}, 146 156 } 147 157 148 - repo, md, err := r.readMetadata(toc) 149 - if md != nil && md.IndexFormatVersion != IndexFormatVersion { 158 + repos, md, err := r.readMetadata(toc) 159 + if md != nil && !canReadVersion(md) { 150 160 return nil, fmt.Errorf("file is v%d, want v%d", md.IndexFormatVersion, IndexFormatVersion) 151 161 } else if err != nil { 152 162 return nil, err 153 163 } 154 164 155 165 d.metaData = *md 156 - d.repoMetaData = []Repository{*repo} 166 + d.repoMetaData = make([]Repository, 0, len(repos)) 167 + for _, r := range repos { 168 + d.repoMetaData = append(d.repoMetaData, *r) 169 + } 157 170 158 171 d.boundariesStart = toc.fileContents.data.off 159 172 d.boundaries = toc.fileContents.relativeIndex() ··· 162 175 d.docSectionsStart = toc.fileSections.data.off 163 176 d.docSectionsIndex = toc.fileSections.relativeIndex() 164 177 165 - if d.metaData.IndexFormatVersion == 16 { 166 - d.symbols.symKindIndex = toc.symbolKindMap.relativeIndex() 167 - d.fileEndSymbol, err = readSectionU32(d.file, toc.fileEndSymbol) 168 - if err != nil { 169 - return nil, err 170 - } 178 + d.symbols.symKindIndex = toc.symbolKindMap.relativeIndex() 179 + d.fileEndSymbol, err = readSectionU32(d.file, toc.fileEndSymbol) 180 + if err != nil { 181 + return nil, err 182 + } 171 183 172 - // Call readSectionBlob on each section key, and store the result in 173 - // the blob value. 174 - for sect, blob := range map[simpleSection]*[]byte{ 175 - toc.symbolMap.index: &d.symbols.symIndex, 176 - toc.symbolMap.data: &d.symbols.symContent, 177 - toc.symbolKindMap.data: &d.symbols.symKindContent, 178 - toc.symbolMetaData: &d.symbols.symMetaData, 179 - } { 180 - if *blob, err = d.readSectionBlob(sect); err != nil { 181 - return nil, err 182 - } 184 + // Call readSectionBlob on each section key, and store the result in 185 + // the blob value. 186 + for sect, blob := range map[simpleSection]*[]byte{ 187 + toc.symbolMap.index: &d.symbols.symIndex, 188 + toc.symbolMap.data: &d.symbols.symContent, 189 + toc.symbolKindMap.data: &d.symbols.symKindContent, 190 + toc.symbolMetaData: &d.symbols.symMetaData, 191 + } { 192 + if *blob, err = d.readSectionBlob(sect); err != nil { 193 + return nil, err 183 194 } 184 195 } 185 196 ··· 228 239 d.rawConfigMasks = append(d.rawConfigMasks, encodeRawConfig(md.RawConfig)) 229 240 } 230 241 242 + d.repoTombstone = make([]bool, len(d.repoMetaData)) 243 + 231 244 blob, err := d.readSectionBlob(toc.runeDocSections) 232 245 if err != nil { 233 246 return nil, err ··· 275 288 return nil, err 276 289 } 277 290 278 - // This is a hack for now to keep the shard format unchanged. To support shard 279 - // merging we will store "repos" in the shard. 280 - repos := make([]uint16, 0, len(d.fileBranchMasks)) 281 - for i := 0; i < len(d.fileBranchMasks); i++ { 282 - repos = append(repos, 0) // just support 1 repo for now. 291 + if d.metaData.IndexFormatVersion >= 17 { 292 + blob, err := d.readSectionBlob(toc.repos) 293 + if err != nil { 294 + return nil, err 295 + } 296 + d.repos = fromSizedDeltas16(blob, nil) 297 + } else { 298 + // every document is for repo index 0 (default value of uint16) 299 + d.repos = make([]uint16, len(d.fileBranchMasks)) 283 300 } 284 - d.repos = repos 285 301 286 302 if err := d.calculateStats(); err != nil { 287 303 return nil, err ··· 290 306 return &d, nil 291 307 } 292 308 293 - func (r *reader) readMetadata(toc *indexTOC) (*Repository, *IndexMetadata, error) { 309 + func (r *reader) readMetadata(toc *indexTOC) ([]*Repository, *IndexMetadata, error) { 294 310 var md IndexMetadata 295 311 if err := r.readJSON(&md, &toc.metaData); err != nil { 296 312 return nil, nil, err 297 313 } 298 314 299 - var repo Repository 300 - if err := r.readJSON(&repo, &toc.repoMetaData); err != nil { 301 - return nil, &md, err 302 - } 303 - 304 315 // Sourcegraph specific: we support mutating metadata via an additional 305 316 // ".meta" file. This is to support tombstoning. An additional benefit is we 306 317 // can update metadata (such as Rank and Name) without re-indexing content. 307 - if b, err := os.ReadFile(r.r.Name() + ".meta"); err != nil && !os.IsNotExist(err) { 318 + blob, err := os.ReadFile(r.r.Name() + ".meta") 319 + if err != nil && !os.IsNotExist(err) { 308 320 return nil, &md, fmt.Errorf("failed to read meta file: %w", err) 309 - } else if len(b) > 0 { 310 - err = json.Unmarshal(b, &repo) 321 + } 322 + 323 + if len(blob) == 0 { 324 + blob, err = r.r.Read(toc.repoMetaData.off, toc.repoMetaData.sz) 311 325 if err != nil { 312 - return nil, &md, fmt.Errorf("failed to unmarshal meta file: %w", err) 326 + return nil, &md, err 313 327 } 314 328 } 315 329 316 - return &repo, &md, nil 330 + var repos []*Repository 331 + if md.IndexFormatVersion >= 17 { 332 + if err := json.Unmarshal(blob, &repos); err != nil { 333 + return nil, &md, err 334 + } 335 + } else { 336 + repos = make([]*Repository, 1) 337 + if err := json.Unmarshal(blob, &repos[0]); err != nil { 338 + return nil, &md, err 339 + } 340 + } 341 + 342 + return repos, &md, nil 317 343 } 318 344 319 345 const ngramEncoding = 8 ··· 449 475 450 476 // ReadMetadata returns the metadata of index shard without reading 451 477 // the index data. The IndexFile is not closed. 452 - func ReadMetadata(inf IndexFile) (*Repository, *IndexMetadata, error) { 478 + func ReadMetadata(inf IndexFile) ([]*Repository, *IndexMetadata, error) { 453 479 rd := &reader{r: inf} 454 480 var toc indexTOC 455 481 if err := rd.readTOC(&toc); err != nil { ··· 462 488 // ReadMetadataPath returns the metadata of index shard at p without reading 463 489 // the index data. ReadMetadataPath is a helper for ReadMetadata which opens 464 490 // the IndexFile at p. 465 - func ReadMetadataPath(p string) (*Repository, *IndexMetadata, error) { 491 + func ReadMetadataPath(p string) ([]*Repository, *IndexMetadata, error) { 466 492 f, err := os.Open(p) 467 493 if err != nil { 468 494 return nil, nil, err
+12 -3
read_test.go
··· 22 22 "fmt" 23 23 "io/ioutil" 24 24 "os" 25 + "path/filepath" 25 26 "reflect" 26 27 "strconv" 28 + "strings" 27 29 "testing" 28 30 29 31 "github.com/google/zoekt/query" ··· 143 145 &query.Symbol{Expr: &query.Regexp{Regexp: mustParseRE("sage$")}}, 144 146 } 145 147 146 - shards := []string{"ctagsrepo_v16.00000", "repo_v16.00000"} 147 - for _, name := range shards { 148 - shard, err := loadShard("testdata/shards/" + name + ".zoekt") 148 + shards, err := filepath.Glob("testdata/shards/*.zoekt") 149 + if err != nil { 150 + t.Fatal(err) 151 + } 152 + 153 + for _, path := range shards { 154 + name := filepath.Base(path) 155 + name = strings.TrimSuffix(name, ".zoekt") 156 + 157 + shard, err := loadShard(path) 149 158 if err != nil { 150 159 t.Fatalf("error loading shard %s %v", name, err) 151 160 }
+59 -24
shards/shards.go
··· 131 131 132 132 type rankedShard struct { 133 133 zoekt.Searcher 134 - name string 134 + 135 135 priority float64 136 + 137 + // We have out of band ranking on compound shards which can change even if 138 + // the shard file does not. So we compute a rank in getShards. We store 139 + // names here to avoid the cost of List in the search request path. 140 + names []string 136 141 } 137 142 138 143 type shardedSearcher struct { ··· 236 241 // (and (repobranches ...) (q)) 237 242 // (and (repobranches ...) (q)) 238 243 244 + hasReposForPredicate := func(pred func(name string) bool) func(names []string) (any, all bool) { 245 + return func(names []string) (any, all bool) { 246 + any = false 247 + all = true 248 + for _, name := range names { 249 + b := pred(name) 250 + any = any || b 251 + all = all && b 252 + } 253 + return any, all 254 + } 255 + } 256 + 239 257 for i, c := range and.Children { 240 258 var setSize int 241 - var hasRepo func(string) bool 242 - 259 + var hasRepos func([]string) (bool, bool) 243 260 switch setQuery := c.(type) { 244 261 case *query.RepoSet: 245 262 setSize = len(setQuery.Set) 246 - hasRepo = func(name string) bool { 247 - return setQuery.Set[name] 248 - } 263 + hasRepos = hasReposForPredicate(func(name string) bool { return setQuery.Set[name] }) 249 264 case *query.RepoBranches: 250 265 setSize = len(setQuery.Set) 251 - hasRepo = func(name string) bool { 252 - return len(setQuery.Set[name]) > 0 253 - } 266 + hasRepos = hasReposForPredicate(func(name string) bool { return len(setQuery.Set[name]) > 0 }) 254 267 default: 255 268 continue 256 269 } ··· 262 275 } 263 276 264 277 filtered := make([]rankedShard, 0, setSize) 278 + filteredAll := true 265 279 266 280 for _, s := range shards { 267 - if hasRepo(s.name) { 281 + if any, all := hasRepos(s.names); any { 268 282 filtered = append(filtered, s) 283 + filteredAll = filteredAll && all 269 284 } 270 285 } 271 286 ··· 275 290 return filtered, and 276 291 } 277 292 293 + // We can't simplify the query since we are searching shards which contain 294 + // repos we aren't supposed to search. 295 + if !filteredAll { 296 + return filtered, and 297 + } 298 + 278 299 // This optimization allows us to avoid the work done by 279 300 // indexData.simplify for each shard. 280 301 // ··· 288 309 } 289 310 if b, ok := c.(*query.RepoBranches); ok { 290 311 // We can only replace if all the repos want the same branches. 291 - want := b.Set[filtered[0].name] 292 - for _, s := range filtered[1:] { 293 - if !strSliceEqual(want, b.Set[s.name]) { 294 - return filtered, and 312 + want := b.Set[filtered[0].names[0]] 313 + for _, s := range filtered { 314 + for _, name := range s.names { 315 + if !strSliceEqual(want, b.Set[name]) { 316 + return filtered, and 317 + } 295 318 } 296 319 } 297 320 298 321 // Every repo wants the same branches, so we can replace RepoBranches 299 322 // with a list of branch queries. 300 - and.Children[i] = b.Branches(filtered[0].name) 323 + and.Children[i] = b.Branches(filtered[0].names[0]) 301 324 return filtered, query.Simplify(and) 302 325 } 303 326 ··· 683 706 if priorityDiff != 0 { 684 707 return priorityDiff > 0 685 708 } 686 - return res[i].name < res[j].name 709 + if len(res[i].names) == 0 || len(res[j].names) == 0 { 710 + // Protect against empty names which can happen if we fail to List or 711 + // the shard is full of tombstones. Prefer the shard which has names. 712 + return len(res[i].names) >= len(res[j].names) 713 + } 714 + return res[i].names[0] < res[j].names[0] 687 715 }) 688 716 689 717 // Cache ranked. We currently hold a read lock, so start a goroutine which ··· 701 729 } 702 730 703 731 func mkRankedShard(s zoekt.Searcher) rankedShard { 704 - q := query.Repo{} 732 + q := query.Const{Value: true} 705 733 result, err := s.List(context.Background(), &q, nil) 706 734 if err != nil { 707 735 return rankedShard{Searcher: s} ··· 710 738 return rankedShard{Searcher: s} 711 739 } 712 740 713 - repo := result.Repos[0].Repository 714 - 715 - var priority float64 716 - if repo.RawConfig != nil { 717 - priority, _ = strconv.ParseFloat(repo.RawConfig["priority"], 64) 741 + var ( 742 + maxPriority float64 743 + names = make([]string, 0, len(result.Repos)) 744 + ) 745 + for _, r := range result.Repos { 746 + names = append(names, r.Repository.Name) 747 + if r.Repository.RawConfig != nil { 748 + priority, _ := strconv.ParseFloat(r.Repository.RawConfig["priority"], 64) 749 + if priority > maxPriority { 750 + priority = maxPriority 751 + } 752 + } 718 753 } 719 754 720 755 return rankedShard{ 721 756 Searcher: s, 722 - name: repo.Name, 723 - priority: priority, 757 + names: names, 758 + priority: maxPriority, 724 759 } 725 760 } 726 761
+1 -1
shards/watcher.go
··· 112 112 113 113 // In the case of downgrades, avoid reading 114 114 // newer index formats. 115 - if version > zoekt.IndexFormatVersion { 115 + if version > zoekt.IndexFormatVersion && version > zoekt.NextIndexFormatVersion { 116 116 continue 117 117 } 118 118
+8 -5
shards/watcher_test.go
··· 21 21 "path/filepath" 22 22 "testing" 23 23 "time" 24 + 25 + "github.com/google/zoekt" 24 26 ) 25 27 26 28 type loggingLoader struct { ··· 187 189 // t.Fatalf("got %v, want 'empty'", err) 188 190 // } 189 191 190 - shardv16 := filepath.Join(dir, "foo_v16.00000.zoekt") 192 + want := zoekt.NextIndexFormatVersion 193 + shardLatest := filepath.Join(dir, fmt.Sprintf("foo_v%d.00000.zoekt", want)) 191 194 192 - for _, v := range []int{15, 16, 17} { 193 - repo := fmt.Sprintf("foo_v%d.00000.zoekt", v) 195 + for delta := -1; delta <= 1; delta++ { 196 + repo := fmt.Sprintf("foo_v%d.00000.zoekt", want+delta) 194 197 shard := filepath.Join(dir, repo) 195 198 if err := ioutil.WriteFile(shard, []byte("hello"), 0644); err != nil { 196 199 t.Fatalf("WriteFile: %v", err) ··· 203 206 } 204 207 defer dw.Stop() 205 208 206 - if got := <-logger.loads; got != shardv16 { 207 - t.Fatalf("got load event %v, want %v", got, shardv16) 209 + if got := <-logger.loads; got != shardLatest { 210 + t.Fatalf("got load event %v, want %v", got, shardLatest) 208 211 } 209 212 210 213 advanceFS()
+13
testdata/gen-shards.sh
··· 1 + #!/bin/bash 2 + 3 + set -ex 4 + 5 + go build ../cmd/zoekt-index 6 + 7 + cp -r repo repo17 8 + 9 + ./zoekt-index -disable_ctags repo17 10 + 11 + rm -rf repo17 12 + 13 + mv *.zoekt shards/
+148
testdata/golden/TestReadSearch/ctagsrepo_v17.00000.golden
··· 1 + { 2 + "FormatVersion": 17, 3 + "FeatureVersion": 9, 4 + "FileMatches": [ 5 + [ 6 + { 7 + "Score": 910, 8 + "Debug": "", 9 + "FileName": "main.go", 10 + "Repository": "repo", 11 + "Branches": null, 12 + "LineMatches": [ 13 + { 14 + "Line": "ZnVuYyBtYWluKCkgew==", 15 + "LineStart": 69, 16 + "LineEnd": 82, 17 + "LineNumber": 10, 18 + "FileName": false, 19 + "Score": 501, 20 + "LineFragments": [ 21 + { 22 + "LineOffset": 0, 23 + "Offset": 69, 24 + "MatchLength": 9, 25 + "SymbolInfo": null 26 + } 27 + ] 28 + } 29 + ], 30 + "Content": null, 31 + "Checksum": "n9fUYqacPXg=", 32 + "Language": "go", 33 + "SubRepositoryName": "", 34 + "SubRepositoryPath": "", 35 + "Version": "" 36 + } 37 + ], 38 + [ 39 + { 40 + "Score": 710, 41 + "Debug": "", 42 + "FileName": "main.go", 43 + "Repository": "repo", 44 + "Branches": null, 45 + "LineMatches": [ 46 + { 47 + "Line": "cGFja2FnZSBtYWlu", 48 + "LineStart": 0, 49 + "LineEnd": 12, 50 + "LineNumber": 1, 51 + "FileName": false, 52 + "Score": 501, 53 + "LineFragments": [ 54 + { 55 + "LineOffset": 0, 56 + "Offset": 0, 57 + "MatchLength": 7, 58 + "SymbolInfo": null 59 + } 60 + ] 61 + } 62 + ], 63 + "Content": null, 64 + "Checksum": "n9fUYqacPXg=", 65 + "Language": "go", 66 + "SubRepositoryName": "", 67 + "SubRepositoryPath": "", 68 + "Version": "" 69 + } 70 + ], 71 + [ 72 + { 73 + "Score": 910, 74 + "Debug": "", 75 + "FileName": "main.go", 76 + "Repository": "repo", 77 + "Branches": null, 78 + "LineMatches": [ 79 + { 80 + "Line": "CW51bSAgICAgPSA1", 81 + "LineStart": 34, 82 + "LineEnd": 46, 83 + "LineNumber": 6, 84 + "FileName": false, 85 + "Score": 501, 86 + "LineFragments": [ 87 + { 88 + "LineOffset": 1, 89 + "Offset": 35, 90 + "MatchLength": 3, 91 + "SymbolInfo": { 92 + "Sym": "num", 93 + "Kind": "var", 94 + "Parent": "main", 95 + "ParentKind": "package" 96 + } 97 + } 98 + ] 99 + } 100 + ], 101 + "Content": null, 102 + "Checksum": "n9fUYqacPXg=", 103 + "Language": "go", 104 + "SubRepositoryName": "", 105 + "SubRepositoryPath": "", 106 + "Version": "" 107 + } 108 + ], 109 + [ 110 + { 111 + "Score": 260, 112 + "Debug": "", 113 + "FileName": "main.go", 114 + "Repository": "repo", 115 + "Branches": null, 116 + "LineMatches": [ 117 + { 118 + "Line": "CW1lc3NhZ2UgPSAiaGVsbG8i", 119 + "LineStart": 47, 120 + "LineEnd": 65, 121 + "LineNumber": 7, 122 + "FileName": false, 123 + "Score": 51, 124 + "LineFragments": [ 125 + { 126 + "LineOffset": 4, 127 + "Offset": 51, 128 + "MatchLength": 4, 129 + "SymbolInfo": { 130 + "Sym": "message", 131 + "Kind": "var", 132 + "Parent": "main", 133 + "ParentKind": "package" 134 + } 135 + } 136 + ] 137 + } 138 + ], 139 + "Content": null, 140 + "Checksum": "n9fUYqacPXg=", 141 + "Language": "go", 142 + "SubRepositoryName": "", 143 + "SubRepositoryPath": "", 144 + "Version": "" 145 + } 146 + ] 147 + ] 148 + }
+4 -4
testdata/golden/TestReadSearch/repo_v15.00000.golden testdata/golden/TestReadSearch/repo17_v17.00000.golden
··· 1 1 { 2 - "FormatVersion": 15, 3 - "FeatureVersion": 8, 2 + "FormatVersion": 17, 3 + "FeatureVersion": 1, 4 4 "FileMatches": [ 5 5 [ 6 6 { 7 7 "Score": 910, 8 8 "Debug": "", 9 9 "FileName": "main.go", 10 - "Repository": "repo", 10 + "Repository": "repo17", 11 11 "Branches": null, 12 12 "LineMatches": [ 13 13 { ··· 40 40 "Score": 710, 41 41 "Debug": "", 42 42 "FileName": "main.go", 43 - "Repository": "repo", 43 + "Repository": "repo17", 44 44 "Branches": null, 45 45 "LineMatches": [ 46 46 {
testdata/shards/ctagsrepo_v17.00000.zoekt

This is a binary file and will not be displayed.

testdata/shards/repo17_v17.00000.zoekt

This is a binary file and will not be displayed.

testdata/shards/repo_v15.00000.zoekt

This is a binary file and will not be displayed.

+10
toc.go
··· 42 42 // 9: Store ctags metadata & bump default max file size 43 43 const FeatureVersion = 9 44 44 45 + // 17: compound shard (multi repo) 46 + const NextIndexFormatVersion = 17 47 + const NextFeatureVersion = 1 48 + 45 49 type indexTOC struct { 46 50 fileContents compoundSection 47 51 fileNames compoundSection ··· 69 73 nameEndRunes simpleSection 70 74 contentChecksums simpleSection 71 75 runeDocSections simpleSection 76 + 77 + repos simpleSection 72 78 } 73 79 74 80 func (t *indexTOC) sections() []section { ··· 100 106 &t.runeDocSections, 101 107 } 102 108 } 109 + 110 + func (t *indexTOC) sectionsNext() []section { 111 + return append(t.sections(), &t.repos) 112 + }
+29 -7
write.go
··· 19 19 "bytes" 20 20 "encoding/binary" 21 21 "encoding/json" 22 + "fmt" 22 23 "io" 23 24 "sort" 24 25 "time" 25 26 ) 26 27 27 - func (w *writer) writeTOC(toc *indexTOC) { 28 - secs := toc.sections() 28 + func (w *writer) writeTOC(secs []section) { 29 29 w.U32(uint32(len(secs))) 30 30 for _, s := range secs { 31 31 s.write(w) ··· 85 85 } 86 86 87 87 func (b *IndexBuilder) Write(out io.Writer) error { 88 + next := b.indexFormatVersion == NextIndexFormatVersion 89 + 88 90 buffered := bufio.NewWriterSize(out, 1<<20) 89 91 defer buffered.Flush() 90 92 ··· 147 149 w.Write(marshalDocSections(b.runeDocSections)) 148 150 toc.runeDocSections.end(w) 149 151 152 + if next { 153 + toc.repos.start(w) 154 + w.Write(toSizedDeltas16(b.repos)) 155 + toc.repos.end(w) 156 + } 157 + 150 158 indexTime := b.IndexTime 151 159 if indexTime.IsZero() { 152 160 indexTime = time.Now() 153 161 } 154 162 155 163 if err := b.writeJSON(&IndexMetadata{ 156 - IndexFormatVersion: IndexFormatVersion, 164 + IndexFormatVersion: b.indexFormatVersion, 157 165 IndexTime: indexTime, 158 - IndexFeatureVersion: FeatureVersion, 166 + IndexFeatureVersion: b.featureVersion, 159 167 PlainASCII: b.contentPostings.isPlainASCII && b.namePostings.isPlainASCII, 160 168 LanguageMap: b.languageMap, 161 169 ZoektVersion: Version, 162 170 }, &toc.metaData, w); err != nil { 163 171 return err 164 172 } 165 - if err := b.writeJSON(b.repo, &toc.repoMetaData, w); err != nil { 166 - return err 173 + 174 + if next { 175 + if err := b.writeJSON(b.repoList, &toc.repoMetaData, w); err != nil { 176 + return err 177 + } 178 + } else { 179 + if len(b.repoList) != 1 { 180 + return fmt.Errorf("have %d repos, but only support 1 in index format version %d", len(b.repoList), b.indexFormatVersion) 181 + } 182 + if err := b.writeJSON(b.repoList[0], &toc.repoMetaData, w); err != nil { 183 + return err 184 + } 167 185 } 168 186 169 187 var tocSection simpleSection 170 188 171 189 tocSection.start(w) 172 - w.writeTOC(&toc) 190 + if next { 191 + w.writeTOC(toc.sectionsNext()) 192 + } else { 193 + w.writeTOC(toc.sections()) 194 + } 173 195 tocSection.end(w) 174 196 tocSection.write(w) 175 197 return w.err