fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

all: support tombstones for compound shards (#155)

With this change we add support for tombstones to compound shards. The effect
is that compound shards will not be deleted if a repository gets updated.

The behavior can be switched on/off by placing a RIP file in the index
directory.

We set or load tombstones in cleanup (indexserver), builder.go (build), and
read.go (zoekt), so it makes sense to add the shared code to the zoekt packa

+214 -25
+3
api.go
··· 274 274 // IndexMetadata. However, we store it here since the Sourcegraph frontend 275 275 // can read this structure but not IndexMetadata. 276 276 HasSymbols bool 277 + 278 + // Tombstone is true if we are not allowed to search this repo. 279 + Tombstone bool 277 280 } 278 281 279 282 func (r *Repository) UnmarshalJSON(data []byte) error {
+17 -6
build/builder.go
··· 320 320 return IndexStateMissing 321 321 } 322 322 323 - repos, index, err := zoekt.ReadMetadataPath(fn) 323 + repos, index, err := zoekt.ReadMetadataPathAlive(fn) 324 324 if os.IsNotExist(err) { 325 325 return IndexStateMissing 326 326 } else if err != nil { ··· 385 385 return "" 386 386 } 387 387 for _, fn := range compoundShards { 388 - repos, _, err := zoekt.ReadMetadataPath(fn) 388 + repos, _, err := zoekt.ReadMetadataPathAlive(fn) 389 389 if err != nil { 390 390 continue 391 391 } ··· 568 568 569 569 delete(toDelete, final) 570 570 571 - b.shardLog("upsert", final) 571 + b.shardLog("upsert", final, b.opts.RepositoryDescription.Name) 572 572 } 573 573 b.finishedShards = map[string]string{} 574 574 575 575 for p := range toDelete { 576 + // Don't delete compound shards, set tombstones instead. 577 + if zoekt.TombstonesEnabled(filepath.Dir(p)) && strings.HasPrefix(filepath.Base(p), "compound-") { 578 + if !strings.HasSuffix(p, ".zoekt") { 579 + continue 580 + } 581 + repoName := b.opts.RepositoryDescription.Name 582 + b.shardLog("tomb", p, repoName) 583 + err := zoekt.SetTombstone(p, repoName) 584 + b.buildError = err 585 + continue 586 + } 576 587 log.Printf("removing old shard file: %s", p) 577 - b.shardLog("remove", p) 588 + b.shardLog("remove", p, b.opts.RepositoryDescription.Name) 578 589 if err := os.Remove(p); err != nil { 579 590 b.buildError = err 580 591 } ··· 638 649 return nil 639 650 } 640 651 641 - func (b *Builder) shardLog(action, shard string) { 652 + func (b *Builder) shardLog(action, shard string, repoName string) { 642 653 shard = filepath.Base(shard) 643 654 var shardSize int64 644 655 if fi, err := os.Stat(filepath.Join(b.opts.IndexDir, shard)); err == nil { 645 656 shardSize = fi.Size() 646 657 } 647 - _, _ = fmt.Fprintf(b.shardLogger, "%d\t%s\t%s\t%d\n", time.Now().UTC().Unix(), action, shard, shardSize) 658 + _, _ = fmt.Fprintf(b.shardLogger, "%d\t%s\t%s\t%d\t%s\n", time.Now().UTC().Unix(), action, shard, shardSize, repoName) 648 659 } 649 660 650 661 var profileNumber int
+22 -8
cmd/zoekt-sourcegraph-indexserver/cleanup.go
··· 36 36 trash := getShards(trashDir) 37 37 index := getShards(indexDir) 38 38 39 + tombstonesEnabled := zoekt.TombstonesEnabled(indexDir) 40 + 39 41 // trash: Remove old shards and conflicts with index 40 42 minAge := now.Add(-24 * time.Hour) 41 43 for repo, shards := range trash { ··· 71 73 72 74 log.Printf("restoring shards from trash for %s", repo) 73 75 moveAll(indexDir, shards) 74 - shardsLog(indexDir, "restore", shards) 76 + shardsLog(indexDir, "restore", shards, repo) 75 77 } 76 78 77 - // index: Move non-existant repos into trash 78 - for _, shards := range index { 79 + // index: Move non-existent repos into trash 80 + for repo, shards := range index { 79 81 // Best-effort touch. If touch fails, we will just remove from the 80 82 // trash sooner. 81 83 for _, shard := range shards { 82 84 _ = os.Chtimes(shard.Path, now, now) 83 85 } 84 86 87 + if tombstonesEnabled { 88 + // 1 repo can be split across many simple shards but it should only be contained 89 + // in 1 compound shard. Hence we check that len(shards)==1 and only consider the 90 + // shard at index 0. 91 + if len(shards) == 1 && strings.HasPrefix(filepath.Base(shards[0].Path), "compound-") { 92 + shardsLog(indexDir, "tomb", shards, repo) 93 + if err := zoekt.SetTombstone(shards[0].Path, repo); err != nil { 94 + log.Printf("error setting tombstone for %s in shard %s: %s. Removing shard\n", repo, shards[0], err) 95 + _ = os.Remove(shards[0].Path) 96 + } 97 + continue 98 + } 99 + } 85 100 moveAll(trashDir, shards) 86 - shardsLog(indexDir, "remove", shards) 101 + shardsLog(indexDir, "remove", shards, repo) 87 102 } 88 103 89 104 // Remove old .tmp files from crashed indexer runs-- for example, if ··· 153 168 } 154 169 155 170 func shardRepoNames(path string) ([]string, error) { 156 - repos, _, err := zoekt.ReadMetadataPath(path) 171 + repos, _, err := zoekt.ReadMetadataPathAlive(path) 157 172 if err != nil { 158 173 return nil, err 159 174 } 160 - 161 175 names := make([]string, 0, len(repos)) 162 176 for _, repo := range repos { 163 177 names = append(names, repo.Name) ··· 252 266 } 253 267 } 254 268 255 - func shardsLog(indexDir, action string, shards []shard) { 269 + func shardsLog(indexDir, action string, shards []shard, repoName string) { 256 270 shardLogger := &lumberjack.Logger{ 257 271 Filename: filepath.Join(indexDir, "zoekt-indexserver-shard-log.tsv"), 258 272 MaxSize: 100, // Megabyte ··· 266 280 if fi, err := os.Stat(filepath.Join(indexDir, shard)); err == nil { 267 281 shardSize = fi.Size() 268 282 } 269 - _, _ = fmt.Fprintf(shardLogger, "%d\t%s\t%s\t%d\n", time.Now().UTC().Unix(), action, shard, shardSize) 283 + _, _ = fmt.Fprintf(shardLogger, "%d\t%s\t%s\t%d\t%s\n", time.Now().UTC().Unix(), action, shard, shardSize, repoName) 270 284 } 271 285 }
+3 -3
eval.go
··· 46 46 count := 0 47 47 alive := len(d.repoMetaData) 48 48 for i, md := range d.repoMetaData { 49 - if d.repoTombstone[i] { 49 + if d.repoMetaData[i].Tombstone { 50 50 alive-- 51 51 } else if predicate(md.Name) { 52 52 count++ ··· 185 185 nextDoc = uint32(lastDoc + 1) 186 186 } 187 187 // Skip tombstoned docs 188 - for nextDoc < docCount && d.repoTombstone[d.repos[nextDoc]] { 188 + for nextDoc < docCount && d.repoMetaData[d.repos[nextDoc]].Tombstone { 189 189 nextDoc++ 190 190 } 191 191 if nextDoc >= docCount { ··· 493 493 } 494 494 495 495 for i := range d.repoListEntry { 496 - if d.repoTombstone[i] { 496 + if d.repoMetaData[i].Tombstone { 497 497 continue 498 498 } 499 499 rle := &d.repoListEntry[i]
-4
indexdata.go
··· 77 77 metaData IndexMetadata 78 78 repoMetaData []Repository 79 79 80 - // repoTombstone[repoID] is true if we are not allowed to search 81 - // repoID. 82 - repoTombstone []bool 83 - 84 80 subRepos []uint32 85 81 subRepoPaths [][]string 86 82
+2 -2
merge.go
··· 29 29 hasher := sha1.New() 30 30 for _, d := range ds { 31 31 for i, md := range d.repoMetaData { 32 - if d.repoTombstone[i] { 32 + if d.repoMetaData[i].Tombstone { 33 33 continue 34 34 } 35 35 hasher.Write([]byte(md.Name)) ··· 96 96 for docID := uint32(0); int(docID) < len(d.fileBranchMasks); docID++ { 97 97 repoID := int(d.repos[docID]) 98 98 99 - if d.repoTombstone[repoID] { 99 + if d.repoMetaData[repoID].Tombstone { 100 100 continue 101 101 } 102 102
+16 -2
read.go
··· 320 320 d.rawConfigMasks = append(d.rawConfigMasks, encodeRawConfig(md.RawConfig)) 321 321 } 322 322 323 - d.repoTombstone = make([]bool, len(d.repoMetaData)) 324 - 325 323 blob, err := d.readSectionBlob(toc.runeDocSections) 326 324 if err != nil { 327 325 return nil, err ··· 571 569 } 572 570 573 571 return rd.readMetadata(&toc) 572 + } 573 + 574 + // ReadMetadataPathAlive is like ReadMetadataPath except that it only returns 575 + // alive repositories. 576 + func ReadMetadataPathAlive(p string) ([]*Repository, *IndexMetadata, error) { 577 + repos, id, err := ReadMetadataPath(p) 578 + if err != nil { 579 + return nil, nil, err 580 + } 581 + alive := repos[:0] 582 + for _, repo := range repos { 583 + if !repo.Tombstone { 584 + alive = append(alive, repo) 585 + } 586 + } 587 + return alive, id, nil 574 588 } 575 589 576 590 // ReadMetadataPath returns the metadata of index shard at p without reading
testdata/shards/repo_v16.00000.zoekt

This is a binary file and will not be displayed.

+84
tombstones.go
··· 1 + package zoekt 2 + 3 + import ( 4 + "encoding/json" 5 + "io/ioutil" 6 + "os" 7 + "path/filepath" 8 + "syscall" 9 + ) 10 + 11 + // TombstoneEnabled returns true if a file "RIP" is present in dir. 12 + func TombstonesEnabled(dir string) bool { 13 + _, err := os.Stat(filepath.Join(dir, "RIP")) 14 + return err == nil 15 + } 16 + 17 + var mockRepos []*Repository 18 + 19 + // SetTombstone idempotently sets a tombstone for repoName in .meta. 20 + func SetTombstone(shardPath string, repoName string) error { 21 + var repos []*Repository 22 + var err error 23 + 24 + if mockRepos != nil { 25 + repos = mockRepos 26 + } else { 27 + repos, _, err = ReadMetadataPath(shardPath) 28 + if err != nil { 29 + return err 30 + } 31 + } 32 + 33 + for _, repo := range repos { 34 + if repo.Name == repoName { 35 + repo.Tombstone = true 36 + } 37 + } 38 + 39 + dest := shardPath + ".meta" 40 + err = jsonMarshalMeta(repos, dest) 41 + if err != nil { 42 + return err 43 + } 44 + 45 + return nil 46 + } 47 + 48 + func jsonMarshalMeta(v interface{}, p string) (err error) { 49 + b, err := json.Marshal(v) 50 + if err != nil { 51 + return err 52 + } 53 + 54 + f, err := ioutil.TempFile(filepath.Dir(p), filepath.Base(p)+".*.tmp") 55 + if err != nil { 56 + return err 57 + } 58 + defer func() { 59 + f.Close() 60 + if err != nil { 61 + _ = os.Remove(f.Name()) 62 + } 63 + }() 64 + 65 + err = f.Chmod(0o666 &^ umask) 66 + if err != nil { 67 + return err 68 + } 69 + 70 + _, err = f.Write(b) 71 + if err != nil { 72 + return err 73 + } 74 + 75 + return os.Rename(f.Name(), p) 76 + } 77 + 78 + // umask holds the Umask of the current process 79 + var umask os.FileMode 80 + 81 + func init() { 82 + umask = os.FileMode(syscall.Umask(0)) 83 + syscall.Umask(int(umask)) 84 + }
+67
tombstones_test.go
··· 1 + package zoekt 2 + 3 + import ( 4 + "encoding/json" 5 + "os" 6 + "path/filepath" 7 + "testing" 8 + ) 9 + 10 + func TestSetTombstone(t *testing.T) { 11 + mockRepos = mkRepos("r1", "r2", "r3") 12 + 13 + readMeta := func(shard string) []byte { 14 + blob, err := os.ReadFile(shard + ".meta") 15 + if err != nil && !os.IsNotExist(err) { 16 + t.Fatal(err) 17 + } 18 + return blob 19 + } 20 + 21 + dir := t.TempDir() 22 + ghostShard := filepath.Join(dir, "test.zoekt") 23 + 24 + SetTombstone(ghostShard, "r2") 25 + 26 + blob := readMeta(ghostShard) 27 + gotRepos := []*Repository{} 28 + if err := json.Unmarshal(blob, &gotRepos); err != nil { 29 + t.Fatal(err) 30 + } 31 + 32 + if gotRepos[0].Tombstone { 33 + t.Fatal("r1 should have been alive") 34 + } 35 + if !gotRepos[1].Tombstone { 36 + t.Fatal("r2 should have been dead") 37 + } 38 + if gotRepos[2].Tombstone { 39 + t.Fatal("r3 should have been alive") 40 + } 41 + 42 + SetTombstone(ghostShard, "r1") 43 + 44 + blob = readMeta(ghostShard) 45 + gotRepos = nil 46 + if err := json.Unmarshal(blob, &gotRepos); err != nil { 47 + t.Fatal(err) 48 + } 49 + 50 + if !gotRepos[0].Tombstone { 51 + t.Fatal("r1 should have been dead") 52 + } 53 + if !gotRepos[1].Tombstone { 54 + t.Fatal("r2 should have been dead") 55 + } 56 + if gotRepos[2].Tombstone { 57 + t.Fatal("r3 should have been alive") 58 + } 59 + } 60 + 61 + func mkRepos(repoNames ...string) []*Repository { 62 + ret := make([]*Repository, 0, len(repoNames)) 63 + for _, n := range repoNames { 64 + ret = append(ret, &Repository{Name: n}) 65 + } 66 + return ret 67 + }