fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

merging: support unsetting tombstones during cleanup (#250)

With this change tombstones are removed during cleanup if a repository is tombstoned and required to be indexed. This will speed up the recovery in case we accidentally drop a lot of indexes.

The mechanics are largely the same for shards in .trash/ and tombstoned repos. However, for the case in which a repo is tombstoned and in the .trash/ directory, the shard in the .trash/ directory takes precedence because we can assume it is newer.

+275 -62
+64 -5
cmd/zoekt-sourcegraph-indexserver/cleanup.go
··· 35 35 } 36 36 37 37 trash := getShards(trashDir) 38 + tombtones := getTombstonedRepos(indexDir) 38 39 index := getShards(indexDir) 39 40 40 41 // trash: Remove old shards and conflicts with index ··· 59 60 delete(trash, repo) 60 61 } 61 62 63 + // tombstones: Remove tombstones that conflict with index or trash. After this, 64 + // tombstones only contain repos that are neither in the trash nor in the index. 65 + for repo, _ := range tombtones { 66 + if _, conflicts := index[repo]; conflicts { 67 + delete(tombtones, repo) 68 + } 69 + // Trash takes precedence over tombstones. 70 + if _, conflicts := trash[repo]; conflicts { 71 + delete(tombtones, repo) 72 + } 73 + } 74 + 62 75 // index: We are ID based, but store shards by name still. If we end up with 63 76 // shards that have the same ID but different names delete and start over. 64 77 // This can happen when a repository is renamed. In future we should make ··· 98 111 } 99 112 100 113 // index: Move missing repos from trash into index 114 + // index: Restore deleted or tombstoned repos. 101 115 for _, repo := range repos { 102 116 // Delete from index so that index will only contain shards to be 103 117 // trashed. 104 118 delete(index, repo) 105 119 106 - shards, ok := trash[repo] 107 - if !ok { 120 + if shards, ok := trash[repo]; ok { 121 + log.Printf("restoring shards from trash for %v", repo) 122 + moveAll(indexDir, shards) 123 + shardsLog(indexDir, "restore", shards) 108 124 continue 109 125 } 110 126 111 - log.Printf("restoring shards from trash for %v", repo) 112 - moveAll(indexDir, shards) 113 - shardsLog(indexDir, "restore", shards) 127 + if s, ok := tombtones[repo]; ok { 128 + log.Printf("removing tombstone for %v", repo) 129 + err := zoekt.UnsetTombstone(s.Path, repo) 130 + if err != nil { 131 + log.Printf("error removing tombstone for %v: %s", repo, err) 132 + } else { 133 + shardsLog(indexDir, "untomb", []shard{s}) 134 + } 135 + } 114 136 } 115 137 116 138 // index: Move non-existent repos into trash ··· 195 217 } 196 218 } 197 219 return shards 220 + } 221 + 222 + // getTombstonedRepos return a map of tombstoned repositories in dir. If a 223 + // repository is tombstoned in more than one compound shard, only the latest one, 224 + // as determined by the date of the latest commit, is returned. 225 + func getTombstonedRepos(dir string) map[uint32]shard { 226 + paths, err := filepath.Glob(filepath.Join(dir, "compound-*.zoekt")) 227 + if err != nil { 228 + return nil 229 + } 230 + if len(paths) == 0 { 231 + return nil 232 + } 233 + 234 + m := make(map[uint32]shard) 235 + 236 + for _, p := range paths { 237 + repos, _, err := zoekt.ReadMetadataPath(p) 238 + if err != nil { 239 + continue 240 + } 241 + for _, repo := range repos { 242 + if !repo.Tombstone { 243 + continue 244 + } 245 + if v, ok := m[repo.ID]; ok && v.ModTime.After(repo.LatestCommitDate) { 246 + continue 247 + } 248 + m[repo.ID] = shard{ 249 + RepoID: repo.ID, 250 + RepoName: repo.Name, 251 + Path: p, 252 + ModTime: repo.LatestCommitDate, 253 + } 254 + } 255 + } 256 + return m 198 257 } 199 258 200 259 var incompleteRE = regexp.MustCompile(`\.zoekt[0-9]+(\.\w+)?$`)
+184 -28
cmd/zoekt-sourcegraph-indexserver/cleanup_test.go
··· 13 13 "time" 14 14 15 15 "github.com/google/go-cmp/cmp" 16 + "github.com/google/go-cmp/cmp/cmpopts" 16 17 "github.com/google/zoekt" 17 18 "github.com/google/zoekt/build" 18 19 ) ··· 130 131 fs = append(fs, f) 131 132 } 132 133 for _, f := range fs { 133 - createTestShard(t, f.RepoName, f.Path) 134 + createTestShard(t, f.RepoName, fakeID(f.RepoName), f.Path) 134 135 if err := os.Chtimes(f.Path, f.ModTime, f.ModTime); err != nil { 135 136 t.Fatal(err) 136 137 } ··· 171 172 } 172 173 } 173 174 174 - func createTestShard(t *testing.T, repo, path string) { 175 + func createTestShard(t *testing.T, repo string, id uint32, path string, optFns ...func(in *zoekt.Repository)) { 175 176 t.Helper() 176 177 177 178 if err := os.MkdirAll(filepath.Dir(path), 0700); err != nil { 178 179 t.Fatal(err) 179 180 } 180 - b, err := zoekt.NewIndexBuilder(&zoekt.Repository{ 181 - ID: fakeID(repo), 181 + r := &zoekt.Repository{ 182 + ID: id, 182 183 Name: repo, 183 - }) 184 + } 185 + for _, optFn := range optFns { 186 + optFn(r) 187 + } 188 + b, err := zoekt.NewIndexBuilder(r) 184 189 if err != nil { 185 190 t.Fatal(err) 186 191 } ··· 242 247 } 243 248 244 249 func TestVacuum(t *testing.T) { 245 - fn := createCompoundShard(t) 250 + tmpDir := t.TempDir() 251 + fn := createCompoundShard(t, tmpDir, []uint32{1, 2, 3, 4}) 246 252 247 253 err := zoekt.SetTombstone(fn, 2) 248 254 if err != nil { ··· 259 265 t.Fatal(err) 260 266 } 261 267 262 - dir := filepath.Dir(fn) 263 - d, err := os.Open(dir) 264 - if err != nil { 265 - t.Fatal(err) 266 - } 267 - shards, err := d.Readdirnames(-1) 268 + shards, err := filepath.Glob(tmpDir + "/compound-*") 268 269 if err != nil { 269 270 t.Fatal(err) 270 271 } ··· 273 274 t.Fatalf("expected 1 shard, but instead got %d", len(shards)) 274 275 } 275 276 276 - repos, _, err := zoekt.ReadMetadataPath(filepath.Join(dir, shards[0])) 277 + repos, _, err := zoekt.ReadMetadataPath(shards[0]) 277 278 if err != nil { 278 279 t.Fatal(err) 279 280 } ··· 289 290 } 290 291 } 291 292 292 - // createCompoundShard returns a path to a compound shard containing repos 293 - // repo0..repo3 294 - func createCompoundShard(t *testing.T) string { 295 - t.Helper() 293 + // Create 2 compound shards, each of which contains the same tombstoned repo but 294 + // from different commit dates. 295 + func TestGetTombstonedRepos(t *testing.T) { 296 + setLastCommitDate := func(lastCommitDate time.Time) func(repository *zoekt.Repository) { 297 + return func(repository *zoekt.Repository) { 298 + repository.LatestCommitDate = lastCommitDate 299 + } 300 + } 296 301 297 302 dir := t.TempDir() 303 + var repoID uint32 = 2 304 + csOld := createCompoundShard(t, dir, []uint32{1, 2, 3, 4}, setLastCommitDate(time.Now().Add(-1*time.Hour))) 305 + zoekt.SetTombstone(csOld, repoID) 298 306 299 - repoNames := []string{"repo1", "repo2", "repo3", "repo4"} 307 + now := time.Now() 308 + csNew := createCompoundShard(t, dir, []uint32{5, 2, 6, 7}, setLastCommitDate(now)) 309 + zoekt.SetTombstone(csNew, repoID) 310 + 311 + // Check that getTombstonedRepos returns the compound shard containing the 312 + // tombstoned repo with id repoID with the latest commit. 313 + got := getTombstonedRepos(dir) 314 + 315 + if len(got) != 1 { 316 + t.Fatalf("want 1 shard, got %d shards", len(got)) 317 + } 318 + 319 + v, ok := got[repoID] 320 + if !ok || v.Path != csNew { 321 + t.Fatalf("want %s, got %s", csNew, v.Path) 322 + } 323 + } 324 + 325 + // HAVE 326 + // ---- 327 + // index/ 328 + // CS 1 329 + // r1, tombstoned, old 330 + // r2, tombstoned, old 331 + // r3, tombstoned, old 332 + // CS 2 333 + // r1, tombstoned, recent 334 + // r2, tombstoned, recent 335 + // r4, tombstoned, recent 336 + // SS 1 337 + // r1, now 338 + // .trash/ 339 + // SS 3 340 + // r3, now 341 + // SS 5 342 + // r5, now 343 + // 344 + // TO BE INDEXED 345 + // ------------- 346 + // repos r1, r2, r3, r4, r5 347 + // 348 + // WANT 349 + // ---- 350 + // index/ 351 + // CS 1 352 + // r1, tombstoned, old 353 + // r2, tombstoned, old 354 + // r3, tombstoned, old 355 + // CS 2 356 + // r1, tombstoned, recent 357 + // r2, recent 358 + // r4, recent 359 + // SS 1 360 + // r1, now 361 + // SS 3 362 + // r3, now 363 + // SS 5 364 + // r5, now 365 + // .trash/ --> empty 366 + // 367 + func TestCleanupCompoundShards(t *testing.T) { 368 + dir := t.TempDir() 369 + 370 + // timestamps 371 + now := time.Now() 372 + recent := now.Add(-1 * time.Hour) 373 + old := now.Add(-2 * time.Hour) 374 + 375 + cs1 := createCompoundShard(t, dir, []uint32{1, 2, 3}, func(in *zoekt.Repository) { 376 + in.LatestCommitDate = old 377 + }) 378 + zoekt.SetTombstone(cs1, 1) 379 + zoekt.SetTombstone(cs1, 2) 380 + zoekt.SetTombstone(cs1, 3) 381 + 382 + cs2 := createCompoundShard(t, dir, []uint32{1, 2, 4}, func(in *zoekt.Repository) { 383 + in.LatestCommitDate = recent 384 + }) 385 + zoekt.SetTombstone(cs2, 1) 386 + zoekt.SetTombstone(cs2, 2) 387 + zoekt.SetTombstone(cs2, 4) 388 + 389 + createTestShard(t, "repo1", 1, filepath.Join(dir, "repo1.zoekt"), func(in *zoekt.Repository) { 390 + in.LatestCommitDate = now 391 + }) 392 + createTestShard(t, "repo3", 3, filepath.Join(dir, ".trash", "repo3.zoekt"), func(in *zoekt.Repository) { 393 + in.LatestCommitDate = now 394 + }) 395 + createTestShard(t, "repo5", 5, filepath.Join(dir, ".trash", "repo5.zoekt"), func(in *zoekt.Repository) { 396 + in.LatestCommitDate = now 397 + }) 398 + 399 + // want indexed 400 + repos := []uint32{1, 2, 3, 4, 5} 401 + 402 + cleanup(dir, repos, now, true) 403 + 404 + index := getShards(dir) 405 + trash := getShards(filepath.Join(dir, ".trash")) 406 + 407 + if len(trash) != 0 { 408 + t.Fatalf("expected empty trash, got %+v", trash) 409 + } 410 + 411 + wantIndex := map[uint32][]shard{ 412 + 1: []shard{{ 413 + RepoID: 1, 414 + RepoName: "repo1", 415 + Path: filepath.Join(dir, "repo1.zoekt"), 416 + }}, 417 + 2: []shard{{ 418 + RepoID: 2, 419 + RepoName: "repo2", 420 + Path: cs2, 421 + }}, 422 + 3: []shard{{ 423 + RepoID: 3, 424 + RepoName: "repo3", 425 + Path: filepath.Join(dir, "repo3.zoekt"), 426 + }}, 427 + 4: []shard{{ 428 + RepoID: 4, 429 + RepoName: "repo4", 430 + Path: cs2, 431 + }}, 432 + 5: []shard{{ 433 + RepoID: 5, 434 + RepoName: "repo5", 435 + Path: filepath.Join(dir, "repo5.zoekt"), 436 + }}, 437 + } 438 + 439 + if d := cmp.Diff(wantIndex, index, cmpopts.IgnoreFields(shard{}, "ModTime")); d != "" { 440 + t.Fatalf("-want, +got: %s", d) 441 + } 442 + } 443 + 444 + // createCompoundShard returns a path to a compound shard containing repos with 445 + // ids. Use optsFns to overwrite fields of zoekt.Repository for all repos. 446 + func createCompoundShard(t *testing.T, dir string, ids []uint32, optFns ...func(in *zoekt.Repository)) string { 447 + t.Helper() 448 + 300 449 var repoFns []string 301 450 302 - for i, name := range repoNames { 303 - opts := build.Options{ 304 - IndexDir: dir, 305 - RepositoryDescription: zoekt.Repository{ 306 - ID: uint32(i + 1), 307 - Name: name, 308 - RawConfig: map[string]string{ 309 - "public": "1", 310 - }, 451 + for _, id := range ids { 452 + repo := zoekt.Repository{ 453 + ID: id, 454 + Name: fmt.Sprintf("repo%d", id), 455 + RawConfig: map[string]string{ 456 + "public": "1", 311 457 }, 458 + } 459 + for _, optsFn := range optFns { 460 + optsFn(&repo) 461 + } 462 + 463 + opts := build.Options{ 464 + IndexDir: dir, 465 + RepositoryDescription: repo, 312 466 } 313 467 opts.SetDefaults() 314 468 b, err := build.NewBuilder(opts) ··· 326 480 } 327 481 328 482 // create a compound shard. 329 - dir = t.TempDir() 330 483 fn, err := merge(dir, repoFns) 331 484 if err != nil { 332 485 t.Fatal(err) 486 + } 487 + for _, old := range repoFns { 488 + os.Remove(old) 333 489 } 334 490 return fn 335 491 }
+10 -1
tombstones.go
··· 20 20 21 21 // SetTombstone idempotently sets a tombstone for repoName in .meta. 22 22 func SetTombstone(shardPath string, repoID uint32) error { 23 + return setTombstone(shardPath, repoID, true) 24 + } 25 + 26 + // UnsetTombstone idempotently removes a tombstones for reopName in .meta. 27 + func UnsetTombstone(shardPath string, repoID uint32) error { 28 + return setTombstone(shardPath, repoID, false) 29 + } 30 + 31 + func setTombstone(shardPath string, repoID uint32, tombstone bool) error { 23 32 var repos []*Repository 24 33 var err error 25 34 ··· 34 43 35 44 for _, repo := range repos { 36 45 if repo.ID == repoID { 37 - repo.Tombstone = true 46 + repo.Tombstone = tombstone 38 47 } 39 48 } 40 49
+17 -28
tombstones_test.go
··· 21 21 dir := t.TempDir() 22 22 ghostShard := filepath.Join(dir, "test.zoekt") 23 23 24 - if err := SetTombstone(ghostShard, 2); err != nil { 25 - t.Fatal(err) 24 + isAlive := func(alive []bool) { 25 + t.Helper() 26 + blob := readMeta(ghostShard) 27 + ghostRepos := []*Repository{} 28 + if err := json.Unmarshal(blob, &ghostRepos); err != nil { 29 + t.Fatal(err) 30 + } 31 + for i, repo := range ghostRepos { 32 + if repo.Tombstone == alive[i] { 33 + t.Fatalf("r%d: want %t, got %t\n", i+1, alive[i], repo.Tombstone) 34 + } 35 + } 26 36 } 27 37 28 - blob := readMeta(ghostShard) 29 - gotRepos := []*Repository{} 30 - if err := json.Unmarshal(blob, &gotRepos); err != nil { 38 + if err := SetTombstone(ghostShard, 2); err != nil { 31 39 t.Fatal(err) 32 40 } 33 - 34 - if gotRepos[0].Tombstone { 35 - t.Fatal("r1 should have been alive") 36 - } 37 - if !gotRepos[1].Tombstone { 38 - t.Fatal("r2 should have been dead") 39 - } 40 - if gotRepos[2].Tombstone { 41 - t.Fatal("r3 should have been alive") 42 - } 41 + isAlive([]bool{true, false, true}) 43 42 44 43 if err := SetTombstone(ghostShard, 1); err != nil { 45 44 t.Fatal(err) 46 45 } 46 + isAlive([]bool{false, false, true}) 47 47 48 - blob = readMeta(ghostShard) 49 - gotRepos = nil 50 - if err := json.Unmarshal(blob, &gotRepos); err != nil { 48 + if err := UnsetTombstone(ghostShard, 2); err != nil { 51 49 t.Fatal(err) 52 50 } 53 - 54 - if !gotRepos[0].Tombstone { 55 - t.Fatal("r1 should have been dead") 56 - } 57 - if !gotRepos[1].Tombstone { 58 - t.Fatal("r2 should have been dead") 59 - } 60 - if gotRepos[2].Tombstone { 61 - t.Fatal("r3 should have been alive") 62 - } 51 + isAlive([]bool{false, true, true}) 63 52 } 64 53 65 54 func mkRepos(repoNames ...string) []*Repository {