fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

indexserver: assert and check for ownership on index directory (#347)

We had a customer misconfigure a zoekt setup such that there were
multiple replicas owning the same directory. This lead to hard to debug
bugs. Instead we will now write a file (owner.txt) which contains the
identity of the owner and proactively look for changes in ownership.

Test Plan: go test and the following manual test plan

$ go run ./cmd/zoekt-sourcegraph-indexserver -sourcegraph_url ~/src -debug
# hit ctrl-c
# now I expect it to have my computers hostname "habitat"
$ cat ~/.zoekt/owner.txt
DO NOT EDIT! generated by zoekt-sourcegraph-indexserver.
This file records the identity of the owner of this zoekt index directory.
If it changes zoekt-sourcegraph-indexserver will exit with a non-zero exit code.
This is to prevent multiple owners/writers.

hostname=habitat

# Now I will run with an overrided hostname, I expect a warning and a new hostname
$ go run ./cmd/zoekt-sourcegraph-indexserver -hostname=foobarbaz -sourcegraph_url ~/src -debug
2022/05/19 16:43:10 WARN: detected a change in ownership at startup. You can ignore this if you only have one zoekt replica: detected a change of ownership of /home/keegan/.zoekt/owner.txt. In multiple replica setups this can lead to un-needed rebalancing or bugs if there are multiple writers: owner="habitat" current="foobarbaz"

# Now I mutate ~/.zoekt/owner.txt to have a different hostname and I expect
the program to crash
2022/05/19 16:47:47 detected a change of ownership of /home/keegan/.zoekt/owner.txt. In multiple replica setups this can lead to un-needed rebalancing or bugs if there are multiple writers: owner="boom" current="foobarbaz"
exit status 1

+158
+6
cmd/zoekt-sourcegraph-indexserver/main.go
··· 868 868 }() 869 869 } 870 870 871 + oc := &ownerChecker{ 872 + Path: filepath.Join(conf.index, "owner.txt"), 873 + Hostname: conf.hostname, 874 + } 875 + go oc.Run() 876 + 871 877 s.Run() 872 878 return nil 873 879 }
+103
cmd/zoekt-sourcegraph-indexserver/owner.go
··· 1 + package main 2 + 3 + import ( 4 + "bytes" 5 + "errors" 6 + "fmt" 7 + "io/fs" 8 + "log" 9 + "os" 10 + "time" 11 + ) 12 + 13 + type ownerChangeError struct { 14 + Path string 15 + Owner, Current string 16 + } 17 + 18 + func (e *ownerChangeError) Error() string { 19 + return fmt.Sprintf("detected a change of ownership of %s. In multiple replica setups this can lead to un-needed rebalancing or bugs if there are multiple writers: owner=%q current=%q", e.Path, e.Owner, e.Current) 20 + } 21 + 22 + // ownerChecker can write and check the owner file for a index directory. It 23 + // is used for detecting when multiple zoekts are writing to a directory, or 24 + // when the ownership is changing too often. 25 + // 26 + // The motivation for this is a person can misconfigure zoekt such that 27 + // multiple indexservers write to the same directory. This will lead to index 28 + // thrashing and hard to debug errors. Alternatively if the stable identity 29 + // (hostname) changes, this can lead to Sourcegraph's repo <-> owner hash 30 + // changing which means unnecessary rebalancing. 31 + type ownerChecker struct { 32 + Path string 33 + Hostname string 34 + } 35 + 36 + // Run will regularly init then regularly check if we are owner. If an error 37 + // is detected it will exit the current process with exit code 1. This method 38 + // blocks. 39 + func (o *ownerChecker) Run() { 40 + if err := o.Init(); err != nil { 41 + log.Fatal(err) 42 + } 43 + for { 44 + time.Sleep(5 * time.Second) 45 + if err := o.Check(); err != nil { 46 + log.Fatal(err) 47 + } 48 + } 49 + } 50 + 51 + func (o *ownerChecker) Init() error { 52 + var ownerErr *ownerChangeError 53 + if err := o.Check(); errors.Is(err, fs.ErrNotExist) { 54 + // do nothing, first run so we just write out the file 55 + } else if errors.As(err, &ownerErr) { 56 + debug.Printf("WARN: detected a change in ownership at startup. You can ignore this if you only have one zoekt replica: %s", err) 57 + } else if err != nil { 58 + return err 59 + } 60 + 61 + content := []byte(fmt.Sprintf(`DO NOT EDIT! generated by zoekt-sourcegraph-indexserver. 62 + This file records the identity of the owner of this zoekt index directory. 63 + If it changes, zoekt-sourcegraph-indexserver will exit with a non-zero exit code. 64 + This is to prevent multiple owners/writers. 65 + 66 + hostname=%s 67 + `, o.Hostname)) 68 + 69 + // Always write out since we may update the comment 70 + if err := os.WriteFile(o.Path, content, 0600); err != nil { 71 + return fmt.Errorf("failed to write owner file %s: %w", o.Path, err) 72 + } 73 + 74 + return nil 75 + } 76 + 77 + func (o *ownerChecker) Check() error { 78 + if b, err := os.ReadFile(o.Path); err != nil { 79 + return fmt.Errorf("failed to read in owner file %s: %w", o.Path, err) 80 + } else if owner := bestEffortParseOwner(b); o.Hostname != owner { 81 + return &ownerChangeError{ 82 + Path: o.Path, 83 + Owner: owner, 84 + Current: o.Hostname, 85 + } 86 + } 87 + return nil 88 + } 89 + 90 + func bestEffortParseOwner(b []byte) string { 91 + prefix := []byte("hostname=") 92 + from := bytes.Index(b, prefix) 93 + if from < 0 { 94 + return "UNKNOWN" 95 + } 96 + 97 + b = b[from+len(prefix):] 98 + if to := bytes.IndexByte(b, '\n'); to > 0 { 99 + b = b[:to] 100 + } 101 + 102 + return string(bytes.TrimSpace(b)) 103 + }
+49
cmd/zoekt-sourcegraph-indexserver/owner_test.go
··· 1 + package main 2 + 3 + import ( 4 + "os" 5 + "path/filepath" 6 + "testing" 7 + ) 8 + 9 + func TestOwner(t *testing.T) { 10 + path := filepath.Join(t.TempDir(), "owner.txt") 11 + 12 + alice := ownerChecker{ 13 + Path: path, 14 + Hostname: "alice", 15 + } 16 + bob := ownerChecker{ 17 + Path: path, 18 + Hostname: "bob", 19 + } 20 + 21 + assertSuccess := func(err error) { 22 + t.Helper() 23 + if err != nil { 24 + t.Fatal(err) 25 + } 26 + } 27 + assertFailed := func(err error) { 28 + t.Helper() 29 + if err == nil { 30 + t.Fatal("expected failure") 31 + } 32 + } 33 + 34 + assertSuccess(alice.Init()) // empty dir so success 35 + assertSuccess(alice.Check()) // alice took ownership above 36 + assertSuccess(bob.Init()) // bob is now the owner. Only debug logs about change of ownership. 37 + assertFailed(alice.Check()) // alice is not the owner anymore 38 + assertSuccess(bob.Check()) // bob is still the owner 39 + 40 + // Test what happens if someone corrupts the file 41 + if err := os.WriteFile(path, []byte("!corrupt"), 0600); err != nil { 42 + t.Fatal(err) 43 + } 44 + assertFailed(alice.Check()) // corrupt so fail 45 + assertFailed(bob.Check()) // corrupt so fail 46 + assertSuccess(bob.Init()) // bob ovewrites corruption 47 + assertSuccess(bob.Check()) // bob is the owner 48 + assertFailed(alice.Check()) // alice is not the owner 49 + }