fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

search: use structured logging for crashed shards (#1028)

We recently had some crashes in production while experimenting with
ZOEKT_RE2_THRESHOLD_BYTES and found it hard to consume the stack traces
since they were over multiple lines.

+28 -2
+23 -2
search/shards.go
··· 30 30 31 31 "github.com/prometheus/client_golang/prometheus" 32 32 "github.com/prometheus/client_golang/prometheus/promauto" 33 + sglog "github.com/sourcegraph/log" 33 34 "go.uber.org/atomic" 34 35 "golang.org/x/sync/semaphore" 35 36 ··· 41 42 ) 42 43 43 44 var ( 45 + shardRecoveryLogger = sync.OnceValue(func() sglog.Logger { 46 + return sglog.Scoped("searchShards") 47 + }) 48 + 44 49 metricShardsLoaded = promauto.NewGauge(prometheus.GaugeOpts{ 45 50 Name: "zoekt_shards_loaded", 46 51 Help: "The number of shards currently loaded", ··· 927 932 } 928 933 } 929 934 935 + func logShardCrash(operation string, s zoekt.Searcher, recovered any, stack []byte) { 936 + fields := []sglog.Field{ 937 + sglog.String("operation", operation), 938 + sglog.String("shard", s.String()), 939 + sglog.String("stacktrace", string(stack)), 940 + } 941 + 942 + if err, ok := recovered.(error); ok { 943 + fields = append(fields, sglog.Error(err)) 944 + } else { 945 + fields = append(fields, sglog.String("panic", fmt.Sprint(recovered))) 946 + } 947 + 948 + shardRecoveryLogger().Error("crashed shard", fields...) 949 + } 950 + 930 951 func searchOneShard(ctx context.Context, s zoekt.Searcher, q query.Q, opts *zoekt.SearchOptions) (sr *zoekt.SearchResult, err error) { 931 952 metricSearchShardRunning.Inc() 932 953 defer func() { 933 954 metricSearchShardRunning.Dec() 934 955 if e := recover(); e != nil { 935 - log.Printf("[ERROR] crashed shard: %s: %#v, %s", s, e, debug.Stack()) 956 + logShardCrash("search", s, e, debug.Stack()) 936 957 937 958 if sr == nil { 938 959 sr = &zoekt.SearchResult{} ··· 954 975 defer func() { 955 976 metricListShardRunning.Dec() 956 977 if r := recover(); r != nil { 957 - log.Printf("[ERROR] crashed shard: %s: %s, %s", s.String(), r, debug.Stack()) 978 + logShardCrash("list", s, r, debug.Stack()) 958 979 sink <- shardListResult{ 959 980 &zoekt.RepoList{Crashes: 1}, nil, 960 981 }
+5
search/shards_test.go
··· 36 36 "github.com/google/go-cmp/cmp" 37 37 "github.com/google/go-cmp/cmp/cmpopts" 38 38 "github.com/grafana/regexp" 39 + sglog "github.com/sourcegraph/log" 39 40 40 41 "github.com/sourcegraph/zoekt/index" 41 42 ··· 74 75 oldOut := log.Writer() 75 76 log.SetOutput(out) 76 77 defer log.SetOutput(oldOut) 78 + 79 + oldShardRecoveryLogger := shardRecoveryLogger 80 + shardRecoveryLogger = sglog.NoOp 81 + defer func() { shardRecoveryLogger = oldShardRecoveryLogger }() 77 82 78 83 ss := newShardedSearcher(2) 79 84 ss.ranked.Store([]*rankedShard{{Searcher: &crashSearcher{}}})