fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

indexserver: truncate failure messages in status updates (#1070)

Truncate failure messages to 12 KiB before sending index status updates to Sourcegraph. This limit is modeled after Kubernetes, which similarly truncates termination messages on the assumption that complete failure details are available in container logs.

+74 -1
+52 -1
cmd/zoekt-sourcegraph-indexserver/main.go
··· 44 44 "sync" 45 45 "text/tabwriter" 46 46 "time" 47 + "unicode/utf8" 47 48 48 49 grpcprom "github.com/grpc-ecosystem/go-grpc-middleware/providers/prometheus" 49 50 "github.com/grpc-ecosystem/go-grpc-middleware/v2/interceptors/retry" ··· 254 255 // time." famous last words. A client was indexing a monorepo with 42 255 256 // cores... 5m was not enough. 256 257 const noOutputTimeout = 30 * time.Minute 258 + 259 + const ( 260 + maxFailureMessageBytes = 12 * 1024 261 + failureMessageTruncationMarker = "\n\n[Error message truncated]\n\n" 262 + ) 257 263 258 264 func (s *Server) loggedRun(tr trace.Trace, cmd *exec.Cmd) (err error) { 259 265 out := &synchronizedBuffer{} ··· 707 713 var indexTimeUnix int64 708 714 if indexErr != nil { 709 715 state = configv1.UpdateIndexStatusRequest_Repository_STATE_FAILURE 710 - failureMessage = indexErr.Error() 716 + failureMessage = truncateFailureMessageForSourcegraph(indexErr.Error()) 711 717 712 718 // On failure, metadata may not exist yet. Include index time if we can, 713 719 // but do not block reporting the failure status. ··· 739 745 } 740 746 741 747 return nil 748 + } 749 + 750 + func truncateFailureMessageForSourcegraph(s string) string { 751 + if len(s) <= maxFailureMessageBytes { 752 + return s 753 + } 754 + 755 + budget := maxFailureMessageBytes - len(failureMessageTruncationMarker) 756 + headBytes := budget / 2 757 + tailBytes := budget - headBytes 758 + 759 + return utf8PrefixBytes(s, headBytes) + failureMessageTruncationMarker + utf8SuffixBytes(s, tailBytes) 760 + } 761 + 762 + func utf8PrefixBytes(s string, maxBytes int) string { 763 + if len(s) <= maxBytes { 764 + return s 765 + } 766 + 767 + if maxBytes <= 0 { 768 + return "" 769 + } 770 + 771 + for maxBytes > 0 && !utf8.RuneStart(s[maxBytes]) { 772 + maxBytes-- 773 + } 774 + 775 + return s[:maxBytes] 776 + } 777 + 778 + func utf8SuffixBytes(s string, maxBytes int) string { 779 + if len(s) <= maxBytes { 780 + return s 781 + } 782 + 783 + if maxBytes <= 0 { 784 + return "" 785 + } 786 + 787 + start := len(s) - maxBytes 788 + for start < len(s) && !utf8.RuneStart(s[start]) { 789 + start++ 790 + } 791 + 792 + return s[start:] 742 793 } 743 794 744 795 func sglogBranches(key string, branches []zoekt.RepositoryBranch) sglog.Field {
+22
cmd/zoekt-sourcegraph-indexserver/main_test.go
··· 12 12 "slices" 13 13 "strings" 14 14 "testing" 15 + "unicode/utf8" 15 16 16 17 "github.com/google/go-cmp/cmp" 17 18 sglog "github.com/sourcegraph/log" ··· 55 56 s := &Server{} 56 57 _, err := s.index(context.Background(), &indexArgs{}) 57 58 require.ErrorIs(t, err, tenant.ErrMissingTenant) 59 + } 60 + 61 + func TestTruncateFailureMessageForSourcegraph(t *testing.T) { 62 + t.Run("preserves short message", func(t *testing.T) { 63 + require.Equal(t, "boom", truncateFailureMessageForSourcegraph("boom")) 64 + }) 65 + 66 + t.Run("truncates oversized utf8 message", func(t *testing.T) { 67 + input := strings.Repeat("é", maxFailureMessageBytes) + "tail" 68 + 69 + got := truncateFailureMessageForSourcegraph(input) 70 + parts := strings.Split(got, failureMessageTruncationMarker) 71 + 72 + require.Len(t, parts, 2) 73 + require.LessOrEqual(t, len(got), maxFailureMessageBytes) 74 + require.True(t, utf8.ValidString(got)) 75 + require.NotEmpty(t, parts[0]) 76 + require.True(t, strings.HasPrefix(input, parts[0])) 77 + require.Contains(t, parts[1], "tail") 78 + require.Less(t, len(parts[0])+len(parts[1]), len(input)) 79 + }) 58 80 } 59 81 60 82 func TestServer_parallelism(t *testing.T) {