fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

Custom marshaller for RepoList (#498)

Currently taking up 90% of object allocations for sg. This commit
reduces that by 90%. The next step we can take is instead of
returning a map we use a slice.


name old time/op new time/op delta
RepoList_Encode-8 166µs ± 4% 84µs ± 3% -49.12% (p=0.000 n=10+10)
RepoList_Decode-8 519µs ± 2% 148µs ± 1% -71.46% (p=0.000 n=10+10)

name old bytes new bytes delta
RepoList_Encode-8 57.8kB ± 0% 51.1kB ± 0% -11.66% (p=0.000 n=10+10)

name old alloc/op new alloc/op delta
RepoList_Encode-8 4.06kB ± 0% 57.34kB ± 0% +1311.02% (p=0.000 n=10+10)
RepoList_Decode-8 316kB ± 0% 259kB ± 0% -17.96% (p=0.000 n=10+7)

name old allocs/op new allocs/op delta
RepoList_Encode-8 1.00k ± 0% 0.00k ± 0% -99.90% (p=0.000 n=10+10)
RepoList_Decode-8 6.59k ± 0% 0.59k ± 0% -91.09% (p=0.000 n=10+10)

Test Plan: go test

Co-authored-by: William Bezuidenhout <william.bezuidenhout@sourcegraph.com>
Co-authored-by: Camden Cheek <camden@ccheek.com>

+419 -18
+49 -4
api.go
··· 754 754 Branches []RepositoryBranch 755 755 } 756 756 757 + type ReposMap map[uint32]MinimalRepoListEntry 758 + 759 + // MarshalBinary implements a specialized encoder for ReposMap. 760 + func (q *ReposMap) MarshalBinary() ([]byte, error) { 761 + return reposMapEncode(*q) 762 + } 763 + 764 + // UnmarshalBinary implements a specialized decoder for ReposMap. 765 + func (q *ReposMap) UnmarshalBinary(b []byte) error { 766 + var err error 767 + (*q), err = reposMapDecode(b) 768 + return err 769 + } 770 + 757 771 // RepoList holds a set of Repository metadata. 758 772 type RepoList struct { 759 - // Full response to a List request. Returned when ListOptions.Minimal is false. 773 + // Returned when ListOptions.Field is RepoListFieldRepos. 760 774 Repos []*RepoListEntry 761 775 762 - Crashes int 776 + // Returned when ListOptions.Field is RepoListFieldMinimal. 777 + // 778 + // Deprecated: use ReposMap. 779 + Minimal map[uint32]*MinimalRepoListEntry 780 + 781 + // ReposMap is set when ListOptions.Field is RepoListFieldReposMap. 782 + ReposMap ReposMap 763 783 764 - // Minimal response to a List request. Returned when ListOptions.Minimal is true. 765 - Minimal map[uint32]*MinimalRepoListEntry 784 + Crashes int 766 785 767 786 // Stats response to a List request. 768 787 // This is the aggregate RepoStats of all repos matching the input query. ··· 781 800 String() string 782 801 } 783 802 803 + type RepoListField int 804 + 805 + const ( 806 + RepoListFieldRepos RepoListField = 0 807 + RepoListFieldMinimal = 1 808 + RepoListFieldReposMap = 2 809 + ) 810 + 784 811 type ListOptions struct { 785 812 // Return only Minimal data per repo that Sourcegraph frontend needs. 813 + // 814 + // Deprecated: use Field 786 815 Minimal bool 816 + 817 + // Field decides which field to populate in RepoList response. 818 + Field RepoListField 819 + } 820 + 821 + func (o *ListOptions) GetField() (RepoListField, error) { 822 + if o == nil { 823 + return RepoListFieldRepos, nil 824 + } 825 + if o.Field < 0 || o.Field > RepoListFieldReposMap { 826 + return 0, fmt.Errorf("unknown RepoListField %d", o.Field) 827 + } 828 + if o.Minimal == true { 829 + return RepoListFieldMinimal, nil 830 + } 831 + return o.Field, nil 787 832 } 788 833 789 834 func (o *ListOptions) String() string {
+28 -8
eval.go
··· 609 609 610 610 var l RepoList 611 611 612 - minimal := opts != nil && opts.Minimal 613 - if minimal { 614 - l.Minimal = make(map[uint32]*MinimalRepoListEntry, len(d.repoListEntry)) 615 - } else { 612 + field, err := opts.GetField() 613 + if err != nil { 614 + return nil, err 615 + } 616 + switch field { 617 + case RepoListFieldRepos: 616 618 l.Repos = make([]*RepoListEntry, 0, len(d.repoListEntry)) 619 + case RepoListFieldMinimal: 620 + l.Minimal = make(map[uint32]*MinimalRepoListEntry, len(d.repoListEntry)) 621 + case RepoListFieldReposMap: 622 + l.ReposMap = make(ReposMap, len(d.repoListEntry)) 617 623 } 618 624 619 625 for i := range d.repoListEntry { ··· 626 632 } 627 633 628 634 l.Stats.Add(&rle.Stats) 629 - if id := rle.Repository.ID; id != 0 && minimal { 630 - l.Minimal[id] = &MinimalRepoListEntry{ 635 + 636 + // Backwards compat for when ID is missing 637 + if rle.Repository.ID == 0 { 638 + l.Repos = append(l.Repos, rle) 639 + continue 640 + } 641 + 642 + switch field { 643 + case RepoListFieldRepos: 644 + l.Repos = append(l.Repos, rle) 645 + case RepoListFieldMinimal: 646 + l.Minimal[rle.Repository.ID] = &MinimalRepoListEntry{ 631 647 HasSymbols: rle.Repository.HasSymbols, 632 648 Branches: rle.Repository.Branches, 633 649 } 634 - } else { 635 - l.Repos = append(l.Repos, rle) 650 + case RepoListFieldReposMap: 651 + l.ReposMap[rle.Repository.ID] = MinimalRepoListEntry{ 652 + HasSymbols: rle.Repository.HasSymbols, 653 + Branches: rle.Repository.Branches, 654 + } 636 655 } 656 + 637 657 } 638 658 639 659 return &l, nil
+181
marshal.go
··· 1 + package zoekt 2 + 3 + import ( 4 + "bytes" 5 + "encoding/binary" 6 + "fmt" 7 + "unsafe" 8 + ) 9 + 10 + // Wire-format of map[uint32]MinimalRepoListEntry is pretty straightforward: 11 + // 12 + // byte(1) version 13 + // uvarint(len(minimal)) 14 + // uvarint(sum(len(entry.Branches) for entry in minimal)) 15 + // for repoID, entry in minimal: 16 + // uvarint(repoID) 17 + // byte(entry.HasSymbols) 18 + // uvarint(len(entry.Branches)) 19 + // for b in entry.Branches: 20 + // str(b.Name) 21 + // str(b.Version) 22 + 23 + // reposMapEncode implements an efficient encoder for ReposMap. 24 + func reposMapEncode(minimal ReposMap) ([]byte, error) { 25 + if minimal == nil { 26 + return nil, nil 27 + } 28 + 29 + var b bytes.Buffer 30 + var enc [binary.MaxVarintLen64]byte 31 + varint := func(n int) { 32 + m := binary.PutUvarint(enc[:], uint64(n)) 33 + b.Write(enc[:m]) 34 + } 35 + str := func(s string) { 36 + varint(len(s)) 37 + b.WriteString(s) 38 + } 39 + strSize := func(s string) int { 40 + return binary.PutUvarint(enc[:], uint64(len(s))) + len(s) 41 + } 42 + 43 + // We calculate this up front so when decoding we only need to allocate the 44 + // underlying array once. 45 + allBranchesLen := 0 46 + for _, entry := range minimal { 47 + allBranchesLen += len(entry.Branches) 48 + } 49 + 50 + // Calculate size 51 + size := 1 // version 52 + size += binary.PutUvarint(enc[:], uint64(len(minimal))) 53 + size += binary.PutUvarint(enc[:], uint64(allBranchesLen)) 54 + for repoID, entry := range minimal { 55 + size += binary.PutUvarint(enc[:], uint64(repoID)) 56 + size += 1 // HasSymbols 57 + size += binary.PutUvarint(enc[:], uint64(len(entry.Branches))) 58 + for _, b := range entry.Branches { 59 + size += strSize(b.Name) 60 + size += strSize(b.Version) 61 + } 62 + } 63 + b.Grow(size) 64 + 65 + // Version 66 + b.WriteByte(1) 67 + 68 + // Length 69 + varint(len(minimal)) 70 + 71 + varint(allBranchesLen) 72 + 73 + for repoID, entry := range minimal { 74 + varint(int(repoID)) 75 + 76 + hasSymbols := byte(1) 77 + if !entry.HasSymbols { 78 + hasSymbols = 0 79 + } 80 + b.WriteByte(hasSymbols) 81 + 82 + varint(len(entry.Branches)) 83 + for _, b := range entry.Branches { 84 + str(b.Name) 85 + str(b.Version) 86 + } 87 + } 88 + 89 + return b.Bytes(), nil 90 + } 91 + 92 + // reposMapDecode implements an efficient decoder for map[string]struct{}. 93 + func reposMapDecode(b []byte) (ReposMap, error) { 94 + // nil input 95 + if len(b) == 0 { 96 + return nil, nil 97 + } 98 + 99 + // binaryReader returns strings pointing into b to avoid allocations. We 100 + // don't own b, so we create a copy of it. 101 + r := binaryReader{ 102 + typ: "ReposMap", 103 + b: append([]byte{}, b...), 104 + } 105 + 106 + // Version 107 + if v := r.byt(); v != 1 { 108 + return nil, fmt.Errorf("unsupported stringSet encoding version %d", v) 109 + } 110 + 111 + // Length 112 + l := r.uvarint() 113 + m := make(map[uint32]MinimalRepoListEntry, l) 114 + 115 + // Pre-allocate slice for all branches 116 + allBranchesLen := r.uvarint() 117 + allBranches := make([]RepositoryBranch, 0, allBranchesLen) 118 + 119 + for i := 0; i < l; i++ { 120 + repoID := r.uvarint() 121 + hasSymbols := r.byt() == 1 122 + lb := r.uvarint() 123 + for i := 0; i < lb; i++ { 124 + allBranches = append(allBranches, RepositoryBranch{ 125 + Name: r.str(), 126 + Version: r.str(), 127 + }) 128 + } 129 + branches := allBranches[len(allBranches)-lb:] 130 + m[uint32(repoID)] = MinimalRepoListEntry{ 131 + HasSymbols: hasSymbols, 132 + Branches: branches, 133 + } 134 + } 135 + 136 + return m, r.err 137 + } 138 + 139 + type binaryReader struct { 140 + typ string 141 + b []byte 142 + err error 143 + } 144 + 145 + func (b *binaryReader) uvarint() int { 146 + x, n := binary.Uvarint(b.b) 147 + if n < 0 { 148 + b.b = nil 149 + b.err = fmt.Errorf("malformed %s", b.typ) 150 + return 0 151 + } 152 + b.b = b.b[n:] 153 + return int(x) 154 + } 155 + 156 + func (b *binaryReader) str() string { 157 + l := b.uvarint() 158 + if l > len(b.b) { 159 + b.b = nil 160 + b.err = fmt.Errorf("malformed %s", b.typ) 161 + return "" 162 + } 163 + s := b2s(b.b[:l]) 164 + b.b = b.b[l:] 165 + return s 166 + } 167 + 168 + func (b *binaryReader) byt() byte { 169 + if len(b.b) < 1 { 170 + b.b = nil 171 + b.err = fmt.Errorf("malformed %s", b.typ) 172 + return 0 173 + } 174 + x := b.b[0] 175 + b.b = b.b[1:] 176 + return x 177 + } 178 + 179 + func b2s(b []byte) string { 180 + return *(*string)(unsafe.Pointer(&b)) 181 + }
+94
marshal_test.go
··· 1 + package zoekt 2 + 3 + import ( 4 + "bytes" 5 + "encoding/gob" 6 + "testing" 7 + 8 + "github.com/google/go-cmp/cmp" 9 + ) 10 + 11 + func BenchmarkRepoList_Encode(b *testing.B) { 12 + set := genRepoList(1000) 13 + 14 + // do one write to amortize away the cost of gob registration 15 + w := &countWriter{} 16 + enc := gob.NewEncoder(w) 17 + if err := enc.Encode(set); err != nil { 18 + b.Fatal(err) 19 + } 20 + 21 + b.ResetTimer() 22 + b.ReportAllocs() 23 + 24 + b.ReportMetric(float64(w.n), "bytes") 25 + 26 + for n := 0; n < b.N; n++ { 27 + if err := enc.Encode(set); err != nil { 28 + b.Fatal(err) 29 + } 30 + } 31 + } 32 + 33 + func BenchmarkRepoList_Decode(b *testing.B) { 34 + set := genRepoList(1000) 35 + 36 + var buf bytes.Buffer 37 + if err := gob.NewEncoder(&buf).Encode(set); err != nil { 38 + b.Fatal(err) 39 + } 40 + 41 + b.ResetTimer() 42 + b.ReportAllocs() 43 + 44 + for n := 0; n < b.N; n++ { 45 + // We need to include gob.NewDecoder cost to avoid measuring encoding. 46 + var repoBranches RepoList 47 + if err := gob.NewDecoder(bytes.NewReader(buf.Bytes())).Decode(&repoBranches); err != nil { 48 + b.Fatal(err) 49 + } 50 + } 51 + } 52 + 53 + func TestRepoList_Marshal(t *testing.T) { 54 + for i := range []int{0, 1, 10, 100} { 55 + want := genRepoList(i) 56 + 57 + var buf bytes.Buffer 58 + if err := gob.NewEncoder(&buf).Encode(want); err != nil { 59 + t.Fatal(err) 60 + } 61 + 62 + var got RepoList 63 + if err := gob.NewDecoder(bytes.NewReader(buf.Bytes())).Decode(&got); err != nil { 64 + t.Fatal(err) 65 + } 66 + 67 + if diff := cmp.Diff(want, &got); diff != "" { 68 + t.Fatalf("mismatch for reposmap size %d (-want +got):\n%s", i, diff) 69 + } 70 + } 71 + } 72 + 73 + func genRepoList(size int) *RepoList { 74 + m := make(ReposMap, size) 75 + for i := 0; i < size; i++ { 76 + m[uint32(i)] = MinimalRepoListEntry{ 77 + HasSymbols: true, 78 + Branches: []RepositoryBranch{{ 79 + Name: "HEAD", 80 + Version: "c301e5c82b6e1632dce5c39902691c359559852e", 81 + }}, 82 + } 83 + } 84 + return &RepoList{ReposMap: m} 85 + } 86 + 87 + type countWriter struct { 88 + n int 89 + } 90 + 91 + func (w *countWriter) Write(b []byte) (int, error) { 92 + w.n += len(b) 93 + return len(b), nil 94 + }
+4 -2
rpc/rpc_test.go
··· 7 7 "reflect" 8 8 "testing" 9 9 10 + "github.com/google/go-cmp/cmp" 11 + "github.com/google/go-cmp/cmp/cmpopts" 10 12 "github.com/sourcegraph/zoekt" 11 13 "github.com/sourcegraph/zoekt/internal/mockSearcher" 12 14 "github.com/sourcegraph/zoekt/query" ··· 61 63 if err != nil { 62 64 t.Fatal(err) 63 65 } 64 - if !reflect.DeepEqual(l, mock.RepoList) { 65 - t.Fatalf("got %+v, want %+v", l, mock.RepoList) 66 + if d := cmp.Diff(mock.RepoList, l, cmpopts.IgnoreUnexported(zoekt.Repository{})); d != "" { 67 + t.Fatalf("unexpected RepoList (-want, +got):\n%s", d) 66 68 } 67 69 68 70 // Test closing a client we never dial.
+8 -4
shards/shards.go
··· 919 919 } 920 920 921 921 agg := zoekt.RepoList{ 922 - Crashes: stillLoadingCrashes, 923 - Minimal: map[uint32]*zoekt.MinimalRepoListEntry{}, 922 + Crashes: stillLoadingCrashes, 923 + Minimal: map[uint32]*zoekt.MinimalRepoListEntry{}, 924 + ReposMap: zoekt.ReposMap{}, 924 925 } 925 926 926 927 uniq := map[string]*zoekt.RepoListEntry{} ··· 950 951 agg.Minimal[id] = r 951 952 } 952 953 } 954 + 955 + for id, r := range r.rl.ReposMap { 956 + agg.ReposMap[id] = r 957 + } 953 958 } 954 959 955 960 agg.Repos = make([]*zoekt.RepoListEntry, 0, len(uniq)) ··· 957 962 agg.Repos = append(agg.Repos, r) 958 963 } 959 964 960 - isMinimal := opts != nil && opts.Minimal 961 - if isAll && !isMinimal { 965 + if isAll && len(agg.Repos) > 0 { 962 966 reportListAllMetrics(agg.Repos) 963 967 } 964 968
+55
shards/shards_test.go
··· 462 462 Stats: aggStats, 463 463 }, 464 464 }, 465 + { 466 + name: "field=repos", 467 + opts: &zoekt.ListOptions{Field: zoekt.RepoListFieldRepos}, 468 + want: &zoekt.RepoList{ 469 + Repos: []*zoekt.RepoListEntry{ 470 + { 471 + Repository: *repos[0], 472 + Stats: stats, 473 + }, 474 + { 475 + Repository: *repos[1], 476 + Stats: stats, 477 + }, 478 + }, 479 + Stats: aggStats, 480 + }, 481 + }, 482 + { 483 + name: "field=minimal", 484 + opts: &zoekt.ListOptions{Field: zoekt.RepoListFieldMinimal}, 485 + want: &zoekt.RepoList{ 486 + Repos: []*zoekt.RepoListEntry{ 487 + { 488 + Repository: *repos[1], 489 + Stats: stats, 490 + }, 491 + }, 492 + Minimal: map[uint32]*zoekt.MinimalRepoListEntry{ 493 + repos[0].ID: { 494 + HasSymbols: repos[0].HasSymbols, 495 + Branches: repos[0].Branches, 496 + }, 497 + }, 498 + Stats: aggStats, 499 + }, 500 + }, 501 + { 502 + name: "field=reposmap", 503 + opts: &zoekt.ListOptions{Field: zoekt.RepoListFieldReposMap}, 504 + want: &zoekt.RepoList{ 505 + Repos: []*zoekt.RepoListEntry{ 506 + { 507 + Repository: *repos[1], 508 + Stats: stats, 509 + }, 510 + }, 511 + ReposMap: zoekt.ReposMap{ 512 + repos[0].ID: { 513 + HasSymbols: repos[0].HasSymbols, 514 + Branches: repos[0].Branches, 515 + }, 516 + }, 517 + Stats: aggStats, 518 + }, 519 + }, 465 520 } { 466 521 tc := tc 467 522 t.Run(tc.name, func(t *testing.T) {