fork of https://github.com/sourcegraph/zoekt
1package query
2
3import (
4 "bytes"
5 "encoding/binary"
6 "errors"
7 "fmt"
8 "io"
9 "unsafe"
10
11 "slices"
12
13 "github.com/RoaringBitmap/roaring"
14)
15
16func branchesReposEncode(brs []BranchRepos) ([]byte, error) {
17 var b bytes.Buffer
18 var enc [binary.MaxVarintLen64]byte
19 varint := func(n uint64) {
20 m := binary.PutUvarint(enc[:], n)
21 b.Write(enc[:m])
22 }
23 str := func(s string) {
24 varint(uint64(len(s)))
25 b.WriteString(s)
26 }
27 strSize := func(s string) uint64 {
28 return uint64(binary.PutUvarint(enc[:], uint64(len(s))) + len(s))
29 }
30
31 // Calculate size
32 size := uint64(1) // version
33 size += uint64(binary.PutUvarint(enc[:], uint64(len(brs))))
34 for _, br := range brs {
35 size += strSize(br.Branch)
36 idsSize := br.Repos.GetSerializedSizeInBytes()
37 size += uint64(binary.PutUvarint(enc[:], idsSize))
38 size += idsSize
39 }
40
41 b.Grow(int(size))
42
43 // Version
44 b.WriteByte(1)
45
46 // Length
47 varint(uint64(len(brs)))
48
49 for _, br := range brs {
50 str(br.Branch)
51 l := br.Repos.GetSerializedSizeInBytes()
52 varint(l)
53
54 n, err := br.Repos.WriteTo(&b)
55 if err != nil {
56 return nil, err
57 }
58
59 if uint64(n) != l {
60 return nil, io.ErrShortWrite
61 }
62 }
63
64 return b.Bytes(), nil
65}
66
67func branchesReposDecode(b []byte) ([]BranchRepos, error) {
68 // binaryReader returns strings pointing into b to avoid allocations. We
69 // don't own b, so we create a copy of it.
70 r := binaryReader{b: append(make([]byte, 0, len(b)), b...)}
71
72 // Version
73 if v := r.byt(); v != 1 {
74 return nil, fmt.Errorf("unsupported BranchRepos encoding version %d", v)
75 }
76
77 l := r.uvarint() // Length
78 brs := make([]BranchRepos, l)
79
80 for i := range l {
81 brs[i].Branch = r.str()
82 brs[i].Repos = r.bitmap()
83 }
84
85 return brs, r.err
86}
87
88// We implement a custom binary marshaller for a set of file names. See commit
89// 6c893ff323647b0419fac46ee462532401bf3283 for context on this code.
90// Additionally this code is based on that commit.
91//
92// Wire-format of map[string]struct{} is pretty straightforward:
93//
94// byte(1) version
95// uvarint(len(map))
96// for k in map:
97// uvarint(len(k))
98// bytes(k)
99//
100// The above format gives about the same size encoding as gob does. However,
101// gob doesn't have a specialization for map[string]struct{} so we get to
102// avoid a lot of intermediate allocations.
103//
104// The above adds up to a huge improvement, worth the extra complexity:
105//
106// name old time/op new time/op delta
107// FileNameSet_Encode-8 91.2µs ± 2% 36.8µs ± 1% -59.69% (p=0.000 n=10+9)
108// FileNameSet_Decode-8 143µs ± 1% 54µs ± 1% -61.96% (p=0.000 n=8+9)
109//
110// name old bytes new bytes delta
111// FileNameSet_Encode-8 12.1kB ± 0% 11.1kB ± 0% -8.63% (p=0.000 n=10+10)
112//
113// name old alloc/op new alloc/op delta
114// FileNameSet_Encode-8 16.0kB ± 0% 12.3kB ± 0% -23.20% (p=0.000 n=10+10)
115// FileNameSet_Decode-8 76.7kB ± 0% 72.3kB ± 0% -5.77% (p=0.000 n=10+10)
116//
117// name old allocs/op new allocs/op delta
118// FileNameSet_Encode-8 1.00k ± 0% 0.00k ± 0% -99.90% (p=0.000 n=10+10)
119// FileNameSet_Decode-8 1.20k ± 0% 0.18k ± 0% -85.27% (p=0.000 n=10+10)
120
121// stringSetEncode implements an efficient encoder for map[string]struct{}.
122func stringSetEncode(set map[string]struct{}) ([]byte, error) {
123 var b bytes.Buffer
124 var enc [binary.MaxVarintLen64]byte
125 varint := func(n int) {
126 m := binary.PutUvarint(enc[:], uint64(n))
127 b.Write(enc[:m])
128 }
129 str := func(s string) {
130 varint(len(s))
131 b.WriteString(s)
132 }
133 strSize := func(s string) int {
134 return binary.PutUvarint(enc[:], uint64(len(s))) + len(s)
135 }
136
137 // Calculate size
138 size := 1 // version
139 size += binary.PutUvarint(enc[:], uint64(len(set)))
140 for k := range set {
141 size += strSize(k)
142 }
143 b.Grow(size)
144
145 // Version
146 b.WriteByte(1)
147
148 // Length
149 varint(len(set))
150
151 for k := range set {
152 str(k)
153 }
154
155 return b.Bytes(), nil
156}
157
158// stringSetDecode implements an efficient decoder for map[string]struct{}.
159func stringSetDecode(b []byte) (map[string]struct{}, error) {
160 // binaryReader returns strings pointing into b to avoid allocations. We
161 // don't own b, so we create a copy of it.
162 r := binaryReader{b: slices.Clone(b)}
163
164 // Version
165 if v := r.byt(); v != 1 {
166 return nil, fmt.Errorf("unsupported stringSet encoding version %d", v)
167 }
168
169 // Length
170 l := r.uvarint()
171 set := make(map[string]struct{}, l)
172
173 for range l {
174 set[r.str()] = struct{}{}
175 }
176
177 return set, r.err
178}
179
180type binaryReader struct {
181 b []byte
182 err error
183}
184
185func (b *binaryReader) uvarint() int {
186 x, n := binary.Uvarint(b.b)
187 if n < 0 {
188 b.b = nil
189 b.err = errors.New("malformed RepoBranches")
190 return 0
191 }
192 b.b = b.b[n:]
193 return int(x)
194}
195
196func (b *binaryReader) str() string {
197 l := b.uvarint()
198 if l > len(b.b) {
199 b.b = nil
200 b.err = errors.New("malformed RepoBranches")
201 return ""
202 }
203 s := b2s(b.b[:l])
204 b.b = b.b[l:]
205 return s
206}
207
208func (b *binaryReader) bitmap() *roaring.Bitmap {
209 l := b.uvarint()
210 if l > len(b.b) {
211 b.b = nil
212 b.err = errors.New("malformed BranchRepos")
213 return nil
214 }
215 r := roaring.New()
216 _, b.err = r.FromBuffer(b.b[:l])
217 b.b = b.b[l:]
218 return r
219}
220
221func (b *binaryReader) byt() byte {
222 if len(b.b) < 1 {
223 b.b = nil
224 b.err = errors.New("malformed RepoBranches")
225 return 0
226 }
227 x := b.b[0]
228 b.b = b.b[1:]
229 return x
230}
231
232func b2s(b []byte) string {
233 return *(*string)(unsafe.Pointer(&b))
234}