fork of https://github.com/sourcegraph/zoekt
1package query
2
3import (
4 "bytes"
5 "encoding/binary"
6 "errors"
7 "fmt"
8 "io"
9 "unsafe"
10
11 "github.com/RoaringBitmap/roaring"
12)
13
14func branchesReposEncode(brs []BranchRepos) ([]byte, error) {
15 var b bytes.Buffer
16 var enc [binary.MaxVarintLen64]byte
17 varint := func(n uint64) {
18 m := binary.PutUvarint(enc[:], n)
19 b.Write(enc[:m])
20 }
21 str := func(s string) {
22 varint(uint64(len(s)))
23 b.WriteString(s)
24 }
25 strSize := func(s string) uint64 {
26 return uint64(binary.PutUvarint(enc[:], uint64(len(s))) + len(s))
27 }
28
29 // Calculate size
30 size := uint64(1) // version
31 size += uint64(binary.PutUvarint(enc[:], uint64(len(brs))))
32 for _, br := range brs {
33 size += strSize(br.Branch)
34 idsSize := br.Repos.GetSerializedSizeInBytes()
35 size += uint64(binary.PutUvarint(enc[:], idsSize))
36 size += idsSize
37 }
38
39 b.Grow(int(size))
40
41 // Version
42 b.WriteByte(1)
43
44 // Length
45 varint(uint64(len(brs)))
46
47 for _, br := range brs {
48 str(br.Branch)
49 l := br.Repos.GetSerializedSizeInBytes()
50 varint(l)
51
52 n, err := br.Repos.WriteTo(&b)
53 if err != nil {
54 return nil, err
55 }
56
57 if uint64(n) != l {
58 return nil, io.ErrShortWrite
59 }
60 }
61
62 return b.Bytes(), nil
63}
64
65func branchesReposDecode(b []byte) ([]BranchRepos, error) {
66 // binaryReader returns strings pointing into b to avoid allocations. We
67 // don't own b, so we create a copy of it.
68 r := binaryReader{b: append(make([]byte, 0, len(b)), b...)}
69
70 // Version
71 if v := r.byt(); v != 1 {
72 return nil, fmt.Errorf("unsupported BranchRepos encoding version %d", v)
73 }
74
75 l := r.uvarint() // Length
76 brs := make([]BranchRepos, l)
77
78 for i := 0; i < l; i++ {
79 brs[i].Branch = r.str()
80 brs[i].Repos = r.bitmap()
81 }
82
83 return brs, r.err
84}
85
86// We implement a custom binary marshaller for a set of file names. See commit
87// 6c893ff323647b0419fac46ee462532401bf3283 for context on this code.
88// Additionally this code is based on that commit.
89//
90// Wire-format of map[string]struct{} is pretty straightforward:
91//
92// byte(1) version
93// uvarint(len(map))
94// for k in map:
95// uvarint(len(k))
96// bytes(k)
97//
98// The above format gives about the same size encoding as gob does. However,
99// gob doesn't have a specialization for map[string]struct{} so we get to
100// avoid a lot of intermediate allocations.
101//
102// The above adds up to a huge improvement, worth the extra complexity:
103//
104// name old time/op new time/op delta
105// FileNameSet_Encode-8 91.2µs ± 2% 36.8µs ± 1% -59.69% (p=0.000 n=10+9)
106// FileNameSet_Decode-8 143µs ± 1% 54µs ± 1% -61.96% (p=0.000 n=8+9)
107//
108// name old bytes new bytes delta
109// FileNameSet_Encode-8 12.1kB ± 0% 11.1kB ± 0% -8.63% (p=0.000 n=10+10)
110//
111// name old alloc/op new alloc/op delta
112// FileNameSet_Encode-8 16.0kB ± 0% 12.3kB ± 0% -23.20% (p=0.000 n=10+10)
113// FileNameSet_Decode-8 76.7kB ± 0% 72.3kB ± 0% -5.77% (p=0.000 n=10+10)
114//
115// name old allocs/op new allocs/op delta
116// FileNameSet_Encode-8 1.00k ± 0% 0.00k ± 0% -99.90% (p=0.000 n=10+10)
117// FileNameSet_Decode-8 1.20k ± 0% 0.18k ± 0% -85.27% (p=0.000 n=10+10)
118
119// stringSetEncode implements an efficient encoder for map[string]struct{}.
120func stringSetEncode(set map[string]struct{}) ([]byte, error) {
121 var b bytes.Buffer
122 var enc [binary.MaxVarintLen64]byte
123 varint := func(n int) {
124 m := binary.PutUvarint(enc[:], uint64(n))
125 b.Write(enc[:m])
126 }
127 str := func(s string) {
128 varint(len(s))
129 b.WriteString(s)
130 }
131 strSize := func(s string) int {
132 return binary.PutUvarint(enc[:], uint64(len(s))) + len(s)
133 }
134
135 // Calculate size
136 size := 1 // version
137 size += binary.PutUvarint(enc[:], uint64(len(set)))
138 for k := range set {
139 size += strSize(k)
140 }
141 b.Grow(size)
142
143 // Version
144 b.WriteByte(1)
145
146 // Length
147 varint(len(set))
148
149 for k := range set {
150 str(k)
151 }
152
153 return b.Bytes(), nil
154}
155
156// stringSetDecode implements an efficient decoder for map[string]struct{}.
157func stringSetDecode(b []byte) (map[string]struct{}, error) {
158 // binaryReader returns strings pointing into b to avoid allocations. We
159 // don't own b, so we create a copy of it.
160 r := binaryReader{b: append([]byte{}, b...)}
161
162 // Version
163 if v := r.byt(); v != 1 {
164 return nil, fmt.Errorf("unsupported stringSet encoding version %d", v)
165 }
166
167 // Length
168 l := r.uvarint()
169 set := make(map[string]struct{}, l)
170
171 for i := 0; i < l; i++ {
172 set[r.str()] = struct{}{}
173 }
174
175 return set, r.err
176}
177
178type binaryReader struct {
179 b []byte
180 err error
181}
182
183func (b *binaryReader) uvarint() int {
184 x, n := binary.Uvarint(b.b)
185 if n < 0 {
186 b.b = nil
187 b.err = errors.New("malformed RepoBranches")
188 return 0
189 }
190 b.b = b.b[n:]
191 return int(x)
192}
193
194func (b *binaryReader) str() string {
195 l := b.uvarint()
196 if l > len(b.b) {
197 b.b = nil
198 b.err = errors.New("malformed RepoBranches")
199 return ""
200 }
201 s := b2s(b.b[:l])
202 b.b = b.b[l:]
203 return s
204}
205
206func (b *binaryReader) bitmap() *roaring.Bitmap {
207 l := b.uvarint()
208 if l > len(b.b) {
209 b.b = nil
210 b.err = errors.New("malformed BranchRepos")
211 return nil
212 }
213 r := roaring.New()
214 _, b.err = r.FromBuffer(b.b[:l])
215 b.b = b.b[l:]
216 return r
217}
218
219func (b *binaryReader) byt() byte {
220 if len(b.b) < 1 {
221 b.b = nil
222 b.err = errors.New("malformed RepoBranches")
223 return 0
224 }
225 x := b.b[0]
226 b.b = b.b[1:]
227 return x
228}
229
230func b2s(b []byte) string {
231 return *(*string)(unsafe.Pointer(&b))
232}