fork of https://github.com/sourcegraph/zoekt
1// Copyright 2016 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package zoekt
16
17import (
18 "bytes"
19 "encoding/binary"
20 "encoding/gob"
21 "fmt"
22 "io"
23 "log"
24)
25
26var _ = log.Println
27
28// writer is an io.Writer that keeps track of errors and offsets
29type writer struct {
30 err error
31 w io.Writer
32 off uint32
33}
34
35func (w *writer) Write(b []byte) (int, error) {
36 if w.err != nil {
37 return 0, w.err
38 }
39
40 var n int
41 n, w.err = w.w.Write(b)
42 w.off += uint32(n)
43 return n, w.err
44}
45
46func (w *writer) Off() uint32 { return w.off }
47
48func (w *writer) B(b byte) {
49 s := []byte{b}
50 w.Write(s)
51}
52
53func (w *writer) U32(n uint32) {
54 var enc [4]byte
55 binary.BigEndian.PutUint32(enc[:], n)
56 w.Write(enc[:])
57}
58
59func (w *writer) U64(n uint64) {
60 var enc [8]byte
61 binary.BigEndian.PutUint64(enc[:], n)
62 w.Write(enc[:])
63}
64
65func (w *writer) Varint(n uint32) {
66 var enc [8]byte
67 m := binary.PutUvarint(enc[:], uint64(n))
68 w.Write(enc[:m])
69}
70
71func (w *writer) String(s string) {
72 b := []byte(s)
73 w.Varint(uint32(len(b)))
74 w.Write(b)
75}
76
77func encodeRanks(w io.Writer, ranks [][]float64) error {
78 hasRank := false
79 for _, r := range ranks {
80 if len(r) > 0 {
81 hasRank = true
82 break
83 }
84 }
85
86 if !hasRank {
87 return nil
88 }
89
90 // We use the first byte to announce the encoding. This way we can easily change the
91 // encoding without loosing backward compatability.
92 _, err := w.Write([]byte{0}) // 0 = gob-encoding
93 if err != nil {
94 return err
95 }
96
97 return gob.NewEncoder(w).Encode(ranks)
98}
99
100func decodeRanks(blob []byte, ranks *[][]float64) error {
101 if len(blob) == 0 {
102 return nil
103 }
104
105 switch encoding := blob[0]; encoding {
106 case 0: // gob-encoding
107 dec := gob.NewDecoder(bytes.NewReader(blob[1:]))
108 err := dec.Decode(ranks)
109 if err != nil {
110 return err
111 }
112 default:
113 return fmt.Errorf("unknown encoding for ranks: %d\n", encoding)
114 }
115
116 return nil
117}
118
119func (s *simpleSection) start(w *writer) {
120 s.off = w.Off()
121}
122
123func (s *simpleSection) end(w *writer) {
124 s.sz = w.Off() - s.off
125}
126
127// section is a range of bytes in the index file.
128type section interface {
129 read(*reader) error
130 // skip advances over the data in the section without reading it.
131 // NOTE: the section will not contain valid data after this call, and it should not be used.
132 skip(*reader) error
133 write(*writer)
134 // kind encodes whether the section is simple or compound, and is used in serialization
135 kind() sectionKind
136}
137
138type sectionKind int
139
140const (
141 sectionKindSimple sectionKind = 0
142 sectionKindCompound sectionKind = 1
143 sectionKindCompoundLazy sectionKind = 2
144)
145
146// simpleSection is a simple range of bytes.
147type simpleSection struct {
148 off uint32
149 sz uint32
150}
151
152func (s *simpleSection) kind() sectionKind {
153 return sectionKindSimple
154}
155
156func (s *simpleSection) read(r *reader) error {
157 var err error
158 s.off, err = r.U32()
159 if err != nil {
160 return err
161 }
162 s.sz, err = r.U32()
163 return err
164}
165
166func (s *simpleSection) skip(r *reader) error {
167 var err error
168 _, err = r.U32()
169 if err != nil {
170 return err
171 }
172 _, err = r.U32()
173 return err
174}
175
176func (s *simpleSection) write(w *writer) {
177 w.U32(s.off)
178 w.U32(s.sz)
179}
180
181// compoundSection is a range of bytes containg a list of variable
182// sized items.
183type compoundSection struct {
184 data simpleSection
185
186 offsets []uint32
187 index simpleSection
188}
189
190func (s *compoundSection) kind() sectionKind {
191 return sectionKindCompound
192}
193
194func (s *compoundSection) start(w *writer) {
195 s.data.start(w)
196}
197
198func (s *compoundSection) end(w *writer) {
199 s.data.end(w)
200 s.index.start(w)
201 for _, o := range s.offsets {
202 w.U32(o)
203 }
204 s.index.end(w)
205}
206
207func (s *compoundSection) addItem(w *writer, item []byte) {
208 s.offsets = append(s.offsets, w.Off())
209 w.Write(item)
210}
211
212func (s *compoundSection) write(w *writer) {
213 s.data.write(w)
214 s.index.write(w)
215}
216
217func (s *compoundSection) read(r *reader) error {
218 if err := s.data.read(r); err != nil {
219 return err
220 }
221 if err := s.index.read(r); err != nil {
222 return err
223 }
224 var err error
225 s.offsets, err = readSectionU32(r.r, s.index)
226 return err
227}
228
229func (s *compoundSection) skip(r *reader) error {
230 if err := s.data.skip(r); err != nil {
231 return err
232 }
233 if err := s.index.read(r); err != nil {
234 return err
235 }
236
237 _, err := r.r.Read(s.index.off, s.index.sz)
238 return err
239}
240
241// relativeIndex returns the relative offsets of the items (first
242// element is 0), plus a final marking the end of the last item.
243func (s *compoundSection) relativeIndex() []uint32 {
244 ri := make([]uint32, 0, len(s.offsets)+1)
245 for _, o := range s.offsets {
246 ri = append(ri, o-s.offsets[0])
247 }
248 if len(s.offsets) > 0 {
249 ri = append(ri, s.data.sz)
250 }
251 return ri
252}
253
254type lazyCompoundSection struct {
255 compoundSection
256}
257
258func (s *lazyCompoundSection) kind() sectionKind {
259 return sectionKindCompoundLazy
260}
261
262func (s *lazyCompoundSection) read(r *reader) error {
263 // We do the same thing compoundSection.read does, except we don't read the
264 // offsets.
265 if err := s.data.read(r); err != nil {
266 return err
267 }
268 return s.index.read(r)
269}